// Program: text_count.cpp // Author: Chuck Stewart // Purpose: Count the lines, characters (non-whitespace), and words // in a text file. Count the occurrences of each letter and the // occurrences of each word. Ignore punctuation in the words. #include #include #include #include #include #include #include using namespace std; unsigned int count_characters( const string& a_line ); void add_to_letter_counts( const string & a_line, vector& letter_counters ); vector break_up_line( const string& a_line ); void add_to_word_counts( vector const& line_words, map & word_counts ); int main( int argc, char* argv[] ) { if ( argc != 3 ) { cerr << "Usage: " << argv[0] << " text-file results-file"; return 1; } ifstream in_str( argv[1] ); if ( !in_str ) { cerr << "Couldn't open " << argv[1] << " to read.\n"; return 1; } ofstream out_str( argv[2] ); if ( !out_str ) { cerr << "Couldn't open " << argv[2] << " to write the results.\n"; return 1; } unsigned int character_count = 0; unsigned int line_count = 0; vector letter_counters( 26, 0 ); // Counts for the individual letters // to update for letter 'g' // 'g' - 'a' int all_word_counts = 0; // Total number of words map word_counts; // Occurrence count for each word // Handle one line at a time... string a_line; while ( getline( in_str, a_line ) ) { line_count ++ ; character_count += count_characters( a_line ); add_to_letter_counts( a_line, letter_counters ); vector words_in_line = break_up_line( a_line ); add_to_word_counts( words_in_line, word_counts ); all_word_counts += words_in_line.size(); } // Output char, word and line counters out_str << "\nHere are the statistics on the input text file:\n" << " char count = " << character_count << "\n" << " word count = " << all_word_counts << "\n" << " line count = " << line_count << "\n"; // Output the letter counts out_str << "\nHere are the letter counts:\n"; for ( unsigned int i = 0; i < 26; ++ i ) { out_str << " " << char( 'a' + i ) << ": " << letter_counters[ i ] << "\n"; } // Output word occurrences out_str << '\n' << "Here are the word occurrence counts\n"; map::iterator mp = word_counts.begin(); while ( mp != word_counts.end() ) { out_str << mp->first << '\t' << mp->second << '\n'; mp++; } } // Return the number of non-whitespace characters on the line unsigned int count_characters( const string& a_line ) { unsigned int r = 0; string::const_iterator i = a_line.begin(); while ( i != a_line.end() ) { // if ( isalpha( *i ) ) { if ( !isspace( *i ) ) { r++; } i++; } return r; } // For each letter seen add to the appropriate count in the vector. void add_to_letter_counts( const string & a_line, vector& letter_counters ) { string::const_iterator i = a_line.begin(); while ( i != a_line.end() ) { if ( isalpha( *i ) ) { ++letter_counters[ *i - 'a' ]; // e.g. 'g'-'a' ==> 71-65 ==> 6 // abcdefg... } i++; } } // Break up a string storing a line of input into a vector of strings // storing the words from the line vector break_up_line( const string& a_line ) { // e.g. "...Hello from New York! Good-bye...later" // ^ // i // // TO DO: modify this to correctly include the "later" string bool in_a_word = false; vector words; // "Hello" "from" string s; string::const_iterator i = a_line.begin(); while ( i != a_line.end() ) { if ( !in_a_word ) { // find the next alpha character.... if ( isalpha( *i ) ) { in_a_word = true; } } if ( in_a_word ) { if ( isalpha( *i ) ) { s.push_back( *i ); } else { in_a_word = false; words.push_back( s ); s.clear(); } } i++; } return words; } // For each word from the input line add to the map, increasing the // word's count by 1 (implicitly starting from 0 if the word was not // in the map). void add_to_word_counts( vector const& line_words, map & word_counts ) { vector::const_iterator p = line_words.begin(); while ( p != line_words.end() ) { word_counts[ *p ] ++ ; p++; } }