Source Code: Strmat C++
In general, a big component of what I look for in a work sample is attention to detail: does the solution meet or exceed the specification? Does it perform well, particularly at large scale or for edge case inputs? Is it efficient and fault-tolerant?
Anonymous
// // $Id: strmat.cpp 13 2008-02-22 11:58:32Z david $ // (c) 2008, David Chaves // // DESCRIPTION // // This command line utility finds which // input lines contain all of a set of match strings // // For example, // // |$ cat input-file.txt | strmat hello world // |"hello, world" // |the world says hello // |say hello to the worldly man // // Any number of strings may be provided as arguments, and // only lines which match all strings will be returned. // Additionally, it implements a context argument, // which prints a number of lines of context before and // after the matched lines, just like `grep`: // // |$ cat input-file.txt | strmat -C1 hello // |The robot said, // |"hello, world" // |after he // |--- // |in return // |the world says hello // |to those who are so friendly // // The integer argument tells it how many lines before and // after to return; "-C" is the same as "-C1" // // COMPILATION // // g++ -o strmat -pedantic -Wall strmat.cpp // // KNOWN BUGS // // This program loads the complete content of <stdin> // in memory before to start filtering it // // It will crash on insuficient memory if the input is huge // // In consecuence, do not use this program // to filter /dev/random or `/usr/bin/yes |` // #include <cassert> #include <cstdlib> #include <cstring> #include <iostream> #include <string> #include <vector> typedef std::string string_t; // syntatic sugar typedef std::vector<string_t> vector_t; // syntatic sugar //-------------------------------------- Command Options /// Print the help message on the console /// /// @param program_name_p is the same as argv[0] /// /// @return always false /// static bool print_usage (char const *const program_name_p) { std::cerr << "Usage: " << program_name_p << " [OPTION]... MATCH..." << std::endl << "Search for all MATCH(es) in standard input" << std::endl << "Miscellaneous options:" << std::endl << " --help display this help and exit" << std::endl << "Context control options:" << std::endl << " -C{NUM} print NUM lines of output context" << std::endl; return false; // always fails } /// Load the command line options and match strings /// /// @param argc from main() /// @param argv from main() /// @param context (output) the context size option value /// @param matches (output) the list of match strings /// /// @return true if parameters are ok, or false if must abort /// static bool load_options (int const argc, char const *const *const argv, size_t & context, vector_t & matches) { char const *const program_name_p = argv[0]; assert (program_name_p != 0); if (argc <= 1) // no options given { return print_usage (program_name_p); } bool already_got_end_of_options = false; for (int k = 1; k < argc; ++k) { if (already_got_end_of_options) { matches.push_back (argv[k]); } else if (strcmp (argv[k], "--help") == 0) { return print_usage (program_name_p); } else if (strcmp (argv[k], "--") == 0) { // GNU-ish already_got_end_of_options = true; } else if (strcmp (argv[k], "-C") == 0) { // "-C" is the same as "-C1" context = 1; } else if ('-' == argv[k][0] && 'C' == argv[k][1]) { context = abs (atoi (&(argv[k][2]))); } else if ('-' == argv[k][0]) { std::cerr << program_name_p << " error: " "invalid option: " << argv[k] << std::endl; return print_usage (program_name_p); } else { already_got_end_of_options = true; matches.push_back (argv[k]); } } // check for a non-empty matches if (matches.empty ()) { std::cerr << program_name_p << " error: missing match strings"; return print_usage (program_name_p); } return true; } //-------------------------------------- Input File /// Load the stdin at once /// /// @param input_lines (output) the contents of stdin /// /// @return the number of lines loaded /// static size_t load_input_file (vector_t & input_lines) { string_t input_line; // speed-up: try to avoid some malloc's input_line.reserve(BUFSIZ); size_t number_of_lines = 0; while (std::getline (std::cin, input_line, '\n')) { // speed-up: try to avoid some malloc's if (0 == (number_of_lines % 1000)) { input_lines.reserve(number_of_lines + 1000); } input_lines.push_back (input_line); ++number_of_lines; } return number_of_lines; } /// Print a range of lines stored in input_lines /// /// @param must_print_slashes if also must print slashes /// @param line_no the current line number to print /// @param context the specific context size /// @param line_count the number of lines in input_lines /// @param input_lines the array of lines to print from /// /// @return void /// static void print_input_lines (bool const must_print_slashes, size_t const line_no, size_t const context, size_t const line_count, vector_t const & input_lines) { // compute the context beginning size_t from_line_no = 0; if (context <= line_no) { from_line_no = line_no - context; } // compute the context ending size_t to_line_no = line_no + context + 1; if (to_line_no >= line_count) { to_line_no = line_count; } // print slashes if necessary if (must_print_slashes) { std::cout << "---" << std::endl; } // print the matching line and the context it is in for (size_t k = from_line_no; k < to_line_no; ++k) { std::cout << input_lines[k] << std::endl; } } //-------------------------------------- Main Body /// Does this line matches the specific match strings? /// /// @param input_line current line to match for /// @param matches the array of match strings to check for /// /// @return true if all the matches are in input_line /// static bool line_matches (string_t const & input_line, vector_t const & matches) { for (vector_t::const_iterator iter = matches.begin (); iter != matches.end (); ++iter) { string_t const & match = *iter; if (input_line.find (match, 0) == std::string::npos) { // match not found in input_line return false; } } // input_line contains all the strings in matches return true; } /// The main body /// /// @param argc /// @param argv /// /// @return EXIT_SUCCESS or EXIT_FAILURE /// int main (int argc, char **argv) { // load command line options... size_t context = 0; vector_t matches; if (!load_options (argc, argv, context, matches)) { std::cerr << std::endl; return EXIT_FAILURE; } // load the input file at once... // // this might look like a waste of memory, but // it makes easier handling contexts which // also contain matching lines inside vector_t input_lines; size_t const line_count = load_input_file (input_lines); // main loop... bool must_print_slashes = false; // not the first time for (size_t line_no = 0; line_no < line_count; ++line_no) { if (line_matches (input_lines[line_no], matches)) { print_input_lines (must_print_slashes, line_no, context, line_count, input_lines); // after the first block, slashes only if contexts must_print_slashes = (0 < context); } } return EXIT_SUCCESS; } //-------------------------------------- The End