Source Code: Strmat C++

In general, a big component of what I look for in a work sample is attention to detail: does the solution meet or exceed the specification? Does it perform well, particularly at large scale or for edge case inputs? Is it efficient and fault-tolerant?

Anonymous

compiler_complaint.png


//
// $Id: strmat.cpp 13 2008-02-22 11:58:32Z david $
// (c) 2008, David Chaves
//
// DESCRIPTION
//
//     This command line utility finds which
//     input lines contain all of a set of match strings
//
//     For example,
//
//        |$ cat input-file.txt | strmat hello world
//        |"hello, world"
//        |the world says hello
//        |say hello to the worldly man
//
//     Any number of strings may be provided as arguments, and
//     only lines which match all strings will be returned.
//     Additionally, it implements a context argument,
//     which prints a number of lines of context before and
//     after the matched lines, just like `grep`:
//
//        |$ cat input-file.txt | strmat -C1 hello
//        |The robot said,
//        |"hello, world"
//        |after he
//        |---
//        |in return
//        |the world says hello
//        |to those who are so friendly
//
//     The integer argument tells it how many lines before and
//     after to return; "-C" is the same as "-C1"
//
// COMPILATION
//
//     g++ -o strmat -pedantic -Wall strmat.cpp
//
// KNOWN BUGS
//
//     This program loads the complete content of <stdin>
//     in memory before to start filtering it
//
//     It will crash on insuficient memory if the input is huge
//
//     In consecuence, do not use this program
//     to filter /dev/random or `/usr/bin/yes |`
// 
 
#include <cassert>
#include <cstdlib>
#include <cstring>
 
#include <iostream>
#include <string>
#include <vector>
 
typedef std::string string_t;           // syntatic sugar
typedef std::vector<string_t> vector_t; // syntatic sugar
 
//-------------------------------------- Command Options
 
/// Print the help message on the console
///
/// @param program_name_p is the same as argv[0]
///
/// @return always false
///
static bool
print_usage (char const *const program_name_p)
{
  std::cerr << "Usage: " << program_name_p <<
    " [OPTION]... MATCH..." << std::endl <<
    "Search for all MATCH(es) in standard input" << std::endl <<
    "Miscellaneous options:" << std::endl <<
    "  --help   display this help and exit" << std::endl <<
    "Context control options:" << std::endl <<
    "  -C{NUM}  print NUM lines of output context" << std::endl;
 
  return false;                 // always fails
}
 
/// Load the command line options and match strings
///
/// @param argc from main()
/// @param argv from main()
/// @param context (output) the context size option value
/// @param matches (output) the list of match strings
///
/// @return true if parameters are ok, or false if must abort
///
static bool
load_options (int const argc,
              char const *const *const argv,
              size_t & context,
              vector_t & matches)
{
  char const *const program_name_p = argv[0];
  assert (program_name_p != 0);
 
  if (argc <= 1)                // no options given
    {
      return print_usage (program_name_p);
    }
 
  bool already_got_end_of_options = false;
 
  for (int k = 1; k < argc; ++k)
    {
      if (already_got_end_of_options)
        {
          matches.push_back (argv[k]);
        }
      else if (strcmp (argv[k], "--help") == 0)
        {
          return print_usage (program_name_p);
        }
      else if (strcmp (argv[k], "--") == 0)
        {
          // GNU-ish
          already_got_end_of_options = true;
        }
      else if (strcmp (argv[k], "-C") == 0)
        {
          // "-C" is the same as "-C1"
          context = 1;
        }
      else if ('-' == argv[k][0] && 'C' == argv[k][1])
        {
          context = abs (atoi (&(argv[k][2])));
        }
      else if ('-' == argv[k][0])
        {
          std::cerr << program_name_p << " error: "
            "invalid option: " << argv[k] << std::endl;
 
          return print_usage (program_name_p);
        }
      else
        {
          already_got_end_of_options = true;
          matches.push_back (argv[k]);
        }
    }
 
  // check for a non-empty matches
 
  if (matches.empty ())
    {
      std::cerr << program_name_p << " error: missing match strings";
 
      return print_usage (program_name_p);
    }
 
  return true;
}
 
//-------------------------------------- Input File
 
/// Load the stdin at once
///
/// @param input_lines (output) the contents of stdin
///
/// @return the number of lines loaded
///
static size_t
load_input_file (vector_t & input_lines)
{
  string_t input_line;
 
  // speed-up: try to avoid some malloc's
  input_line.reserve(BUFSIZ);
 
  size_t number_of_lines = 0;
  while (std::getline (std::cin, input_line, '\n'))
    {
      // speed-up: try to avoid some malloc's
      if (0 == (number_of_lines % 1000))
      {
          input_lines.reserve(number_of_lines + 1000);
      }
 
      input_lines.push_back (input_line);
      ++number_of_lines;
    }
 
  return number_of_lines;
}
 
/// Print a range of lines stored in input_lines
///
/// @param must_print_slashes if also must print slashes
/// @param line_no the current line number to print
/// @param context the specific context size
/// @param line_count the number of lines in input_lines
/// @param input_lines the array of lines to print from
///
/// @return void
///
static void
print_input_lines (bool const must_print_slashes,
                   size_t const line_no,
                   size_t const context,
                   size_t const line_count,
                   vector_t const & input_lines)
{
  // compute the context beginning
  size_t from_line_no = 0;
  if (context <= line_no)
    {
      from_line_no = line_no - context;
    }
 
  // compute the context ending
  size_t to_line_no = line_no + context + 1;
  if (to_line_no >= line_count)
    {
      to_line_no = line_count;
    }
 
  // print slashes if necessary
  if (must_print_slashes)
    {
      std::cout << "---" << std::endl;
    }
 
  // print the matching line and the context it is in
  for (size_t k = from_line_no; k < to_line_no; ++k)
    {
      std::cout << input_lines[k] << std::endl;
    }
}
 
//-------------------------------------- Main Body
 
/// Does this line matches the specific match strings?
///
/// @param input_line current line to match for
/// @param matches the array of match strings to check for
///
/// @return true if all the matches are in input_line
///
static bool
line_matches (string_t const & input_line,
              vector_t const & matches)
{
  for (vector_t::const_iterator iter = matches.begin ();
       iter != matches.end (); ++iter)
    {
      string_t const & match = *iter;
 
      if (input_line.find (match, 0) == std::string::npos)
        {
          // match not found in input_line
          return false;
        }
    }
 
  // input_line contains all the strings in matches
  return true;
}
 
/// The main body
///
/// @param argc 
/// @param argv
///
/// @return EXIT_SUCCESS or EXIT_FAILURE
///
int
main (int argc, char **argv)
{
  // load command line options...
 
  size_t context = 0;
  vector_t matches;
 
  if (!load_options (argc, argv, context, matches))
    {
      std::cerr << std::endl;
      return EXIT_FAILURE;
    }
 
  // load the input file at once...
  //
  // this might look like a waste of memory, but
  // it makes easier handling contexts which
  // also contain matching lines inside
 
  vector_t input_lines;
  size_t const line_count = load_input_file (input_lines);
 
  // main loop...
 
  bool must_print_slashes = false;      // not the first time
  for (size_t line_no = 0; line_no < line_count; ++line_no)
    {
      if (line_matches (input_lines[line_no], matches))
        {
          print_input_lines (must_print_slashes,
                             line_no, context,
                             line_count, input_lines);
 
          // after the first block, slashes only if contexts
          must_print_slashes = (0 < context);
        }
    }
 
  return EXIT_SUCCESS;
}
 
//-------------------------------------- The End
Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License