Program: tcgrep (Perl Cookbook, 2nd Edition)

Example 6-11. tcgrep

  #!/usr/bin/perl -w
  # tcgrep: tom christiansen's rewrite of grep
  # v1.0: Thu Sep 30 16:24:43 MDT 1993
  # v1.1: Fri Oct  1 08:33:43 MDT 1993
  # v1.2: Fri Jul 26 13:37:02 CDT 1996
  # v1.3: Sat Aug 30 14:21:47 CDT 1997
  # v1.4: Mon May 18 16:17:48 EDT 1998
  use strict;
                                    # globals
  our ($Me, $Errors, $Grand_Total, $Mult, %Compress, $Matches);
  my ($matcher, $opt);              # matcher - anon. sub to check for matches
                                    # opt - ref to hash w/ command-line options
  init( );                           # initialize globals
  ($opt, $matcher) = parse_args( );  # get command line options and patterns
  matchfile($opt, $matcher, @ARGV); # process files
  exit(2) if $Errors;
  exit(0) if $Grand_Total;
  exit(1);
  ###################################
  sub init {
      ($Me = $0) =~ s!.*/!!;        # get basename of program, "tcgrep"
      $Errors = $Grand_Total = 0;   # initialize global counters
      $Mult = "";                   # flag for multiple files in @ARGV
      $| = 1;                       # autoflush output
      %Compress = (                 # file extensions and program names
          z  => 'gzcat',            # for uncompressing
          gz => 'gzcat',
          Z  => 'zcat',
      );
  }
  ###################################
  sub usage {
          die << EOF
  usage: $Me [flags] [files]
  Standard grep options:
          i   case insensitive
          n   number lines
          c   give count of lines matching
          C   ditto, but >1 match per line possible
          w   word boundaries only
          s   silent mode
          x   exact matches only
          v   invert search sense (lines that DON'T match)
          h   hide filenames
          e   expression (for exprs beginning with -)
          f   file with expressions
          l   list filenames matching
  Specials:
          1   1 match per file
          H   highlight matches
          u   underline matches
          r   recursive on directories or dot if none
          t   process directories in 'ls -t' order
          p   paragraph mode (default: line mode)
          P   ditto, but specify separator, e.g. -P '%%\\n'
          a   all files, not just plain text files
          q   quiet about failed file and dir opens
          T   trace files as opened
  May use a TCGREP environment variable to set default options.
  EOF
  }
  ###################################
  sub parse_args {
      use Getopt::Std;
      my ($optstring, $zeros, $nulls, %opt, $pattern, @patterns, $match_code);
      my ($SO, $SE);
      if (my $opts = $ENV{TCGREP}) {   # get envariable TCGREP
          $opts =~ s/^([^\-])/-$1/;    # add leading - if missing
          unshift(@ARGV, $opts);       # add TCGREP opt string to @ARGV
      }
      $optstring = "incCwsxvhe:f:l1HurtpP:aqT";
      $zeros = 'inCwxvhelut';       # options to init to 0 
      $nulls = 'pP';                # options to init to "" 
      @opt{ split //, $zeros } = ( 0 )  x length($zeros);
      @opt{ split //, $nulls } = ( '' ) x length($nulls);
      getopts($optstring, \%opt)              or usage( );
      # handle option "-f patfile", for list of patterns
      if ($opt{f}) {                
          open(PATFILE, $opt{f})          or die "$Me: Can't open '$opt{f}': $!";
          # make sure each pattern in file is valid
          while ($pattern = <PATFILE>) {
              chomp $pattern;
              eval { 'foo' =~ /$pattern/, 1 } or
                  die "$Me: $opt{f}:$.: bad pattern: $@";
              push @patterns, $pattern;
          }
          close PATFILE;
      }
      else {                        # make sure pattern is valid
          $pattern = $opt{e} || shift(@ARGV) || usage( );
          eval { 'foo' =~ /$pattern/; 1 } or
              die "$Me: bad pattern: $@";
          @patterns = ($pattern);
      }
      # option -H is for highlight, option -u is for underline
      if ($opt{H} || $opt{u}) {     
          my $term = $ENV{TERM} || 'vt100';
          my $terminal;
          # eval{  } only to trap potential exceptions in function calls
          eval {                    # try to look up escapes for stand-out
              require POSIX;        # or underline via Term::Cap
              use Term::Cap;
              my $termios = POSIX::Termios->new( );
              $termios->getattr;
              my $ospeed = $termios->getospeed;
              $terminal = Tgetent Term::Cap { TERM=>undef, OSPEED=>$ospeed }
          };
          unless ($@) {             # if successful, get escapes for either
              local $^W = 0;        # stand-out (-H) or underlined (-u)
              ($SO, $SE) = $opt{H}
                  ? ($terminal->Tputs('so'), $terminal->Tputs('se'))
                  : ($terminal->Tputs('us'), $terminal->Tputs('ue'));
          }
          else {                    # if use of Term::Cap fails,
              ($SO, $SE) = $opt{H}  # use tput command to get escapes
                  ? (`tput -T $term smso`, `tput -T $term rmso`)
                  : (`tput -T $term smul`, `tput -T $term rmul`)
          }
      }
      # option -i makes all pattern case insensitive
      if ($opt{i}) {
          @patterns = map {"(?i)$_"} @patterns;
      }
      # option -p or -P is paragraph mode, so add /m
      if ($opt{p} || $opt{P}) {
          @patterns = map {"(?m)$_"} @patterns;
      }
      # option -p is standard paragraph mode
      $opt{p}   && ($/ = '');
      # option -p is user-defined paragraph mode
      $opt{P}   && ($/ = eval(qq("$opt{P}")));     # for -P '%%\n'
      # option -w is at word boundary only (XXX: is this always optimal?)
      $opt{w}   && (@patterns = map {'\b' . $_ . '\b'} @patterns);
      # option -x is for whole lines only
      $opt{'x'} && (@patterns = map {"^$_\$"} @patterns);
      # determine whether to emit file name in front of each match
      if (@ARGV) {
          $Mult = 1 if ($opt{r} || (@ARGV > 1) || -d $ARGV[0]) && !$opt{h};
      }
      # if just listing filenames, stop after first match
      $opt{1}   += $opt{l};                   # that's a one and an ell
      # this way only need look for -H
      $opt{H}   += $opt{u};
      # if we're doing a complete count, where doing some counting
      $opt{c}   += $opt{C};
      # if we're counting, keep track of status
      $opt{'s'} += $opt{c};
      # stop at first match if checking status but not counting
      $opt{1}   += $opt{'s'} && !$opt{c};     # that's a one
      # default args are cwd if recursive, stdin otherwise
      @ARGV = ($opt{r} ? '.' : '-') unless @ARGV;
      # we're recursive even w/o -r if all args are directories
      $opt{r} = 1 if !$opt{r} && grep(-d, @ARGV) =  = @ARGV;
      ######
      # now the hard part: build of the matching function as text to eval
      #
      $match_code  = '';
      $match_code .= 'study;' if @patterns > 5; # might speed things up a bit
      foreach (@patterns) { s(/)(\\/)g }
      # add the stand-out and end-stand-out sequences for highlight mode
      if ($opt{H}) {
          foreach $pattern (@patterns) {
              $match_code .= "\$Matches += s/($pattern)/${SO}\$1${SE}/g;";
          }
      }
      # option -v means to count a line if it *doesn't* match
      elsif ($opt{v}) {
          foreach $pattern (@patterns) {
              $match_code .= "\$Matches += !/$pattern/;";
          }
      }
      # do full count, multiple hits per line
      elsif ($opt{C}) {
          foreach $pattern (@patterns) {
              $match_code .= "\$Matches++ while /$pattern/g;";
          }
      }
      else {
          foreach $pattern (@patterns) {
              $match_code .= "\$Matches++ if /$pattern/;";
          }
      }
      # now compile as a closure, and grab function pointer
      $matcher = eval "sub { $match_code }";
      die if $@;
      return (\%opt, $matcher);
  }
  ###################################
  sub matchfile {
      $opt = shift;                 # reference to option hash
      $matcher = shift;             # reference to matching sub
      my ($file, @list, $total, $name);
      local($_);
      $total = 0;
  FILE: while (defined ($file = shift(@_))) {
          if (-d $file) {
              if (-l $file && @ARGV != 1) {
                  warn "$Me: \"$file\" is a symlink to a directory\n"
                      if $opt->{T};
                  next FILE;
              }
              if (!$opt->{r}) {
                  warn "$Me: \"$file\" is a directory, but no -r given\n"
                      if $opt->{T};
                  next FILE;
              }
              unless (opendir(DIR, $file)) {
                  unless ($opt->{'q'}) {
                      warn "$Me: can't opendir $file: $!\n";
                      $Errors++;
                  }
                  next FILE;
              }
              @list = ( );
              for (readdir(DIR)) {                # skip cwd and parent dir
                  push(@list, "$file/$_") unless /^\.{1,2}$/;
              }
              closedir(DIR);
              # option -t is sort by age, youngest first
              # use algorithm from Recipe 4.XXX, Sorting a List by Computable Field 
              if ($opt->{t}) {
                  @list = map  { $_->[0] }
                          sort { $a->[1] <=> $b->[1] } 
                          map  { [ $_, -M $_ ] } @list;
              }
              else {
                  @list = sort @list;
              }
              matchfile($opt, $matcher, @list);    # process files
              next FILE;
          }
          # avoid annoying situation of grep wanting to read from keyboard
          # but user not realizing this 
          if ($file eq '-') {
              warn "$Me: reading from stdin\n" if -t STDIN && !$opt->{'q'};
              $name = '<STDIN>';
          }
          else {
              $name = $file;
              unless (-e $file) {
                  warn qq($Me: file "$file" does not exist\n) unless $opt->{'q'};
                  $Errors++;
                  next FILE;
              }
              unless (-f $file || $opt->{a}) {
                  warn qq($Me: skipping non-plain file "$file"\n) if $opt->{T};
                  next FILE;
              }
              # could use File::Spec instead
              my ($ext) = $file =~ /\.([^.]+)$/;
              # check whether it's an extension whose contents we know 
              # how to convert to plain text via a filter program
              if (defined($ext) && exists($Compress{$ext})) {
                  $file = "$Compress{$ext} < $file |";
              }
              elsif (! (-T $file  || $opt->{a})) {
                  warn qq($Me: skipping binary file "$file"\n) if $opt->{T};
                  next FILE;
              }
          }
          warn "$Me: checking $file\n" if $opt->{T};
          unless (open(FILE, $file)) {
              unless ($opt->{'q'}) {
                  warn "$Me: $file: $!\n";
                  $Errors++;
              }
              next FILE;
          }
          $total = 0;
          $Matches = 0;
  LINE:  while (<FILE>) {
              $Matches = 0;
              ##############
              &{$matcher}( );        # do it! (check for matches)
              ##############
              next LINE unless $Matches;
              $total += $Matches;
              if ($opt->{p} || $opt->{P}) {
                s/\n{2,}$/\n/ if $opt->{p};
                chomp         if $opt->{P};
              }
              print("$name\n"), next FILE if $opt->{l};
              # The following commented out block is the 
              # expanded/legible version of the statement
              # that immediately follows it.  This is one 
              # of the few times we sacrifice readability
              # for execution speed: we carefully arrange 
              # that print( ) be called just once, not four times,
              # and we don't resort to a braced block either.
              # (note that $Mult must be "" not 0 for this to work)
              ########
              ## unless ($opt->{'s'}) {
              ##    print "$name:"                 if $Mult;
              ##    print "$.:"                    if $opt{n};
              ##    print;
              ##    print (('-' x 20) . "\n")         if $opt->{p} || $opt->{P};
              ## } 
              ########
              $opt->{'s'} || print $Mult && "$name:",
                  $opt->{n} ? "$.:" : "",
                  $_,
                  ($opt->{p} || $opt->{P}) && ('-' x 20) . "\n";
              next FILE if $opt->{1};                 # that's a one
          }
      }
      continue {
          # again, next block equivalent to line following
          #######
          ## if ($opt->{c}) {
          ##         print $name if $Mult;
          ##         print "$total\n";
          ## } 
          #######
          print $Mult && "$name:", "$total\n" if $opt->{c};
      }
      $Grand_Total += $total;
  }
6.22. Program: tcgrep

Example 6-11. tcgrep