extractwords.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:31 2010 from extractwords.pl 2008/01/12 3.4 KB.

#!/perl -w
# NAME: extractwords.pl
# AIM: read a file, and extract the word within ...
# 12/01/2008 - geoff mclane - http://geoffair.net/mperl
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );
my %wordlist = ();
my $in_file = 'C:\Program Files\Microsoft Platform SDK for Windows Server 2003 R2\include\windows.h';
my $maxwrap = 8;
my $ignorecomm = 1;
my $ignerror = 1;
process_file($in_file);
show_words();
close_log($outfile,1);
exit(0);
sub is_hex_numb {
   my ($txt) = shift;
   if ($txt =~ /^0X/i) {
      $txt = substr($txt,2);
   }
   my $tl = length($txt);
   my ($t, $c);
   for ($t = 0; $t < $tl; $t++) {
      $c = substr($txt,$t,1);
      if ( !(($c =~ /\d/)||($c =~ /[A-F]/i)) ) {
         return 0;
      }
   }
   return 1;
}
# isallnums
sub is_all_nums {
   my ($txt) = shift;
   my $tl = length($txt);
   my ($t, $c);
   for ($t = 0; $t < $tl; $t++) {
      $c = substr($txt,$t,1);
      if ( !($c =~ /\d/) ) {
         return 0;
      }
   }
   return 1;
}
sub add_word {
   my ($wd) = shift;
   if ((length($wd) > 1) &&
      !is_all_nums($wd) &&
      !is_hex_numb($wd) ) {
      if (defined $wordlist{$wd}) {
         $wordlist{$wd}++;
      } else {
         $wordlist{$wd} = 1;
      }
   }
}
sub process_directive {
   my ($ln, $dr) = @_;
   my ($ind, $dl, $ll, $tag, $ch, $i);
   if ($ignerror) {
      #if ($ln =~ /^\s*#\s*error\s+/) {
      if ($dr eq 'error') {
         add_word('error');
         return;
       }
    }
   $dl = length($dr);
   $ind = index($ln, $dr);
   $tag = '';
   if ($ind > 0) {
      $ln = substr($ln,$ind+$dl);
   }
   $ll = length($ln);
   for ($i = 0; $i < $ll; $i++) {
      $ch = substr($ln,$i,1);
      if ($ch =~ /\w/) {
         $tag .= $ch;
      } else {
         add_word($tag) if length($tag);
         $tag = '';
      }
   }
}
sub process_file {
   my ($fil) = shift;
   my (@lines, $lc, $line, $i, $ll, $ch, $pch, $word, $incomm);
   if (open INF, "<$fil") {
      @lines = <INF>;
      close INF;
      $lc = scalar @lines;
      prt( "Processing $lc lines from $fil ...\n" );
      $word = '';
      $incomm = 0;
      foreach $line (@lines) {
         if ( !$incomm && ($line =~ /^\s*#\s*(\w+)\s+/)) {
            process_directive($line, $1);
            next;
         }
         $ll = length($line);
         for ($i = 0; $i < $ll; $i++) {
            $ch = substr($line,$i,1);
            if ($ignorecomm && $incomm) {
               if (($pch eq '*') && ($ch eq '/')) {
                  $incomm = 0;
               }
               $pch = $ch;
               next;
            }
            if ($ch =~ /\w/) {
               $word .= $ch;
            } else {
               add_word($word) if length($word);
               $word = '';
               if ($ignorecomm) {
                  if (($ch eq '*')&&
                     ($pch eq '/')) {
                     $incomm = 1;
                  } elsif (($ch eq '/')&&
                     ($pch eq '/')) {
                     $ch = ' ';
                     $i = $ll;
                  }
               }
            }
            $pch = $ch;
         }
      }
      add_word($word) if length($word);
   } else {
      prt( "WARNING: Failed to OPEN file [$fil] ...\n" );
   }
}
sub show_words {
   my ($wd, $cnt, $tot, $wrap, $wcnt);
   $tot = 0;
   $wrap = 0;
   $wcnt = scalar keys(%wordlist);
   prt( "Output of $wcnt words found ...\n" );
   foreach $wd (keys %wordlist) {
      $cnt = $wordlist{$wd};
      $tot += $cnt;
      prt( "$wd " );
      $wrap++;
      if ($wrap > $maxwrap) {
         prt("\n");
         $wrap = 0;
      }
   }
   prt("\n") if ($wrap);
   prt( "Done $wcnt, $tot total words ...\n" );
}
# eof - extractwords.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional