p2hall01.pl to HTML.

Generated: Tue Feb 2 17:54:48 2010 from p2hall01.pl 2007/01/27 72.1 KB.
#!/Perl
###########################################################################
# p2hall01.pl - 26 October - Geoff McLane - http://geoffmclane.com/mperl/samples/p2all01.htm
#
# This is an update on a series p2h03.pl, p2h04.pl, etc ...
# Its purpose is to read ONE Perl script FILE, and CONVERT it
# into a HTML file ... See p2hall02.pl for processing a FOLDER
#
# 20070127 - fix for get_nn only
###########################################################################
use Time::HiRes qw(usleep ualarm gettimeofday tv_interval nanosleep );
use strict;
use File::Copy; # to copy from an existing background file - see $jpg_file
use File::stat; # to get the file date
# USER VARIABLES
# out folder - this FOLDER must exist - it will NOT be created
##my $out_folder = 'C:/HOMEPAGE/P26/mperl/samples'; # updated 2006.09.19
###my $out_folder = 'C:/HOMEPAGE/Max5/mperl/samples';   # set directly - done 2006.09.19
my $out_folder = 'P26';
###my $out_folder = 'Max5';
###my $out_folder = 'temp';
my $log_file = 'temp'.$0.'.txt'; # log file output
my $in_folder = '.'; # run in local folder
# setting reserved word and function arrays
my $use_local = 0; # set 1 to local internal lists, and NOT load the following file ...
my $perlstx = 'C:/Program Files/EditPlus 2/perl.stx'; # fix location - or use local list!
# a back ground file
my $jpg_file = 'cldsp.jpg';
# background SOURCE and DESTINATION of background file
my $jpg_src = "c:/HOMEPAGE/P26/mperl/$jpg_file";
my $jpg_des = "$out_folder/$jpg_file";
# validation file
my $v401_file = 'valid-html401.gif';
# validation SOURCE and DESTINATION of validation file
my $v401_src = "c:/HOMEPAGE/P26/mperl/$v401_file";
my $v401_des = "$out_folder/$v401_file";
my $indexhtm = 'index.htm';
my $write_index = 1; # set to WRITE index ...
my $no_index = 0; # is set to 1 if no old index found
my $dbgem = 0;
##my $emreg = '(\\w+\\@{1})(hotmail\\.com)';
my $emreg = '(geoff\\w+\\@{1})(hotmail\\.com)';
my $efix_cnt = 0;
my $m_doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'."\n".
'"http://www.w3.org/TR/html4/loose.dtd">';
my $wrap = 5;
my $verb3 = 0;
my $verb4 = 0;
my %HFuncsFnd = ();   # set of FOUND builtin functions
my %HResWdFnd = (); # reserved words used
my @AFileNames = (); # for each output file, with hash of functions
my @AFileHashs = (); # for each output file, with hash of functions
# set the CLASS and COLOUR strings
my $a_class = 'a'; # built-in function (red)
my $b_class = 'b'; # comments (#006666)
my $c_class = 'c'; # reserved words (blue)
my $d_class = 'd'; # inside qw(...)
my $e_class = 'e'; # $scalar (#9400d3)
my $f_class = 'f'; # in <<EOF...EOF block (#666666)
my $o_class = 'o'; # @array  (#008b8b - was #FFA500)
my $v_class = 'v'; # %hash (#a52a2a - was #808000)
my $t_class = 't'; # quoted - single and double (#006600)
my $a_color = 'red';
my $b_color = '#006666';
my $c_color = 'blue';
###my $d_color = 'brown'; # does not exist!
my $d_color = '#a52a2a';
#my $e_color = '#00008B';
my $e_color = '#9400d3';
my $f_color = '#666666';
#my $o_color = '#FFA500';
my $o_color = '#008b8b';
#my $v_color = '#808000';
my $v_color = '#a52a2a';
my $t_color = '#006600';
# other USER variables
my $tab_space = '   '; # note tabs to 3 spaces - change if desired
# some USER OPTIONS
my $add_chart = 0; # add colour chart at end, with document stats
my $brown_qw = 1; # to process a qw(...);
# these a mutually exclusive - either or ...
my $add_table = 0; # use table to outline code
my $add_pre = 1; # use a <pre>...</pre> block
# this option REALLY adds weight to certain files
my $add_uvars = 1; # colour code user variables
# special DEBUG variables
my $debug_on = 0; # heavy DEBUG ONLY output
my $out_lists = 0; # output the lists in qw form
my $dbg1 = 0;
my $dbg2 = 1;
my $dbg3 = 0;
#####################
# PROGRAM VARIABLES #
#####################
my $out_file = ''; # out file for HTML
my $in_file = '';  # current in begin processed
my $in_date = '';
my $in_size = '';
# for log file
my ($LF, $OF);
# reserved words, and build-ins
my @ResWords = ();
my @BuiltIns = ();
my $perlcss = <<"PEOF";
/* Style Definitions - updated 2006.08.28 - 2006.07.13 */
body { 
   background-image:url('cldsp.jpg');
   margin: 0cm 1cm 0cm 1cm; }
hr {
   margin: 0px 0px 0px 0px;
   border-style: none;
   padding: 0px 0px 0px 0px; }
h1 {
 background:#efefef;
 border-style: solid solid solid solid;
 border-color:#d9e2e2;
 border-width:1px;
 padding:2px 2px 2px 2px;
 font-size:200%;
 text-align:center;
}
p.top {
   margin: 0; 
   border-style: none; 
   padding: 0; 
   text-align: center; }
p.nom {
margin:0cm;
margin-bottom:.0001pt;
color: red;
}
p.code {
   margin: 0cm 0.5cm 0cm 0.5cm;
   font-size:10.0pt;
   font-family:"Courier New";
}
.bld { font-weight: bold; }
.cn { font-family:"Courier New"; }
.ctr { text-align: center; }
.red { color:red; }
.blue { color:blue; }
.green { color:#006600 }
.brown { color:#a52a2a }
.a { color:red; }
.b { color:#006666; }
.c { color:blue; }
.d { color:#a52a2a; }
.e { color:#9400d3; }
.f { color:#666666; }
.o { color:#008b8b; }
.v { color:#a52a2a; }
.t { color:#006600; }
.cd {
  /* top, right, bottom, left */
  padding: 0px 10px 0px 10px;
  margin: 1px 10px 1px 10px;
  background: #f0f8ff;
  border-width: 1px;
  border-style: solid solid solid solid;
  border-color: #cccccc;
  width: 90%;
  font-family:"Courier New";
}
.out {
  padding: 0px 10px 0px 10px;
  margin: 1px 10px 1px 10px;
  background: #2f2f2f;
  color: #ffffff;
  border-width: 1px;
  border-style: solid solid solid solid;
  border-color: #cccccc;
  width: 90%;
  font-family:"Courier New";
}
 /* reserved words */
 .rw { color: #0000cd; }
 /* built-in functions */
.bif { color: #ff0000; }
/* scalar variables */
.sca { color: #9400d3; }
/* array variables */
.arr { color: #008b8b; }
/* hash variables */
.has { color: #a52a2a; }
/* comments after # */
.com { color: #008000; }
/* quoted items */
.qot { color: #009900; }
/* eof - perl.css */
PEOF
my @lines = (); # final output line gathered here
my $line = '';
my $date = '';
my $sz = '';
my $last_builtin = '';
my $last_resword = '';
my $doc_total = 0;
my $out_total = 0;
# these are really just DEBUG counters
my $a_cnt = 0;
my $b_cnt = 0;
my $c_cnt = 0;
my $d_cnt = 0;
my $e_cnt = 0;
my $f_cnt = 0;
my $o_cnt = 0;
my $v_cnt = 0;
my $q_cnt = 0;
# TIME VARIABLES
my ($t0, $t1, $elapsed);
my @in_files = ();   # set of perl files gathered from FOLDER search
my $ind_file = '';   # file name for INDEX list
my @ind_files = ();   # array of files for INDEX generation
my $sb;   # stat of current file
my $latest = 0;
my $earliest = time();
my $dbg20 = 0;
my $dbg21 = 0; # collect from alphabetic table
my $dbg22 = 0; # collect from alphabetic table
my $dbg23 = 0;
my $dbg24 = 0;
my $dbg25 = 0;
# get the old index.htm - D NOT lose information in update ...
my $in_index = "$out_folder/$indexhtm";   # = something line 'index.htm';
my $tbl_num = 1;   # want the first table
my @tbl_arr = ();
my $tacnt = 0;
my @tbl_set = ();
##   push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
my $tbl_num3 = 3;   # and we want the third table
my @tbl_arr3 = ();
my $tacnt3 = 0;
my @tbl_set3 = ();
## push(@tbl_set3, [$bif, $files]);
my @hrefs = ();
my %HOldbifs = ();
#################
my $lncnt = 0;
my $tblcnt = 0;
my $indcnt = 0;
my @larr = ();
my @larr2 = ();
my $ln = '';
# get_existing_files( $out_folder );   # put existing in @existing
my @existing = (); # push(@existing, $dfile);
my @dir_list = ();   # if/when I want to be recursive...
my $tot_dirs = 0;
my $tot_files = 0;
#####################################################################
# This is the small MAIN part of the script
$t0 = [gettimeofday];
# logging file, if possible
my $out_log = 1;
if (open $LF, ">$log_file") {
   $out_log = 1;
   prt( "Output also being written to LOG file $log_file ... \n" );
} else {
   $out_log = 0;
   prt( "WARNING: Unable to create LOG file $log_file ... \n" );
}
load_stx_file( $perlstx );
prt( "Got ".scalar @ResWords." Reserved Words, and ".scalar @BuiltIns." Built-in functions ...\n" );
###get_existing_files( $out_folder );   # put existing in @existing
###get_input_files( $in_folder );      # find perl scripts, and put in @in_files
###push(@in_files, ['test4.pl', 'test4.pl']);
push(@in_files, ['test9.pl', 'test9.pl']);
my $fcnt = scalar @in_files;
my $skipped = 0;
my $newercnt = 0;
my $donecnt = 0;
if ($fcnt) {
   prt( "Processing $fcnt files from folder $in_folder ...\n" );
   for (my $i = 0; $i < $fcnt; $i++ ) {
      reset_variables();
      $in_file = $in_files[$i][0];
      $sb = stat($in_file);
      ###$in_date = YYYYMMDD($sb->mtime);
      ###$in_size = get_nn($sb->size);
      $in_size = $sb->size;
      $in_date = $sb->mtime; # keep DATE unchanged, so a SORT can be done
      if ($sb->mtime > $latest) { $latest = $sb->mtime; }
      if ($sb->mtime < $earliest) { $earliest = $sb->mtime; }
      $ind_file = my_file_name( $in_files[$i][1] ) . '.htm';
      push(@ind_files, [$ind_file,$in_date,$in_size]);   # array of files for INDEX generation
      $out_file = "$out_folder/$ind_file";
      process_file( $in_file ); # main processing of the file lines
      ##push(@AFileNames, [$ind_file, \%HFuncsFnd]); # store the functions used ...
      push(@AFileNames, $ind_file   );
      my %th = %HFuncsFnd;
      my @tar = keys %th;
      prt("Pushing HASH with ".scalar @tar." keys ...\n");
      push(@AFileHashs, \%th); # store the functions used ...
      ##if ( -f $out_file) {
      ##   my $sb2 = stat($out_file);
      ##   if ($sb->mtime < $sb2->mtime) {
      ##      prt( "Skipping [$out_file] since it already exists ...\n" ) if ($dbg3);
      ##      $out_file = ''; # kill the new output
      ##      $skipped++; # older or same
      ##   } else {
      ##      $newercnt++;
      ##   }
      ##}
      if (length($out_file)) {
         prt( "Putting ".scalar @lines." new lines to $out_file ...\n" );
         write_out_file( $out_file ); # write out results, using HTML format ...
         $donecnt++;
         if ($fcnt == 1) {
            my $nf = $out_file;
            $nf =~ s/\//\\/g;
            system($nf);
         }
      }
   }
   if ($skipped > 0) {
      prt( "Skipped $skipped already existing, and where time is not later ...\n" );
   }
   prt( "Processed $donecnt files, $newercnt were newer, ".($donecnt - $newercnt)." as new...\n" );
   ###get_old_index( $in_index );   # load HTML table, and get set of files ...
   ###generate_index() if (($write_index > 0)||($no_index > 0)); # output @ind_files - array of files to index.htm
} else {
   prt( "FAILED to find any perl files in [$in_folder] ...\n" );
}
if ($efix_cnt) {
   prt("Note: $efix_cnt email changes were made ...\n");
}
$t1 = [gettimeofday];
$elapsed = tv_interval ( $t0, $t1 );
prt( "$0 processing took $elapsed seconds ...\n" );
if ($out_log) {
   close($LF);
   system($log_file);
}
exit 0;
#####################################################################
#######################
### only subs below ###
#######################
sub mycmp_decend2 {
   my $off = 1;
   if (${$a}[$off] < ${$b}[$off]) {
      prt( "+[".${$a}[$off]."] < [".${$b}[$off]."]\n" ) if $verb3;
      return 1;
   }
   if (${$a}[$off] > ${$b}[$off]) {
      prt( "-[".${$a}[$off]."] < [".${$b}[$off]."]\n" ) if $verb3;
      return -1;
   }
   prt( "=[".${$a}[$off]."] == [".${$b}[$off]."]\n" ) if $verb3;
   return 0;
}
sub is_valid_link($) {
   my ($f_l) = shift;
   my $ff = $out_folder . "\\" . $f_l;
   if (($f_l =~ /\./) && ( -f $ff )) {
      return 1;
   }
   return 0;
}
sub has_valid_files($) {
   my ($tx) = shift;
   my @atmp = split(/\s/,$tx); # get a list
   my $f = '';
   foreach $f (@atmp) {
      if (is_valid_link($f)) {
         return 1;
      }
   }
   return 0;
}
# push(@AFileNames, $ind_file); store the file
# push(@AFileHashs, \%hr); # and store the functions used ...
sub add_jump_table($) {
   my ($oh) = shift;
   my $acnt = scalar @AFileNames;
   my $bcnt = scalar @AFileHashs;
   my ($itm, $bi, $b3, $msg);
   my %nh = ();
   my @b2 = ();
   %nh = %HOldbifs; # get any OLD, from the OLD index
   @b2 = keys %nh;
   $b3 = scalar @b2;
   if ($acnt != $bcnt) {
      prt("\nWARNING: THESE TWO COUNTS SHOULD BE THE SAME!!! $acnt vs $bcnt ???\n");
   }
   prt("Adding jump table for $acnt ($bcnt) new hashes ... plus $b3 from OLD index ...\n");
   for ($itm = 0; $itm < $acnt; $itm++) {
      my $fl = $AFileNames[$itm];
      my $hr = $AFileHashs[$itm];
      my $nfl = '';
      prt("Processing file [$fl] ...\n") if ($verb4);
      ###my @kys = keys %{$hr}; # get built-ins for this file
      my @kys = keys %{$AFileHashs[$itm]}; # get built-ins for this file
      if (@kys) {
         foreach my $ky (@kys) {
            $nfl = '';
            if (exists $nh{$ky} ) {
               $nfl = $nh{$ky};
            }
            if ( $nfl =~ /$fl/i ) {
               prt("$ky - File [$fl] aready in [$nfl] ...\n") if ($verb4);
            } else {
               if (length($nfl) && !($nfl =~ /\s$/) ) {
                  $nfl .= ' '; # add space
               }
               prt("$ky - Adding [$fl] to [$nfl] ...\n") if ($verb4);
               $nfl .= $fl; # add file with this built-in
               $nh{$ky} = $nfl; # store or create built-in with this, these files
            }
         }
      } else {
         prt("Failed ... NO KEYS for file [$fl] ... check ...\n");
      }
   }
   # done generation of a set of built in, with each file that contains that built-in
   @b2 = sort keys %nh;
   $b3 = scalar @b2;
   if ( @b2 && ($b3 > 0)) {
      my $allbi = ' '.join(' ', @BuiltIns).' '; # ensure begin and end with space
      prt("Adding 3rd table with $b3 built-ins ...\n");
      out_link_line($oh, 4); # avoid jumptable
      $msg = "<a name=\"jumptable\"></a>\n";
      $msg .= "<p>This is a link/jump table for built-in functions. ";
      $msg .= "The link is to file(s) using that function. Enjoy ;=))</p>\n";
      print $oh $msg;
      print $oh '<table width="100%" border="1" summary="jump index to Perl samples">'."\n";
      print $oh '<caption>Jump index to Perl samples</caption>'."\n";
      print $oh '<tr><th>Built-In</th><th>Jump file(s)</th></tr>'."\n";
      foreach $bi (@b2) {
         my $v = $nh{$bi};
         # check if the 'value' has any valid file to link to
         if (has_valid_files($v)) {
            $msg = "<tr>\n";
            $allbi =~ s/\s+$bi\s+/ /; # delete this entry
            $msg .= '<td class="bif">'.$bi."</td>\n";
            ###$msg .= '<td>'.$v."</td>\n";
            my @tmpa = split(/\s/,$v);
            $msg .= "<td>\n";
            ###foreach my $tmpf (@tmpa) {
            foreach my $tmpf (sort @tmpa) { # put jumps in alphabetic order
               if (is_valid_link($tmpf)) {
                  $msg .= " <a href=\"$tmpf\">$tmpf</a>\n";
               } else {
                  prt( "DISCARDED: [$bi] file [$tmpf] ...\n" );
               }
            }
            $msg .= "</td>\n";
            $msg .= "</tr>\n";
            print $oh $msg;
         } else {
            prt( "DISCARD: bif=[$bi] has no valid files [$v]!!!\n" );
         }
      }
      $allbi = trim_line($allbi);
      if (length($allbi)) {
         $msg = "<tr>\n";
         $msg .= "<td>missed</td>\n";
         $msg .= "<td class=\"bif\">$allbi</td>\n";
         $msg .= "</tr>\n";
         print $oh $msg;
      }
      print $oh "</table>\n";
   } else {
      prt( "Failed ... no keys in %nh ... NO JUMP TABLE DONE!\n" );
   }
}
#   out_link_line($OF, 1); # avoid adding top
#   out_link_line($OF, 2); # avoid adding alphabetc
#   out_link_line($oh, 3); # avoid dateindex
#   out_link_line($oh, 4); # avoid jumptable
#   out_link_line($OF, 0); # avoid none
sub out_link_line($$) {
   my ($oh, $num) = @_;
   print $oh '<p class="ctr">';
   print $oh ' [ <a href="#top">top</a> ] '."\n" if ($num != 1);
   print $oh ' [ <a href="#alphabetic">alphabetic table</a> ] '."\n" if ($num != 2);
   print $oh ' [ <a href="#dateindex">date table</a> ] '."\n" if ($num != 3);
   print $oh ' [ <a href="#jumptable">jump table</a> ] '."\n" if ($num != 4);
   print $oh ' [ <a href="../index.htm">Perl Index</a> ] '."\n";
   print $oh "</p>\n";
}
sub add_new_table($) {
   my ($oh) = shift;
   my @ind_sort = sort mycmp_decend2 @ind_files;
   my $icnt = scalar @ind_sort;
   my $cnt = 0;
   my $i = 0;
   my $line = '';
   my $date = '';
   my $sz = 0;
   my $msg = '';
   prt("Adding 2nd table ...\n");
   out_link_line($oh, 3); # avoid dateindex
   $msg = "<a name=\"dateindex\"></a>\n";
   $msg .= "<p>This is a repeated table in date order, with the latest, most recent listed first.";
   $msg .= " Enjoy ;=))</p>\n";
   print $oh $msg;
   print $oh '<table width="100%" border="1" summary="Date index to Perl samples">'."\n";
   print $oh '<caption>Date index to Perl samples</caption>'."\n";
   # actual output of SORTED generated lines
   $cnt = 0;
   for ($i = 0; $i < $icnt; $i++) {
      $line = $ind_sort[$i][0];
      $date = YYYYMMDD($ind_sort[$i][1]);
      $sz = get_nn($ind_sort[$i][2]);
      $msg = '';
      if ($cnt == 0) {
         $msg = "<tr>\n";
      }
      ###mark_old_index($line);
      $msg .= "<td align=\"center\">$date<br><a href=\"$line\">$line</a><br>$sz</td>\n";
      $cnt++;
      if ($cnt == $wrap) {
         $msg .= "</tr>\n";
         $cnt = 0;
      }
      print $oh $msg;
   }
   if ($cnt) {
      $msg = '';
      while ($cnt < $wrap) {
         $msg .= "<td>&nbsp; </td>";
         $cnt++;
      }
      $msg .= "\n</tr>\n";
      print $oh $msg;
   }
   print $oh "</table>\n";
}
sub my_file_name {
   my ($f) = shift;
   my @a = split(/\./, $f);
   my $cnt = scalar @a;
   if ($cnt > 1) {
      pop @a;
      return join( '.', @a);
   }
   return $f;
}
sub my_file_ext {
   my ($f) = shift;
   my @a = split(/\./, $f);
   my $cnt = scalar @a;
   if ($cnt > 1) {
      return $a[-1];
   }
   return '';
}
sub my_file_type {
   my ($f) = shift;
   my $ext = my_file_ext($f);
   if ($ext =~ /^pl$/i) {
      return 1;
   }
   return 0;
}
sub in_existing($) {
   my ($cf) = shift;
   my $ef = '';
   foreach $ef (@existing) {
      if ($ef eq $cf) {
         return 1;
      }
   }
   return 0;
}
sub get_existing_files {
   my ($dir) = shift;
   my $df = '';
   prt( "Getting list of EXISTING files in out folder [$dir] ...\n" );
   opendir( THEDIR, $dir ) or mydie( "ERROR: Unable to open folder [$dir] ...\n" );
   my @dfiles = readdir(THEDIR); # slurp in ALL directories, and files, (and . & ..!)
   closedir(THEDIR);
   my $fndcss = 0;
   my $fndjpg = 0;
   my $fndval = 0;
   my $fcnt = scalar @dfiles;
   foreach my $dfile (@dfiles) {
      $df = $dir . '/' . $dfile; # get full name
      if ($dir eq '.') {
         $df = $dfile;
      }
      if ( -d $df ) { # is directory?
         # if ($dfile eq '.' || $dfile eq '..') or
         if ($dfile =~ '^\.$' || $dfile =~ '^\.\.$') {
            # do nothing with DOT and DOUBLE DOT
         } else {
            push(@dir_list, $df); # save local DIRECTORY LIST
            $tot_dirs++;
         }
      } else { # it is a FILE
         $tot_files++;
         push(@existing, $dfile);
         if ($dfile =~ /^perl\.css$/i) {
            prt( "NOTE: [$df] already exists ...\n" );
            $fndcss = 1;
         } elsif ($dfile =~ /^$jpg_file$/i) {
            prt( "NOTE: [$df] already exists ...\n" );
            $fndjpg = 1;
         } elsif ($dfile =~ /^$v401_file$/i) {
            prt( "NOTE: [$df] already exists ...\n" );
            $fndval = 1;
         }
      }
   }
   if (!$fndcss) {
      $df = $dir.'/perl.css';
      prt( "NOTE: Creating [$df] ...\n" );
      write2file( $perlcss, $df );
   }
   if (!$fndjpg) {
      prt( "NOTE: Copying [$jpg_src] to [$jpg_des] ...\n" );
      copy( $jpg_src, $jpg_des ) or mydie("ERROR: Failed to COPY [$jpg_src]!\n");
   }
   if (!$fndval) {
      prt( "NOTE: Copying [$v401_src] to [$v401_des] ...\n" );
      copy( $v401_src, $v401_des ) or mydie("ERROR: Failed to COPY [$v401_src]!\n");
   }
}
# get_input_files( $in_folder );      # find perl scripts, and put in @in_files
sub get_input_files {
   my ($dir) = shift;
   prt( "Openning folder [$dir] ...\n" ) if ($dbg1);
   opendir( THEDIR, $dir ) or mydie( "ERROR: Unable to open folder [$dir] ...\n" );
   my @dfiles = readdir(THEDIR); # slurp in ALL directories, and files, (and . & ..!)
   closedir(THEDIR);
   prt( "Got ".scalar @dfiles." from folder [$dir] ...\n" ) if ($dbg1);
   foreach my $dfile (@dfiles) {
      my $df = $dir . '/' . $dfile; # get full name
      if ($dir eq '.') {
         $df = $dfile;
      }
      if ( -d $df ) { # is directory?
         # if ($dfile eq '.' || $dfile eq '..') or
         if ($dfile =~ '^\.$' || $dfile =~ '^\.\.$') {
            # do nothing with DOT and DOUBLE DOT
         } else {
            push(@dir_list, $df); # save local DIRECTORY LIST
            prt( "Added [$df] to folder list ...\n" ) if ($dbg1);
            $tot_dirs++;
         }
      } else { # it is a FILE
         $tot_files++;
         if (my_file_type($dfile)) {
            prt( "Added [$df] to list ...\n" ) if ($dbg1);
            push(@in_files, [$df, $dfile]); # store full, and base names
         } else {
            prt( "Skipping [$df] ...\n" ) if ($dbg1);
         }
      }
   }
}
sub reset_variables {
   # done at start of each file
   @lines = (); # no lines, yet
   $doc_total = 0;
   $out_total = 0;
   # these are really just DEBUG counters
   $a_cnt = 0;
   $b_cnt = 0;
   $c_cnt = 0;
   $d_cnt = 0;
   $e_cnt = 0;
   $f_cnt = 0;
   $o_cnt = 0;
   $v_cnt = 0;
   $q_cnt = 0;
   %HResWdFnd = ();
   %HFuncsFnd = ();
}
sub generate_index { # output @ind_files - array of files to index.htm
   my $icnt = scalar @ind_files;
   my $cnt = 0;
   my $msg = '';
   my $i = 0;
   my $dcnt = 0;
   my $ocnt = 0;
   my $acnt = 0; # added to index.htm
   if ($icnt == 0) {
      prt( "No index.htm generated - no files to list ...\n" );
      return;
   }
   my $slatest = YYYYMMDD($latest);
   my $searly = YYYYMMDD($earliest);
   my $of = $in_index; ## "$out_folder/$indexhtm";   # = something line 'index.htm';
   open $OF, ">$of" or mydie("ERROR: Unable to generate index file ...aborting ...\n");
   prt( "\nWriting [$of] HTML with $icnt files ...\n" );
   print $OF "$m_doctype\n";
   print $OF <<"EOF";
<html>
<head>
<title>Index to Perl HTML Samples</title>
<meta http-equiv="Content-Language" content="en-au">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
EOF
   add_metas($OF, 0);
   print $OF <<"EOF";
<link rel=stylesheet href="perl.css" type="text/css">
</head>
<body>
EOF
   print $OF "<h1>Index to Perl HTML Samples</h1>\n";
   out_link_line($OF, 1); # avoid adding top
   print $OF <<"EOF";
<a name="top"></a>
<p>This is a rather random sample of the Perl scripts I have generated over the 
last few years ($searly - $slatest). Some represent complete Perl applications, aimed at a particular 
purpose, while others are just samples, sometimes not functional! And some, like logfile.pl, are only 
'include' files, ie require 'logfile.pl'. A small amount of script has been scraped 
from various web site, to test some suggested functionality, but most are largely 
my own fun and games with Perl.</p>
<p>When there is a series numbered 01, 02, 03, etc, this usually means the latest is the largest 
number, but sometimes they are different samples. However, the date following each file name 
link is a further indication of the age of the sample. And the original file size, in bytes, follows 
that.</p>
<p>Each of these HTML files are generated from the Perl script, p2hall02.pl, with colour coding added, 
and, as can be read in the preamble to p2hall02.pl, this means sometimes a simple 
copy and paste will fail, due mainly to a 'translation' of certain characters. 
But most of the time it should be ok, or only require minor fixes.</p>
<p>As always, <font size="2" color="red"><b>*** USE AT OWN RISK ***</b></font>. These are in the 
'public domain' thus there is no 'licence' to worry about. Of course you MUST have a Perl runtime
installed, and in some special cases, additional Perl 'libraries' installed, to run those 
particular files.</p>
   
<a name="alphabetic"></a>
<p>The table is repeated. The first should be more or less in file alphabetic order, 
the second is in <a href="#dateindex"><b>date order</b></a> table, with the latest listed first. Then 
there is a <a href="#jumptable"><b>'jump'</b></a> table, where each Perl built-in function is list, 
with links to the file(s) that use that built-in. Enjoy ;=))</p>
EOF
   out_link_line($OF, 2); # avoid adding alphabetc
   print $OF '<table width="100%" border="0" summary="Alphabetic index to Perl samples">'."\n";
   print $OF '<caption>Alphabetic index to Perl samples</caption>'."\n";
   # actual output of generated lines
   $cnt = 0; # for $wrap
   #foreach $line (@ind_files) {
   for ($i = 0; $i < $icnt; $i++) {
      $line = $ind_files[$i][0];
      ####$date = $ind_files[$i][1];
      $date = YYYYMMDD($ind_files[$i][1]);
      $sz = get_nn($ind_files[$i][2]);
      $msg = '';
      if ($cnt == 0) {
         $msg = "<tr>\n";
      }
      mark_old_index($line);
      $msg .= "<td><a href=\"$line\">$line</a><br>$date<br>$sz</td>\n";
      $cnt++;
      if ($cnt == $wrap) {
         $msg .= "</tr>\n";
         $cnt = 0;
      }
      print $OF $msg;
      $acnt++; # bump added
   }
   my $tsc = scalar @tbl_set;
   $ocnt = 0;
   for ($i = 0; $i < $tsc; $i++ ) {
      if ($tbl_set[$i][7] == 0) {
         $ocnt++;
      }
   }
   prt("Checked $tsc files from old index, and found $ocnt NOT MARKED ...\n");
   #                 0     1     2    3    4    5    6    7
   # push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
   for ($i = 0; $i < $tsc; $i++ ) {
      if ($tbl_set[$i][7] == 0) {
         $line = $tbl_set[$i][0];
         if (in_existing($line)) {
            $date = $tbl_set[$i][2];
            $sz   = $tbl_set[$i][3];
            $msg = '';
            if ($cnt == 0) {
               $msg = "<tr>\n";
            }
            ###mark_old_index($line);
            $tbl_set[$i][7] = 2;
            $msg .= "<td><a href=\"$line\">$line</a><br>$date<br>$sz</td>\n";
            $cnt++;
            if ($cnt == $wrap) {
               $msg .= "</tr>\n";
               $cnt = 0;
            }
            print $OF $msg;
            prt( "NOTE ADDED [$line][$date][$sz] from OLD index ...\n" );
            $dcnt++;
         } else {
            prt( "WARNING: File [$line] is NO LONGER IN FOLDER! Now dumped!!\n" );
         }
      }
   }
   if ($cnt) {
      $msg = '';
      while ($cnt < $wrap) {
         $msg .= "<td>&nbsp; </td>\n";
         $cnt++;
      }
      $msg .= "</tr>\n";
      print $OF $msg;
   }
   print $OF "</table>\n";
   prt( "Done primary table ".($acnt + $dcnt)." ... now to do date sorted table ...\n" );
   add_new_table($OF); # add new table sorted by time
   add_jump_table($OF); # put a jump table of build-in function
   out_link_line($OF, 0); # avoid none
   # add 4.01 validation ...
   print $OF <<"EOF";
<p>
<a href="http://validator.w3.org/check?uri=referer">
<img src="valid-html401.gif" alt="Valid HTML 4.01 Transitional" width="88" height="31">
</a>
</p>
EOF
   print $OF "</body>\n";
   $msg = "<!-- P26.".YYYYMMDD(time())." generated by $0 for geoffmclane.com/mperl/samples -->\n";
   print $OF $msg;
   print $OF "</html>\n";
   close($OF);
   prt( "Done file [$of] with $icnt files, plus $dcnt of $tsc from previous ...\n" );
}
##########################################################################
# The main file OUTPUT - that is the HTML file.
# It establishes the HTML header, which includes the CSS style
# information. then outputs each of the 'converted' lines ...
# this is what it is all about - to generate a HTML document
##########################################################################
sub write_out_file {
   my ($of) = shift;
   open $OF, ">$of" or mydie( "ERROR: Unable to create $of ... aborting ...\n" );
   print $OF "$m_doctype\n";
   print $OF <<"EOF";
<html>
<head>
<title>$in_file to HTML</title>
<meta http-equiv="Content-Language" content="en-gb">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
EOF
   add_metas($OF, 1);
   print $OF <<"EOF";
<link rel=stylesheet href="perl.css" type="text/css">
</head>
<body>
EOF
   print $OF "<h1>$in_file to HTML.</h1>\n";
   print $OF '<p class="top"><a href="'.$indexhtm.'">index</a></p>'."\n";
   print $OF '<p>Generated: ' . localtime(time()) . " from $in_file ";
   print $OF YYYYMMDD($in_date).' '.b2KMG($in_size).".</p>\n";
   if ($add_table) {
      print $OF '<table width="100%" border="1" summary="Simple HTML of $in_file"><tr><td>'."\n";
   } elsif ($add_pre) {
      print $OF '<pre class="cd">'."\n";
   }
   # actual output of generated lines
   foreach $line (@lines) {
      $out_total += length($line);
      print $OF $line;
   }
   if ($add_table) {
      print $OF '</td></tr></table>'."\n";
   } elsif ($add_pre) {
      print $OF '</pre>'."\n";
   }
   if ($add_chart) {
      # mainly only for DEBUG
     print $OF <<"EOF";
Chart of Colours Used<br>
<table border="1" summary="Table of colours, and count of times used">
<tr>
   <th>Class</th><th>Colour</th><th>Use</th><th>Count</th>
</tr>
<tr>
<td><span class="$a_class">class='$a_class'</span></td>
<td><span class="$a_class">$a_color RED</span></td>
<td><span class="$a_class">Built-in Functions</span></td>
<td><span class="$a_class">$a_cnt</span></td>
</tr>
<tr>
<td><span class="$b_class">class='$b_class'</span></td>
<td><span class="$b_class">$b_color BLUEGREEN</span></td>
<td><span class="$a_class">Comments (following #)</span></td>
<td><span class="$b_class">$b_cnt</span></td>
</tr>
<tr>
<td><span class="$c_class">class='$c_class'</span></td>
<td><span class="$c_class">$c_color BLUE</span></td>
<td><span class="$a_class">Reserved Words</span></td>
<td><span class="$c_class">$c_cnt</span></td>
</tr>
<tr>
<td><span class="$d_class">class='$d_class'</span></td>
<td><span class="$d_class">$d_color BROWN</span></td>
<td><span class="$a_class">Inside qw(...)</span></td>
<td><span class="$d_class">$d_cnt</span></td>
</tr>
<tr>
<td><span class="$e_class">class='$e_class'</span></td>
<td><span class="$e_class">$e_color DARKBLUE</span></td>
<td><span class="$a_class">Scalar Variables</span></td>
<td><span class="$e_class">$e_cnt</span></td>
</tr>
<tr>
<td><span class="$f_class">class='$f_class'</span></td>
<td><span class="$f_class">$f_color GREY</span></td>
<td><span class="$a_class">Inside <<EOF thingy</span></td>
<td><span class="$f_class">$f_cnt</span></td>
</tr>
<tr>
<td><span class="$o_class">class='$o_class'</span></td>
<td><span class="$o_class">$o_color ORANGE</span></td>
<td><span class="$a_class">Array Variables</span></td>
<td><span class="$o_class">$o_cnt</span></td>
</tr>
<tr>
<td><span class="$v_class">class='$v_class'</span></td>
<td><span class="$v_class">$v_color OLIVE</span></td>
<td><span class="$a_class">Hash Variables</span></td>
<td><span class="$v_class">$v_cnt</span></td>
</tr>
<tr>
<td><span class="$t_class">class='$t_class'</span></td>
<td><span class="$t_class">$t_color GREEN</span></td>
<td><span class="$a_class">Single and Double Quotes</span></td>
<td><span class="$t_class">$q_cnt</span></td>
</tr>
</table>
<br>End of chart<br>
EOF
      my $tot = ($a_cnt+$b_cnt+$c_cnt+$d_cnt+$e_cnt+$f_cnt+$o_cnt+$v_cnt+$q_cnt);
      my $diff = $out_total - $doc_total;
      print $OF "This use of $tot colour span sequences added $diff bytes. In=$doc_total Out=$out_total<br>\n";
   }
   print $OF '<p class="top"><a href="'.$indexhtm.'">index</a></p>'."\n";
   # add 4.01 validation ...
   print $OF <<"EOF";
<p>
<a href="http://validator.w3.org/check?uri=referer">
<img src="valid-html401.gif" alt="Valid HTML 4.01 Transitional" width="88" height="31">
</a>
</p>
EOF
   print $OF "</body>\n";
   print $OF "</html>\n";
   close($OF);
}
#########################################################
# A small set of 9 services which add in the CSS class,
# using <span class="???">.thetext.</span>
#
# Each one does a different class, and the class
# is extracted to variables set above. This means
# they can easily be adjusted to new, different
# values ...
#
# They also accumulate statistic information on how
# many time each is used ...
#########################################################
# built-in functions
sub add_red {
   my ($t) = shift;
   $a_cnt++;
   return ('<span class="'.$a_class.'">'.$t.'</span>');
}
# perl comments
sub add_class_b {
   my ($t) = shift;
   $b_cnt++;
   return ('<span class="'.$b_class.'">'.$t.'</span>');
}
# perl reserved words
sub add_blue {
   my ($t) = shift;
   $c_cnt++;
   return ('<span class="'.$c_class.'">'.$t.'</span>');
}
# perl qw set
sub add_class_d {
   my ($t) = shift;
   $d_cnt++;
   return ('<span class="'.$d_class.'">'.$t.'</span>');
}
sub add_class_e {
   my ($t) = shift;
   $e_cnt++;
   return ('<span class="'.$e_class.'">'.$t.'</span>');
}
sub add_class_f {
   my ($t) = shift;
   $f_cnt++;
   return ('<span class="'.$f_class.'">'.$t.'</span>');
}
sub add_class_o {
   my ($t) = shift;
   $o_cnt++;
   return ('<span class="'.$o_class.'">'.$t.'</span>');
}
sub add_class_v {
   my ($t) = shift;
   $v_cnt++;
   return ('<span class="'.$v_class.'">'.$t.'</span>');
}
sub add_quote {
   my ($t) = shift;
   $q_cnt++;
   return ('<span class="'.$t_class.'">'.$t.'</span>');
}
#########################################################
# search the @ResWord array for an entry
sub in_res_words {
   my ($t) = shift;
   foreach my $rw (@ResWords) {
      if ($t eq $rw) {
         $last_resword = $rw;
       if (exists $HResWdFnd{$rw}) {
         $HResWdFnd{$rw}++; # another count
       } else {
         $HResWdFnd{$rw} = 1; # start count
       }
         return 1;
      }
   }
   return 0;
}
# search the @BuiltIns array for an entry
sub is_built_in {
   my ($t) = shift;
   foreach my $rw (@BuiltIns) {
      if ($t eq $rw) {
         return 1;
      }
   }
   return 0;
}
sub in_built_in {
   my ($t) = shift;
   if (is_built_in($t)) {
      $last_builtin = $t;
     if (exists $HFuncsFnd{$t}) {
      ### prt ( "Bumped Funcs [$t] ...\n" );
      $HFuncsFnd{$t}++; # another count
    } else {
      ### prt ( "Created Funcs [$t] ...\n" );
      $HFuncsFnd{$t} = 1; # start count
    }
     return 1;
   }
   return 0;
}
sub in_built_in_ok {
   my ($t) = shift;
   foreach my $rw (@BuiltIns) {
      if ($t eq $rw) {
         $last_builtin = $rw;
       if (exists $HFuncsFnd{$rw}) {
         ### prt ( "Bumped Funcs $rw ...\n" );
         $HFuncsFnd{$rw}++; # another count
       } else {
         ### prt ( "Created Funcs $rw ...\n" );
         $HFuncsFnd{$rw} = 1; # start count
       }
         return 1;
      }
   }
   return 0;
}
sub is2lt {
   my $t = shift;
   $t =~ s/&lt;/</g;
   if ( (length($t) >= 2 ) && ( $t =~ /<<$/ ) ) {
      return 1;
   }
   return 0;
}
sub sans_quotes {
   my $t = shift;
   $t =~ s/"//g;
   $t =~ s/'//g;
   return $t;
}
######################################################
# Converting SPACES to '&nbsp;'
# Of course this could be done just using perl's
# powerful search and replace, but this handles
# any number of spaces, only converting the number
# minus 1 to &nbsp; ... not sure how to have
# this level of control with regex replacement
######################################################
sub conv_spaces {
   my $t = shift;
   my ($c, $i, $nt, $ln, $sc, $sp);
   $nt = ''; # accumulate new line here
   $ln = length($t);
   for ($i = 0; $i < $ln; $i++) {
      $c = substr($t,$i,1);
      if ($c eq ' ') {
         $i++; # bump to next 
         $sc = 0;
         $sp = '';
         for ( ; $i < $ln; $i++) {
            $c = substr($t,$i,1);
            if ($c ne ' ') {
               last; # exit
            }
            $sc++;
            $sp .= $c;
         }
         if ($sc) {
            $sp =~ s/ /&nbsp;/g;
            $nt .= $sp;
         }
         $i--; # back up one
         $c = ' '; # add back the 1 space
      }
      $nt .= $c;
   }
   prt( "conv_space: from [$t] to [$nt] ...\n" ) if $debug_on;
   return $nt;
}
###########################################################################
# VERY IMPORTANT SERVICE
# This converts the 'text' into HTML text, but only does a partial job!
# 1. Convert '&' to '&amp;' to avoid interpreting as replacement
# 2. Convert '<' to '&lt;' to avoid interpreting as HTML
# 3. Convert '"' to '&quot;'
# 4. Convert '\t' to SPACES
# 5. Finally, if there are double or more SPACES, convert to '&nbsp;'
###########################################################################
sub html_line {
   my $t = shift;
   my $ot = $t;
   $t =~ s/&/&amp;/g; # all '&' become '&amp;'
   $t =~ s/</&lt;/g; # make sure all '<' is/are swapped out
   $t =~ s/\"/&quot;/g; # and all quotes become &quot;
   $t =~ s/\t/$tab_space/g; # tabs to spaces
   if ($t =~ /\s\s/) { # if any two consecutive white space
      return conv_spaces($t);
   }
   prt( "html_line: from [$ot] to [$t] ...\n" ) if $debug_on;
   return $t;
}
##########################################################
# The following two functions 'convert' scalar variables
# to colour codes spans, in the print <<EOF = get_uform,
# and withing double quoted text "this $cnt ..." ...
# THESE ADD LOTS OF WEIGHT TO THE FILE
##########################################################
sub get_uform {
   my $ln = shift;
   my $tok = ''; # colour up the USER scalar variables within
   my $len = length($ln);
   my $nline = '';
   for (my $i = 0; $i < $len; $i++) {
      my $ch = substr($ln, $i, 1);
      if (($ch eq '$') && (($i + 1) < $len) && (substr($ln,$i+1,1) =~ /\w/) ) {
         $nline .= add_class_f(html_line($tok)) if (length($tok));
         $tok = $ch;
         $i++;
         for ( ; $i < $len; $i++) {
            $ch = substr($ln, $i, 1);
            if ( ! ($ch =~ /\w/) ) {
               # end of token
               $nline .= add_class_e(html_line($tok));
               $tok = '';
               last;
            }
            $tok .= $ch;
         }
      }
      $tok .= $ch;
    }
   $nline .= add_class_f(html_line($tok)) if (length($tok));
   return $nline;
}
sub add_quote2 {
   my ($ln) = shift;
   my $len = length($ln);
   my $ch = '';
   my $ch2 = '';
   my $pc = '';
   my $pc2 = '';
   my $nl = ''; # put the NEW line in here
   my $tok = ''; # colour up the USER scalar variables within DOUBLE quotes
   for (my $i = 0; $i < $len; $i++ ) {
      $ch = substr($ln, $i, 1);
      $ch2 = (($i + 1) < $len) ? substr($ln,$i+1,1) : '';
      # if a scalar variable, and not 'escaped', or the escape escaped and next is 'an_'
      if (($ch eq '$') && (($pc ne '\\')||(($pc eq '\\') && ($pc2 eq '\\'))) &&
         (($i + 1) < $len) && ($ch2 =~ /\w/) ) {
         $nl .= add_quote(html_line($tok)) if (length($tok));
         $tok = $ch;
         $i++;
         for ( ; $i < $len; $i++) {
            $ch = substr($ln, $i, 1);
            if ( ! ($ch =~ /\w/) ) {
               # end of token
               $nl .= add_class_e(html_line($tok));
               $tok = '';
               last; # exit
            }
            $tok .= $ch;
         }
      }
      $tok .= $ch;
      $pc2 = $pc;
      $pc = $ch;
   }
   $nl .= add_quote(html_line($tok)) if (length($tok));
   return $nl;   
}
sub add_2_lines {
   my $t = shift;
   if ( ! $add_pre ) {
      $t .= "<br>";
   }
    prt( "nline[$t]\n" ) if $debug_on;
   $t .= "\n";
   push(@lines, $t);
}
sub get_balance {
   my ($t) = shift;
   if ($t =~ /#/) {
      my $off = index($t, '#');
      if ($off != -1) {
         $t = substr($t,0,$off);
      }
   }
   return $t;
}
sub get_comment {
   my ($t) = shift;
   my $off = index($t, '#');
   if ($off != -1) {
      $t = substr($t,$off);
   } else {
      $t = '';
   }
   return $t;
}
#################################################################
# The MAIN file processing
# The input file is openned, and all the lines read
# into an array @lns, then each line is processed,
# cheracter by character ...
# It does it mainly via a state, $st
# $st == 0 - processing white space
# $st == 1 - processing alphanumeric, plus _
# $st == 2 - processing nither space nor alphanumeric, here
#            referred to as 'an_' ...
# $st == 3 - Locked in one of << thingies, until the end
#            token located, or until end of file ...
# $st == 4 - Processing a qw(...) function, of qw/.../ if 
# enabled.
#
# Generally the 'tokens' are stored in $tok, as the line
# is processed, added to the $nline at various change
# points, and finally the $nline is stored in the array
# @lines, for later output ...
#
# Setting $debug_on will give a BIG TRACE of where the
# code is handling something ...
#
# Setting $add_uvars to on will add colour code user variables
# but this adds a lot of extra weight to the file.
#################################################################
sub process_file {
   my ($in_file) = shift;
   my ($IF);
   my ($ch1,$ch2,$ch3,$ch4);
   open $IF, "<$in_file" or die "ERROR: Unable to open $in_file ... aborting ...\n";
   my @lns = <$IF>; # slurp into line array
   close($IF);
   prt( "\nGot ".scalar @lns." to process from $in_file ...\n" );
   my $st = 0; # current status
   my $nst = 0;
   my $pc = '';
   my $pc2 = '';
   my $ch = '';
   my $tok = '';
   my $ltok = ''; # last token
   my $ltok1 = '';
   my $ltok2 = '';
   my $qtok = ''; # print <<"EOF" or ANY <<'until_end', token
   my $end_qw = '/';
   my $i = 0;
   foreach my $ln (@lns) {
      $doc_total += length($ln);
      chomp $ln;
      $ln =~ s/\r$//; # and remove CR, if present
     $ln = fix_email($ln);
      my $len = length($ln);
      my $nline = '';
      prt( "\nline=[$ln] ...\n" ) if $debug_on;
      $pc = '';
     $pc2 = '';
      $tok = '';
      $ltok = ''; # last token
      $ltok1 = ''; # token stack
      $ltok2 = '';
      $i = 0;
      $nst = 0; # if fall through, next status is IN space
      if ($st == 3) {
         # locked in a 'print' string to end token
       if ($add_uvars) {
          $nline = get_uform( $ln );
       } else {
          $nline = add_class_f(html_line($ln));
       }
       add_2_lines($nline);
         if ($ln =~ /^$qtok/) {
            $st = 0;
         }
         next; # next LINE of file
      } elsif ($st == 4) {
         # processing a 'qw' block - only if $brown_qw is ON
         $tok = '';
         for ( ; $i < $len; $i++) {
            $ch = substr($ln, $i, 1);
            if ($ch eq $end_qw) { # either '/' or ')' depending on start
               $nline .= add_class_d(html_line($tok)) if (length($tok));
               $tok = '';
               last;
            }
            $tok .= $ch;
         }
         if ($i < $len) {
            $nst = 2; # fall through to continue line
         } else {
            $nline = add_class_d(html_line($ln));
         add_2_lines($nline);
            next;
         }
      }
      $st = $nst;
      for ( ; $i < $len; $i++) {
         $ch = substr($ln, $i, 1);
         # make a BIG exception of '&lt;' ...
         if (($ch eq '&') && (($i + 3) < $len)) {
            $ch1 = substr($ln, $i, 4);
         if ($ch1 eq '&lt;') {
            $tok .= $ch1;
            $i += 3;
            $st = 2;
            $pc = ';';
             next;
         }
       }
         if ($st == 0) {
            # IN white space territory
            if ($ch =~ /\S/) {
               prt( "IN ws, changed to NOT with [$ch] ".
                  "\$tok=[$tok] \$ltok[$ltok] \$ltok1[$ltok1] \$ltok2[$ltok2] html\n" ) if $debug_on; 
               $nline .= html_line($tok); # add any white space to new line
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
            # if NOT escape, or escaped escape character
               if ( ($pc ne '\\') || (($pc eq '\\') && ($pc2 eq '\\')) ){
                  if ($ch eq '#') {
                     # start of a COMMENT
                     prt( "start of a COMMENT [$ch] ".
                        "tok=[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2] html\n" ) if $debug_on; 
                     $tok = substr($ln, $i); 
                     $nline .= add_class_b(html_line($tok));
                     $tok = '';
                     $st = 0;
                     last;
                  } elsif (($ch eq '"')||($ch eq "'")) {
                     my $bch = $ch;
                     prt( "start of a QUOTE [$ch] ".
                        "tok=[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]\n" ) if $debug_on; 
                     $tok = $ch;
                     $i++;
                $pc2 = '';
                     for ( ; $i < $len; $i++ ) {
                        $ch = substr($ln, $i, 1);
                  # if the PREVIOUS is NOT an ESCAPE, OR the previous and previous ARE
                  # that is a ESCAPED ESCAPE character, which is NOT an escape at all ;=))
                        if ( ($pc ne '\\') || (($pc eq '\\')&&($pc2 eq '\\')) ) {
                           if ($ch eq $bch) {
                              $tok .= $ch;
                              prt( "End of a QUOTE [$ch] ".
                                 "tok=[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2] html\n" ) if $debug_on; 
                       if ($add_uvars && ($bch eq '"')) {
                          $nline .= add_quote2($tok);
                       } else {
                          $nline .= add_quote(html_line($tok));
                       }
                              $tok = '';
                       $pc2 = $pc;
                              $pc = $ch;
                              last;
                           }
                        }
                        $tok .= $ch;
                  $pc2 = $pc;
                        $pc = $ch;
                     }
                     $pc = $ch;
                     next;
                  }
               }
               $tok = $ch;
               if ($ch =~ /\w/) {
               prt( "Start tok with $ch ... sw st [$st] to 1\n" ) if $debug_on;
                  $st = 1;
               } else {
               prt( "Start tok with $ch ... sw st [$st] to 2\n" ) if $debug_on;
                  $st = 2;
               }
            $pc2 = $pc;
               $pc = $ch;
               next;
            } else {
               # staying in white space
               $tok .= $ch;
            $pc2 = $pc;
               $pc = $ch;
               next;
            }
         } elsif ($st == 1) {
            # dealing with alphanumberic + _
            if ($ch =~ /\w/) {
               $tok .= $ch;
            $pc2 = $pc;
               $pc = $ch;
               next; # continue alphanumeric + _
            }
            prt( "IN an_, no longer an_ with [$ch] ... tok=[$tok]\n" ) if $debug_on;
            if (length($tok)) {
               if (in_res_words($tok) ) {
                  $nline .= add_blue(html_line($tok));
                  if ($brown_qw && (($ch eq '(')||($ch eq '/')) && ($last_resword eq 'qw')) {
                # entering a qw list
                $end_qw = '/';
                $end_qw = ')' if ($ch eq '(');
                     prt( "Excepting a qw list ... Begin $ch, End $end_qw ...\n" ) if $debug_on;
                     $i++;
                     $nline .= $ch;
                     $tok = ''; # no token
                     for ( ; $i < $len ; $i++) {
                        $ch = substr($ln,$i,1);
                        if ($ch eq $end_qw) { # end on '/' or ')' depending on start
                           $nline .= add_class_d(html_line($tok)) if (length($tok));
                           $nline .= $ch;
                           $tok = '';
                           last;
                        }
                        $tok .= $ch;
                     }
                     if ($i < $len) {
                        next; # get next character
                     } # else, we have ended the line, still in a 'qw' ...
                     $nline .= add_class_d(html_line($tok)) if (length($tok));
                     $tok = '';
                     $st = 4;
                     last; # end of THIS line
                  }
               } elsif (in_built_in($tok)) {
                  $nline .= add_red(html_line($tok));
               } else {
               if ($add_uvars) { # colour code user variables
                  $ch1 = substr($tok,0,1);
                 if ($ch1 eq '$') {
                    $nline .= add_class_e(html_line($tok));
                 } elsif ($ch1 eq '@') {
                    $nline .= add_class_o(html_line($tok));
                 } elsif ($ch1 eq '%') {
                    $nline .= add_class_v(html_line($tok));
                 } else {
                        $nline .= html_line($tok);
                 }
               } else {
                  $nline .= html_line($tok);
               }
               }
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
            }
            $tok = $ch;
            if ($ch =~ /\s/) {
               $st = 0; # goto SPACE mode
            } elsif ($ch =~ /\w/) {
               $st = 1; # goto AN_ mode
            } else {
               $st = 2; # goto NOT SPACE or AN_ mode
            }
         $pc2 = $pc;
            $pc = $ch;
            next;
         } elsif ($st == 2) {
            # not IN space or IN an_
            if ($ch =~ /\s/) {
               prt( "IN 2 - change back to space with [$ch] ... tok=[$tok]\n" ) if $debug_on;
               if ( is2lt($tok) ) {
                  $ch1 = get_balance(substr($ln,$i)); # get balance of line
                  $ch1 =~ s/\s+$//; # remove any trailing white space
                  ##if ( ($ch1 =~ /;$/) && ($ltok =~ /=/) ) { mod20060829 add ltok2 also
                  if ( ($ch1 =~ /;$/) && (($ltok =~ /=/)||($ltok1 =~ /=/)) ) {
                     $ch1 =~ s/^\s+//; # remove any leading spaces
                     $ch1 =~ s/;$//; # remove colon
                     $ch1 =~ s/\s+$//; # now again remove any trailing white space
                     if ( !($ch1 =~ /\s/) ) {
                        $qtok = sans_quotes($ch1); # STORE THE END MARKER !!!
                        prt( "Got <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                        $nline .= html_line($tok);
                        $tok = '';
                  $ch1 = substr($ln,$i);
                  if ($ch1 =~ /#/) {
                     $nline .= html_line(get_balance($ch1)); # add this part
                     $ch1 = get_comment($ch1);
                     if (length($ch1)) {
                        $nline .= add_class_b(html_line($ch1));
                     }
                  } else {
                     $nline .= html_line($ch1); # get balance of line
                  }
                        $st = 3;
                        last; # done this line
                     } else {
                        prt( "NOT 1 <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                }
                  } else {
                     prt( "NOT 2 <<EOH type tok[$tok] tbol=[$ch1] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... abol=[".
                   substr($ln,$i)."]\n" ) if $debug_on;
              }
               }
               $nline .= html_line($tok);
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = $ch;
               $st = 0;
            $pc2 = $pc;
               $pc = $ch;
               next;
            } elsif ($ch =~ /\w/) { # alphanumeric, including _
               prt( "IN 2 - change back to an_ with [$ch] ... tok=[$tok]\n" ) if $debug_on;
               if ( is2lt($tok) ) {
                  $ch1 = get_balance(substr($ln,$i)); # get balance of line
                  $ch1 =~ s/\s+$//; # remove any trailing white space
                  ##if ( ($ch1 =~ /;$/) && ($ltok =~ /=/) ) { mod20060829 add ltok2 also
                  if ( ($ch1 =~ /;$/) && (($ltok =~ /=/)||($ltok1 =~ /=/)) ) {
                     $ch1 =~ s/^\s+//; # remove any leading spaces
                     $ch1 =~ s/;$//; # remove colon
                     $ch1 =~ s/\s+$//; # now again remove any trailing white space
                     if ( !($ch1 =~ /\s/) ) {
                        $qtok = sans_quotes($ch1); # STORE THE END MARKER !!!
                        prt( "Got <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                        $nline .= html_line($tok);
                        $tok = '';
                  $ch1 = substr($ln,$i);
                  if ($ch1 =~ /#/) {
                     $nline .= html_line(get_balance($ch1)); # add this part
                     $ch1 = get_comment($ch1);
                     if (length($ch1)) {
                        $nline .= add_class_b(html_line($ch1));
                     }
                  } else {
                     $nline .= html_line($ch1); # get balance of line
                  }
                        $st = 3;
                        last; # done this line
                     } else {
                        prt( "NOT 1 <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                }
                  } else {
                     prt( "NOT 2 <<EOH type tok[$tok] tbol=[$ch1] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... abol=[".
                   substr($ln,$i)."]\n" ) if $debug_on;
              }
               }
               if (($tok eq '$')||($tok eq '@')||($tok eq '%')) {
                  $tok .= $ch;
               } else {
               prt( "Not \$, \@, or \% - html\n" ) if $debug_on;
                  $nline .= html_line($tok);
                  $ltok2 = $ltok1;
                  $ltok1 = $ltok;
                  $ltok = $tok;
                  $tok = $ch;
               }
               $st = 1;
            $pc2 = $pc;
               $pc = $ch;
               next;
            }
         ## NOT space or alphanumeric, including _ ...
            ###if (($pc ne '\\') && (($ch eq '#') || ($ch eq '"') || ($ch eq "'"))) {
            if ((($pc ne '\\')||(($pc eq '\\')&&($pc2 eq '\\'))) &&
            ((($ch eq '#')&&($pc ne '$')) || ($ch eq '"') || ($ch eq "'"))) {
            prt( "add in current tok[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2] ...\n" ) if $debug_on;
               if ( is2lt($tok) ) {
                  $ch1 = get_balance(substr($ln,$i)); # get balance of line
                  $ch1 =~ s/\s+$//; # remove any trailing white space
                  ##if ( ($ch1 =~ /;$/) && ($ltok =~ /=/) ) { mod20060829 add ltok2 also
                  if ( ($ch1 =~ /;$/) && (($ltok =~ /=/)||($ltok1 =~ /=/)) ) {
                     $ch1 =~ s/^\s+//; # remove any leading spaces
                     $ch1 =~ s/;$//; # remove colon
                     $ch1 =~ s/\s+$//; # now again remove any trailing white space
                     if ( !($ch1 =~ /\s/) ) {
                        $qtok = sans_quotes($ch1); # STORE THE END MARKER !!!
                        prt( "Got <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                        $nline .= html_line($tok);
                        $tok = '';
                  $ch1 = substr($ln,$i);
                  if ($ch1 =~ /#/) {
                     $nline .= html_line(get_balance($ch1)); # add this part
                     $ch1 = get_comment($ch1);
                     if (length($ch1)) {
                        $nline .= add_class_b(html_line($ch1));
                     }
                  } else {
                     $nline .= html_line($ch1); # get balance of line
                  }
                        $st = 3;
                        last; # done this line
                     } else {
                        prt( "NOT 1 <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                }
                  } else {
                     prt( "NOT 2 <<EOH type tok[$tok] tbol=[$ch1] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... abol=[".
                   substr($ln,$i)."]\n" ) if $debug_on;
              }
               }
               $nline .= html_line($tok); # add in current token
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
               if ($ch eq '#') {
                  prt("# start of a COMMENT ...\n") if $debug_on;
                  $tok = substr($ln, $i); 
                  $nline .= add_class_b(html_line($tok));
                  $tok = '';
                  $st = 0;
                  last;
               } elsif (($ch eq '"')||($ch eq "'")) {
                  my $bch = $ch;
                  $tok = $ch;
                  $i++;
              $pc2 = '';
                  for ( ; $i < $len; $i++ ) {
                     $ch = substr($ln, $i, 1);
                     if ( ($pc ne '\\') || ( ($pc eq '\\') && ($pc2 eq '\\') ) ) {
                        if ($ch eq $bch) {
                           $tok .= $ch;
                           $qtok = sans_quotes($tok);
                     if ($add_uvars && ($bch eq '"')) {
                       $nline .= add_quote2($tok);
                     } else {
                       $nline .= add_quote(html_line($tok));
                     }
                           $tok = '';
                     $pc2 = $pc;
                           $pc = $ch;
                           last;
                        }
                     }
                     $tok .= $ch;
                $pc2 = $pc;
                     $pc = $ch;
                  }
                  # check for 'print ... <<"EOF";'
                  if (($i < $len) && 
                 ($last_builtin eq 'print') &&
                 (length($ltok) >= 2) && 
                 is2lt($ltok) && 
                 length($qtok) ) {
                $qtok = sans_quotes($qtok);   # strip any DOUBLE/SINGLE quotes
                     prt( "Got print [$last_builtin] ltok[$ltok] qtok[$qtok] ...\n" ) if $debug_on;
                     $i++;
                     $nline .= html_line(substr($ln,$i));
                     $tok = '';
                     $st = 3;
                     last; # done this line
                  }
              $pc2 = $pc;
                  $pc = $ch;
                  next;
               }
            }
         if ($add_uvars && (($ch eq '$')||($ch eq '@')||($ch eq '%'))) {
            prt( "In add_uvars and got \$\@\% [$ch] add tok 2 line ... reset tok\n" ) if $debug_on;
               $nline .= html_line($tok); # add in current token
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
         } else {
            prt( "NOT space or alphanumeric, including _, or special, or \$\@\% [$ch] add2tok ...\n" ) if $debug_on;
         }
            $tok .= $ch;
         }
       $pc2 = $pc;
         $pc = $ch;
      }
      $nline .= html_line($tok);
     add_2_lines($nline);   # push(@lines, $nline); after appending EOL
   }
}
####################################
# Reducing a line to bare bones
# Only presently used when loading
# the EditPlus 2 perl.stx file.
####################################
sub trim_line($) {
   my ($l) = shift;
   chomp $l; # remove LF
   $l =~ s/\r$//; # and remove CR, if present
   $l =~ s/\t/ /g; # tabs to a space
   $l =~ s/\s\s/ /g while ($l =~ /\s\s/); # duplicate space to single
   $l = substr($l,1) while ($l =~ /^\s/); # each off leading space
   $l = substr($l,0,length($l)-1) while (($l =~ /\s$/)&&(length($l))); # and trailing space
   return $l;
}
########################################
# Loading the reserved words, and
# perl built-in functions from a
# special EditPlus 2, perl.stx file,
# but there are arrays already included
# if you do not have this file.
########################################
sub load_stx_file {
   my ($infil) = shift;
   my ($IF);
   my @stx = ();
   my %dchk = ();
   open $IF, "<$infil" or mydie( "ERROR: Unable to open $infil ... aborting ...\n" );
   @stx = <$IF>; # slurp entire file into array
   close($IF);
   my $scnt = scalar @stx;
   prt( "Got $scnt lines in $infil to process ...\n" );
   my $st = 0;
   foreach my $ln (@stx) {
      my $tln = trim_line($ln);
      my $ll = length($tln);
      next if ($ll == 0);
      if( $tln =~ /^\#KEYWORD=Reserved words/ ) {
         $st = 1;
         next;
      } elsif ($tln =~ /^\#KEYWORD=Built-in functions/ ) {
         $st = 2;
         next;
      } elsif (($tln =~ /^\#/) || ($tln =~ /^;/)) {
         $st = 0;
         next;
      }
      if (exists $dchk{$tln}) {
         prt( "Warning: Avoiding duplicate of [$tln] ...\n" );
         next;
      }
      $dchk{$tln} = 1;
      if( $st == 1 ) {
         push(@ResWords, $tln);
      } elsif ($st == 2) {
         push(@BuiltIns, $tln);
      }
   }
   # this was ONLY used to get the internal list
   # so this file becomes unneccessary ...
   if ($out_lists) {
      my $max = 85;
      my $cnt = 20;
      prt( '@ResWords = qw(' );
      foreach my $ln (@ResWords) {
         prt( $ln.' ' );
         $cnt += length($ln);
         if ($cnt > $max) {
            prt("\n");
            $cnt = 0;
         }
      }
      prt( ");\n" );
      $cnt = 20;
      prt( '@BuiltIns = qw(' );
      foreach my $ln (@BuiltIns) {
         prt( $ln.' ' );
         $cnt += length($ln);
         if ($cnt > $max) {
            prt("\n");
            $cnt = 0;
         }
      }
      prt( ");\n" );
   }
}
################################################
# sadly, this is to mangle my email, so
# it does not 'appear' to web scrapers
################################################
sub fix_email {
   my ($eml) = shift;
   my $nem = $eml;
   if ($eml =~ /$emreg/i) {
      my $nm = $1.$2;
      my $sm = mangled_email($nm);
      my $ind = index($eml, $nm);
      if (!($ind == 1)) {
         $nem = substr($eml,0,$ind);
         $nem .= $sm;
         $nem .= substr($eml, $ind+length($nm));
         $efix_cnt++;
      }
      print "got [$nm] ... now [$sm] ... ind $ind ...\n" if $dbgem;
   } else {
      print "failed\n" if $dbgem;
   }
   return $nem;
}
sub mangled_email {
   my ($em) = shift;
   $em =~ s/geoffmclane/geoffair/i;
   $em =~ s/\./ _dot_ /;
   $em =~ s/\@/ _at_ /;
   return $em;
}
################################################
# My particular time 'translation'
sub YYYYMMDD {
   #  0    1    2     3     4    5     6     7     8
   my ($tm) = shift;
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($tm);
   $year += 1900;
   $mon += 1;
   my $ymd = "$year/";
   if ($mon < 10) {
      $ymd .= '0'.$mon.'/';
   } else {
      $ymd .= "$mon/";
   }
   if ($mday < 10) {
      $ymd .= '0'.$mday;
   } else {
      $ymd .= "$mday";
   }
   return $ymd;
}
##################################################
# My particular 'nice number'
sub get_nn { # perl nice number nicenum add commas
   my ($n) = shift;
   if (length($n) > 3) {
      my $mod = length($n) % 3;
      my $ret = (($mod > 0) ? substr( $n, 0, $mod ) : '');
      my $mx = int( length($n) / 3 );
      for (my $i = 0; $i < $mx; $i++ ) {
         if (($mod == 0) && ($i == 0)) {
            $ret .= substr( $n, ($mod+(3*$i)), 3 );
         } else {
            $ret .= ',' . substr( $n, ($mod+(3*$i)), 3 );
         }
      }
      return $ret;
   }
   return $n;
}
##################################################
# My particular bytes to K, M, G
sub b2KMG($) {
   my ($d) = shift;
   if ($d < 1000) {
      return $d;
   }
   my $oss;
   my $kss;
   my $lg = 0;
   my $ks = ($d / 1024); #// get Ks
   my $div = 1;
    if( $ks < 1000 ) {
      $div = 1;
      $oss = "KB";
    } elsif ( $ks < 1000000 ) {
     $div = 1000;
      $oss = "MB";
    } elsif ( $ks < 1000000000 ) {
      $div = 1000000;
      $oss = "GB";
    } else {
      $div = 1000000000;
      $oss = "TB";
    }
    $kss = $ks / $div;
    $kss += 0.05;
    $kss *= 10;
    $lg = int($kss);
    return( ($lg / 10) . " " . $oss );
   ###return( ($lg / 10) . $oss );
}
################################################
# A small 'print' service, that not only
# sends the output to STDOUT, but also
# directs it to a LOG file. I find it
# quite difficult to watch the console
# messages FLASH by ... Of course the
# output can be command line RE-DRIECTED,
# IF you are running it from the command
# line ... most of the time I run it
# from withing the Editor tool, thus thus
# provides a convenient look-back at what
# happend ... this is especially true when
# $debug_on is set ...
################################################
sub prt {
   my ($m) = shift;
   print $m;
   print $LF $m if $out_log;
}
sub mydie {
   my ($m) = shift;
   prt($m);
   die "Got above error ... aborting ...\n";
}
sub write2file {
   my ($txt,$fil) = @_;
   open WOF, ">$fil" or mydie("ERROR: Unable to open $fil!!!\n");
   print WOF $txt;
   close WOF;
}
#########################################################
######## keep the OLD index
### this is needed IF files have been DELETED ...
sub get_old_index($) {
   my ($ind) = shift;
   $tacnt = 0;
   $tacnt3 = 0;
   if (open IF, "<$ind") {
      @larr = <IF>; # slurp it all in ...
      close(IF);
      $lncnt = scalar @larr;
      prt( "Got $lncnt lines to process ... from [$ind]\n" );
      ###write2file( join('',@larr), 'tempout.txt');
      $ln = tag2newline( join('',@larr), 'td' );
      ###$ln = tag2newline( $ln, 'br' );
      @larr2 = split(/\n/, $ln);
      ###write2file( join("\n",@larr2), 'tempout3.txt');
      if (get_table_array()) {
         $tacnt = scalar @tbl_arr;
         $tacnt3 = scalar @tbl_arr3;
         prt( "Got $tacnt and $tacnt3 lines to process ... from [$ind]...\n" );
      } else {
         prt( "Failed to find table $tbl_num or $tbl_num3 ... in [$ind]...\n" );
      }
   } else {
      prt( "Warning: Failed to open $ind ...\n" );
      $no_index = 1;
   }
   if ($tacnt > 0) {
      my $cc = 0;
      for (my $i = 0; $i < $tacnt ; $i++) {
         $ln = $tbl_arr[$i]; # extract a line
         if ($ln =~ /<td.*>/i) {
            while ( !($ln =~ /<\/td>/i) ) {
               $i++;
               if ($i < $tacnt) {
                  $ln .= ' '.$tbl_arr[$i]; # extract a line
               } else {
                  last;
               }
            }
            # got begin and end of <td>...</td> block
            if ($ln =~ /(<td.*?>)(.*)(<\/td>)/i) {
               my $tds = $1;
               my $inb = $2;
               my $tde = $3;
               # like Line [<td><a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213</td>] = 
               # [<td>][<a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213][</td>] ...
               prt( "Line [$ln] = \nBlocks [$tds][$inb][$tde] ...\n" ) if ($dbg21);
               ###if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>/) {
               ##if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>/i) {
               #if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)/i) {
               if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)\s*/i) {
                  my $hrf = $1;
                  my $fil = $2;
                  my $dt = $3;
                  my $sz = $4;
                  my ($yr, $mt, $dy) = split(/\//,$dt);
                  ###$sz =~ s/,//g;
                  if ( !($hrf =~ /\./) || !($fil =~ /\./)) {
                     # no DOT!!!
                     prt( "NOTE tbl_arr: Discarding [$hrf] [$fil] ...\n" );
                  } else {
                     #               0     1     2    3    4    5    6    7
                     push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
                  }
                  prt("href=[$hrf], file=[$fil], date=[$dt][$yr][$mt][$dy], size=[$sz]...\n") if ($dbg22);
               } else {
                  prt("HREF not found - CHECK!\n") if ($dbg22);
               }
            }
         }
      }
   }
   if ($tacnt3 > 0) {
      my $cc = 0;
      my $ff = 0; # since just two columns - flip flop
      my $bif = '';
      my $fil = '';
      for (my $i = 0; $i < $tacnt3 ; $i++) {
         $ln = $tbl_arr3[$i]; # extract a line
         if ($ln =~ /<td.*>/i) {
            $cc = length($ln);
            prt( "$i - Line [$ln] $cc...\n" ) if ($dbg24);
            while ( !($ln =~ /<\/td>/i) ) {
               $i++;
               if ($i < $tacnt3) {
                  $ln .= ' '.$tbl_arr3[$i]; # extract a line
               } else {
                  last;
               }
            }
            if ($cc != length($ln)) {
               $cc = length($ln);
               prt( "$i - Line [$ln] $cc...\n" ) if ($dbg24);
            }
            # got begin and end of <td>...</td> block
            # 2006.09.11 '?' added to STOP greedy parsing
            if ($ln =~ /(<td.*?>)(.*)(<\/td>)/i) {
               my $tds = $1;
               my $inb = $2;
               my $tde = $3;
               prt( "$i - td[$tds] in[$inb] te[$tde]...\n" ) if ($dbg24);
               if ($ff > 0) {
                  $fil = collecthrefs($inb, 1); # remove HREF
                  $fil = trim_line($fil);
                  if (is_built_in($bif)) {
                     push(@tbl_set3, [$bif, $fil, 0]);
                     prt( " push(\@tbl_set3, [$bif, $fil, 0]); ...\n" ) if ($dbg23);
                  } else {
                     prt( " Advice: Skipping [$bif] - NOT BUILT IN FUNCTION!\n" );
                  }
                  $ff = 0;
               } else {
                  $bif = $inb;
                  $bif =~ s/\[//;
                  $bif =~ s/\]//;
                  $bif =    trim_line($bif);
                  $ff = 1;
               }
            } else {
               prt( "CHECK ME: Missed <td> ... </td> \n");
            }
         }
      }
   }
   transfer_old_table3();
}
sub transfer_old_table3() {
   $tacnt3 = scalar @tbl_set3;
   if ($tacnt3 > 0) {
      prt( "Collected $tacnt3 in \@tbl_set3 ... need to intialise built-in hash ...\n" );
      ## load into my %HOldbifs = ();
      my $elimcnt = 0;
      my $elimcnt2 = 0;
      for (my $i = 0; $i < $tacnt3; $i++) {
         my $bif = $tbl_set3[$i][0];
         my $fss = $tbl_set3[$i][1];
         if (is_built_in($bif)) {
            # each new htm file written is kept in -
            # push(@AFileNames, $ind_file   );
            # and for each of these a new hash of built ins has been kept
            # push(@AFileHashs, \%th); # store the functions used ...
            # so these files can be (safely) eliminated, since they will be added later
            foreach my $nhf (@AFileNames) {
               if ($fss =~ /$nhf/i) {
                  $fss =~ s/$nhf//;
                  $elimcnt++;
               }
            }
            $fss = trim_line($fss);
            if (length($fss)) {
               if (exists $HOldbifs{$bif}) {
                  prt("\nWARNING: [$bif] appears DUPLICATED ...\n had=[".$HOldbifs{$bif}."\nadding [$fss]\n\n");
                  $HOldbifs{$bif} .= $fss;
               } else {
                  $HOldbifs{$bif} = $fss;
               }
            } else {
               $elimcnt2++;
            }
         } else {
            prt("WARNING: DISCARDING [$bif] - NOT BUILT-IN!\n");
         }
      }
      my $nwcnt = scalar keys %HOldbifs;
      if ($elimcnt > 0) {
         prt( "Elimated old files $elimcnt times, avoiding $elimcnt2 bifs being added...\n" );
      }
      prt( "Done $tacnt3 in \@tbl_set3 ... now $nwcnt in \%HOldbifs ...\n" );
   }
}
sub mark_old_index($) {
   my ($f) = shift;
   my $tsc = scalar @tbl_set;
   for (my $i = 0; $i < $tsc; $i++ ) {
      if ($tbl_set[$i][0] eq $f) {
         $tbl_set[$i][7] = 1;
         last;
      }
   }
}
sub get_table_array {
   my $fnd = 0;
   $lncnt = scalar @larr2;
   for (my $i = 0; $i < $lncnt ; $i++) {
      $ln = $larr2[$i]; # extract a line
      chomp $ln; # remove LF (\n)
      $ln =~ s/\r$//; # and remove CR, if present
      if ($ln =~ /<table.*>/i) {
         prt( "FOUND TABLE: [$ln] ...\n" );
         $tblcnt++; # bump table counter
         if ($tblcnt == $tbl_num) {
            prt( "Is my TABLE [$tblcnt] ...\n" ) if ($dbg20);
            push(@tbl_arr,$ln);
            if ( !($ln =~ /<\/table>/i) ) {
               $i++; # move to next line
               for ( ; $i < $lncnt; $i++) {
                  $ln = $larr2[$i]; # extract a line
                  chomp $ln; # remove LF (\n)
                  $ln =~ s/\r$//; # and remove CR, if present
                  if ( $ln =~ /<\/table>/i ) {
                     prt( "END TABLE $tbl_num: [$ln] ...\n" ) if ($dbg20);
                     push(@tbl_arr,$ln);
                     $fnd++;
                     last;
                  }
                  push(@tbl_arr,$ln);
               }
            }
         } elsif ($tblcnt == $tbl_num3) {
            prt( "Is also my TABLE [$tblcnt] ...\n" ) if ($dbg20);
            push(@tbl_arr3,$ln);
            if ( !($ln =~ /<\/table>/i) ) {
               $i++; # move to next line
               for ( ; $i < $lncnt; $i++) {
                  $ln = $larr2[$i]; # extract a line
                  chomp $ln; # remove LF (\n)
                  $ln =~ s/\r$//; # and remove CR, if present
                  if ( $ln =~ /<\/table>/i ) {
                     prt( "END TABLE $tbl_num3: [$ln] ...\n" ) if ($dbg20);
                     push(@tbl_arr3,$ln);
                     $fnd++;
                     last;
                  }
                  push(@tbl_arr3,$ln);
               }
            }
         }
      }
   }
   return $fnd;
}
###################################################################
# COPIED OUT OF htmltools.pl, since I do NOT want to include it, just now ...
sub tag2newline { # ($txt2,'td');
   my ($txt, $tag) = @_;
   my $len = length($txt);
   my $ntxt = '';
   my $i;
   my $ch = '';
   my $ft = '';
   my $lcnt = 0;
   for ($i = 0; $i < $len; $i++ ) {
      $ch = substr($txt,$i,1);
      if ($lcnt && ($ch eq '<')) {
         $ft = $ch;
         $i++;
         for ( ; $i < $len; $i++ ) {
            $ch = substr($txt,$i,1);
            $ft .= $ch;
            if ($ch eq '>') {
               if ($ft =~ /^<$tag/i) {
                  $ft = "\n".$ft;
               }
               last;
            }
         }
         $ntxt .= $ft;
      } else {
         $ntxt .= $ch;
         if ($ch eq "\n") {
            $lcnt = 0;
         } else {
            $lcnt++;
         }
      }
   }
   return $ntxt;
}
sub collecthrefs {
   my ($txt,$del) = @_;
   my $ntxt = '';
   my $len = length($txt);
   my $ch = '';
   my $hrf = '';
   my $i;
   for ($i = 0; $i < $len; $i++) {
      $ch = substr($txt,$i,1);
      if ($ch eq '<') {
         $hrf = $ch;
         $i++;
         for ( ; $i < $len; $i++) {
            $ch = substr($txt,$i,1);
            $hrf .= $ch;
            if ($ch eq '>') {
               last;
            }
         }
         if ($hrf =~ /^<a\s/i) {
            if ($del == 0) {
               $ntxt .= $hrf;
            }
            ### prt("Got [$hrf] ...\n");
            if ($hrf =~ /href=["'](\S+)["']./i) {
               $hrf = $1;
               push(@hrefs,$hrf);
               ### prt("Got [$hrf] ...\n");
            }
         } elsif ($hrf =~ /^<\/a>$/i) {
            if ($del == 0) {
               $ntxt .= $hrf;
            }
         } else {
            $ntxt .= $hrf;
         }
      } else {
         $ntxt .= $ch;
      }
   }
   return $ntxt;
}
###################################################################
sub add_metas($$) {
   my ($oh, $ad) = @_;
   my $m = '';
   my $m2 = '';
   prt( "Add metas to handle ...\n" );
   $m = '<meta name="author" content="geoff mclane">'."\n";
   $m .= '<meta name="keywords" content="geoff, mclane, geoffmclane, computer, consultant, programmer,'."\n";
   $m2 = 'perl, scripts, samples, examples';
   if ($ad) {
      foreach my $k (keys %HFuncsFnd) {
         if (length($m2) > 76) {
            $m2 .= ",\n";
            $m .= $m2;
            $m2 = $k;
         } else {
            $m2 .= ', '.$k;
         }
      }
   } else {
      my $bcnt = scalar @AFileHashs; # collection of HASHES from each file
      my $nkys = ' ';
      my $ky = '';
      my @kys = ();
      for (my $ih = 0; $ih < $bcnt; $ih++) { # for each HASH
         @kys = keys %{$AFileHashs[$ih]}; # get built-ins used for this file
         foreach $ky (@kys) {   # go through the keys
            if ( !($nkys =~ / $ky /) ) { # if NOT already in the list
               $nkys .= $ky.' '; # add it
            }
         }
      }
      @kys = split(/ /, $nkys); # split the list into an array
      foreach $ky (@kys) {   # and add each from the array
         if (length($ky)) {
            if (length($m2) > 76) {
               $m2 .= ",\n";
               $m .= $m2;
               $m2 = $ky;
            } else {
               $m2 .= ', '.$ky;
            }
         }
      }
   }
   $m .= $m2;
   $m .= ', free">'."\n";
   $m .= '<meta name="description" content="page of a computer programmer, with sample perl scripts">'."\n";
   print $oh $m;
   prt("$m") if ($dbg25);
}
# eof - p2hall02.pl
index -|- top