Perl to HTML

index

p2h03.pl to HTML.

#!/Perl
###########################################################################
# p2h02.pl - 21 April, 2006 - Geoff McLane
#
# Another attempt at 'converting' perl scripts to a colour coded HTML page.
# The previous attempt got too unwieldy - abandoned at p2html12.pl ...
# This works on a line by line, character by character, decode,
# and colour encode ... a modest file can grow to 4 or more times
# its original size ... adding colour coding COSTS!
#
# NOTE: While this conversion to coloured HTML produces a 'pretty
# picture' of the original perl script file, often it MAY NOT be copied
# exactly by others. Asside from some big spacing differences, entities
# such as $tok .= '&'; MAY NOT translate correctly. In a copy-and-
# paste operation, this MAY be 'translated' as $tok .= '&';, which
# produces ERRANT perl code! Other ERRORS are $ln =~ s/</&lt;/g;
# MAY become $ln =~ s/</</g; which does NOTHING!!
#
# BUT EVERY ATTEMPT HAS BEEN MADE TO FIX THIS TRANSLATION PROBLEM,
# BUT THERE ARE NO GUARANTEES ;=)) MAYBE I HAVE MISSED SOME CASES!!!
#
# If you want to SHARE your perl script, then you should also place the
# actual script, perhaps in a TEXT (.txt) file, on the web for example.
# I this perl utility mainly only to convert code fragments, with pretty
# colouring, on example web pages ... BE VERY WARNED, AND TAKE DUE CARE
#
# However, considerable effort has been made to ensuring a clipboard copy of
# the HTML page will 'translate' back to the original text faithfully. ;=))
# The main difference being the translation of TAB characters, mentioned
# below. A GNU 'diff', from - http://unxutils.sourceforge.net/ - with the
# -w (ignore white space) compare, will usually shows NO DIFFERENCE ...
#
# ALSO IT STILL HAS SOME OTHER FOIBLES ;=)) MAYBE ...
#
# Coding like $cond{$#value} can be taken as a comment from the #
# in SOME cases, but maybe most are fixed ... and other unescaped #
# can likewise go wrong ...
#
# The best thing is does is to try to correctly handle such things as
# print <<"EOF";, placing all the following text in one colour, until EOF
# and even my $help = <<EOH; is greyed until EOH but again I may have
# missed some case. Perl syntax can usually be done MANY WAYS ...
#
# There is presently some slight miss-indenting, as all tabs are converted
# to 3 spaces, so lines with a say 4 spaces, will be different to lines
# with tab ... $tab_space can be adjust below. The only auto-type
# solution would be to pre-process the lines, and try to make a tab-stop
# decision, but that is a lot of extra work ;=()
#
# It presently has NO input command - you have to manually adjust the
# $in_file variable to the file you want decode. Likewise with the
# htm $out_file, and $log_file ...
#
# The 'reserved words' and 'builtin functions' can come from the
# perl.stx file of EditPlus 2 - http://www.editplus.com/ - This makes the
# load flexible, as the perl.stx file can be adjusted as desired.
# Without this, you can define $use_local = 1; and the local list will
# be used.
#
# I have needlessly included Time::HiRes to give an indication of how
# long the processing took, but usually I can 'see' it takes longer
# than the very minimal time elapsed ... If you do not have this
# module, then these time references can be commented out.
#
# The CSS class names, and colours can be changed via the set of 'class'
# and 'color' variables, $a_class, $a_color, $b_class, $b_color, etc.
# And of course the head and ending of the HTML document can be modified
# as desired.
#
# There are a couple of DEBUG switch. The $debug_on immencely increases
# the output, but can often aid is 'seeing' and 'understanding' the code
# path taken ... $add_chart adds a colour chart at the end of the
# document, together with some stats on colour use ... and the increased
# size of the document.
#
###########################################################################
use Time::HiRes qw(usleep ualarm gettimeofday tv_interval nanosleep );
use strict;

# USER VARIABLES
my $vers = '03';
my $out_file = 'tempout'.$vers.'.htm'; # HTML output
my $log_file = 'tempp2h'.$vers.'.txt'; # log file output
my $in_file = "p2h03.pl"; # an INPUT file to convert
###my $in_file = "am2dsp5.pl";
###my $in_file = 'temptest.pl';

# setting reserved word and function arrays
my $use_local = 0; # set 1 to local internal lists, and NOT load the following file ...
my $perlstx = 'C:/Program Files/EditPlus 2/perl.stx'; # fix location - or use local list!

# set the CLASS and COLOUR strings
my $a_class = 'a';
my $b_class = 'b';
my $c_class = 'c';
my $d_class = 'd';
my $e_class = 'e';
my $f_class = 'f'; #{ color:#666666; }
my $o_class = 'o'; #{ color:#FFA500; }
my $v_class = 'v'; #{ color:#808000; }
my $t_class = 't'; #{ color:#006600; }
my $a_color = 'red';
my $b_color = '#006666';
my $c_color = 'blue';
my $d_color = 'brown';
my $e_color = '#00008B';
my $f_color = '#666666';
my $o_color = '#FFA500';
my $v_color = '#808000';
my $t_color = '#006600';

# other USER variables
my $tab_space = '   '; # note tabs to 3 spaces - change if desired
# some USER OPTIONS
my $add_chart = 1; # add colour chart at end, with document stats
my $brown_qw = 1; # to process a qw(...);
my $add_table = 1; # use table to outline code
# this option REALLY adds weight to certain files
my $add_uvars = 1; # colour code user variables
# this load the output result into a browser
my $load_html = 1; # load the final HTML

# special DEBUG variables
my $debug_on = 0; # heavy DEBUG ONLY output
my $out_lists = 0; # output the lists in qw form

#####################
# PROGRAM VARIABLES #
#####################
my ($LF, $OF);
my @ResWords = ();
my @BuiltIns = ();

# load perl.stx file, or use this local list
if ($use_local) {

@ResWords = qw/ continue do else elsif for foreach goto if last local lock map my next package redo
require return sub unless until use while STDIN STDOUT STDERR ARGV ARGVOUT ENV INC SIG TRUE FALSE __FILE__
__LINE__ __PACKAGE__ __END__ __DATA__ lt gt le ge eq ne cmp x not and or xor q qq qx qw $ @ % /;

@BuiltIns = qw(abs accept alarm atan2 bind binmode bless caller chdir chmod chomp chop chown chr
chroot close closedir connect cos crypt dbmclose dbmopen defined delete die dump each eof eval exec exists
exit exp fcntl fileno flock fork format formline getc getlogin getpeername getpgrp getppid getpriority
getpwnam getgrnam gethostbyname getnetbyname getprotobyname getpwuid getgrgid getservbyname gethostbyaddr
getnetbyaddr getprotobynumber getservbyport getpwent getgrent gethostent getnetent getprotoent
getservent setpwent setgrent sethostent setnetent setprotoent setservent endpwent endgrent endhostent
endnetent endprotoent endservent getsockname getsockopt glob gmtime grep hex import index int ioctl
join keys kill lc lcfirst length link listen localtime log lstat mkdir msgctl msgget msgsnd msgrcv no oct
open opendir ord pack pipe pop pos print printf prototype push quotemeta rand read readdir readlink recv
ref rename reset reverse rewinddir rindex rmdir scalar seek seekdir select semctl semget semop send setpgrp
setpriority setsockopt shift shmctl shmget shmread shmwrite shutdown sin sleep socket socketpair sort
splice split sprintf sqrt srand stat study substr symlink syscall sysopen sysread sysseek system syswrite
tell telldir tie tied time times truncate uc ucfirst umask undef unlink unpack untie unshift utime values
vec wait waitpid wantarray warn write );

}

my @lines = (); # final output line gathered here
my $line = '';
my $last_builtin = '';
my $last_resword = '';
my $doc_total = 0;
my $out_total = 0;
# these are really just DEBUG counters
my $a_cnt = 0;
my $b_cnt = 0;
my $c_cnt = 0;
my $d_cnt = 0;
my $e_cnt = 0;
my $f_cnt = 0;
my $o_cnt = 0;
my $v_cnt = 0;
my $q_cnt = 0;
# TIME VARIABLES
my ($t0, $t1, $elapsed);

#####################################################################
# This is the small MAIN part of the script
$t0 = [gettimeofday];
# logging file, if possible
my $out_log = 1;
if (open $LF, ">$log_file") {
   $out_log = 1;
   prt( "Output also being written to LOG file $log_file ... \n" );
} else {
   $out_log = 0;
   prt( "WARNING: Unable to create LOG file $log_file ... \n" );
}
if ( ! $use_local) { load_stx_file( $perlstx ); }
prt( "Got ".scalar @ResWords." Reserved Words, and ".scalar @BuiltIns." Built-in functions ...\n" );
process_file( $in_file ); # main processing of the file lines
prt( "Got ".scalar @lines." new lines out to $out_file ...\n" );
write_out_file(); # write out results, using HTML format ...
$t1 = [gettimeofday];
$elapsed = tv_interval ( $t0, $t1 );
prt( "$0 processing took $elapsed seconds ...\n" );
if ($load_html) {
   system( $out_file );
}
close($LF) if $out_log;
exit 0;
#####################################################################

#######################
### only subs below ###
#######################
##########################################################################
# The main file OUTPUT - that is the HTML file.
# It establishes the HTML header, which includes the CSS style
# information. then outputs each of the 'converted' lines ...
##########################################################################
sub write_out_file {
   # this is what it is all about - to generate a HTML document
   open $OF, ">$out_file" or die "ERROR: Unable to create $out_file ... aborting ...\n";

   print $OF <<"EOF";
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>$in_file - Generated HTML from Perl Script</title>
<meta http-equiv="Content-Language" content="en-au">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
<style type="text/css">
<!-- /* Style Definitions */
body {
margin-left:1cm;
margin-right:1cm;
margin-top:0cm;
margin-bottom:0cm;
font-family: Courier New;
font-size: 10pt;
}
.$a_class { color:$a_color; }
.$b_class { color:$b_color; }
.$c_class { color:$c_color; }
.$d_class { color:$d_color; }
.$e_class { color:$e_color; }
.$f_class { color:$f_color; }
.$o_class { color:$o_color; }
.$v_class { color:$v_color; }
.$t_class { color:$t_color; }
-->
</style>
</head>
<body>

EOF

   print $OF "<p>$in_file to HTML.<br>\n";

   if ($add_table) {
      print $OF '<table width="100%" border="4"><tr><td>'."\n";
   }
   # actual output of generated lines
   foreach $line (@lines) {
      $out_total += length($line);
      print $OF $line;
   }

   if ($add_table) {
      print $OF '</td></tr></table>'."\n";
   }

   if ($add_chart) {
      # mainly only for DEBUG
     print $OF <<"EOF";
Chart of Colours<br>
<span class="$a_class">class='$a_class'  - $a_color RED       $a_cnt</span><br>
<span class="$b_class">class='$b_class'  - $b_color BLUEGREEN $b_cnt</span><br>
<span class="$c_class">class='$c_class'  - $c_color BLUE      $c_cnt</span><br>
<span class="$d_class">class='$d_class'  - $d_color BROWN     $d_cnt</span><br>
<span class="$e_class">class='$e_class'  - $e_color DARKBLUE  $e_cnt</span><br>
<span class="$f_class">class='$f_class'  - $f_color GREY      $f_cnt</span><br>
<span class="$o_class">class='$o_class'  - $o_color ORANGE    $o_cnt</span><br>
<span class="$v_class">class='$v_class'  - $v_color OLIVE     $v_cnt</span><br>
<span class="$t_class">class='$t_class'  - $t_color GREEN     $q_cnt</span><br>
End of chart<br>
EOF
      my $tot = ($a_cnt+$b_cnt+$c_cnt+$d_cnt+$e_cnt+$f_cnt+$o_cnt+$v_cnt+$q_cnt);
      my $diff = $out_total - $doc_total;
      print $OF "This use of $tot colour span sequences added $diff bytes. In=$doc_total Out=$out_total<br>\n";
   }

   print $OF 'Generated: ' . localtime(time()) . " from $in_file.<br>\n";

   print $OF "</body>\n";
   close($OF);
}

#########################################################
# A small set of 9 services which add in the CSS class,
# using <span class="???">.thetext.</span>
#
# Each one does a different class, and the class
# is extracted to variables set above. This means
# they can easily be adjusted to new, different
# values ...
#
# They also accumulate statistic information on how
# many time each is used ...
#########################################################
sub add_red {
   my ($t) = shift;
   $a_cnt++;
   return ('<span class="'.$a_class.'">'.$t.'</span>');
}
sub add_class_b {
   my ($t) = shift;
   $b_cnt++;
   return ('<span class="'.$b_class.'">'.$t.'</span>');
}
sub add_blue {
   my ($t) = shift;
   $c_cnt++;
   return ('<span class="'.$c_class.'">'.$t.'</span>');
}
sub add_class_d {
   my ($t) = shift;
   $d_cnt++;
   return ('<span class="'.$d_class.'">'.$t.'</span>');
}
sub add_class_e {
   my ($t) = shift;
   $e_cnt++;
   return ('<span class="'.$e_class.'">'.$t.'</span>');
}
sub add_class_f {
   my ($t) = shift;
   $f_cnt++;
   return ('<span class="'.$f_class.'">'.$t.'</span>');
}
sub add_class_o {
   my ($t) = shift;
   $o_cnt++;
   return ('<span class="'.$o_class.'">'.$t.'</span>');
}
sub add_class_v {
   my ($t) = shift;
   $v_cnt++;
   return ('<span class="'.$v_class.'">'.$t.'</span>');
}
sub add_quote {
   my ($t) = shift;
   $q_cnt++;
   return ('<span class="'.$t_class.'">'.$t.'</span>');
}
#########################################################


# search the @ResWord array for an entry
sub in_res_words {
   my ($t) = shift;
   foreach my $rw (@ResWords) {
      if ($t eq $rw) {
         $last_resword = $rw;
         return 1;
      }
   }
   return 0;
}

# search the @BuiltIns array for an entry
sub in_built_in {
   my ($t) = shift;
   foreach my $rw (@BuiltIns) {
      if ($t eq $rw) {
         $last_builtin = $rw;
         return 1;
      }
   }
   return 0;
}

sub is2lt {
   my $t = shift;
   $t =~ s/&lt;/</g;
   if ( (length($t) >= 2 ) && ( $t =~ /<<$/ ) ) {
      return 1;
   }
   return 0;
}

sub sans_quotes {
   my $t = shift;
   $t =~ s/\"//g;
   return $t;
}

######################################################
# Converting SPACES to '&nbsp;'
# Of course this could be done just using perl's
# powerful search and replace, but this handles
# any number of spaces, only converting the number
# minus 1 to &nbsp; ... not sure how to have
# this level of control with regex replacement
######################################################
sub conv_spaces {
   my $t = shift;
   my ($c, $i, $nt, $ln, $sc, $sp);
   $nt = ''; # accumulate new line here
   $ln = length($t);
   for ($i = 0; $i < $ln; $i++) {
      $c = substr($t,$i,1);
      if ($c eq ' ') {
         $i++; # bump to next
         $sc = 0;
         $sp = '';
         for ( ; $i < $ln; $i++) {
            $c = substr($t,$i,1);
            if ($c ne ' ') {
               last; # exit
            }
            $sc++;
            $sp .= $c;
         }
         if ($sc) {
            $sp =~ s/ /&nbsp;/g;
            $nt .= $sp;
         }
         $i--; # back up one
         $c = ' '; # add back the 1 space
      }
      $nt .= $c;
   }
   prt( "conv_space: from [$t] to [$nt] ...\n" ) if $debug_on;
   return $nt;
}

###########################################################################
# VERY IMPORTANT SERVICE
# This converts the 'text' into HTML text, but only does a partial job!
# 1. Convert '&' to '&amp;' to avoid interpreting as replacement
# 2. Convert '<' to '&lt;' to avoid interpreting as HTML
# 3. Convert '"' to '&quot;'
# 4. Convert '\t' to SPACES
# 5. Finally, if there are double or more SPACES, convert to '&nbsp;'
###########################################################################
sub html_line {
   my $t = shift;
   my $ot = $t;
   $t =~ s/&/&amp;/g; # all '&' become '&amp;'
   $t =~ s/</&lt;/g; # make sure all '<' is/are swapped out
   $t =~ s/\"/&quot;/g; # and all quotes become &quot;
   $t =~ s/\t/$tab_space/g; # tabs to spaces
   if ($t =~ /\s\s/) { # if any two consecutive white space
      return conv_spaces($t);
   }
   prt( "html_line: from [$ot] to [$t] ...\n" ) if $debug_on;
   return $t;
}

##########################################################
# The following two functions 'convert' scalar variables
# to colour codes spans, in the print <<EOF = get_uform,
# and withing double quoted text "this $cnt ..." ...
# THESE ADD LOTS OF WEIGHT TO THE FILE
##########################################################
sub get_uform {
   my $ln = shift;
   my $tok = ''; # colour up the USER scalar variables within
   my $len = length($ln);
   my $nline = '';
   for (my $i = 0; $i < $len; $i++) {
      my $ch = substr($ln, $i, 1);
      if (($ch eq '$') && (($i + 1) < $len) && (substr($ln,$i+1,1) =~ /\w/) ) {
         $nline .= add_class_f(html_line($tok)) if (length($tok));
         $tok = $ch;
         $i++;
         for ( ; $i < $len; $i++) {
            $ch = substr($ln, $i, 1);
            if ( ! ($ch =~ /\w/) ) {
               # end of token
               $nline .= add_class_e(html_line($tok));
               $tok = '';
               last;
            }
            $tok .= $ch;
         }
      }
      $tok .= $ch;
    }
   $nline .= add_class_f(html_line($tok)) if (length($tok));
   return $nline;
}

sub add_quote2 {
   my ($ln) = shift;
   my $len = length($ln);
   my $ch = '';
   my $ch2 = '';
   my $pc = '';
   my $pc2 = '';
   my $nl = ''; # put the NEW line in here
   my $tok = ''; # colour up the USER scalar variables within DOUBLE quotes
   for (my $i = 0; $i < $len; $i++ ) {
      $ch = substr($ln, $i, 1);
      $ch2 = (($i + 1) < $len) ? substr($ln,$i+1,1) : '';
      # if a scalar variable, and not 'escaped', or the escape escaped and next is 'an_'
      if (($ch eq '$') && (($pc ne '\\')||(($pc eq '\\') && ($pc2 eq '\\'))) &&
         (($i + 1) < $len) && ($ch2 =~ /\w/) ) {
         $nl .= add_quote(html_line($tok)) if (length($tok));
         $tok = $ch;
         $i++;
         for ( ; $i < $len; $i++) {
            $ch = substr($ln, $i, 1);
            if ( ! ($ch =~ /\w/) ) {
               # end of token
               $nl .= add_class_e(html_line($tok));
               $tok = '';
               last; # exit
            }
            $tok .= $ch;
         }
      }
      $tok .= $ch;
      $pc2 = $pc;
      $pc = $ch;
   }
   $nl .= add_quote(html_line($tok)) if (length($tok));
   return $nl;  
}

#################################################################
# The MAIN file processing
# The input file is openned, and all the lines read
# into an array @lns, then each line is processed,
# cheracter by character ...
# It does it mainly via a state, $st
# $st == 0 - processing white space
# $st == 1 - processing alphanumeric, plus _
# $st == 2 - processing nither space nor alphanumeric, here
#            referred to as 'an_' ...
# $st == 3 - Locked in one of << thingies, until the end
#            token located, or until end of file ...
# $st == 4 - Processing a qw(...) function, of qw/.../ if
# enabled.
#
# Generally the 'tokens' are stored in $tok, as the line
# is processed, added to the $nline at various change
# points, and finally the $nline is stored in the array
# @lines, for later output ...
#
# Setting $debug_on will give a BIG TRACE of where the
# code is handling something ...
#
# Setting $add_uvars to on will add colour code user variables
# but this adds a lot of extra weight to the file.
#################################################################
sub process_file {
   my ($in_file) = shift;
   my ($IF);
   my ($ch1,$ch2,$ch3,$ch4);
   open $IF, "<$in_file" or die "ERROR: Unable to open $in_file ... aborting ...\n";
   my @lns = <$IF>; # slurp into line array
   close($IF);
   prt( "Got ".scalar @lns." to process from $in_file ...\n" );
   my $st = 0; # current status
   my $nst = 0;
   my $pc = '';
   my $pc2 = '';
   my $ch = '';
   my $tok = '';
   my $ltok = ''; # last token
   my $ltok1 = '';
   my $ltok2 = '';
   my $qtok = ''; # print <<"EOF" token
   my $end_qw = '/';
   my $i = 0;
   foreach my $ln (@lns) {
      $doc_total += length($ln);
      chomp $ln;
      $ln =~ s/\r$//; # and remove CR, if present
      my $len = length($ln);
      my $nline = '';
      prt( "\nline=[$ln] ...\n" ) if $debug_on;
      $pc = '';
     $pc2 = '';
      $tok = '';
      $ltok = ''; # last token
      $ltok1 = ''; # token stack
      $ltok2 = '';
      $i = 0;
      $nst = 0; # if fall through, next status is IN space
      if ($st == 3) {
         # locked in a 'print' string to end token
       if ($add_uvars) {
          $nline = get_uform( $ln );
       } else {
          $nline = add_class_f(html_line($ln));
       }
         $nline .= "<br>\n";
         push(@lines, $nline);
         if ($ln =~ /^$qtok/) {
            $st = 0;
         }
         next; # next LINE of file
      } elsif ($st == 4) {
         # processing a 'qw' block - only if $brown_qw is ON
         $tok = '';
         for ( ; $i < $len; $i++) {
            $ch = substr($ln, $i, 1);
            if ($ch eq $end_qw) { # either '/' or ')' depending on start
               $nline .= add_class_d(html_line($tok)) if (length($tok));
               $tok = '';
               last;
            }
            $tok .= $ch;
         }
         if ($i < $len) {
            $nst = 2; # fall through to continue line
         } else {
            $nline = add_class_d(html_line($ln));
            $nline .= "<br>\n";
            push(@lines, $nline);
            next;
         }
      }
      $st = $nst;
      for ( ; $i < $len; $i++) {
         $ch = substr($ln, $i, 1);
         # make a BIG exception of '&lt;' ...
         if (($ch eq '&') && (($i + 3) < $len)) {
            $ch1 = substr($ln, $i, 4);
         if ($ch1 eq '&lt;') {
            $tok .= $ch1;
            $i += 3;
            $st = 2;
            $pc = ';';
             next;
         }
       }
         if ($st == 0) {
            # IN white space territory
            if ($ch =~ /\S/) {
               prt( "IN ws, changed to NOT with [$ch] ".
                  "\$tok=[$tok] \$ltok[$ltok] \$ltok1[$ltok1] \$ltok2[$ltok2]\n" ) if $debug_on;
               $nline .= html_line($tok); # add any white space to new line
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
            # if NOT escape, or escaped escape character
               if ( ($pc ne '\\') || (($pc eq '\\') && ($pc2 eq '\\')) ){
                  if ($ch eq '#') {
                     # start of a COMMENT
                     prt( "start of a COMMENT [$ch] ".
                        "tok=[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]\n" ) if $debug_on;
                     $tok = substr($ln, $i);
                     $nline .= add_class_b(html_line($tok));
                     $tok = '';
                     $st = 0;
                     last;
                  } elsif (($ch eq '"')||($ch eq "'")) {
                     my $bch = $ch;
                     prt( "start of a QUOTE [$ch] ".
                        "tok=[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]\n" ) if $debug_on;
                     $tok = $ch;
                     $i++;
                $pc2 = '';
                     for ( ; $i < $len; $i++ ) {
                        $ch = substr($ln, $i, 1);
                  # if the PREVIOUS is NOT an ESCAPE, OR the previous and previous ARE
                  # that is a ESCAPED ESCAPE character, which is NOT an escape at all ;=))
                        if ( ($pc ne '\\') || (($pc eq '\\')&&($pc2 eq '\\')) ) {
                           if ($ch eq $bch) {
                              $tok .= $ch;
                              prt( "End of a QUOTE [$ch] ".
                                 "tok=[$tok] ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]\n" ) if $debug_on;
                       if ($add_uvars && ($bch eq '"')) {
                          $nline .= add_quote2($tok);
                       } else {
                          $nline .= add_quote(html_line($tok));
                       }
                              $tok = '';
                       $pc2 = $pc;
                              $pc = $ch;
                              last;
                           }
                        }
                        $tok .= $ch;
                  $pc2 = $pc;
                        $pc = $ch;
                     }
                     $pc = $ch;
                     next;
                  }
               }
               $tok = $ch;
               if ($ch =~ /\w/) {
                  $st = 1;
               } else {
                  $st = 2;
               }
            $pc2 = $pc;
               $pc = $ch;
               next;
            } else {
               # staying in white space
               $tok .= $ch;
            $pc2 = $pc;
               $pc = $ch;
               next;
            }
         } elsif ($st == 1) {
            # dealing with alphanumberic + _
            if ($ch =~ /\w/) {
               $tok .= $ch;
            $pc2 = $pc;
               $pc = $ch;
               next; # continue alphanumeric + _
            }
            prt( "IN an_, no longer an_ with [$ch] ... tok=[$tok]\n" ) if $debug_on;
            if (length($tok)) {
               if (in_res_words($tok) ) {
                  $nline .= add_blue(html_line($tok));
                  if ($brown_qw && (($ch eq '(')||($ch eq '/')) && ($last_resword eq 'qw')) {
                # entering a qw list
                $end_qw = '/';
                $end_qw = ')' if ($ch eq '(');
                     prt( "Excepting a qw list ... Begin $ch, End $end_qw ...\n" ) if $debug_on;
                     $i++;
                     $nline .= $ch;
                     $tok = ''; # no token
                     for ( ; $i < $len ; $i++) {
                        $ch = substr($ln,$i,1);
                        if ($ch eq $end_qw) { # end on '/' or ')' depending on start
                           $nline .= add_class_d(html_line($tok)) if (length($tok));
                           $nline .= $ch;
                           $tok = '';
                           last;
                        }
                        $tok .= $ch;
                     }
                     if ($i < $len) {
                        next; # get next character
                     } # else, we have ended the line, still in a 'qw' ...
                     $nline .= add_class_d(html_line($tok)) if (length($tok));
                     $tok = '';
                     $st = 4;
                     last; # end of THIS line
                  }
               } elsif (in_built_in($tok)) {
                  $nline .= add_red(html_line($tok));
               } else {
               if ($add_uvars) { # colour code user variables
                  $ch1 = substr($tok,0,1);
                 if ($ch1 eq '$') {
                    $nline .= add_class_e(html_line($tok));
                 } elsif ($ch1 eq '@') {
                    $nline .= add_class_o(html_line($tok));
                 } elsif ($ch1 eq '%') {
                    $nline .= add_class_v(html_line($tok));
                 } else {
                        $nline .= html_line($tok);
                 }
               } else {
                  $nline .= html_line($tok);
               }
               }
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
            }
            $tok = $ch;
            if ($ch =~ /\s/) {
               $st = 0; # goto SPACE mode
            } elsif ($ch =~ /\w/) {
               $st = 1; # goto AN_ mode
            } else {
               $st = 2; # goto NOT SPACE or AN_ mode
            }
         $pc2 = $pc;
            $pc = $ch;
            next;
         } elsif ($st == 2) {
            # not IN space or IN an_
            if ($ch =~ /\s/) {
               prt( "IN 2 - change back to space with [$ch] ... tok=[$tok]\n" ) if $debug_on;
               $nline .= html_line($tok);
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = $ch;
               $st = 0;
            $pc2 = $pc;
               $pc = $ch;
               next;
            } elsif ($ch =~ /\w/) {
               prt( "IN 2 - change back to an_ with [$ch] ... tok=[$tok]\n" ) if $debug_on;
               if ( is2lt($tok) ) {
                  $ch1 = substr($ln,$i); # get balance of line
                  $ch1 =~ s/\s+$//; # remove any trailing white space
                  if ( ($ch1 =~ /;$/) && ($ltok =~ /=/) ) {
                     $ch1 =~ s/^\s+//; # remove any leading spaces
                     $ch1 =~ s/;$//; # remove colon
                     $ch1 =~ s/\s+$//; # now again remove any trailing white space
                     if ( !($ch1 =~ /\s/) ) {
                        $qtok = $ch1; # STORE THE END MARKER !!!
                        prt( "Got <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
                        $nline .= html_line($tok);
                        $tok = '';
                        $nline .= html_line(substr($ln,$i)); # get balance of line
                        $st = 3;
                        last; # done this line
                     }
                  }
               }

               if (($tok eq '$')||($tok eq '@')||($tok eq '%')) {
                  $tok .= $ch;
               } else {
                  $nline .= html_line($tok);
                  $ltok2 = $ltok1;
                  $ltok1 = $ltok;
                  $ltok = $tok;
                  $tok = $ch;
               }
               $st = 1;
            $pc2 = $pc;
               $pc = $ch;
               next;
            }
            ###if (($pc ne '\\') && (($ch eq '#') || ($ch eq '"') || ($ch eq "'"))) {
            if ((($pc ne '\\')||(($pc eq '\\')&&($pc2 eq '\\'))) &&
            ((($ch eq '#')&&($pc ne '$')) || ($ch eq '"') || ($ch eq "'"))) {
               $nline .= html_line($tok); # add in current token
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
               if ($ch eq '#') {
                  # start of a COMMENT
                  $tok = substr($ln, $i);
                  $nline .= add_class_b(html_line($tok));
                  $tok = '';
                  $st = 0;
                  last;
               } elsif (($ch eq '"')||($ch eq "'")) {
                  my $bch = $ch;
                  $tok = $ch;
                  $i++;
              $pc2 = '';
                  for ( ; $i < $len; $i++ ) {
                     $ch = substr($ln, $i, 1);
                     if ( ($pc ne '\\') || ( ($pc eq '\\') && ($pc2 eq '\\') ) ) {
                        if ($ch eq $bch) {
                           $tok .= $ch;
                           $qtok = sans_quotes($tok);
                     if ($add_uvars && ($bch eq '"')) {
                       $nline .= add_quote2($tok);
                     } else {
                       $nline .= add_quote(html_line($tok));
                     }
                           $tok = '';
                     $pc2 = $pc;
                           $pc = $ch;
                           last;
                        }
                     }
                     $tok .= $ch;
                $pc2 = $pc;
                     $pc = $ch;
                  }
                  # check for 'print ... <<"EOF";'
                  if ( ($i < $len) && ($last_builtin eq 'print') && (length($ltok) >= 2) && is2lt($ltok) && length($qtok) ) {
                     prt( "Got $last_builtin ltok[$ltok] qtok[$qtok] ...\n" ) if $debug_on;
                     $i++;
                     $nline .= html_line(substr($ln,$i));
                     $tok = '';
                     $st = 3;
                     last; # done this line
                  }
              $pc2 = $pc;
                  $pc = $ch;
                  next;
               }
            }

         if ($add_uvars && (($ch eq '$')||($ch eq '@')||($ch eq '%'))) {
               $nline .= html_line($tok); # add in current token
               $ltok2 = $ltok1;
               $ltok1 = $ltok;
               $ltok = $tok;
               $tok = '';
         }
            $tok .= $ch;
         }
       $pc2 = $pc;
         $pc = $ch;
      }
      $nline .= html_line($tok);
      prt( "nline[$nline]\n" ) if $debug_on;
      $nline .= "<br>\n";
      push(@lines, $nline);
   }
}

####################################
# Reducing a line to bare bones
# Only presently used when loading
# the EditPlus 2 perl.stx file.
####################################
sub trim_line {
   my ($l) = shift;
   chomp $l;
   $l =~ s/\r$//; # and remove CR, if present
   $l =~ s/\t/ /g;
   $l =~ s/\s\s/ /g while ($l =~ /\s\s/);
   $l = substr($l,1) while ($l =~ /^\s/);
   $l = substr($l,0,length($l)-1) while (($l =~ /\s$/)&&(length($l)));
   return $l;
}

########################################
# Loading the reserved words, and
# perl built-in functions from a
# special EditPlus 2, perl.stx file,
# but there are arrays already included
# if you do not have this file.
########################################
sub load_stx_file {
   my ($in_file) = shift;
   my ($IF);
   my @stx = ();
   my %dchk = ();
   open $IF, "<$in_file" or die "ERROR: Unable to open $in_file ... aborting ...\n";
   @stx = <$IF>; # slurp entire file into array
   close($IF);
   my $scnt = scalar @stx;
   prt( "Got $scnt lines in $in_file to process ...\n" );
   my $st = 0;
   foreach my $ln (@stx) {
      my $tln = trim_line($ln);
      my $ll = length($tln);
      next if ($ll == 0);
      if( $tln =~ /^\#KEYWORD=Reserved words/ ) {
         $st = 1;
         next;
      } elsif ($tln =~ /^\#KEYWORD=Built-in functions/ ) {
         $st = 2;
         next;
      } elsif (($tln =~ /^\#/) || ($tln =~ /^;/)) {
         $st = 0;
         next;
      }

      if (exists $dchk{$tln}) {
         prt( "Warning: Avoiding duplicate of [$tln] ...\n" );
         next;
      }
      $dchk{$tln} = 1;

      if( $st == 1 ) {
         push(@ResWords, $tln);
      } elsif ($st == 2) {
         push(@BuiltIns, $tln);
      }
   }

   # this was ONLY used to get the internal list
   # so this file becomes unneccessary ...
   if ($out_lists) {
      my $max = 85;
      my $cnt = 20;
      prt( '@ResWords = qw(' );
      foreach my $ln (@ResWords) {
         prt( $ln.' ' );
         $cnt += length($ln);
         if ($cnt > $max) {
            prt("\n");
            $cnt = 0;
         }
      }
      prt( ");\n" );
      $cnt = 20;
      prt( '@BuiltIns = qw(' );
      foreach my $ln (@BuiltIns) {
         prt( $ln.' ' );
         $cnt += length($ln);
         if ($cnt > $max) {
            prt("\n");
            $cnt = 0;
         }
      }
      prt( ");\n" );
   }
}

################################################
# A small 'print' service, that not only
# sends the output to STDOUT, but also
# directs it to a LOG file. I find it
# quite difficult to watch the console
# messages FLASH by ... Of course the
# output can be command line RE-DRIECTED,
# IF you are running it from the command
# line ... most of the time I run it
# from withing the Editor tool, thus thus
# provides a convenient look-back at what
# happend ... this is especially true when
# $debug_on is set ...
################################################
sub prt {
   my ($m) = shift;
   print $m;
   print $LF $m if $out_log;
}

# eof - p2h03.pl
Chart of Colours
class='a' - red RED 104
class='b' - #006666 BLUEGREEN 287
class='c' - blue BLUE 389
class='d' - brown BROWN 18
class='e' - #00008B DARKBLUE 916
class='f' - #666666 GREY 98
class='o' - #FFA500 ORANGE 25
class='v' - #808000 OLIVE 1
class='t' - #006600 GREEN 277
End of chart
This use of 2115 colour span sequences added 86189 bytes. In=34467 Out=120656
Generated: Sat Apr 22 21:30:03 2006 from p2h03.pl.

index