fgfixfn.pl to HTML.

index -|- end

Generated: Mon Aug 16 14:14:15 2010 from fgfixfn.pl 2010/04/12 43.4 KB.

#!/perl -w
# NAME: fgfixfn.pl
# AIM: VERY SPECIFIC - read file, and fix footnotes - did a REASONABLE job, but needed some manual touchups
# 2010/04/06  - geoff mclane - http://geoffair.net/mperl/
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
unshift(@INC, 'C:\GTools\perl');
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
    my @tmpsp = split(/\\/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $perl_dir = 'C:\GTools\perl';
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

# user variables
my $load_log = 1;
my $def_infile = 'C:\HOMEPAGE\FG\Docs\getstart\getstartch9.html';
my $in_file = '';
my $show_div_stack = 0;
my $show_para_stack = 0;

### program variables
my @warnings = ();
my $cwd = cwd();
my @g_all_elements = ();

my @tag_list = ("a","abbr","acronym","address","applet","area","b","base","basefont","bdo","big","blockquote","body",
"br","button","caption","center","cite","code","col","colgroup","dd","del","dfn","dir","div","dl","dt","em","fieldset",
"font","form","frame","frameset","h1","h2","h3","h4","h5","h6","head","hr","html","i","iframe","img","input","ins",
"isindex","kbd","label","legend","li","link","listing","map","menu","meta","noframes","noscript","object","ol",
"optgroup","option","p","param","plaintext","pre","q","rb","rbc","rp","rt","rtc","ruby","s","samp","script","select",
"small","span","strike","strong","style","sub","sup","table","tbody","td","textarea","tfoot","th","thead","title",
"tr","tt","u","ul","var","xmp","nextid","align","bgsound","blink","comment","embed","ilayer","keygen","layer",
"marquee","multicol","nobr","noembed","nolayer","nosave","server","servlet","spacer","wbr");

my @closed_tags = qw( meta link area base basefont br frame hr isindex param bgsound embed keygen img );

# tags which do NOT need a closing, like </p>, tag
my @opt_tags = ( "body", "colgroup", "dd", "dt", "head", "html", "li", "optgroup", "option",
"p", "tbody", "td", "tfoot", "th", "thead", "tr", "marquee" );

my $us_dir = "u:\\var\\www\\fg\\www\\Docs\\getstart\\";

# debug
my $dbg38 = 0; # prt( "[dbg38] Got [$hr2] = [$txt] [$fil]\n" ) if ($dbg38);
my $dbg39 = 0; # prt( "[dbg39] Got [$hr2] = [$txt] - no inverted commas! [$fil]\n" ) if ($dbg39);

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg)
    }
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub show_warnings() {
   if (@warnings) {
      prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
      foreach my $itm (@warnings) {
         prt("$itm\n");
      }
      prt("\n");
   } else {
      prt( "\nNo warnings issued.\n\n" );
   }
}

sub is_closed_tag($) {
    my ($tt) = shift;
    my $lctt = lc($tt);
    foreach my $tag (@closed_tags) {
        return 1 if ($tag eq $lctt);
    }
    return 0;
}

sub is_opt_tag($) {
    my ($tt) = shift;
    my $lctt = lc($tt);
    foreach my $tag (@opt_tags) {
        return 1 if ($tag eq $lctt);
    }
    return 0;
}

# $drop = can_find_this_tag($tag,\@elements);
sub can_find_this_tag($$) {
    my ($tag,$re) = @_;
    my $len = scalar @{$re};
    my $drop = 0;
    my $bu = -1;
    my $last = '';
    while ($len) {
        $drop++;    # can pop this one
        $last = ${$re}[$bu][0]; # get tag
        if ($last eq $tag) {    # if the desired tag
            return $drop;   # return drop value
        } elsif ( ! is_opt_tag($last) ) {
            return 0;   # oop, have a non-optional tag
        }
        $bu--;  # back up one more
        $len--; # and reduce available to check
    }
    return 0;
}

sub is_all_optional($) {
    my ($re) = @_;
    my $len = scalar @{$re};
    my $bu = -1;
    my ($last);
    while ($len) {
        $last = ${$re}[$bu][0]; # get tag
        if ( ! is_opt_tag($last) ) {
            return 0;   # oop, have a non-optional tag
        }
        $bu--;  # back up one more
        $len--; # and reduce available to check
    }
    return 1;   # ALL were optiona
}

sub show_stack_elements($$$) {
    my ($tag,$rele,$rlns) = @_;
    my $cnt = scalar @{$rele};
    my $lcnt = scalar @{$rlns};
    if ($cnt) {
        prt("The stack has $cnt elements... The current closing element is [$tag]\n");
        for (my $i = 0; $i < $cnt; $i++) {
            my $e = ${$rele}[$i][0];
            my $n = ${$rele}[$i][1];
            prt("$n: elelement [$e]");
            prt(" SAME as tag [$tag]!") if ($e eq $tag);
            if ($n <= $lcnt) {
                my $ln = trim_all(${$rlns}[$n-1]);
                prt(" line=[$ln]");
            }
            prt("\n");
        }
    } else {
        prt("The stack has NO elements... The current closing element is [$tag]\n");
    }
}

sub get_attribute_hash_ref($$$$) {
    my ($fank,$fil,$xml,$dbg) = @_;
    my %hash = ();
    my ($ank,$len,$i,$ch,$pc,$hr2,$txt);
    $ank = trim_all($fank);
    $len = length($ank);
    $ch = '';
    $hr2 = '';
    for ($i = 0; $i < $len; $i++) {
        $pc = $ch;
        $ch = substr($ank,$i,1);
        # if ($ch =~ /\w/) - this missed xml:link="abc"
        # and 'http-equiv="..."
        if ($ch =~ /(\w|:|-)/) {
            $hr2 .= $ch;   # accumulate \w chars - alphanumeric, including _
        } elsif (length($hr2)) {
            if (($ch ne '=') && ($ch =~ /\s/)) {
                $i++;
                for (; $i < $len; $i++) {
                    $ch = substr($ank,$i,1);
                    last if ($ch eq '=');
                    last if !($ch =~ /\s/);
                }
            }
            if ($ch eq '=') {
                # found our equal sign
                $i++; # move on...
                for (; $i < $len; $i++) {
                    $ch = substr($ank,$i,1);
                    last if ($ch =~ /('|")/);
                    last if !($ch =~ /\s/);
                }
                if (($ch eq '"')||($ch eq "'")) {
                    $pc = $ch;
                    $i++; # move on...
                    $txt = '';
                    for (; $i < $len; $i++) {
                        $ch = substr($ank,$i,1);
                        last if ($ch eq $pc);
                        $txt .= $ch;
                    }
                    if ($ch eq $pc) {
                        $hr2 = lc($hr2) if ($xml);
                        $hash{$hr2} = $txt;
                        prt( "[dbg38] Got [$hr2] = [$txt] [$fil]\n" ) if ($dbg38);
                    } else {
                        prtw("PROBLEM: got [$hr2]. At pos $i in [$ank], from [$fank], and NO END INVERTED COMMA! ($pc) [$fil]\n");
                        pgm_exit(1,"NEED CODE FIX!") if ($dbg);
                    }
                } else {
                    if (($ch =~ /\w/) && (($hr2 =~ /name/i)||($hr2 =~ /id/i))) {
                        # accept these WITHOUT inverted comma
                        $txt = $ch;
                        $i++; # MOVING ON
                        for (; $i < $len; $i++) {
                            $ch = substr($ank,$i,1);
                            last if !($ch =~ /\w/);
                            $txt .= $ch;
                        }
                        $hr2 = lc($hr2) if ($xml);
                        $hash{$hr2} = $txt;
                        prt( "[dbg39] Got [$hr2] = [$txt] - no inverted commas! [$fil]\n" ) if ($dbg39);
                    } else {
                        prtw("PROBLEM: got [$hr2]. At pos $i in [$ank], from [$fank], and NO START INVERTED COMMA! [$fil]\n");
                        pgm_exit(1,"NEED CODE FIX!") if ($dbg);
                    }
                }
            } else {
                prtw("PROBLEM: got [$hr2]. At pos $i in [$ank], from [$fank], and NO EQUAL SIGN! [$fil]\n");
                pgm_exit(1,"NEED CODE FIX!") if ($dbg);
            }
            $hr2 = '';
        }
    }
    return \%hash;
}

sub get_anchor_hash_ref($$$) {
   my ($fank,$fil,$xml,$dbg) = @_;
   my ($hr);
   my ($ank,$len,$i,$ch,$pc,$hr2,$txt);
   if ($fank =~ /<a\s+(.+)>$/) {
      $ank = trim_all($1);
      $hr = get_attribute_hash_ref($ank,$fil,$xml,$dbg);
   }
   return $hr;
}

sub get_img_hash_ref($$$$) {
   my ($fank,$fil,$xml,$dbg) = @_;
   my $hr = ();
   my ($ank,$len,$i,$ch,$pc,$hr2,$txt,$tail);
   # 2010/04/06 - add 'i' case insensitive; 's' to treat as single line;
   if ($fank =~ /^<img\s+(.+)>{1}(.*)$/is) {
      $ank = trim_all($1);
      $tail = $2;
      $ank =~ s/\/$//;  # 2010/04/06 - remove any trailing '/'
      $ank = trim_all($ank);    # and trim again
      $hr = get_attribute_hash_ref($ank,$fil,$xml,$dbg);
   } else {
       prtw("PROBLEM: [$fank] FAILED initial image test regex! [$fil]\n");
       pgm_exit(1,"get_img_hash_ref: FAILED!") if ($dbg);
   }
   ${$hr}{'_TRAILING_'} = $tail if (length($tail));
   return $hr;
}

sub get_table_element($$) {
    my ($txt,$rt) = @_;
    my $len = length($txt);
    my $ttx = '';
    my ($ch,$t);
    for ($t = 0; $t < $len; $t++) {
        $ch = substr($txt,$t,1);
        if ($ch eq '>') {
            $t++;
            last;
        }
        $ttx .= $ch;
    }
    ${$rt} = substr($txt,$t);
    return $ttx;
}

sub get_table_hash_ref($$$$) {
   my ($fank,$fil,$xml,$dbg) = @_;
   my ($hr);
   my ($ank,$len,$i,$ch,$pc,$hr2,$txt,$tail);
   # 2010/04/06 - add 'i' case insensitive; 's' to treat as single line;
   $ank = '';
   $tail = '';
   if ($fank =~ /^<table>(.*)$/is) {
       $tail = $1;
       $hr = get_attribute_hash_ref("",$fil,$xml,$dbg);
   } elsif ($fank =~ /^<table\s+>(.*)$/is) {
       $tail = $1;
       $hr = get_attribute_hash_ref("",$fil,$xml,$dbg);
   } elsif ($fank =~ /^<table\s+(.+)>{1}(.*)$/is) {
      #$ank = trim_all($1);
      #$tail = $2;
      get_table_element(substr($fank,6),\$tail);
      $ank =~ s/\/$//;  # 2010/04/06 - remove any trailing '/'
      $ank = trim_all($ank);    # and trim again
      $hr = get_attribute_hash_ref($ank,$fil,$xml,$dbg);
   } else {
       prtw("PROBLEM: [$fank] FAILED initial image test regex! [$fil]\n");
       pgm_exit(1,"get_table_hash_ref: FAILED! [4]") if ($dbg);
   }
   ${$hr}{'_TRAILING_'} = $tail if (length($tail));
   return $hr;
}


sub get_ufile_text($$) {
    my ($inf,$dbg) = @_;
    if (!open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]!\n");
    }
    my @lines = <INF>;
    close INF;
    my ($filtitle,$fildir) = fileparse($inf);
    $fildir = $cwd.'/' if ($fildir =~ /^\.(\\|\/)$/);
    my $lncnt = scalar @lines;
    prt("Got $lncnt lines, from $inf...\n") if ($dbg & 1);

    my ($i,$line,$ch,$tag,$len,$intag,$txt,$j,$pc,$ppc,$incdata,$hadsp,$attrs);
    my ($lnn,$last,$lln,$bgnlnn,$endlnn,$clnn,$elecnt);
    my ($bgnp,$endp,$bgna,$enda);
    my ($val,$msg,$inpara,$isclose,$ignore,$ind,$ff,$hrcnt);
    my ($missed,$bgndiv,$enddiv,$divfnt);
    my @msg_stack = ();
    my @missed_files = ();
    $tag = '';
    $attrs = '';
    $intag = 0;
    $incdata = 0;
    $hadsp = 0;
    $txt = '';
    $ch = '';
    $pc = '';
    my @elements = ();
    $lnn = 0;
    $inpara = 0;
    $isclose = 0;
    $ignore = 0;
    $hrcnt = 0;
    $missed = 0;
    $divfnt = 0;
    for ($i = 0; $i < $lncnt; $i++) {
        $line = $lines[$i];
        chomp $line;
        $len = length($line);
        $lnn++;
        $clnn = sprintf("%3d",$lnn);
        for ($j = 0; $j < $len; $j++) {
            $ppc = $pc;
            $pc = $ch;
            $ch = substr($line,$j,1);
            if ($incdata) {
                $tag .= $ch;
                if (($ch eq '>')&&($pc eq ']')&&($ppc eq ']')) {
                    $incdata = 0;
                    prt("$clnn: End CDATA\n");
                }
            } elsif ($intag) {
                if ($hadsp) {
                    $attrs .= $ch if !($ch eq '>');
                } elsif ($ch =~ /\s/) {
                    $hadsp = 1;
                } else {
                    $tag .= $ch if !($ch eq '>');
                }

                if ($ch eq '>') {
                    $intag = 0;
                    $endlnn = $lnn;
                } elsif (($ch eq '[')&&($pc eq 'A')&&($tag =~ /^<\!\[CDATA\[/)) {
                    $incdata = 1;
                    prt("\n$clnn: Begin CDATA\n");
                }
                # on EXIT a tag
                if (!$intag) {
                    $tag = trim_all($tag);
                    $msg = '';
                    if ($dbg & 1) {
                        $clnn = sprintf("%4d",$lnn);
                        $msg = "$clnn: ";
                    }
                    #$msg .= "Text [".trim_all($txt)."]\n    : " if (length($txt) && !($txt =~ /^\s+$/));
                    #$msg .= "End tag [$tag] ";
                    #$msg .= "Attrs [".trim_all($attrs)."] " if (length($attrs));
                    if ( length($txt) && !($txt =~ /^\s+$/) ) {
                        if ($dbg & 2) {
                            $msg .= "Text [$txt]\n$clnn: " if length($txt);
                        } else {
                            $msg .= "$txt\n";
                        }
                    }
                    if ($dbg & 2) {
                        $msg .= "Tag  [<$tag";
                    } else {
                        $msg .= "<$tag";
                    }
                    if (length($attrs)) {
                        $msg .= " $attrs>";
                    } else {
                        $msg .= ">";
                    }
                    if ($dbg & 2) {
                        $msg .= "]";
                    }
                    my $hr = get_attribute_hash_ref("",$inf,1,1);
                    if ($tag =~ /^(\!|\?)/) {
                        $msg .= " Special" if ($dbg & 4);
                    } else {
                        if (($attrs =~ /\/$/) || is_closed_tag($tag)) {
                            $msg .= " self-closed" if ($dbg & 4);
                        } elsif ($tag =~ /^\//) {
                            $tag = substr($tag,1);
                            $msg .= " Close" if ($dbg & 4);
                            $isclose = 1;
                            if ($tag =~ /^p$/i) {
                                $endp = $i;
                                $msg .= "($tag)$i:$bgnp:$endp" if ($dbg & 4);
                            } elsif ($tag =~ /^a$/i) {
                                $enda = $i;
                                $msg .= "($tag)$i:$bgna:$enda" if ($dbg & 4);
                            } elsif ($tag =~ /^div$/i) {
                                $enddiv = $i;
                            }
                            if (@elements) {
                                $last = $elements[-1][0]; 
                                $lln  = $elements[-1][1]; 
                                if ($last eq $tag) {
                                    pop @elements;
                                } else {
                                    #if (($last =~ /^p$/i)&&($elecnt > 1)&&($elements[-2][0] eq $tag)) {
                                    # Like a <p> does NOT need a close, so accept this
                                    my $drop = can_find_this_tag($tag,\@elements);
                                    if ($drop) {
                                        while($drop--) {
                                            pop @elements;
                                        }
                                    } else {
                                        prt("\nWARNING:Last [$last]$lln NE [$tag]$lnn line=[".trim_all($line)."]\n");
                                        show_stack_elements($tag,\@elements,\@lines);
                                        pgm_exit(1,"ERROR:[1]: It is useless to continue when the element stack is out of order!\n");
                                    }
                                }
                            } else {
                                prt("The stack has NO elements... The current closing element is [$tag]\n");
                                pgm_exit(1,"ERROR:[2]: It is useless to continue when the element stack is out of order! [2]\n");
                            }
                        } else {
                            $msg .= " Open" if ($dbg & 4);
                            push(@elements,[$tag,$bgnlnn,$endlnn]);
                            $elecnt = scalar @elements;
                            $hr = get_attribute_hash_ref($attrs,$inf,1,1);
                            if ($tag =~ /^p$/i) {
                                $bgnp = $i;
                                $msg .= " ($tag)$i" if ($dbg & 4);
                                $inpara++;
                                #prt("Bump para $inpara $lnn:[$msg] ln=[$line]\n");
                            } elsif ($tag =~ /^a$/i) {
                                $bgna = $i;
                                $msg .= " ($tag)$i" if ($dbg & 4);
                                if ($inpara) {
                                    if (defined ${$hr}{'href'}) {
                                        $val = ${$hr}{'href'};
                                        $msg .= " href=\"$val\"" if ($dbg & 4);
                                        if ($val =~ /^#/) {
                                            $ignore = 1;    # just an infile link
                                        } elsif ($val =~ /^http/i) {
                                            $ignore = 1;    # just a remote like
                                        } else {
                                            $ind = index($val,'#');
                                            if ($ind > 0) {
                                                $val = substr($val,0,$ind);
                                            }
                                            $msg .= " ($val)" if ($dbg & 4);
                                            $ff = $fildir.$val;
                                            if (-f $ff) {
                                                $ignore = 1;    # found this file
                                                $msg .= "ok"  if ($dbg & 4);
                                            } elsif (-d $ff) {
                                                $ignore = 1;    # found this directory
                                                $msg .= "okd" if ($dbg & 4);
                                            } else {
                                                $missed++;
                                                push(@missed_files, [$val,$i,0,0]);
                                            }
                                        }
                                        $hrcnt++;
                                    }
                                }
                            } elsif ($tag =~ /^div$/i) {
                                $bgndiv = $i;
                                if (defined ${$hr}{'class'}) {
                                    $val = ${$hr}{'class'};
                                    if ($val eq 'footnote-text') {
                                        $divfnt = 1;
                                        prt("$clnn: Entered footnote text div\n") if ($dbg & 8);
                                    }
                                }
                            }
                        }
                    }
                    if ($divfnt) {
                        push(@msg_stack,$msg);
                    }
                    # prt("$msg\n") 
                    if ($isclose && ($tag =~ /^div$/i)) {
                        if ($divfnt) {
                            $val = scalar @msg_stack;
                            if ($val && ($dbg & 1)) {
                                prt("\nCount $val, from msg_stack...hrcnt = $hrcnt\n");
                                foreach $msg (@msg_stack) {
                                    prt("$msg\n");
                                }
                            }
                            shift @msg_stack;   # remove first element - <div...> open
                            pop @msg_stack;     # and last element  </div> close
                            return \@msg_stack;
                        }
                    }

                    # reset
                    $txt = '';
                    $tag = '';
                    $attrs = '';
                    $hadsp = 0;
                    $isclose = 0;
                }
            } else {
                if ($ch eq '<') {
                    $tag = '';
                    $intag = 1;
                    $hadsp = 0;
                    $bgnlnn = $lnn;
                } else {
                    $txt .= $ch;
                }
            }
        } # reached end of line - get next
        $ch = ' ';
        $txt .= $ch if (length($txt) && !($txt =~ /\s$/));
        if ($hadsp) {
            $attrs .= $ch if (length($attrs) && !($attrs =~ /\s$/));
        } else {
            $tag .= $ch if (length($tag) && !($tag =~ /\s$/));
        }
        $ppc = $pc;
        $pc = $ch;
    }
    if (@elements && !is_all_optional(\@elements)) {
        show_stack_elements("At-end-of-file",\@elements,\@lines);
        pgm_exit(1,"ERROR:[3] It is useless to continue when the element stack is out of order!\n");
    }
    @msg_stack = ();
    return \@msg_stack;
}


sub process_file($) {
    my ($inf) = shift;
    if (!open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]!\n");
    }
    my @lines = <INF>;
    close INF;
    my ($filtitle,$fildir) = fileparse($inf);
    $fildir = $cwd.'/' if ($fildir =~ /^\.(\\|\/)$/);
    my $lncnt = scalar @lines;
    prt("Got $lncnt lines, from $inf...\n");
    my ($i,$line,$ch,$tag,$len,$intag,$txt,$j,$pc,$ppc,$incdata,$hadsp,$attrs);
    my ($lnn,$last,$lln,$bgnlnn,$endlnn,$clnn);
    my ($bgnp,$endp,$bgna,$enda);
    my ($val,$msg,$inpara,$isclose,$ignore,$ind,$ff,$hrcnt);
    my ($missed,$bgndiv,$enddiv,$divfnt);
    my ($bgndft,$enddft);
    my @msg_stack = ();
    my @missed_files = ();
    $tag = '';
    $attrs = '';
    $intag = 0;
    $incdata = 0;
    $hadsp = 0;
    $txt = '';
    $ch = '';
    $pc = '';
    my @elements = ();
    $lnn = 0;
    $inpara = 0;
    $isclose = 0;
    $ignore = 0;
    $hrcnt = 0;
    $missed = 0;
    $divfnt = 0;
    $bgndft = 0;
    $enddft = 0;
    $bgnp = -1;
    $endp = -1;
    for ($i = 0; $i < $lncnt; $i++) {
        $line = $lines[$i];
        chomp $line;
        $len = length($line);
        $lnn++;
        $clnn = sprintf("%3d",$lnn);
        for ($j = 0; $j < $len; $j++) {
            $ppc = $pc;
            $pc = $ch;
            $ch = substr($line,$j,1);
            if ($incdata) {
                $tag .= $ch;
                if (($ch eq '>')&&($pc eq ']')&&($ppc eq ']')) {
                    $incdata = 0;
                    prt("$clnn: End CDATA\n");
                }
            } elsif ($intag) {
                if ($hadsp) {
                    $attrs .= $ch if !($ch eq '>');
                } elsif ($ch =~ /\s/) {
                    $hadsp = 1;
                } else {
                    $tag .= $ch if !($ch eq '>');
                }

                if ($ch eq '>') {
                    $intag = 0;
                    $endlnn = $lnn;
                } elsif (($ch eq '[')&&($pc eq 'A')&&($tag =~ /^<\!\[CDATA\[/)) {
                    $incdata = 1;
                    prt("\n$clnn: Begin CDATA\n");
                }
                # on EXIT a tag
                if (!$intag) {
                    $tag = trim_all($tag);
                    $clnn = sprintf("%4d",$lnn);
                    $msg = "$clnn: ";
                    $msg .= "Text [".trim_all($txt)."]\n    : " if (length($txt) && !($txt =~ /^\s+$/));
                    $msg .= "End tag [$tag] ";
                    $msg .= "Attrs [".trim_all($attrs)."] " if (length($attrs));
                    my $hr = get_attribute_hash_ref("",$inf,1,1);
                    if ($tag =~ /^(\!|\?)/) {
                        $msg .= "Special";
                    } else {
                        if ($attrs =~ /\/$/) {
                            $msg .= "self-closed";
                        } elsif ($tag =~ /^\//) {
                            $tag = substr($tag,1);
                            $msg .= "Close";
                            $isclose = 1;
                            if ($tag =~ /^p$/i) {
                                $endp = $i;
                                $msg .= "($tag)$i:$bgnp:$endp";
                            } elsif ($tag =~ /^a$/i) {
                                $enda = $i;
                                $msg .= "($tag)$i:$bgna:$enda";
                            } elsif ($tag =~ /^div$/i) {
                                $enddiv = $i;
                                $msg .= "($tag)$i Div:$bgndiv:$enddiv Para:$bgnp:$endp";
                            }
                            if (@elements) {
                                $last = $elements[-1][0]; 
                                $lln  = $elements[-1][1]; 
                                if ($last eq $tag) {
                                    pop @elements;
                                } else {
                                    prt("\nWARNING:Last [$last]$lln NE [$tag]$lnn line=[".trim_all($line)."]\n");
                                    show_stack_elements($tag,\@elements,\@lines);
                                    pgm_exit(1,"ERROR:[1]: It is useless to continue when the element stack is out of order!\n");
                                }
                            } else {
                                prt("The stack has NO elements... The current closing element is [$tag]\n");
                                pgm_exit(1,"ERROR:[2]: It is useless to continue when the element stack is out of order! [2]\n");
                            }
                        } else {
                            $msg .= "Open";
                            push(@elements,[$tag,$bgnlnn,$endlnn]);
                            $hr = get_attribute_hash_ref($attrs,$inf,1,1);
                            if ($tag =~ /^p$/i) {
                                $bgnp = $i;
                                $msg .= "($tag)$i";
                                $inpara++;
                                #prt("Bump para $inpara $lnn:[$msg] ln=[$line]\n");
                            } elsif ($tag =~ /^a$/i) {
                                $bgna = $i;
                                $msg .= "($tag)$i";
                                if ($inpara) {
                                    if (defined ${$hr}{'href'}) {
                                        $val = ${$hr}{'href'};
                                        $msg .= " href=\"$val\"";
                                        if ($val =~ /^#/) {
                                            $ignore = 1;    # just an infile link
                                        } elsif ($val =~ /^http/i) {
                                            $ignore = 1;    # just a remote like
                                        } else {
                                            $ind = index($val,'#');
                                            if ($ind > 0) {
                                                $val = substr($val,0,$ind);
                                            }
                                            $msg .= " ($val)";
                                            $ff = $fildir.$val;
                                            if (-f $ff) {
                                                $ignore = 1;    # found this file
                                                $msg .= "ok";
                                            } elsif (-d $ff) {
                                                $ignore = 1;    # found this directory
                                                $msg .= "okd";
                                            } else {
                                                $missed++;
                                                #                    0    1  2 3 4  5 6 7
                                                push(@missed_files, [$val,$i,0,0,[],0,0,$hr]);
                                            }
                                        }
                                        $hrcnt++;
                                    }
                                }
                            } elsif ($tag =~ /^div$/i) {
                                $bgndiv = $i;
                                if (defined ${$hr}{'class'}) {
                                    $val = ${$hr}{'class'};
                                    if ($val eq 'footnote-text') {
                                        $divfnt = 1;
                                        $bgndft = $i;
                                        prt("\n$clnn: Entered footnote text div\n");
                                    }
                                }
                            }
                        }
                    }
                    if ($inpara || $divfnt) {
                        push(@msg_stack,$msg);
                    }
                    # prt("$msg\n") 
                    if ($isclose && ($tag =~ /^p$/i)) {
                        if (!$divfnt) {
                            if ($inpara) {
                                $inpara--;
                                $val = scalar @msg_stack;
                                if ((!$ignore || $missed) && $hrcnt && $val && $show_para_stack) {
                                    prt("\nCount $val, from msg_stack...hrcnt = $hrcnt\n");
                                    foreach $msg (@msg_stack) {
                                        prt("$msg\n");
                                    }
                                }
                                if ($missed) {
                                    #  0    1  2 3
                                    # [$val,$i,0,0,...]);
                                    $val = -1;
                                    while($missed) {
                                        $missed_files[$val][2] = $bgnp;
                                        $missed_files[$val][3] = $endp;
                                        $missed--;
                                        $val--;
                                    }
                                }
                                $ignore = 0;
                                $missed = 0;
                            }
                            @msg_stack = ();
                            $hrcnt = 0;
                        }
                    } elsif ($isclose && ($tag =~ /^div$/i)) {
                        if ($divfnt) {
                            $val = scalar @msg_stack;
                            if ($val && $show_div_stack) {
                                prt("\nCount $val, from msg_stack...hrcnt = $hrcnt\n");
                                foreach $msg (@msg_stack) {
                                    prt("$msg\n");
                                }
                            }
                            $enddft = $i;
                            $ignore = 0;
                            $missed = 0;
                            @msg_stack = ();
                            $hrcnt = 0;
                        }
                        $divfnt = 0;
                        $ignore = 0;
                        $missed = 0;
                        @msg_stack = ();
                        $hrcnt = 0;
                    }

                    push(@g_all_elements,[$tag,$bgnlnn,$endlnn,$txt,$attrs,$hr]);
                    # reset
                    $txt = '';
                    $tag = '';
                    $attrs = '';
                    $hadsp = 0;
                    $isclose = 0;
                }
            } else {
                if ($ch eq '<') {
                    $tag = '';
                    $intag = 1;
                    $hadsp = 0;
                    $bgnlnn = $lnn;
                } else {
                    $txt .= $ch;
                }
            }
        } # reached end of line - get next
        $ch = ' ';
        $txt .= $ch if (length($txt) && !($txt =~ /\s$/));
        if ($hadsp) {
            $attrs .= $ch if (length($attrs) && !($attrs =~ /\s$/));
        } else {
            $tag .= $ch if (length($tag) && !($tag =~ /\s$/));
        }
        $ppc = $pc;
        $pc = $ch;
    }
    if (@elements) {
        show_stack_elements("At-end-of-file",\@elements,\@lines);
        pgm_exit(1,"ERROR:[3] It is useless to continue when the element stack is out of order!\n");
    }
    my $minm = $lncnt;
    my $maxm = 0;
    if (@missed_files) {
        $val = scalar @missed_files;
        #                      0    1  2 3 4  5 6 7
        # push(@missed_files, [$val,$i,0,0,[],0,0,$hr]);
        prt("\nFound $val missed files...\n");
        $bgna = 0;
        $enda = 0;
        for ($j = 0; $j < $val; $j++) {
            $tag = $missed_files[$j][0];
            $ind = $missed_files[$j][1];
            $bgnp = $missed_files[$j][2];
            $endp = $missed_files[$j][3];
            $ff = $us_dir.$tag;
            $msg = '';
            $minm = $bgnp if ($bgnp < $minm);
            $maxm = $endp if ($endp > $maxm);
            if (-f $ff) {
                my $ha = get_ufile_text($ff,0);
                $msg .= "ok ".scalar @{$ha};
                $missed_files[$j][4] = $ha;
                my @arr = ();
                for ($i = $bgnp; $i < $endp; $i++) {
                    $line = $lines[$i];
                    chomp $line;
                    push(@arr,$line);
                }
                $missed_files[$j][5] = [@arr];
                # $msg .= "ok";
            } else {
                $msg .= "NF";
            }
            if (($bgnp == $bgna)&&($endp == $enda)) {
                $msg .= " Repeated BLOCK";
            }
            prt("Line $ind:$bgnp:$endp: $tag $msg\n");
            $bgna = $bgnp;
            $enda = $endp;
        }
    }
    my %hash = ();
    $hash{'file_name'} = $inf;
    $hash{'file_lines'} = [@lines];
    $hash{'file_missed'} = [@missed_files];
    $hash{'file_dfnt'} = [$bgndft,$enddft,$minm,$maxm];
    return \%hash;
}

sub fix_these_lines($$$$) {
    my ($off,$rl,$hr,$dbg) = @_# = \@arr, ${$rma}[$first][7]);
    my $cnt = scalar @{$rl};
    my $txt = '';
    my @nlines = ();
    my ($i,$line,$len,$ch,$intg,$hsp,$tag,$atts,$href,$chr);
    my ($j,$lnn,$ind,$tatts,$k,$v);
    my @fixes = ();
    my @divfnt_fix1 = ();
    my @divfnt_fix2 = ();
    $intg = 0;
    $hsp = 0;
    $tag = '';
    $atts = '';
    if (!defined ${$hr}{'href'}) {
        pgm_exit(1,"ERROR: Passed ref does NOT have a 'href' attribute!\n");
        return ($rl,undef,undef);
    }
    $href = ${$hr}{'href'};
    for ($i = 0; $i < $cnt; $i++) {
        $line = ${$rl}[$i];     # already chomped line
        $len = length($line);
        $lnn = $off + $i;
        for ($j = 0; $j < $len; $j++) {
            $ch = substr($line,$j,1);
            if ($intg) {
                if ($hsp) {
                    $atts .= $ch if ($ch ne '>');
                } elsif ($ch =~ /\s/) {
                    $hsp = 1;
                } else {
                    $tag .= $ch if ($ch ne '>');
                }
                if ($ch eq '>') {
                    prt("End TAG:$lnn:$j: [$tag] [$atts]\n") if ($dbg & 1);
                    $intg = 0;
                    if ((substr($tag,1,1) eq '!')||($tag =~ /^<(\!|\?)/)) {
                        # specials <!DOC, <!--, <?php
                        $txt .= $tag;
                        if (length($atts)) {
                            $txt .= ' '.$atts;
                        }
                        $txt .= '>';
                    } elsif (length($atts)) {
                        $tatts = trim_all($atts);
                        $tatts =~ s/\/$//;
                        $tatts = trim_all($tatts);
                        $tatts =~ s/>$//;
                        $tatts = trim_all($tatts);
                        my $rat = get_attribute_hash_ref($tatts,"<fix_these_lines>",1,1);
                        if (defined ${$rat}{'href'}) {
                            $chr = ${$rat}{'href'};
                            if ($chr eq $href) {
                                $ind = index($chr,'.');
                                if ($ind > 0) {
                                    $chr = substr($chr,0,$ind);
                                    prt("Found HREF to fix [$href] [$chr] ($i:$j)\n");
                                    ${$rat}{'href'} = '#'.$chr;
                                    push(@fixes,"     <a name=\"ret_".$chr."\"></a>");
                                    push(@divfnt_fix1,"     <a name=\"".$chr."\"></a>");
                                    push(@divfnt_fix2,"     <a href=\"#ret_".$chr."\">back</a>");
                                } else {
                                    prtw("WARNING: Found HREF to fix [$href] [$chr] ($i:$j) BUT COULD NOT FIX IT!\n");
                                }
                            }
                        }
                        $txt .= $tag;
                        $k = 0;
                        foreach $k (keys %{$rat}) {
                            $v = ${$rat}{$k};
                            $txt .= ' ' if ($k);
                            $txt .= "$k=\"$v\"";
                            $k++;
                        }
                        $txt .= '>';
                    } else {
                        $txt .= $tag.'>';
                    }
                    $atts = '';
                    $tag = '';
                }
            } else {
                if ($ch eq '<') {
                    $tag = $ch;
                    $intg = 1;
                    $hsp = 0;
                } else {
                    $txt .= $ch;
                }
            }
        }
        # done this line
        if (length($txt)) {
            push(@nlines,$txt);
            prt("New line [$txt]\n") if ($dbg & 2);
        }
        $txt = '';
    }
    if (length($txt)) {
        push(@nlines,$txt);
        prt("New line [$txt]\n") if ($dbg & 2);
    }
    if (@fixes) {
        foreach $txt (@fixes) {
            unshift(@nlines,$txt);  # add in NEW line(s) at top of para
        }
    }
    return \@nlines,\@divfnt_fix1,\@divfnt_fix2;
    # return $rl;
}

sub show_ref_hash($) {
    my ($rh) = @_;
    if (!defined ${$rh}{'file_name'} || !defined ${$rh}{'file_lines'} || !defined ${$rh}{'file_missed'}) {
        pgm_exit(1,"Ref HASH paseed is NOT compatible...");
    }
    my $fil = ${$rh}{'file_name'};
    my $rla = ${$rh}{'file_lines'};
    my $rma = ${$rh}{'file_missed'};
    my $rdft = ${$rh}{'file_dfnt'};
    my $lncnt = scalar @{$rla};
    my $mcnt = scalar @{$rma};
    my $bdft = ${$rdft}[0];
    my $edft = ${$rdft}[1];
    my $minm = ${$rdft}[2];
    my $maxm = ${$rdft}[3];
    prt("File [$fil], $lncnt lines, missed $mcnt, min $minm, max $maxm, dfn $bdft - $edft ");
    # some sanity checks
    if ($mcnt && ($minm < $maxm) && ($maxm < $lncnt) && ($maxm < $bdft) && ($edft < $lncnt)) {
        prt("ok\n");
        my @nlines = ();
        my ($i,$line,$first,$last,$j,$cnt);
        my ($nrlns,$fx1,$fx2,$k,$ha);
        my ($k2,$max,$max1,$max2,$min);
        #                    0    1  2 = minp
        #                               3 4  5 
        #                                     6=done flag
        #                                       7=missed hash ref of anchor to be changes
        $first = -1;
        for ($i = 0; $i < $mcnt; $i++) {
            ${$rma}[$i][6] = 0;
            if (${$rma}[$i][2] == $minm) {
                $first = $i;
                $last = ${$rma}[$i][3];
            }
        }
        pgm_exit(1,"ERROR: Did NOT find first!\n") if ($first == -1);
        # 1: Get to FIRST need for change
        for ($i = 0; $i < $minm; $i++) {
            $line = ${$rla}[$i];
            chomp $line;
            push(@nlines,$line);
        }
        # now fix the changes
        my @fixes = ();
        while ($first != -1) {
            ${$rma}[$first][6] = 1; # set as DONE
            $ha = ${$rma}[$first][4];   # get NEW lines for div fnt section
            my @arr = ();
            prt("Getting lines to change...\n");
            for (; $i <= $last; $i++) {
                $line = ${$rla}[$i];
                chomp $line;
                push(@arr,$line);
                prt("$i: [$line]\n");
            }
            ($nrlns,$fx1,$fx2) = fix_these_lines($minm,\@arr, ${$rma}[$first][7],0);
            push(@fixes,[$first,$fx1,$fx2]);
            $cnt = scalar @{$nrlns};
            for ($j = 0; $j < $cnt; $j++) {
                $line = ${$nrlns}[$j];
                push(@nlines,$line);
            }
            $first = -1;
            $min = $maxm;
            for ($j = 0; $j < $mcnt; $j++) {
                if (${$rma}[$j][6] == 0) {
                    $k = ${$rma}[$i][2];
                    if ($k < $min) {
                        $min = $k;
                        $first = $j;
                        $last = ${$rma}[$j][3];
                    }
                }
            }
        }

        for (; $i < $lncnt; $i++) {
            $line = ${$rla}[$i];
            chomp $line;
            last if ($i == $edft);
            push(@nlines,$line);
        }
        my $fcnt = scalar @fixes;
        for ($j = 0; $j < $fcnt; $j++) {
            $first = $fixes[$j][0];
            $fx1   = $fixes[$j][1];
            $fx2   = $fixes[$j][2];
            $ha = ${$rma}[$first][4];   # get NEW lines for div fnt section
            $max  = scalar(@{$ha});
            $max1 = scalar(@{$fx1});
            for ($k = 0; $k < $max1; $k++) {
                $line = ${$fx1}[$k];
                push(@nlines,$line);
            }
            $max2 = $max;
            $max2-- if ($max2);
            for ($k2 = 0; $k2 < $max2; $k2++) {
                $line = ${$ha}[$k2];
                push(@nlines,$line);
            }
            $max2 = scalar(@{$fx2});
            for ($k = 0; $k < $max2; $k++) {
                $line = ${$fx2}[$k];
                push(@nlines,$line);
            }
            # finish balance
            for ( ; $k2 < $max; $k2++) {
                $line = ${$ha}[$k2];
                push(@nlines,$line);
            }
        }

        for (; $i < $lncnt; $i++) {
            $line = ${$rla}[$i];
            chomp $line;
            push(@nlines,$line);
        }
        write2file(join("\n",@nlines)."\n",'tempnew.html');
        prt("Written to tempnew.html...\n");

    } else {
        prt("\nERROR: Somehting does not add up!\n");
        prt("NOT if ($mcnt && ($minm < $maxm) && ($maxm < $lncnt) && ($maxm < $bdft) && ($edft < $lncnt))! UGH!\n");
    }
}

#########################################
### MAIN ###
parse_args(@ARGV);
prt( "$pgmname: in [$cwd]: Process $in_file...\n" );
my $ref_hash = process_file($in_file);
show_ref_hash($ref_hash);
pgm_exit(0,"Normal exit(0)");
########################################

sub parse_args {
    my (@av) = @_;
    my ($arg);
    while (@av) {
        $arg = $av[0];
        $in_file = $arg;
        shift @av;
    }
    if (!length($in_file)) {
        $in_file = $def_infile;
    }
}

# eof - template.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional