Generated: Mon Aug 16 14:14:15 2010 from fgfixfn.pl 2010/04/12 43.4 KB.
#!/perl -w # NAME: fgfixfn.pl # AIM: VERY SPECIFIC - read file, and fix footnotes - did a REASONABLE job, but needed some manual touchups # 2010/04/06 - geoff mclane - http://geoffair.net/mperl/ use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use Cwd; unshift(@INC, 'C:\GTools\perl'); require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $perl_dir = 'C:\GTools\perl'; my $outfile = $perl_dir."\\temp.$pgmname.txt"; open_log($outfile); # user variables my $load_log = 1; my $def_infile = 'C:\HOMEPAGE\FG\Docs\getstart\getstartch9.html'; my $in_file = ''; my $show_div_stack = 0; my $show_para_stack = 0; ### program variables my @warnings = (); my $cwd = cwd(); my @g_all_elements = (); my @tag_list = ("a","abbr","acronym","address","applet","area","b","base","basefont","bdo","big","blockquote","body", "br","button","caption","center","cite","code","col","colgroup","dd","del","dfn","dir","div","dl","dt","em","fieldset", "font","form","frame","frameset","h1","h2","h3","h4","h5","h6","head","hr","html","i","iframe","img","input","ins", "isindex","kbd","label","legend","li","link","listing","map","menu","meta","noframes","noscript","object","ol", "optgroup","option","p","param","plaintext","pre","q","rb","rbc","rp","rt","rtc","ruby","s","samp","script","select", "small","span","strike","strong","style","sub","sup","table","tbody","td","textarea","tfoot","th","thead","title", "tr","tt","u","ul","var","xmp","nextid","align","bgsound","blink","comment","embed","ilayer","keygen","layer", "marquee","multicol","nobr","noembed","nolayer","nosave","server","servlet","spacer","wbr"); my @closed_tags = qw( meta link area base basefont br frame hr isindex param bgsound embed keygen img ); # tags which do NOT need a closing, like </p>, tag my @opt_tags = ( "body", "colgroup", "dd", "dt", "head", "html", "li", "optgroup", "option", "p", "tbody", "td", "tfoot", "th", "thead", "tr", "marquee" ); my $us_dir = "u:\\var\\www\\fg\\www\\Docs\\getstart\\"; # debug my $dbg38 = 0; # prt( "[dbg38] Got [$hr2] = [$txt] [$fil]\n" ) if ($dbg38); my $dbg39 = 0; # prt( "[dbg39] Got [$hr2] = [$txt] - no inverted commas! [$fil]\n" ) if ($dbg39); sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg) } close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub show_warnings() { if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ); } } sub is_closed_tag($) { my ($tt) = shift; my $lctt = lc($tt); foreach my $tag (@closed_tags) { return 1 if ($tag eq $lctt); } return 0; } sub is_opt_tag($) { my ($tt) = shift; my $lctt = lc($tt); foreach my $tag (@opt_tags) { return 1 if ($tag eq $lctt); } return 0; } # $drop = can_find_this_tag($tag,\@elements); sub can_find_this_tag($$) { my ($tag,$re) = @_; my $len = scalar @{$re}; my $drop = 0; my $bu = -1; my $last = ''; while ($len) { $drop++; # can pop this one $last = ${$re}[$bu][0]; # get tag if ($last eq $tag) { # if the desired tag return $drop; # return drop value } elsif ( ! is_opt_tag($last) ) { return 0; # oop, have a non-optional tag } $bu--; # back up one more $len--; # and reduce available to check } return 0; } sub is_all_optional($) { my ($re) = @_; my $len = scalar @{$re}; my $bu = -1; my ($last); while ($len) { $last = ${$re}[$bu][0]; # get tag if ( ! is_opt_tag($last) ) { return 0; # oop, have a non-optional tag } $bu--; # back up one more $len--; # and reduce available to check } return 1; # ALL were optiona } sub show_stack_elements($$$) { my ($tag,$rele,$rlns) = @_; my $cnt = scalar @{$rele}; my $lcnt = scalar @{$rlns}; if ($cnt) { prt("The stack has $cnt elements... The current closing element is [$tag]\n"); for (my $i = 0; $i < $cnt; $i++) { my $e = ${$rele}[$i][0]; my $n = ${$rele}[$i][1]; prt("$n: elelement [$e]"); prt(" SAME as tag [$tag]!") if ($e eq $tag); if ($n <= $lcnt) { my $ln = trim_all(${$rlns}[$n-1]); prt(" line=[$ln]"); } prt("\n"); } } else { prt("The stack has NO elements... The current closing element is [$tag]\n"); } } sub get_attribute_hash_ref($$$$) { my ($fank,$fil,$xml,$dbg) = @_; my %hash = (); my ($ank,$len,$i,$ch,$pc,$hr2,$txt); $ank = trim_all($fank); $len = length($ank); $ch = ''; $hr2 = ''; for ($i = 0; $i < $len; $i++) { $pc = $ch; $ch = substr($ank,$i,1); # if ($ch =~ /\w/) - this missed xml:link="abc" # and 'http-equiv="..." if ($ch =~ /(\w|:|-)/) { $hr2 .= $ch; # accumulate \w chars - alphanumeric, including _ } elsif (length($hr2)) { if (($ch ne '=') && ($ch =~ /\s/)) { $i++; for (; $i < $len; $i++) { $ch = substr($ank,$i,1); last if ($ch eq '='); last if !($ch =~ /\s/); } } if ($ch eq '=') { # found our equal sign $i++; # move on... for (; $i < $len; $i++) { $ch = substr($ank,$i,1); last if ($ch =~ /('|")/); last if !($ch =~ /\s/); } if (($ch eq '"')||($ch eq "'")) { $pc = $ch; $i++; # move on... $txt = ''; for (; $i < $len; $i++) { $ch = substr($ank,$i,1); last if ($ch eq $pc); $txt .= $ch; } if ($ch eq $pc) { $hr2 = lc($hr2) if ($xml); $hash{$hr2} = $txt; prt( "[dbg38] Got [$hr2] = [$txt] [$fil]\n" ) if ($dbg38); } else { prtw("PROBLEM: got [$hr2]. At pos $i in [$ank], from [$fank], and NO END INVERTED COMMA! ($pc) [$fil]\n"); pgm_exit(1,"NEED CODE FIX!") if ($dbg); } } else { if (($ch =~ /\w/) && (($hr2 =~ /name/i)||($hr2 =~ /id/i))) { # accept these WITHOUT inverted comma $txt = $ch; $i++; # MOVING ON for (; $i < $len; $i++) { $ch = substr($ank,$i,1); last if !($ch =~ /\w/); $txt .= $ch; } $hr2 = lc($hr2) if ($xml); $hash{$hr2} = $txt; prt( "[dbg39] Got [$hr2] = [$txt] - no inverted commas! [$fil]\n" ) if ($dbg39); } else { prtw("PROBLEM: got [$hr2]. At pos $i in [$ank], from [$fank], and NO START INVERTED COMMA! [$fil]\n"); pgm_exit(1,"NEED CODE FIX!") if ($dbg); } } } else { prtw("PROBLEM: got [$hr2]. At pos $i in [$ank], from [$fank], and NO EQUAL SIGN! [$fil]\n"); pgm_exit(1,"NEED CODE FIX!") if ($dbg); } $hr2 = ''; } } return \%hash; } sub get_anchor_hash_ref($$$) { my ($fank,$fil,$xml,$dbg) = @_; my ($hr); my ($ank,$len,$i,$ch,$pc,$hr2,$txt); if ($fank =~ /<a\s+(.+)>$/) { $ank = trim_all($1); $hr = get_attribute_hash_ref($ank,$fil,$xml,$dbg); } return $hr; } sub get_img_hash_ref($$$$) { my ($fank,$fil,$xml,$dbg) = @_; my $hr = (); my ($ank,$len,$i,$ch,$pc,$hr2,$txt,$tail); # 2010/04/06 - add 'i' case insensitive; 's' to treat as single line; if ($fank =~ /^<img\s+(.+)>{1}(.*)$/is) { $ank = trim_all($1); $tail = $2; $ank =~ s/\/$//; # 2010/04/06 - remove any trailing '/' $ank = trim_all($ank); # and trim again $hr = get_attribute_hash_ref($ank,$fil,$xml,$dbg); } else { prtw("PROBLEM: [$fank] FAILED initial image test regex! [$fil]\n"); pgm_exit(1,"get_img_hash_ref: FAILED!") if ($dbg); } ${$hr}{'_TRAILING_'} = $tail if (length($tail)); return $hr; } sub get_table_element($$) { my ($txt,$rt) = @_; my $len = length($txt); my $ttx = ''; my ($ch,$t); for ($t = 0; $t < $len; $t++) { $ch = substr($txt,$t,1); if ($ch eq '>') { $t++; last; } $ttx .= $ch; } ${$rt} = substr($txt,$t); return $ttx; } sub get_table_hash_ref($$$$) { my ($fank,$fil,$xml,$dbg) = @_; my ($hr); my ($ank,$len,$i,$ch,$pc,$hr2,$txt,$tail); # 2010/04/06 - add 'i' case insensitive; 's' to treat as single line; $ank = ''; $tail = ''; if ($fank =~ /^<table>(.*)$/is) { $tail = $1; $hr = get_attribute_hash_ref("",$fil,$xml,$dbg); } elsif ($fank =~ /^<table\s+>(.*)$/is) { $tail = $1; $hr = get_attribute_hash_ref("",$fil,$xml,$dbg); } elsif ($fank =~ /^<table\s+(.+)>{1}(.*)$/is) { #$ank = trim_all($1); #$tail = $2; get_table_element(substr($fank,6),\$tail); $ank =~ s/\/$//; # 2010/04/06 - remove any trailing '/' $ank = trim_all($ank); # and trim again $hr = get_attribute_hash_ref($ank,$fil,$xml,$dbg); } else { prtw("PROBLEM: [$fank] FAILED initial image test regex! [$fil]\n"); pgm_exit(1,"get_table_hash_ref: FAILED! [4]") if ($dbg); } ${$hr}{'_TRAILING_'} = $tail if (length($tail)); return $hr; } sub get_ufile_text($$) { my ($inf,$dbg) = @_; if (!open INF, "<$inf") { pgm_exit(1,"ERROR: Unable to open file [$inf]!\n"); } my @lines = <INF>; close INF; my ($filtitle,$fildir) = fileparse($inf); $fildir = $cwd.'/' if ($fildir =~ /^\.(\\|\/)$/); my $lncnt = scalar @lines; prt("Got $lncnt lines, from $inf...\n") if ($dbg & 1); my ($i,$line,$ch,$tag,$len,$intag,$txt,$j,$pc,$ppc,$incdata,$hadsp,$attrs); my ($lnn,$last,$lln,$bgnlnn,$endlnn,$clnn,$elecnt); my ($bgnp,$endp,$bgna,$enda); my ($val,$msg,$inpara,$isclose,$ignore,$ind,$ff,$hrcnt); my ($missed,$bgndiv,$enddiv,$divfnt); my @msg_stack = (); my @missed_files = (); $tag = ''; $attrs = ''; $intag = 0; $incdata = 0; $hadsp = 0; $txt = ''; $ch = ''; $pc = ''; my @elements = (); $lnn = 0; $inpara = 0; $isclose = 0; $ignore = 0; $hrcnt = 0; $missed = 0; $divfnt = 0; for ($i = 0; $i < $lncnt; $i++) { $line = $lines[$i]; chomp $line; $len = length($line); $lnn++; $clnn = sprintf("%3d",$lnn); for ($j = 0; $j < $len; $j++) { $ppc = $pc; $pc = $ch; $ch = substr($line,$j,1); if ($incdata) { $tag .= $ch; if (($ch eq '>')&&($pc eq ']')&&($ppc eq ']')) { $incdata = 0; prt("$clnn: End CDATA\n"); } } elsif ($intag) { if ($hadsp) { $attrs .= $ch if !($ch eq '>'); } elsif ($ch =~ /\s/) { $hadsp = 1; } else { $tag .= $ch if !($ch eq '>'); } if ($ch eq '>') { $intag = 0; $endlnn = $lnn; } elsif (($ch eq '[')&&($pc eq 'A')&&($tag =~ /^<\!\[CDATA\[/)) { $incdata = 1; prt("\n$clnn: Begin CDATA\n"); } # on EXIT a tag if (!$intag) { $tag = trim_all($tag); $msg = ''; if ($dbg & 1) { $clnn = sprintf("%4d",$lnn); $msg = "$clnn: "; } #$msg .= "Text [".trim_all($txt)."]\n : " if (length($txt) && !($txt =~ /^\s+$/)); #$msg .= "End tag [$tag] "; #$msg .= "Attrs [".trim_all($attrs)."] " if (length($attrs)); if ( length($txt) && !($txt =~ /^\s+$/) ) { if ($dbg & 2) { $msg .= "Text [$txt]\n$clnn: " if length($txt); } else { $msg .= "$txt\n"; } } if ($dbg & 2) { $msg .= "Tag [<$tag"; } else { $msg .= "<$tag"; } if (length($attrs)) { $msg .= " $attrs>"; } else { $msg .= ">"; } if ($dbg & 2) { $msg .= "]"; } my $hr = get_attribute_hash_ref("",$inf,1,1); if ($tag =~ /^(\!|\?)/) { $msg .= " Special" if ($dbg & 4); } else { if (($attrs =~ /\/$/) || is_closed_tag($tag)) { $msg .= " self-closed" if ($dbg & 4); } elsif ($tag =~ /^\//) { $tag = substr($tag,1); $msg .= " Close" if ($dbg & 4); $isclose = 1; if ($tag =~ /^p$/i) { $endp = $i; $msg .= "($tag)$i:$bgnp:$endp" if ($dbg & 4); } elsif ($tag =~ /^a$/i) { $enda = $i; $msg .= "($tag)$i:$bgna:$enda" if ($dbg & 4); } elsif ($tag =~ /^div$/i) { $enddiv = $i; } if (@elements) { $last = $elements[-1][0]; $lln = $elements[-1][1]; if ($last eq $tag) { pop @elements; } else { #if (($last =~ /^p$/i)&&($elecnt > 1)&&($elements[-2][0] eq $tag)) { # Like a <p> does NOT need a close, so accept this my $drop = can_find_this_tag($tag,\@elements); if ($drop) { while($drop--) { pop @elements; } } else { prt("\nWARNING:Last [$last]$lln NE [$tag]$lnn line=[".trim_all($line)."]\n"); show_stack_elements($tag,\@elements,\@lines); pgm_exit(1,"ERROR:[1]: It is useless to continue when the element stack is out of order!\n"); } } } else { prt("The stack has NO elements... The current closing element is [$tag]\n"); pgm_exit(1,"ERROR:[2]: It is useless to continue when the element stack is out of order! [2]\n"); } } else { $msg .= " Open" if ($dbg & 4); push(@elements,[$tag,$bgnlnn,$endlnn]); $elecnt = scalar @elements; $hr = get_attribute_hash_ref($attrs,$inf,1,1); if ($tag =~ /^p$/i) { $bgnp = $i; $msg .= " ($tag)$i" if ($dbg & 4); $inpara++; #prt("Bump para $inpara $lnn:[$msg] ln=[$line]\n"); } elsif ($tag =~ /^a$/i) { $bgna = $i; $msg .= " ($tag)$i" if ($dbg & 4); if ($inpara) { if (defined ${$hr}{'href'}) { $val = ${$hr}{'href'}; $msg .= " href=\"$val\"" if ($dbg & 4); if ($val =~ /^#/) { $ignore = 1; # just an infile link } elsif ($val =~ /^http/i) { $ignore = 1; # just a remote like } else { $ind = index($val,'#'); if ($ind > 0) { $val = substr($val,0,$ind); } $msg .= " ($val)" if ($dbg & 4); $ff = $fildir.$val; if (-f $ff) { $ignore = 1; # found this file $msg .= "ok" if ($dbg & 4); } elsif (-d $ff) { $ignore = 1; # found this directory $msg .= "okd" if ($dbg & 4); } else { $missed++; push(@missed_files, [$val,$i,0,0]); } } $hrcnt++; } } } elsif ($tag =~ /^div$/i) { $bgndiv = $i; if (defined ${$hr}{'class'}) { $val = ${$hr}{'class'}; if ($val eq 'footnote-text') { $divfnt = 1; prt("$clnn: Entered footnote text div\n") if ($dbg & 8); } } } } } if ($divfnt) { push(@msg_stack,$msg); } # prt("$msg\n") if ($isclose && ($tag =~ /^div$/i)) { if ($divfnt) { $val = scalar @msg_stack; if ($val && ($dbg & 1)) { prt("\nCount $val, from msg_stack...hrcnt = $hrcnt\n"); foreach $msg (@msg_stack) { prt("$msg\n"); } } shift @msg_stack; # remove first element - <div...> open pop @msg_stack; # and last element </div> close return \@msg_stack; } } # reset $txt = ''; $tag = ''; $attrs = ''; $hadsp = 0; $isclose = 0; } } else { if ($ch eq '<') { $tag = ''; $intag = 1; $hadsp = 0; $bgnlnn = $lnn; } else { $txt .= $ch; } } } # reached end of line - get next $ch = ' '; $txt .= $ch if (length($txt) && !($txt =~ /\s$/)); if ($hadsp) { $attrs .= $ch if (length($attrs) && !($attrs =~ /\s$/)); } else { $tag .= $ch if (length($tag) && !($tag =~ /\s$/)); } $ppc = $pc; $pc = $ch; } if (@elements && !is_all_optional(\@elements)) { show_stack_elements("At-end-of-file",\@elements,\@lines); pgm_exit(1,"ERROR:[3] It is useless to continue when the element stack is out of order!\n"); } @msg_stack = (); return \@msg_stack; } sub process_file($) { my ($inf) = shift; if (!open INF, "<$inf") { pgm_exit(1,"ERROR: Unable to open file [$inf]!\n"); } my @lines = <INF>; close INF; my ($filtitle,$fildir) = fileparse($inf); $fildir = $cwd.'/' if ($fildir =~ /^\.(\\|\/)$/); my $lncnt = scalar @lines; prt("Got $lncnt lines, from $inf...\n"); my ($i,$line,$ch,$tag,$len,$intag,$txt,$j,$pc,$ppc,$incdata,$hadsp,$attrs); my ($lnn,$last,$lln,$bgnlnn,$endlnn,$clnn); my ($bgnp,$endp,$bgna,$enda); my ($val,$msg,$inpara,$isclose,$ignore,$ind,$ff,$hrcnt); my ($missed,$bgndiv,$enddiv,$divfnt); my ($bgndft,$enddft); my @msg_stack = (); my @missed_files = (); $tag = ''; $attrs = ''; $intag = 0; $incdata = 0; $hadsp = 0; $txt = ''; $ch = ''; $pc = ''; my @elements = (); $lnn = 0; $inpara = 0; $isclose = 0; $ignore = 0; $hrcnt = 0; $missed = 0; $divfnt = 0; $bgndft = 0; $enddft = 0; $bgnp = -1; $endp = -1; for ($i = 0; $i < $lncnt; $i++) { $line = $lines[$i]; chomp $line; $len = length($line); $lnn++; $clnn = sprintf("%3d",$lnn); for ($j = 0; $j < $len; $j++) { $ppc = $pc; $pc = $ch; $ch = substr($line,$j,1); if ($incdata) { $tag .= $ch; if (($ch eq '>')&&($pc eq ']')&&($ppc eq ']')) { $incdata = 0; prt("$clnn: End CDATA\n"); } } elsif ($intag) { if ($hadsp) { $attrs .= $ch if !($ch eq '>'); } elsif ($ch =~ /\s/) { $hadsp = 1; } else { $tag .= $ch if !($ch eq '>'); } if ($ch eq '>') { $intag = 0; $endlnn = $lnn; } elsif (($ch eq '[')&&($pc eq 'A')&&($tag =~ /^<\!\[CDATA\[/)) { $incdata = 1; prt("\n$clnn: Begin CDATA\n"); } # on EXIT a tag if (!$intag) { $tag = trim_all($tag); $clnn = sprintf("%4d",$lnn); $msg = "$clnn: "; $msg .= "Text [".trim_all($txt)."]\n : " if (length($txt) && !($txt =~ /^\s+$/)); $msg .= "End tag [$tag] "; $msg .= "Attrs [".trim_all($attrs)."] " if (length($attrs)); my $hr = get_attribute_hash_ref("",$inf,1,1); if ($tag =~ /^(\!|\?)/) { $msg .= "Special"; } else { if ($attrs =~ /\/$/) { $msg .= "self-closed"; } elsif ($tag =~ /^\//) { $tag = substr($tag,1); $msg .= "Close"; $isclose = 1; if ($tag =~ /^p$/i) { $endp = $i; $msg .= "($tag)$i:$bgnp:$endp"; } elsif ($tag =~ /^a$/i) { $enda = $i; $msg .= "($tag)$i:$bgna:$enda"; } elsif ($tag =~ /^div$/i) { $enddiv = $i; $msg .= "($tag)$i Div:$bgndiv:$enddiv Para:$bgnp:$endp"; } if (@elements) { $last = $elements[-1][0]; $lln = $elements[-1][1]; if ($last eq $tag) { pop @elements; } else { prt("\nWARNING:Last [$last]$lln NE [$tag]$lnn line=[".trim_all($line)."]\n"); show_stack_elements($tag,\@elements,\@lines); pgm_exit(1,"ERROR:[1]: It is useless to continue when the element stack is out of order!\n"); } } else { prt("The stack has NO elements... The current closing element is [$tag]\n"); pgm_exit(1,"ERROR:[2]: It is useless to continue when the element stack is out of order! [2]\n"); } } else { $msg .= "Open"; push(@elements,[$tag,$bgnlnn,$endlnn]); $hr = get_attribute_hash_ref($attrs,$inf,1,1); if ($tag =~ /^p$/i) { $bgnp = $i; $msg .= "($tag)$i"; $inpara++; #prt("Bump para $inpara $lnn:[$msg] ln=[$line]\n"); } elsif ($tag =~ /^a$/i) { $bgna = $i; $msg .= "($tag)$i"; if ($inpara) { if (defined ${$hr}{'href'}) { $val = ${$hr}{'href'}; $msg .= " href=\"$val\""; if ($val =~ /^#/) { $ignore = 1; # just an infile link } elsif ($val =~ /^http/i) { $ignore = 1; # just a remote like } else { $ind = index($val,'#'); if ($ind > 0) { $val = substr($val,0,$ind); } $msg .= " ($val)"; $ff = $fildir.$val; if (-f $ff) { $ignore = 1; # found this file $msg .= "ok"; } elsif (-d $ff) { $ignore = 1; # found this directory $msg .= "okd"; } else { $missed++; # 0 1 2 3 4 5 6 7 push(@missed_files, [$val,$i,0,0,[],0,0,$hr]); } } $hrcnt++; } } } elsif ($tag =~ /^div$/i) { $bgndiv = $i; if (defined ${$hr}{'class'}) { $val = ${$hr}{'class'}; if ($val eq 'footnote-text') { $divfnt = 1; $bgndft = $i; prt("\n$clnn: Entered footnote text div\n"); } } } } } if ($inpara || $divfnt) { push(@msg_stack,$msg); } # prt("$msg\n") if ($isclose && ($tag =~ /^p$/i)) { if (!$divfnt) { if ($inpara) { $inpara--; $val = scalar @msg_stack; if ((!$ignore || $missed) && $hrcnt && $val && $show_para_stack) { prt("\nCount $val, from msg_stack...hrcnt = $hrcnt\n"); foreach $msg (@msg_stack) { prt("$msg\n"); } } if ($missed) { # 0 1 2 3 # [$val,$i,0,0,...]); $val = -1; while($missed) { $missed_files[$val][2] = $bgnp; $missed_files[$val][3] = $endp; $missed--; $val--; } } $ignore = 0; $missed = 0; } @msg_stack = (); $hrcnt = 0; } } elsif ($isclose && ($tag =~ /^div$/i)) { if ($divfnt) { $val = scalar @msg_stack; if ($val && $show_div_stack) { prt("\nCount $val, from msg_stack...hrcnt = $hrcnt\n"); foreach $msg (@msg_stack) { prt("$msg\n"); } } $enddft = $i; $ignore = 0; $missed = 0; @msg_stack = (); $hrcnt = 0; } $divfnt = 0; $ignore = 0; $missed = 0; @msg_stack = (); $hrcnt = 0; } push(@g_all_elements,[$tag,$bgnlnn,$endlnn,$txt,$attrs,$hr]); # reset $txt = ''; $tag = ''; $attrs = ''; $hadsp = 0; $isclose = 0; } } else { if ($ch eq '<') { $tag = ''; $intag = 1; $hadsp = 0; $bgnlnn = $lnn; } else { $txt .= $ch; } } } # reached end of line - get next $ch = ' '; $txt .= $ch if (length($txt) && !($txt =~ /\s$/)); if ($hadsp) { $attrs .= $ch if (length($attrs) && !($attrs =~ /\s$/)); } else { $tag .= $ch if (length($tag) && !($tag =~ /\s$/)); } $ppc = $pc; $pc = $ch; } if (@elements) { show_stack_elements("At-end-of-file",\@elements,\@lines); pgm_exit(1,"ERROR:[3] It is useless to continue when the element stack is out of order!\n"); } my $minm = $lncnt; my $maxm = 0; if (@missed_files) { $val = scalar @missed_files; # 0 1 2 3 4 5 6 7 # push(@missed_files, [$val,$i,0,0,[],0,0,$hr]); prt("\nFound $val missed files...\n"); $bgna = 0; $enda = 0; for ($j = 0; $j < $val; $j++) { $tag = $missed_files[$j][0]; $ind = $missed_files[$j][1]; $bgnp = $missed_files[$j][2]; $endp = $missed_files[$j][3]; $ff = $us_dir.$tag; $msg = ''; $minm = $bgnp if ($bgnp < $minm); $maxm = $endp if ($endp > $maxm); if (-f $ff) { my $ha = get_ufile_text($ff,0); $msg .= "ok ".scalar @{$ha}; $missed_files[$j][4] = $ha; my @arr = (); for ($i = $bgnp; $i < $endp; $i++) { $line = $lines[$i]; chomp $line; push(@arr,$line); } $missed_files[$j][5] = [@arr]; # $msg .= "ok"; } else { $msg .= "NF"; } if (($bgnp == $bgna)&&($endp == $enda)) { $msg .= " Repeated BLOCK"; } prt("Line $ind:$bgnp:$endp: $tag $msg\n"); $bgna = $bgnp; $enda = $endp; } } my %hash = (); $hash{'file_name'} = $inf; $hash{'file_lines'} = [@lines]; $hash{'file_missed'} = [@missed_files]; $hash{'file_dfnt'} = [$bgndft,$enddft,$minm,$maxm]; return \%hash; } sub fix_these_lines($$$$) { my ($off,$rl,$hr,$dbg) = @_; # = \@arr, ${$rma}[$first][7]); my $cnt = scalar @{$rl}; my $txt = ''; my @nlines = (); my ($i,$line,$len,$ch,$intg,$hsp,$tag,$atts,$href,$chr); my ($j,$lnn,$ind,$tatts,$k,$v); my @fixes = (); my @divfnt_fix1 = (); my @divfnt_fix2 = (); $intg = 0; $hsp = 0; $tag = ''; $atts = ''; if (!defined ${$hr}{'href'}) { pgm_exit(1,"ERROR: Passed ref does NOT have a 'href' attribute!\n"); return ($rl,undef,undef); } $href = ${$hr}{'href'}; for ($i = 0; $i < $cnt; $i++) { $line = ${$rl}[$i]; # already chomped line $len = length($line); $lnn = $off + $i; for ($j = 0; $j < $len; $j++) { $ch = substr($line,$j,1); if ($intg) { if ($hsp) { $atts .= $ch if ($ch ne '>'); } elsif ($ch =~ /\s/) { $hsp = 1; } else { $tag .= $ch if ($ch ne '>'); } if ($ch eq '>') { prt("End TAG:$lnn:$j: [$tag] [$atts]\n") if ($dbg & 1); $intg = 0; if ((substr($tag,1,1) eq '!')||($tag =~ /^<(\!|\?)/)) { # specials <!DOC, <!--, <?php $txt .= $tag; if (length($atts)) { $txt .= ' '.$atts; } $txt .= '>'; } elsif (length($atts)) { $tatts = trim_all($atts); $tatts =~ s/\/$//; $tatts = trim_all($tatts); $tatts =~ s/>$//; $tatts = trim_all($tatts); my $rat = get_attribute_hash_ref($tatts,"<fix_these_lines>",1,1); if (defined ${$rat}{'href'}) { $chr = ${$rat}{'href'}; if ($chr eq $href) { $ind = index($chr,'.'); if ($ind > 0) { $chr = substr($chr,0,$ind); prt("Found HREF to fix [$href] [$chr] ($i:$j)\n"); ${$rat}{'href'} = '#'.$chr; push(@fixes," <a name=\"ret_".$chr."\"></a>"); push(@divfnt_fix1," <a name=\"".$chr."\"></a>"); push(@divfnt_fix2," <a href=\"#ret_".$chr."\">back</a>"); } else { prtw("WARNING: Found HREF to fix [$href] [$chr] ($i:$j) BUT COULD NOT FIX IT!\n"); } } } $txt .= $tag; $k = 0; foreach $k (keys %{$rat}) { $v = ${$rat}{$k}; $txt .= ' ' if ($k); $txt .= "$k=\"$v\""; $k++; } $txt .= '>'; } else { $txt .= $tag.'>'; } $atts = ''; $tag = ''; } } else { if ($ch eq '<') { $tag = $ch; $intg = 1; $hsp = 0; } else { $txt .= $ch; } } } # done this line if (length($txt)) { push(@nlines,$txt); prt("New line [$txt]\n") if ($dbg & 2); } $txt = ''; } if (length($txt)) { push(@nlines,$txt); prt("New line [$txt]\n") if ($dbg & 2); } if (@fixes) { foreach $txt (@fixes) { unshift(@nlines,$txt); # add in NEW line(s) at top of para } } return \@nlines,\@divfnt_fix1,\@divfnt_fix2; # return $rl; } sub show_ref_hash($) { my ($rh) = @_; if (!defined ${$rh}{'file_name'} || !defined ${$rh}{'file_lines'} || !defined ${$rh}{'file_missed'}) { pgm_exit(1,"Ref HASH paseed is NOT compatible..."); } my $fil = ${$rh}{'file_name'}; my $rla = ${$rh}{'file_lines'}; my $rma = ${$rh}{'file_missed'}; my $rdft = ${$rh}{'file_dfnt'}; my $lncnt = scalar @{$rla}; my $mcnt = scalar @{$rma}; my $bdft = ${$rdft}[0]; my $edft = ${$rdft}[1]; my $minm = ${$rdft}[2]; my $maxm = ${$rdft}[3]; prt("File [$fil], $lncnt lines, missed $mcnt, min $minm, max $maxm, dfn $bdft - $edft "); # some sanity checks if ($mcnt && ($minm < $maxm) && ($maxm < $lncnt) && ($maxm < $bdft) && ($edft < $lncnt)) { prt("ok\n"); my @nlines = (); my ($i,$line,$first,$last,$j,$cnt); my ($nrlns,$fx1,$fx2,$k,$ha); my ($k2,$max,$max1,$max2,$min); # 0 1 2 = minp # 3 4 5 # 6=done flag # 7=missed hash ref of anchor to be changes $first = -1; for ($i = 0; $i < $mcnt; $i++) { ${$rma}[$i][6] = 0; if (${$rma}[$i][2] == $minm) { $first = $i; $last = ${$rma}[$i][3]; } } pgm_exit(1,"ERROR: Did NOT find first!\n") if ($first == -1); # 1: Get to FIRST need for change for ($i = 0; $i < $minm; $i++) { $line = ${$rla}[$i]; chomp $line; push(@nlines,$line); } # now fix the changes my @fixes = (); while ($first != -1) { ${$rma}[$first][6] = 1; # set as DONE $ha = ${$rma}[$first][4]; # get NEW lines for div fnt section my @arr = (); prt("Getting lines to change...\n"); for (; $i <= $last; $i++) { $line = ${$rla}[$i]; chomp $line; push(@arr,$line); prt("$i: [$line]\n"); } ($nrlns,$fx1,$fx2) = fix_these_lines($minm,\@arr, ${$rma}[$first][7],0); push(@fixes,[$first,$fx1,$fx2]); $cnt = scalar @{$nrlns}; for ($j = 0; $j < $cnt; $j++) { $line = ${$nrlns}[$j]; push(@nlines,$line); } $first = -1; $min = $maxm; for ($j = 0; $j < $mcnt; $j++) { if (${$rma}[$j][6] == 0) { $k = ${$rma}[$i][2]; if ($k < $min) { $min = $k; $first = $j; $last = ${$rma}[$j][3]; } } } } for (; $i < $lncnt; $i++) { $line = ${$rla}[$i]; chomp $line; last if ($i == $edft); push(@nlines,$line); } my $fcnt = scalar @fixes; for ($j = 0; $j < $fcnt; $j++) { $first = $fixes[$j][0]; $fx1 = $fixes[$j][1]; $fx2 = $fixes[$j][2]; $ha = ${$rma}[$first][4]; # get NEW lines for div fnt section $max = scalar(@{$ha}); $max1 = scalar(@{$fx1}); for ($k = 0; $k < $max1; $k++) { $line = ${$fx1}[$k]; push(@nlines,$line); } $max2 = $max; $max2-- if ($max2); for ($k2 = 0; $k2 < $max2; $k2++) { $line = ${$ha}[$k2]; push(@nlines,$line); } $max2 = scalar(@{$fx2}); for ($k = 0; $k < $max2; $k++) { $line = ${$fx2}[$k]; push(@nlines,$line); } # finish balance for ( ; $k2 < $max; $k2++) { $line = ${$ha}[$k2]; push(@nlines,$line); } } for (; $i < $lncnt; $i++) { $line = ${$rla}[$i]; chomp $line; push(@nlines,$line); } write2file(join("\n",@nlines)."\n",'tempnew.html'); prt("Written to tempnew.html...\n"); } else { prt("\nERROR: Somehting does not add up!\n"); prt("NOT if ($mcnt && ($minm < $maxm) && ($maxm < $lncnt) && ($maxm < $bdft) && ($edft < $lncnt))! UGH!\n"); } } ######################################### ### MAIN ### parse_args(@ARGV); prt( "$pgmname: in [$cwd]: Process $in_file...\n" ); my $ref_hash = process_file($in_file); show_ref_hash($ref_hash); pgm_exit(0,"Normal exit(0)"); ######################################## sub parse_args { my (@av) = @_; my ($arg); while (@av) { $arg = $av[0]; $in_file = $arg; shift @av; } if (!length($in_file)) { $in_file = $def_infile; } } # eof - template.pl