Generated: Tue Feb 2 17:54:57 2010 from table2hash.pl 2008/01/29 3.3 KB.
#!/perl -w # NAME: table2hash.pl # AIM: Read in a HTML table, and convert to a Perl HASH structure ... # 29/01/2008 - geoff mclane use strict; use warnings; require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); prt( "$0 ... Hello, World ...\n" ); my $in_file = '..\html\metar_abreviations.htm'; # debug my $dbg1 = 0; # show open/close of rows and columns my @tblitems = (); if (open IF, "<$in_file") { my @lines = <IF>; close IF; my $lcnt = scalar @lines; prt( "$lcnt lines to process ...\n" ); my $htxt = join(' ', @lines); $htxt =~ s/\n/ /gm; $htxt =~ s/\r/ /gm; $htxt =~ s/\t/ /gm; $htxt = trim_all($htxt); my $ccnt = length($htxt); prt( "Processing $ccnt characters ...\n" ); process_chars($htxt); } else { prt("ERROR: Can NOT open $in_file ...\n"); } if (@tblitems) { my $icnt = scalar @tblitems; prt( "Got $icnt table items ...\n" ); prt( "my \%acronyms = (\n" ); for (my $t = 0; $t < $icnt; $t++) { my $i1 = $tblitems[$t][0]; my $i2 = $tblitems[$t][1]; my $msg = "\t'$i1' => '$i2'"; $msg .= ',' if (($t + 1) < $icnt); prt( "$msg\n" ); } prt( ");\n" ); } close_log($outfile,1); exit(0); sub process_chars { my ($txt) = shift; my $tl = length($txt); my ($ch, $intbl, $tag, $row, $col, $attr, $item, $colcnt, $ftag); my ($itm1,$itm2,$itm3,$itm4); # we have FOUR columns $intbl = 0; $row = 0; $col = 0; $item = ''; $colcnt = 0; for (my $i = 0; $i < $tl; $i++) { $ch = substr($txt,$i,1); if ($ch eq '<') { if (length($item)) { if ($intbl && $col) { prt( "Table Item = [$item] ($colcnt)\n" ); if ($colcnt == 1) { $itm1 = $item; } elsif ($colcnt == 2) { $itm2 = $item; } elsif ($colcnt == 3) { $itm3 = $item; } elsif ($colcnt == 4) { $itm4 = $item; } } $item = ''; } $tag = ''; $attr = ''; $i++; for ( ; $i < $tl; $i++) { $ch = substr($txt,$i,1); if (($ch eq '>')||($ch =~ /\s/)) { if ($ch ne '>') { $i++; $attr = ''; for ( ; $i < $tl; $i++) { $ch = substr($txt,$i,1); if ($ch eq '>') { last; } $attr .= $ch; } } last; } $tag .= $ch; } $ftag = '<'.$tag; $ftag .= ' ' if (length($attr)); $ftag .= $attr.'>'; if ($tag =~ /^table$/i) { $intbl++; prt( "In table $ftag...($intbl)\n" ); } elsif ($tag =~ /^\/table$/) { $intbl-- if ($intbl); prt( "Out table $ftag ...($intbl)\n" ); } elsif ($tag =~ /^tr$/) { prt( "New row $ftag ... $row - clear column count ...\n" ) if ($dbg1); $colcnt = 0; } elsif ($tag =~ /^\/tr$/) { $row-- if ($row); prt( "End row $ftag ... $row \n" ) if ($dbg1); # end of row push(@tblitems, [$itm1,$itm2]); push(@tblitems, [$itm3,$itm4]); } elsif ($tag =~ /^td$/) { $col++; $colcnt++; prt( "New column $ftag ... $col \n" ) if ($dbg1); } elsif ($tag =~ /^\/td$/) { $col-- if ($col); prt( "End column $ftag ... $col \n" ) if ($dbg1); } } else { $item .= $ch; } } } # eof - table2hash.pl