table2hash.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:57 2010 from table2hash.pl 2008/01/29 3.3 KB.

#!/perl -w
# NAME: table2hash.pl
# AIM: Read in a HTML table, and convert to a Perl HASH structure ...
# 29/01/2008 - geoff mclane
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );
my $in_file = '..\html\metar_abreviations.htm';
# debug
my $dbg1 = 0;   # show open/close of rows and columns
my @tblitems = ();
if (open IF, "<$in_file") {
   my @lines = <IF>;
   close IF;
   my $lcnt = scalar @lines;
   prt( "$lcnt lines to process ...\n" );
   my $htxt = join(' ', @lines);
   $htxt =~ s/\n/ /gm;
   $htxt =~ s/\r/ /gm;
   $htxt =~ s/\t/ /gm;
   $htxt = trim_all($htxt);
   my $ccnt = length($htxt);
   prt( "Processing $ccnt characters ...\n" );
   process_chars($htxt);
} else {
   prt("ERROR: Can NOT open $in_file ...\n");
}
if (@tblitems) {
   my $icnt = scalar @tblitems;
   prt( "Got $icnt table items ...\n" );
   prt( "my \%acronyms = (\n" );
   for (my $t = 0; $t < $icnt; $t++) {
      my $i1 = $tblitems[$t][0];
      my $i2 = $tblitems[$t][1];
      my $msg = "\t'$i1' => '$i2'";
      $msg .= ',' if (($t + 1) < $icnt);
      prt( "$msg\n" );
   }
   prt( ");\n" );
}
close_log($outfile,1);
exit(0);
sub process_chars {
   my ($txt) = shift;
   my $tl = length($txt);
   my ($ch, $intbl, $tag, $row, $col, $attr, $item, $colcnt, $ftag);
   my ($itm1,$itm2,$itm3,$itm4);   # we have FOUR columns
   $intbl = 0;
   $row = 0;
   $col = 0;
   $item = '';
   $colcnt = 0;
   for (my $i = 0; $i < $tl; $i++) {
      $ch = substr($txt,$i,1);
      if ($ch eq '<') {
         if (length($item)) {
            if ($intbl && $col) {
               prt( "Table Item = [$item] ($colcnt)\n" );
               if ($colcnt == 1) {
                  $itm1 = $item;
               } elsif ($colcnt == 2) {
                  $itm2 = $item;
               } elsif ($colcnt == 3) {
                  $itm3 = $item;
               } elsif ($colcnt == 4) {
                  $itm4 = $item;
               }
            }
            $item = '';
         }
         $tag = '';
         $attr = '';
         $i++;
         for ( ; $i < $tl; $i++) {
            $ch = substr($txt,$i,1);
            if (($ch eq '>')||($ch =~ /\s/)) {
               if ($ch ne '>') {
                  $i++;
                  $attr = '';
                  for ( ; $i < $tl; $i++) {
                     $ch = substr($txt,$i,1);
                     if ($ch eq '>') {
                        last;
                     }
                     $attr .= $ch;
                  }
               }
               last;
            }
            $tag .= $ch;
         }
         $ftag = '<'.$tag;
         $ftag .= ' ' if (length($attr));
         $ftag .= $attr.'>';
         if ($tag =~ /^table$/i) {
            $intbl++;
            prt( "In table $ftag...($intbl)\n" );
         } elsif ($tag =~ /^\/table$/) {
            $intbl-- if ($intbl);
            prt( "Out table $ftag ...($intbl)\n" );
         } elsif ($tag =~ /^tr$/) {
            prt( "New row $ftag ... $row - clear column count ...\n" ) if ($dbg1);
            $colcnt = 0;
         } elsif ($tag =~ /^\/tr$/) {
            $row-- if ($row);
            prt( "End row $ftag ... $row \n" ) if ($dbg1);
            # end of row
            push(@tblitems, [$itm1,$itm2]);
            push(@tblitems, [$itm3,$itm4]);
         } elsif ($tag =~ /^td$/) {
            $col++;
            $colcnt++;
            prt( "New column $ftag ... $col \n" ) if ($dbg1);
         } elsif ($tag =~ /^\/td$/) {
            $col-- if ($col);
            prt( "End column $ftag ... $col \n" ) if ($dbg1);
         }
      } else {
         $item .= $ch;
      }
   }
}
# eof - table2hash.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional