loadtable.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:44 2010 from loadtable.pl 2006/09/03 3.7 KB.

#!/Perl
# loadtable.pl
# AIM: Load a HTML table into a multidimensional array
use strict;
require 'logfile.pl' or die "ERROR: Unable to load logfile.pl ...\n";
require 'htmltools.pl' or die "ERROR: Unable to load htmltools.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );
my $dbg21 = 0;
my $dbg22 = 0;
my $tbl_num = 1;   # want the first table
my $in_index = 'P26\index.htm';
###my $in_index = 'tempind.htm';
my @tbl_arr = ();
my $tacnt = 0;
my $lncnt = 0;
my $tblcnt = 0;
my $indcnt = 0;
my @larr = ();
my @larr2 = ();
my $ln = '';
my @tbl_set = ();
##   push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
get_old_index( $in_index );
$indcnt = scalar @tbl_set;
prt( "Got $indcnt from [$in_index] ...\n" );
close_log($outfile,1);
exit(0);
sub get_old_index($) {
   my ($ind) = shift;
   if (open IF, "<$ind") {
      @larr = <IF>; # slurp it all in ...
      close(IF);
      $lncnt = scalar @larr;
      prt( "Got $lncnt lines to process ...\n" );
      ###write2file( join('',@larr), 'tempout.txt');
      $ln = tag2newline( join('',@larr), 'td' );
      ###$ln = tag2newline( $ln, 'br' );
      @larr2 = split(/\n/, $ln);
      write2file( join("\n",@larr2), 'tempout3.txt');
      if (get_table_array()) {
         $tacnt = scalar @tbl_arr;
         prt( "Got $tacnt lines to process ...\n" );
      } else {
         prt( "Failed to find table $tbl_num ...\n" );
      }
   } else {
      prt( "Warning: Failed to open $ind ...\n" );
   }
   if ($tacnt > 0) {
      my $cc = 0;
      for (my $i = 0; $i < $tacnt ; $i++) {
         $ln = $tbl_arr[$i]; # extract a line
         if ($ln =~ /<td.*>/i) {
            while ( !($ln =~ /<\/td>/i) ) {
               $i++;
               if ($i < $tacnt) {
                  $ln .= ' '.$tbl_arr[$i]; # extract a line
               } else {
                  last;
               }
            }
            # got begin and end of <td>...</td> block
            if ($ln =~ /(<td*.>)(.*)(<\/td>)/i) {
               my $tds = $1;
               my $inb = $2;
               my $tde = $3;
               # like Line [<td><a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213</td>] = 
               # [<td>][<a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213][</td>] ...
               prt( "Line [$ln] = \n[$tds][$inb][$tde] ...\n" ) if ($dbg21);
               ###if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>/) {
               ##if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>/i) {
               #if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)/i) {
               if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)\s*/i) {
                  my $hrf = $1;
                  my $fil = $2;
                  my $dt = $3;
                  my $sz = $4;
                  my ($yr, $mt, $dy) = split(/\//,$dt);
                  $sz =~ s/,//g;
                  push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
                  prt("href=[$hrf], file=[$fil], date=[$dt][$yr][$mt][$dy], size=[$sz]...\n") if ($dbg22);
               }
            }
         }
      }
   }
}
sub get_table_array {
   my $fnd = 0;
   $lncnt = scalar @larr2;
   for (my $i = 0; $i < $lncnt ; $i++) {
      $ln = $larr2[$i]; # extract a line
      chomp $ln; # remove LF (\n)
      $ln =~ s/\r$//; # and remove CR, if present
      if ($ln =~ /<table.*>/i) {
         prt( "FOUND TABLE: [$ln] ...\n" );
         $tblcnt++; # bump table counter
         if ($tblcnt == $tbl_num) {
            prt( "Is my TABLE [$tbl_num] ...\n" );
            push(@tbl_arr,$ln);
            if ( !($ln =~ /<\/table>/i) ) {
               $i++; # move to next line
               for ( ; $i < $lncnt; $i++) {
                  $ln = $larr2[$i]; # extract a line
                  chomp $ln; # remove LF (\n)
                  $ln =~ s/\r$//; # and remove CR, if present
                  if ( $ln =~ /<\/table>/i ) {
                     prt( "END TABLE: [$ln] ...\n" );
                     push(@tbl_arr,$ln);
                     $fnd = 1;
                     last;
                  }
                  push(@tbl_arr,$ln);
               }
            }
            last;
         }
      }
   }
   return $fnd;
}
# eof - loadtable.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional