Generated: Tue Feb 2 17:54:44 2010 from loadtable.pl 2006/09/03 3.7 KB.
#!/Perl # loadtable.pl # AIM: Load a HTML table into a multidimensional array use strict; require 'logfile.pl' or die "ERROR: Unable to load logfile.pl ...\n"; require 'htmltools.pl' or die "ERROR: Unable to load htmltools.pl ...\n"; # log file stuff my ($LF); my $outfile = 'temp'.$0.'.txt'; open_log($outfile); prt( "$0 ... Hello, World ...\n" ); my $dbg21 = 0; my $dbg22 = 0; my $tbl_num = 1; # want the first table my $in_index = 'P26\index.htm'; ###my $in_index = 'tempind.htm'; my @tbl_arr = (); my $tacnt = 0; my $lncnt = 0; my $tblcnt = 0; my $indcnt = 0; my @larr = (); my @larr2 = (); my $ln = ''; my @tbl_set = (); ## push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]); get_old_index( $in_index ); $indcnt = scalar @tbl_set; prt( "Got $indcnt from [$in_index] ...\n" ); close_log($outfile,1); exit(0); sub get_old_index($) { my ($ind) = shift; if (open IF, "<$ind") { @larr = <IF>; # slurp it all in ... close(IF); $lncnt = scalar @larr; prt( "Got $lncnt lines to process ...\n" ); ###write2file( join('',@larr), 'tempout.txt'); $ln = tag2newline( join('',@larr), 'td' ); ###$ln = tag2newline( $ln, 'br' ); @larr2 = split(/\n/, $ln); write2file( join("\n",@larr2), 'tempout3.txt'); if (get_table_array()) { $tacnt = scalar @tbl_arr; prt( "Got $tacnt lines to process ...\n" ); } else { prt( "Failed to find table $tbl_num ...\n" ); } } else { prt( "Warning: Failed to open $ind ...\n" ); } if ($tacnt > 0) { my $cc = 0; for (my $i = 0; $i < $tacnt ; $i++) { $ln = $tbl_arr[$i]; # extract a line if ($ln =~ /<td.*>/i) { while ( !($ln =~ /<\/td>/i) ) { $i++; if ($i < $tacnt) { $ln .= ' '.$tbl_arr[$i]; # extract a line } else { last; } } # got begin and end of <td>...</td> block if ($ln =~ /(<td*.>)(.*)(<\/td>)/i) { my $tds = $1; my $inb = $2; my $tde = $3; # like Line [<td><a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213</td>] = # [<td>][<a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213][</td>] ... prt( "Line [$ln] = \n[$tds][$inb][$tde] ...\n" ) if ($dbg21); ###if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>/) { ##if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>/i) { #if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)/i) { if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)\s*/i) { my $hrf = $1; my $fil = $2; my $dt = $3; my $sz = $4; my ($yr, $mt, $dy) = split(/\//,$dt); $sz =~ s/,//g; push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]); prt("href=[$hrf], file=[$fil], date=[$dt][$yr][$mt][$dy], size=[$sz]...\n") if ($dbg22); } } } } } } sub get_table_array { my $fnd = 0; $lncnt = scalar @larr2; for (my $i = 0; $i < $lncnt ; $i++) { $ln = $larr2[$i]; # extract a line chomp $ln; # remove LF (\n) $ln =~ s/\r$//; # and remove CR, if present if ($ln =~ /<table.*>/i) { prt( "FOUND TABLE: [$ln] ...\n" ); $tblcnt++; # bump table counter if ($tblcnt == $tbl_num) { prt( "Is my TABLE [$tbl_num] ...\n" ); push(@tbl_arr,$ln); if ( !($ln =~ /<\/table>/i) ) { $i++; # move to next line for ( ; $i < $lncnt; $i++) { $ln = $larr2[$i]; # extract a line chomp $ln; # remove LF (\n) $ln =~ s/\r$//; # and remove CR, if present if ( $ln =~ /<\/table>/i ) { prt( "END TABLE: [$ln] ...\n" ); push(@tbl_arr,$ln); $fnd = 1; last; } push(@tbl_arr,$ln); } } last; } } } return $fnd; } # eof - loadtable.pl