Generated: Tue Jun 8 17:26:59 2010 from htmimglist.pl 2010/03/19 2.2 KB.
#!/perl -w # NAME: htmimglist.pl # AIM: given a HTML file, list all the 'image' links in the file. # 19/03/2010 - also report existance of file... # 06/05/2009 geoff mclane http://geoffair.net/mperl use strict; use warnings; use File::Basename; # split path into ($nm,$dr)=fileparse($ff); or ($nm,$dir,$ext)=fileparse($fil,qr/\.[^.]*/); unshift(@INC, 'C:/GTools/perl'); require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; require 'htmltools.pl' or die "Unable to load htmltools.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $perl_base = 'C:/GTools/perl'; my $outfile = $perl_base."\\temp.$pgmname.txt"; open_log($outfile); my $in_file = 'C:/HOMEPAGE/GA/fg/gshhs-02.htm'; my $load_log = 0; # debug my $dbg3 = 1; # show processing... ############################# sub parse_file($) { my ($fil) = shift; my ($lncnt, $text, $hrcnt, $i, $hfcnt, $typ, $filcnt); my ($min,$len,$ff,$msg); my @hrf = (); my ($nm,$dr) = fileparse($fil); # get the directory if (open INF, "<$fil") { my @lines = <INF>; close INF; $lncnt = scalar @lines; prt( "Processing $lncnt lines from $fil ...\n" ) if ($dbg3); $text = join('',@lines); # sub write2file { my ($txt,$fil) = @_; #my $scrp = return_tag($full,'script'); my @imgs = ret_imgs_array($text); $min = 0; foreach my $img (@imgs) { $len = length($img); $min = $len if ($len > $min); } $dr .= '/' if !($dr =~ /(\\|\/)$/); foreach my $img (@imgs) { $ff = $dr.$img; $msg = (-f $ff) ? "ok" : "NOT FOUND"; $img .= ' ' while (length($img) < $min); prt( "$img $msg\n" ); } } else { prt( "ERROR: failed to open $fil...\n" ); } } parse_args(@ARGV); prt( "$pgmname: Getting images from [$in_file]...\n" ); parse_file($in_file); close_log($outfile,$load_log); exit(0); ################################### sub parse_args { my (@av) = @_; while (@av) { my $arg = $av[0]; $in_file = $arg; shift @av; last; } } # eof - htmimglist.pl