Generated: Tue Feb 2 17:54:39 2010 from genalt.pl 2006/01/10 5.3 KB.
#!/Perl print "$0: Hello, World...\n"; # AIM - scan HTML file, and ADD an alt='$fn image', if none found my $in_dir = shift || die "ERROR: Must give input folder, or file ...\n"; my $out_file = 'tempout2.txt'; my $OH; open $OH, ">$out_file" || die "ERROR: Can NOT create output file ... aborting ...\n"; ###my $out_dir = $in_dir; my $out_dir = 'temp2'; # use another OUT folder, until SURE no mess-up!!! my @files = (); my $file; my $line; my $filcnt = 0; my @myfiles = (); my @fnames = (); if ( -f $in_dir) { prt( "Processing file $in_dir ...\n" ); push(@myfiles,$in_dir); $file = get_file_name($in_dir); push(@fnames, $file); $in_dir = get_path_name($in_dir); # GET THE DIRECTORY PART prt( "Processing file $file ... dir=$in_dir\n" ); } else { prt( "Processing directory $in_dir ...\n" ); opendir( DIR, $in_dir) || die "ERROR: Can NOT open $in_dir ... aborting ...\n"; @files = readdir(DIR); closedir DIR; prt( "Found ".scalar @files." items in the directory ...\n" ); foreach $file (sort @files) { if (($file eq '.')||($file eq '..')) { next; } my $ff = $in_dir . '/' . $file; if ( -d $ff ) { #prt( "Ignore Directory $file ...\n"); } else { if (is_my_file($file)) { push(@myfiles,$ff); push(@fnames,$file); $filcnt++; } else { #prt( "IGNORE $file ...\n" ); } } } } prt( "Found ".scalar @myfiles." files to scan ...\n"); $filcnt = 0; my @diff_files = (); foreach $file (@myfiles) { my $on = $out_dir . '/' . $fnames[$filcnt]; prt( "IN=$file OUT=$on\n" ); $filcnt++; open $IF, "<$file" or die "Can not OPEN $file! ... aborting ...\n"; my @lines = <$IF>; # slurp whole file, to an array of lines close($IF); # process, line by line my $lc = 0; my $diff_cnt = 0; foreach $line (@lines) { chomp $line; while( substr($line,0,1) eq ' ' ) { $line = substr($line,1); } ##if ($line =~ /<img (.*)>/i) { ##if ($line =~ /<img (.*)(>|\/>)/io) { if ($line =~ /document\.write/io) { # ignore these lines ##} elsif ($line =~ /<img (.*)(>|\/>)?/io) { } elsif ($line =~ /<img (.*)(>|\/>){1,}/io) { ###prt( "Line $lc: with IMG [$line] ...\n" ); ###prt( "Line $lc: [$1][$2] ...\n" ); my $pl = $1; # get the interesting part if ($pl =~ /alt=/) { ### prt( "With ALT Line $lc: [$pl] ...\n" ); } else { #if ($pl =~ />/) { if ($pl =~ /(.+)>/) { # multiple ... $pl = $1; # at least get this portion ### prt( "Line $lc: MULTIPLE - NO ALT [$pl] ...\n" ); } if ($pl =~ /(.+)>/) { # multiple ... prt( "Line $lc: STILL MULTIPLE - NO ALT [$1][$pl] ...\n" ); } else { if ($pl =~ /src=(.+)/) { my $src = $1; if (substr($src,0,1) eq '"') { $src = substr($src,1); my $pos = index($src,'"'); if ($pos > 0) { $src = substr($src,0,$pos); $src = get_file_name($src); $src =~ s/\./ /; ###my $npl = $pl.' alt="'.$src.' image"'; ###my $mnpl = quotemeta($npl); ###my $mpl = quotemeta($pl); ###$line =~ s/$mpl/$mnpl/o; my $nline = add_alt($line,$src); if (length($nline) > length($line)) { $diff_cnt++; $lines[$lc] = $nline; } prt( "Line $lc: Added ALT [$pl][$src] ...[$line] \n" ); } else { prt( "Line $lc: src?? NO ALT [$pl][$src] ...\n" ); } } else { prt( "Line $lc: SRC NO ALT [$pl][$src] ...\n" ); } } else { prt( "Line $lc: NO ALT [$pl] ...\n" ); } } } } $lc++; } if ($diff_cnt) { # now write it OUT open $OF, ">$on" or die "ERROR: Unable to open OUT file ... aborting ...\n"; foreach $line (@lines) { chomp $line; print $OF "$line\n"; } close $OF; } } sub add_alt { my ($ln,$sc) = @_; # extract line, and source my $len = length($ln); my $i; my $nln = ''; my $done_it = 0; my $si = 0; for ($i = 0; $i < $len; $i++) { my $ch = substr($ln,$i,1); if (($si == 4) && ($done_it == 0)) { if ($ch eq '>') { $nln .= ' alt="image '.$sc.'"'; $done_it = 1; $si++; } } if ($si == 0) { if (uc($ch) eq 'I') { $si++; } } elsif ($si == 1) { if (uc($ch) eq 'M') { $si++; } else { $si = 0; } } elsif ($si == 2) { if (uc($ch) eq 'G') { $si++; } else { $si = 0; } } elsif ($si == 3) { if ($ch eq ' ') { $si++; # set SUCCESS } else { $si = 0; } } $nln .= $ch; } return $nln; } sub is_my_file { my ($f) = @_; my $ret = 0; if ($f =~ /(.*)\.htm$/i) { $ret = 1; } elsif ($f =~ /(.*)\.html$/i) { $ret = 1; # } elsif ($f =~ /(.*)\.shtml$/i) { # $ret = 1; # } elsif ($f =~ /(.*)\.php$/i) { # $ret = 1; } # if ($ret) { # foreach my $f2 (@excl) { # if (uc($f2) eq uc($f)) { # $ret = 0; # last; # } # } # } return $ret; } sub dos_to_unix { my ($d) = @_; $d =~ s/\\/\//g; return $d; } sub get_file_name { my ($pf) = @_; $pf = dos_to_unix($pf); @parts = split('/',$pf); return $parts[$#parts]; } sub get_path_name { my ($pf) = @_; $pf = dos_to_unix($pf); @parts = split('/',$pf); pop( @parts ); # Pops off the last value of the array - the file name $pf = join( '\\', @parts ); return $pf; } sub prt { my ($m) = @_; print $m; print $OH $m; } # eof - genalt.pl