genalt.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:39 2010 from genalt.pl 2006/01/10 5.3 KB.

#!/Perl
print "$0: Hello, World...\n";
# AIM - scan HTML file, and ADD an alt='$fn image', if none found
my $in_dir = shift || die "ERROR: Must give input folder, or file ...\n";
my $out_file = 'tempout2.txt';
my $OH;
open $OH, ">$out_file" || die "ERROR: Can NOT create output file ... aborting ...\n";
###my $out_dir = $in_dir;
my $out_dir = 'temp2'; # use another OUT folder, until SURE no mess-up!!!
my @files = ();
my $file;
my $line;
my $filcnt = 0;
my @myfiles = ();
my @fnames = ();
if ( -f $in_dir) {
   prt( "Processing file $in_dir ...\n" );
   push(@myfiles,$in_dir);
   $file = get_file_name($in_dir);
   push(@fnames, $file);
   $in_dir = get_path_name($in_dir);    # GET THE DIRECTORY PART
   prt( "Processing file $file ... dir=$in_dir\n" );
} else {
   prt( "Processing directory $in_dir ...\n" );
   opendir( DIR, $in_dir) || die "ERROR: Can NOT open $in_dir ... aborting ...\n";
   @files = readdir(DIR);
   closedir DIR;
   prt( "Found ".scalar @files." items in the directory ...\n" );
   foreach $file (sort @files) {
      if (($file eq '.')||($file eq '..')) {
         next;
      }
      my $ff = $in_dir . '/' . $file;
      if ( -d $ff ) {
         #prt( "Ignore Directory $file ...\n");
      } else {
         if (is_my_file($file)) {
            push(@myfiles,$ff);
            push(@fnames,$file);
            $filcnt++;
         } else {
            #prt( "IGNORE $file ...\n" );
         }
      }
   }
}
prt( "Found ".scalar @myfiles." files to scan ...\n");
$filcnt = 0;
my @diff_files = ();
foreach $file (@myfiles) {
   my $on = $out_dir . '/' . $fnames[$filcnt];
   prt( "IN=$file OUT=$on\n" );
   $filcnt++;
   open $IF, "<$file" or die "Can not OPEN $file! ... aborting ...\n";
   my @lines = <$IF>; # slurp whole file, to an array of lines
   close($IF);
   # process, line by line
   my $lc = 0;
   my $diff_cnt = 0;
   foreach $line (@lines) {
      chomp $line;
      while( substr($line,0,1) eq ' ' ) {
         $line = substr($line,1);
      }
      ##if ($line =~ /<img (.*)>/i) {
      ##if ($line =~ /<img (.*)(>|\/>)/io) {
      if ($line =~ /document\.write/io) {
         # ignore these lines
      ##} elsif ($line =~ /<img (.*)(>|\/>)?/io) {
      } elsif ($line =~ /<img (.*)(>|\/>){1,}/io) {
         ###prt( "Line $lc: with IMG [$line] ...\n" );
         ###prt( "Line $lc: [$1][$2] ...\n" );
         my $pl = $1; # get the interesting part
         if ($pl =~ /alt=/) {
            ### prt( "With ALT Line $lc: [$pl] ...\n" );
         } else {
            #if ($pl =~ />/) {
            if ($pl =~ /(.+)>/) {
               # multiple ...
               $pl = $1; # at least get this portion
               ### prt( "Line $lc: MULTIPLE - NO ALT [$pl] ...\n" );
            }
            if ($pl =~ /(.+)>/) {
               # multiple ...
               prt( "Line $lc: STILL MULTIPLE - NO ALT [$1][$pl] ...\n" );
            } else {
               if ($pl =~ /src=(.+)/) {
                  my $src = $1;
                  if (substr($src,0,1) eq '"') {
                     $src = substr($src,1);
                     my $pos = index($src,'"');
                     if ($pos > 0) {
                        $src = substr($src,0,$pos);
                        $src = get_file_name($src);
                        $src =~ s/\./ /;
                        ###my $npl = $pl.' alt="'.$src.' image"';
                        ###my $mnpl = quotemeta($npl);
                        ###my $mpl = quotemeta($pl);
                        ###$line =~ s/$mpl/$mnpl/o;
                        my $nline = add_alt($line,$src);
                        if (length($nline) > length($line)) {
                           $diff_cnt++;
                           $lines[$lc] = $nline;
                        }
                        prt( "Line $lc: Added ALT [$pl][$src] ...[$line] \n" );
                     } else {
                        prt( "Line $lc: src?? NO ALT [$pl][$src] ...\n" );
                     }
                  } else {
                     prt( "Line $lc: SRC NO ALT [$pl][$src] ...\n" );
                  }
               } else {
                  prt( "Line $lc: NO ALT [$pl] ...\n" );
               }
            }
         }
      }
      $lc++;
   }
   if ($diff_cnt) {
      # now write it OUT
      open $OF, ">$on" or die "ERROR: Unable to open OUT file ... aborting ...\n";
      foreach $line (@lines) {
         chomp $line;
         print $OF "$line\n";
      }
      close $OF;
   }
}
sub add_alt {
   my ($ln,$sc) = @_; # extract line, and source
   my $len = length($ln);
   my $i;
   my $nln = '';
   my $done_it = 0;
   my $si = 0;
   for ($i = 0; $i < $len; $i++) {
      my $ch = substr($ln,$i,1);
      if (($si == 4) && ($done_it == 0)) {
         if ($ch eq '>') {
            $nln .= ' alt="image '.$sc.'"';
            $done_it = 1;
            $si++;
         }
      }
      if ($si == 0) {
         if (uc($ch) eq 'I') {
            $si++;
         }
      } elsif ($si == 1) {
         if (uc($ch) eq 'M') {
            $si++;
         } else {
            $si = 0;
         }
      } elsif ($si == 2) {
         if (uc($ch) eq 'G') {
            $si++;
         } else {
            $si = 0;
         }
      } elsif ($si == 3) {
         if ($ch eq ' ') {
            $si++; # set SUCCESS
         } else {
            $si = 0;
         }
      }
      $nln .= $ch;
   }
   return $nln;
}
sub is_my_file {
   my ($f) = @_;
   my $ret = 0;
   if ($f =~ /(.*)\.htm$/i) {
      $ret = 1;
   } elsif ($f =~ /(.*)\.html$/i) {
      $ret = 1;
#   } elsif ($f =~ /(.*)\.shtml$/i) {
#      $ret = 1;
#   } elsif ($f =~ /(.*)\.php$/i) {
#      $ret = 1;
   }
#   if ($ret) {
#      foreach my $f2 (@excl) {
#         if (uc($f2) eq uc($f)) {
#            $ret = 0;
#            last;
#         }
#      }
#   }
   return $ret;
}
sub dos_to_unix {
   my ($d) = @_;
   $d =~ s/\\/\//g;
   return $d;
}
sub get_file_name {
   my ($pf) = @_;
   $pf = dos_to_unix($pf);
   @parts = split('/',$pf);
   return $parts[$#parts];
}
sub get_path_name {
   my ($pf) = @_;
   $pf = dos_to_unix($pf);
   @parts = split('/',$pf);
   pop( @parts ); # Pops off the last value of the array - the file name
   $pf = join( '\\', @parts );
   return $pf;
}
sub prt {
   my ($m) = @_;
   print $m;
   print $OH $m;
}
# eof - genalt.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional