adjxml.pl to HTML.

index -|- end

Generated: Sat Oct 24 16:35:08 2020 from adjxml.pl 2020/05/18 6.3 KB. text copy

#!/perl -w
# NAME: adjxml.pl
# AIM: specalized - read a javascript text file, with array of image captions, and
# write the caption to a JetPhoto XML file as <info>...</info>
# 06/07/2008 geoff mclane http://geoffair.net/mperl - specialized!
use strict;
use warnings;
use XML::DOM;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
####
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );

my $in_js_file = 'C:\HOMEPAGE\GA\travel\maroc\content2.js';
my $in_xml_file = 'C:\Documents and Settings\Geoff McLane\Desktop\Maroc - June, 2008.flash\imagelist.xml';
my $out_xml_file = "tempxml.xml";

my @jslines = ();
my @xmllines = ();
my @warnings = ();
my $wmsg = '';

# DEBUG
my $dbg1 = 0;   # show "Finding $xi in $lnsjs javascript lines
my $dbg2 = 0;   # show "Text [$val] added ...

my ($lnsjs, $lnsxml, $j, $x, $jline, $xline);
if (open INF, "<$in_js_file") {
   @jslines = <INF>;
   close INF;
   ###if (open INF, "<$in_xml_file") {
   if (open INF, "<:utf8", "$in_xml_file") {
      @xmllines = <INF>;
      close INF;
      $lnsjs = scalar @jslines;
      $lnsxml = scalar @xmllines;
      prt( "Processing $lnsjs of javascript ($in_js_file) ...\n" );
      prt( "and $lnsxml of XML ($in_xml_file) ...\n" );
   } else {
      prt( "ERROR: Unable to open $in_xml_file ... check name, location ...\n" );
   }
} else {
   prt( "ERROR: Unable to open $in_js_file ... check name, location ...\n" );
}
if (@jslines && @xmllines) {
   ###process_lines();
   process_xml($in_xml_file);
}

if (@warnings) {
   prt( "WARNING: Got ".scalar @warnings." WARNING messages ...\n" );
   foreach $wmsg (@warnings) {
      prt( "$wmsg\n" );
   }
}

close_log($outfile,1);
exit(0);
#############################

# usage: $val = get_js_caption($ximg);
sub get_js_caption {
   my ($xi) = shift;
   my (@arr, $img, $siz, $txt);
   prt( "Finding $xi in $lnsjs javascript lines\n" ) if ($dbg1);
   for ($j = 0; $j < $lnsjs; $j++) {
      $jline = $jslines[$j];
      chomp $jline;
      # "maroc242.jpg|N|Neudorf flat - Simone, Yvonne, Jacquot and Annie.|http://maps.google.com/maps?f=q&hl=en&q=Neudorf,+Strasbourg,+Bas-Rhin,+Alsace,+France&ie=UTF8&lr=lang_en%7Clang_fr&cd=2&geocode=0,48.564566,7.760482&ll=48.560857,7.767479&spn=0.000801,0.001652&t=h&z=19",
      if (substr($jline,0,1) eq '"') {
         $jline = substr($jline,1);
         $jline = trim_js_line($jline);
         @arr = split(/\|/,$jline);
         if (scalar @arr > 2) {
            $img = $arr[0];
            if ($xi eq $img) {
               $siz = $arr[1];
               $txt = $arr[2];
               return $txt;
            }
         }
      }
   }
   $wmsg = "WARNING: FAILED to find $xi in $lnsjs javascript lines!";
   push(@warnings,$wmsg);
   prt( "$wmsg\n" );
   return "caption not found";
}

sub process_xml {
   my ($in_file) = shift;
   my $parser = XML::DOM::Parser->new();
   my $doc = $parser->parsefile($in_file);
   my $cnt = 0;
   my ($ntxt, @arr, $ximg, $val, $inf);
   if ($doc) {
      foreach my $images ($doc->getElementsByTagName('Image')) {
         $cnt++;
         $ntxt = $images->getElementsByTagName('size_0')->item(0)->getFirstChild->getNodeValue;
         @arr = split(/\//,$ntxt);
         if (scalar @arr == 2) {
            $ximg = $arr[1];
            $val = get_js_caption($ximg);
            #prt("size_0: $ximg\n");
            ##prt("size_2: ".$images->getElementsByTagName('size_2')->item(0)->getFirstChild->getNodeValue . "\n");
            ##prt("info:   ".$images->getElementsByTagName('info')->item(0)->getFirstChild->getNodeValue . "\n");
            ##my $inf = $images->getElementsByTagName('info');
            $inf = $images->getElementsByTagName('info')->item(0);
            ##my $inf = $images->getElementsByTagName('info')->item(0)->getFirstChild;
            if (defined $inf) {
               $inf->addText($val);
               prt( "Text [$val] added ...\n") if ($dbg2);
            } else {
               $wmsg = "ERROR: Failed to ADD TEXT! for $ximg ...";
               push(@warnings,$wmsg);
               prt( "$wmsg\n" );
            }
         } else {
            $wmsg = "WARNING: $ntxt did not split correctly!";
            push(@warnings,$wmsg);
            prt( "$wmsg\n" );
         }
      }
   }
   $ntxt = $doc->toString();
   write2file($ntxt, $out_xml_file);
   prt( "Written $out_xml_file ...\n" );
}


sub process_lines_failed {
   for ($j = 0; $j < $lnsjs; $j++) {
      $jline = $jslines[$j];
      chomp $jline;
      # "maroc242.jpg|N|Neudorf flat - Simone, Yvonne, Jacquot and Annie.|http://maps.google.com/maps?f=q&hl=en&q=Neudorf,+Strasbourg,+Bas-Rhin,+Alsace,+France&ie=UTF8&lr=lang_en%7Clang_fr&cd=2&geocode=0,48.564566,7.760482&ll=48.560857,7.767479&spn=0.000801,0.001652&t=h&z=19",
      if (substr($jline,0,1) eq '"') {
         $jline = substr($jline,1);
         $jline = trim_js_line($jline);
         my @arr = split(/\|/,$jline);
         if (scalar @arr > 2) {
            my $img = $arr[0];
            my $siz = $arr[1];
            my $txt = $arr[2];
            prt( "Finding $img in $lnsxml XML ... text[$txt]\n" );
            for ($x = 0; $x < $lnsxml; $x++) {
               $xline = $xmllines[$x];
               ##$xline = unpack("U*", $xline); # unpack Unicode characters
               ###$xline = unicode_substr($xline,0,length($xline));
               chomp $xline;
               prt( "$xline\n" );
               # <size_0>128x128/maroc704.jpg</size_0>
               if ($xline =~ /<size_0>128x128\/(.+)<\/size_0>/) {
                  my $xfil = $1;
                  prt( "Got XML file [$xfil] ...\n" );
                  if ($xfil eq $img) {
                     prt( "Found $img ...\n" );
                     last;
                  }
               }
            }
         }
      }
   }
}


sub unicode_substr {
    my($what,$where,$howmuch) = @_;
    return (unpack("x$where a$howmuch", $what));
}

# $jline = trim_js_line($jline);
sub trim_js_line {
   my ($jl) = shift;
   $jl =~ s/,*"\s*$//;
   $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/);
   $jl = substr($jl,0,length($jl)-1) while ($jl =~ /;$/);
   $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/);
   $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\)$/);
   $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/);
   $jl = substr($jl,0,length($jl)-1) while ($jl =~ /,$/);
   $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/);
   $jl = substr($jl,0,length($jl)-1) while ($jl =~ /"$/);
   $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/);
   return $jl;
}

# eof = adjxml.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional