Generated: Sat Oct 24 16:35:08 2020 from adjxml.pl 2020/05/18 6.3 KB. text copy
#!/perl -w # NAME: adjxml.pl # AIM: specalized - read a javascript text file, with array of image captions, and # write the caption to a JetPhoto XML file as <info>...</info> # 06/07/2008 geoff mclane http://geoffair.net/mperl - specialized! use strict; use warnings; use XML::DOM; my $os = $^O; my $perl_dir = '/home/geoff/bin'; my $PATH_SEP = '/'; my $temp_dir = '/tmp'; if ($os =~ /win/i) { $perl_dir = 'C:\GTools\perl'; $temp_dir = $perl_dir; $PATH_SEP = "\\"; } unshift(@INC, $perl_dir); #### require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); prt( "$0 ... Hello, World ...\n" ); my $in_js_file = 'C:\HOMEPAGE\GA\travel\maroc\content2.js'; my $in_xml_file = 'C:\Documents and Settings\Geoff McLane\Desktop\Maroc - June, 2008.flash\imagelist.xml'; my $out_xml_file = "tempxml.xml"; my @jslines = (); my @xmllines = (); my @warnings = (); my $wmsg = ''; # DEBUG my $dbg1 = 0; # show "Finding $xi in $lnsjs javascript lines my $dbg2 = 0; # show "Text [$val] added ... my ($lnsjs, $lnsxml, $j, $x, $jline, $xline); if (open INF, "<$in_js_file") { @jslines = <INF>; close INF; ###if (open INF, "<$in_xml_file") { if (open INF, "<:utf8", "$in_xml_file") { @xmllines = <INF>; close INF; $lnsjs = scalar @jslines; $lnsxml = scalar @xmllines; prt( "Processing $lnsjs of javascript ($in_js_file) ...\n" ); prt( "and $lnsxml of XML ($in_xml_file) ...\n" ); } else { prt( "ERROR: Unable to open $in_xml_file ... check name, location ...\n" ); } } else { prt( "ERROR: Unable to open $in_js_file ... check name, location ...\n" ); } if (@jslines && @xmllines) { ###process_lines(); process_xml($in_xml_file); } if (@warnings) { prt( "WARNING: Got ".scalar @warnings." WARNING messages ...\n" ); foreach $wmsg (@warnings) { prt( "$wmsg\n" ); } } close_log($outfile,1); exit(0); ############################# # usage: $val = get_js_caption($ximg); sub get_js_caption { my ($xi) = shift; my (@arr, $img, $siz, $txt); prt( "Finding $xi in $lnsjs javascript lines\n" ) if ($dbg1); for ($j = 0; $j < $lnsjs; $j++) { $jline = $jslines[$j]; chomp $jline; # "maroc242.jpg|N|Neudorf flat - Simone, Yvonne, Jacquot and Annie.|http://maps.google.com/maps?f=q&hl=en&q=Neudorf,+Strasbourg,+Bas-Rhin,+Alsace,+France&ie=UTF8&lr=lang_en%7Clang_fr&cd=2&geocode=0,48.564566,7.760482&ll=48.560857,7.767479&spn=0.000801,0.001652&t=h&z=19", if (substr($jline,0,1) eq '"') { $jline = substr($jline,1); $jline = trim_js_line($jline); @arr = split(/\|/,$jline); if (scalar @arr > 2) { $img = $arr[0]; if ($xi eq $img) { $siz = $arr[1]; $txt = $arr[2]; return $txt; } } } } $wmsg = "WARNING: FAILED to find $xi in $lnsjs javascript lines!"; push(@warnings,$wmsg); prt( "$wmsg\n" ); return "caption not found"; } sub process_xml { my ($in_file) = shift; my $parser = XML::DOM::Parser->new(); my $doc = $parser->parsefile($in_file); my $cnt = 0; my ($ntxt, @arr, $ximg, $val, $inf); if ($doc) { foreach my $images ($doc->getElementsByTagName('Image')) { $cnt++; $ntxt = $images->getElementsByTagName('size_0')->item(0)->getFirstChild->getNodeValue; @arr = split(/\//,$ntxt); if (scalar @arr == 2) { $ximg = $arr[1]; $val = get_js_caption($ximg); #prt("size_0: $ximg\n"); ##prt("size_2: ".$images->getElementsByTagName('size_2')->item(0)->getFirstChild->getNodeValue . "\n"); ##prt("info: ".$images->getElementsByTagName('info')->item(0)->getFirstChild->getNodeValue . "\n"); ##my $inf = $images->getElementsByTagName('info'); $inf = $images->getElementsByTagName('info')->item(0); ##my $inf = $images->getElementsByTagName('info')->item(0)->getFirstChild; if (defined $inf) { $inf->addText($val); prt( "Text [$val] added ...\n") if ($dbg2); } else { $wmsg = "ERROR: Failed to ADD TEXT! for $ximg ..."; push(@warnings,$wmsg); prt( "$wmsg\n" ); } } else { $wmsg = "WARNING: $ntxt did not split correctly!"; push(@warnings,$wmsg); prt( "$wmsg\n" ); } } } $ntxt = $doc->toString(); write2file($ntxt, $out_xml_file); prt( "Written $out_xml_file ...\n" ); } sub process_lines_failed { for ($j = 0; $j < $lnsjs; $j++) { $jline = $jslines[$j]; chomp $jline; # "maroc242.jpg|N|Neudorf flat - Simone, Yvonne, Jacquot and Annie.|http://maps.google.com/maps?f=q&hl=en&q=Neudorf,+Strasbourg,+Bas-Rhin,+Alsace,+France&ie=UTF8&lr=lang_en%7Clang_fr&cd=2&geocode=0,48.564566,7.760482&ll=48.560857,7.767479&spn=0.000801,0.001652&t=h&z=19", if (substr($jline,0,1) eq '"') { $jline = substr($jline,1); $jline = trim_js_line($jline); my @arr = split(/\|/,$jline); if (scalar @arr > 2) { my $img = $arr[0]; my $siz = $arr[1]; my $txt = $arr[2]; prt( "Finding $img in $lnsxml XML ... text[$txt]\n" ); for ($x = 0; $x < $lnsxml; $x++) { $xline = $xmllines[$x]; ##$xline = unpack("U*", $xline); # unpack Unicode characters ###$xline = unicode_substr($xline,0,length($xline)); chomp $xline; prt( "$xline\n" ); # <size_0>128x128/maroc704.jpg</size_0> if ($xline =~ /<size_0>128x128\/(.+)<\/size_0>/) { my $xfil = $1; prt( "Got XML file [$xfil] ...\n" ); if ($xfil eq $img) { prt( "Found $img ...\n" ); last; } } } } } } } sub unicode_substr { my($what,$where,$howmuch) = @_; return (unpack("x$where a$howmuch", $what)); } # $jline = trim_js_line($jline); sub trim_js_line { my ($jl) = shift; $jl =~ s/,*"\s*$//; $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/); $jl = substr($jl,0,length($jl)-1) while ($jl =~ /;$/); $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/); $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\)$/); $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/); $jl = substr($jl,0,length($jl)-1) while ($jl =~ /,$/); $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/); $jl = substr($jl,0,length($jl)-1) while ($jl =~ /"$/); $jl = substr($jl,0,length($jl)-1) while ($jl =~ /\s$/); return $jl; } # eof = adjxml.pl