#!/usr/bin/perl -w # NAME: xmlparser.pl # AIM: 08/10/2013 -Experiments with XML::Parser # 28/02/2014 - More experiments, on some ICAO.xml from the LDS767 zip (C:\Users\user\Downloads\LDS767) # More successful, but STILL TOO MESSY ;=(( There MUST be another way to parse XML????????? # The parser inserts BLANK HASHES, followed by a strange '0', and adds whitespace text!!! use strict; use warnings; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use XML::Parser; use Data::Dumper; use Cwd; my $os = $^O; my $perl_dir = '/home/geoff/bin'; my $PATH_SEP = '/'; my $temp_dir = '/tmp'; if ($os =~ /win/i) { $perl_dir = 'C:\GTools\perl'; $temp_dir = $perl_dir; $PATH_SEP = "\\"; } unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt"; open_log($outfile); # user variables my $VERS = "0.0.1 2013-03-17"; my $load_log = 0; my $in_file = ''; my $verbosity = 0; my $out_file = $temp_dir.$PATH_SEP."tempxml.json"; # ### DEBUG ### my $debug_on = 1; my $def_file = 'C:\Users\user\Downloads\LDS767\KSFO.xml'; my $def_file3 = 'C:\Users\user\Downloads\LDS767\TNCM.xml'; my $def_file4 = 'C:\Users\user\Downloads\LDS767\KDFW.xml'; my $def_file2 = 'C:\Program Files (x86)\Airline Project 0.3.7\Data\addons\airports\africa.xml'; ### program variables my @warnings = (); my $cwd = cwd(); sub VERB1() { return $verbosity >= 1; } sub VERB2() { return $verbosity >= 2; } sub VERB5() { return $verbosity >= 5; } sub VERB9() { return $verbosity >= 9; } sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { prt( "\nNo warnings issued.\n\n" ) if (VERB9()); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } ####################################### sub treestg() { my $txt = < 'DAAG', 'iata' => 'ALG', 'name' => 'Houari Boumediene Airport', 'type' => 'Long_Haul_International', 'season' => 'All_Year' }, 0, ' ', 'town', [ { 'country' => '142', 'DST' => '01:00:00', 'town' => 'Algier', 'GMT' => '01:00:00' } ], 0, ' ', 'coordinates', [ {}, 0, ' ', 'latitude', [ { 'value' => "036\x{b0}41'27''N" } ], 0, ' ', 'longitude', [ { 'value' => "003\x{b0}12'55''E" } ], 0, ' ' ], 0, ' ', 'size', [ { 'cargo' => 'Smallest', 'value' => 'Medium', 'cargovolume' => '0', 'pax' => '4480' } ], 0, ' ', 'terminals', [ {}, 0, ' ', 'terminal', [ { 'gates' => '16', 'name' => 'Terminal 1' } ], 0, ' ', 'terminal', [ { 'gates' => '7', 'name' => 'Terminal 2' } ], 0, ' ' ], 0, ' ', 'runways', [ {}, 0, ' ', 'runway', [ { 'surface' => 'Asphalt', 'length' => '3500', 'name' => '05/23' } ], 0, ' ', 'runway', [ { 'surface' => 'Asphalt', 'length' => '3500', 'name' => '09/27' } ], 0, ' ' ], 0, ' ' ], 0, ' ', 'airport', [ { 'icao' => 'DAUA', 'iata' => 'AZR', 'name' => 'Touat Cheikh Sidi Mohamed Belkebir Airport', 'type' => 'Domestic', 'season' => 'All_Year' EOF return $txt; } sub dump_view() { my $txt = < 'By Pedro Sousa, Wade Chafe, extracted by PROCIO 6a -Ian Mitchell'},0,' ', 'Airport', [ { 'ICAOcode' => 'TNCM' },0,' ', 'Approach', [ { 'Name' => 'VOR.DME.09' },0,' ', 'App_Waypoint', [ { 'ID' => '1' },0,' ', 'Name',[{},0,'PJM'],0,' ', 'Type',[{},0,'Normal'],0,' ', 'Latitude',[{},0,'18.038111'],0,' ', 'Longitude',[{},0,'-63.118281'],0,' ', 'Speed',[{},0,'0'],0,' ', 'Altitude',[{},0,'3500'],0,' ', 'AltitudeCons',[{},0,'0'],0,' ', 'AltitudeRestriction',[{},0,'above'],0,' ', 'Flytype',[{},0,'Fly-by'],0,' ', 'Sp_Turn',[{},0,'Auto'],0,' ' ],0,' ', 'App_Waypoint', [ {'ID' => '2'},0,' ', 'Name', EOF return $txt; } ####################################### sub is_a_double($) { my $val = shift; return 1 if ($val =~ /^-?\d+\.?\d*$/); # { print "is a real number\n" } return 1 if ($val =~ /^\d+$/); # { print "is a whole number\n" } return 1 if ($val =~ /^-?\d+$/); # { print "is an integer\n" } return 1 if ($val =~ /^[+-]?\d+$/); # { print "is a +/- integer\n" } return 1 if ($val =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/); # { print "is a decimal number\n" } return 1 if ($val =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/); # { print "a C float\n" } return 0 if ($val =~ /\D/); # { print "has nondigits\n" } return 0; } my $last_text = ''; sub enum_array($$$$); sub enum_array($$$$) { my ($ra,$ind,$txt,$lev) = @_; my $max = scalar @{$ra}; prt("$ind Enumerate array of $max items\n") if (VERB9()); my ($i,$ref,$val,$i2,@arr,$cnt,$key2,$val2,$add); my $text = ''; my $lctxt = lc($txt); for ($i = 0; $i < $max; $i++) { $i2 = $i + 1; $val = ${$ra}[$i]; $ref = ref($val); if ($ref && length($ref)) { if ($ref eq 'HASH') { @arr = keys %{$val}; $cnt = scalar @arr; if ($cnt) { prt("$ind $i2: $ref - $cnt keys - "); $add = 0; foreach $key2 (@arr) { $val2 = ${$val}{$key2}; prt("$key2=$val2 "); } prt("\n"); } $i++; # skip the '0' } elsif ($ref eq 'ARRAY') { @arr = @{$val}; $cnt = scalar @arr; prt("$ind $i2: $ref - $cnt items\n") if (VERB5()); enum_array(\@arr,$ind.' ',$text,$lev+1); $i++; # skip the '0' } else { prt("$ind $i2: uncased *FIX ME*\n"); } ###$i += 2; # skip '0' and newline text } else { $val = trim_all($val); if (length($val)) { if ($lev > 1) { if ($txt =~ /Waypoint/i) { prt("$ind $i2: wpt $val\n") if (VERB5()); } else { if (is_a_double($val)) { prt("$ind $i2: num \"$lctxt\":$val\n"); } else { prt("$ind $i2: txt \"$lctxt\":\"$val\"\n"); } } } else { if ($lev) { if ($val =~ /Waypoint/i) { prt("$ind $i2: Wpt $val\n"); } else { prt("$ind $i2: TXT $val\n") if (VERB5()); } } else { prt("$ind $i2: Txt $val\n"); } } $text = $val } } } } sub process_in_file($) { my ($inf) = @_; #my $p = new XML::Parser(Style => 'Debug'); # NO GOOD Nothing special is returned by parse my $p = new XML::Parser(Style => 'Tree'); # seems best #my $p = new XML::Parser(Style => 'Objects'); # Hmmm, bless Kids - seems difficulat #my $p = new XML::Parser(Style => 'Stream'); # GOT NOTHING NO GOOD #my $p = new XML::Parser(Style => 'Subs'); # NO GOOD Nothing special is returned by parse prt("Parsing file [$inf]\n"); my $xml = $p->parsefile($inf); # prt("Dumping return...\n"); # prt(Dumper($xml)); # $load_log = 1; #if (defined ${$xml}{ProceduresDB}) { # NOT a hash reference #if (defined $xml{ProceduresDB}) { # NO %xml exists #if (defined ${$xml}['ProceduresDB']) { # not numeric my $len = scalar @{$xml}; my ($i,$ref,$i2,@arr,$ra2,$cnt,$j,$ra3,$ref2,$j2,$hadarr,@arr2,$cnt2,$key,$val,$add); my $lctxt = ''; if (! defined ${$xml}[0]) { # not numeric prt("Do not have an array\n"); return; } if ($len < 2) { prt("Insufficient length\n"); return; } prt("ProceduresDB is defined. len $len\n"); my $ra = ${$xml}[1]; # extracts an ARRAY #prt($ra); $len = scalar @{$ra}; prt("Array len $len\n"); for ($i = 0; $i < $len; $i++) { $i2 = $i + 1; $ra2 = ${$ra}[$i]; $ref = ref($ra2); if ($ref && length($ref)) { if ($ref eq 'HASH') { @arr = keys %{$ra2}; $cnt = scalar @arr; prt("$i2: $ref - with $cnt keys - "); $cnt = 0; foreach $key (@arr) { $val = ${$ra2}{$key}; prt("$key=$val "); } prt("\n"); $i += 2; # skip the '0' and newline text } elsif ($ref eq 'ARRAY') { $cnt = scalar @{$ra2}; prt("$i2: $ref - with $cnt items\n"); for ($j = 0; $j < $cnt; $j++) { $j2 = $j + 1; $ra3 = ${$ra2}[$j]; $ref2 = ref($ra3); if ($ref2 && length($ref2)) { if ($ref2 eq 'HASH') { @arr2 = keys %{$ra3}; $cnt2 = scalar @arr2; prt(" $j2: $ref2 - with $cnt2 keys - "); $add = 0; foreach $key (@arr2) { $val = ${$ra3}{$key}; if ($key =~ /ICAO/) { $key = 'icao'; $val = uc($val); } else { $key = lc($key); $val = lc($val); } prt("$add: $key=$val "); } prt("\n"); } elsif ($ref2 eq 'ARRAY') { @arr2 = @{$ra3}; $cnt2 = scalar @{$ra3}; prt(" $j2: $ref2 - with $cnt2 items\n "); enum_array(\@arr2,' ',$last_text,0); } else { prt(" $j2: ref=$ref2 uncased *FIX ME*"); } $j += 2; } else { $ra3 = trim_all($ra3); if (length($ra3)) { $lctxt = lc($ra3); prt(" $j2: txt [$lctxt] "); $last_text = $ra3; } else { prt(" $j2: blank ") if (VERB9()); } } } prt("\n"); $i += 2; # skip the '0' and newline text } else { prt("$i2: $ref - not parsed\n"); } } else { # is text $ra2 = trim_all($ra2); if (length($ra2)) { $lctxt = lc($ra2); prt("$i2: txt $lctxt\n"); } else { prt("$i2: txt blank\n"); } } } $load_log = 1; } ######################################### ### MAIN ### parse_args(@ARGV); process_in_file($in_file); pgm_exit(0,""); ######################################## sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^v/) { if ($sarg =~ /^v.*(\d+)$/) { $verbosity = $1; } else { while ($sarg =~ /^v/) { $verbosity++; $sarg = substr($sarg,1); } } prt("Verbosity = $verbosity\n") if (VERB1()); } elsif ($sarg =~ /^l/) { if ($sarg =~ /^ll/) { $load_log = 2; } else { $load_log = 1; } prt("Set to load log at end. ($load_log)\n") if (VERB1()); } elsif ($sarg =~ /^o/) { need_arg(@av); shift @av; $sarg = $av[0]; $out_file = $sarg; prt("Set out file to [$out_file].\n") if (VERB1()); } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_file = $arg; prt("Set input to [$in_file]\n") if (VERB1()); } shift @av; } if ($debug_on) { prtw("WARNING: DEBUG is ON!\n"); if ((length($in_file) == 0) && $debug_on) { $in_file = $def_file; prt("Set DEFAULT input to [$in_file]\n"); } $verbosity = 9; } if (length($in_file) == 0) { pgm_exit(1,"ERROR: No input files found in command!\n"); } if (! -f $in_file) { pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n"); } } sub give_help { prt("$pgmname: version $VERS\n"); prt("Usage: $pgmname [options] in-file\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); prt(" --verb[n] (-v) = Bump [or set] verbosity. def=$verbosity\n"); prt(" --load (-l) = Load LOG at end. ($outfile)\n"); prt(" --out (-o) = Write output to this file.\n"); } # eof - template.pl