xmlparse.pl to HTML.

index -|- end

Generated: Sun Mar 2 17:20:30 2014 from xmlparse.pl 2014/02/28 15.7 KB. text copy

#!/usr/bin/perl -w
# NAME: xmlparser.pl
# AIM: 08/10/2013 -Experiments with XML::Parser
# 28/02/2014 - More experiments, on some ICAO.xml from the LDS767 zip (C:\Users\user\Downloads\LDS767)
# More successful, but STILL TOO MESSY ;=(( There MUST be another way to parse XML?????????
# The parser inserts BLANK HASHES, followed by a strange '0', and adds whitespace text!!!
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use XML::Parser;
use Data::Dumper;
use Cwd;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.1 2013-03-17";
my $load_log = 0;
my $in_file = '';
my $verbosity = 0;
my $out_file = $temp_dir.$PATH_SEP."tempxml.json";

# ### DEBUG ###
my $debug_on = 1;
my $def_file = 'C:\Users\user\Downloads\LDS767\KSFO.xml';
my $def_file3 = 'C:\Users\user\Downloads\LDS767\TNCM.xml';
my $def_file4 = 'C:\Users\user\Downloads\LDS767\KDFW.xml';
my $def_file2 = 'C:\Program Files (x86)\Airline Project 0.3.7\Data\addons\airports\africa.xml';
### program variables
my @warnings = ();
my $cwd = cwd();

sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if (VERB9());
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

#######################################
sub treestg() {
    my $txt = <<EOF;
\$VAR1 = [
          'airports',
          [ {}, 0, ' ',
            'airport',
            [ {
                'icao' => 'DAAG',
                'iata' => 'ALG',
                'name' => 'Houari Boumediene Airport',
                'type' => 'Long_Haul_International',
                'season' => 'All_Year'
              }, 0, ' ',
              'town',
              [ {
                  'country' => '142',
                  'DST' => '01:00:00',
                  'town' => 'Algier',
                  'GMT' => '01:00:00'
                }
              ], 0, ' ',
              'coordinates',
              [ {}, 0, ' ',
                'latitude',
                [ {
                    'value' => "036\x{b0}41'27''N"
                  }
                ], 0, ' ',
                'longitude',
                [ {
                    'value' => "003\x{b0}12'55''E"
                  }
                ], 0, ' '
              ], 0, ' ',
              'size',
              [ {
                  'cargo' => 'Smallest',
                  'value' => 'Medium',
                  'cargovolume' => '0',
                  'pax' => '4480'
                }
              ], 0, ' ',
              'terminals',
              [ {}, 0, ' ',
                'terminal',
                [ {
                    'gates' => '16',
                    'name' => 'Terminal 1'
                  }
                ], 0, ' ',
                'terminal',
                [ {
                    'gates' => '7',
                    'name' => 'Terminal 2'
                  }
                ], 0, ' '
              ], 0, ' ',
              'runways',
              [ {}, 0, ' ',
                'runway',
                [ {
                    'surface' => 'Asphalt',
                    'length' => '3500',
                    'name' => '05/23'
                  }
                ], 0, ' ',
                'runway',
                [ {
                    'surface' => 'Asphalt',
                    'length' => '3500',
                    'name' => '09/27'
                  }
                ], 0, ' '
              ], 0, ' '
            ], 0, ' ',
            'airport',
            [ {
                'icao' => 'DAUA',
                'iata' => 'AZR',
                'name' => 'Touat Cheikh Sidi Mohamed Belkebir Airport',
                'type' => 'Domestic',
                'season' => 'All_Year'
EOF
    return $txt;
}

sub dump_view() {
    my $txt = <<EOF;
\$VAR1 = [
  'ProceduresDB',
  [{ 'build' => 'By Pedro Sousa, Wade Chafe, extracted by PROCIO 6a -Ian Mitchell'},0,'   ',
    'Airport',
    [ { 'ICAOcode' => 'TNCM' },0,' ',
    'Approach',
    [ { 'Name' => 'VOR.DME.09' },0,' ',
    'App_Waypoint',
    [ { 'ID' => '1' },0,' ',
      'Name',[{},0,'PJM'],0,' ',
      'Type',[{},0,'Normal'],0,' ',
      'Latitude',[{},0,'18.038111'],0,' ',
      'Longitude',[{},0,'-63.118281'],0,' ',
      'Speed',[{},0,'0'],0,' ',
      'Altitude',[{},0,'3500'],0,' ',
      'AltitudeCons',[{},0,'0'],0,' ',
      'AltitudeRestriction',[{},0,'above'],0,' ',
      'Flytype',[{},0,'Fly-by'],0,' ',
      'Sp_Turn',[{},0,'Auto'],0,' '
     ],0,' ',
     'App_Waypoint',
     [ {'ID' => '2'},0,' ',
       'Name',

EOF
    return $txt;
}
#######################################
sub is_a_double($) {
    my $val = shift;
    return 1 if ($val =~ /^-?\d+\.?\d*$/); # { print "is a real number\n" }
    return 1 if ($val =~ /^\d+$/); # { print "is a whole number\n" }
    return 1 if ($val =~ /^-?\d+$/); # { print "is an integer\n" }
    return 1 if ($val =~ /^[+-]?\d+$/); # { print "is a +/- integer\n" }
    return 1 if ($val =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/); # { print "is a decimal number\n" }
    return 1 if ($val =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/); # { print "a C float\n" }
    return 0 if ($val =~ /\D/); # { print "has nondigits\n" }
    return 0;
}

my $last_text = '';

sub enum_array($$$$);
sub enum_array($$$$) {
    my ($ra,$ind,$txt,$lev) = @_;
    my $max = scalar @{$ra};
    prt("$ind Enumerate array of $max items\n") if (VERB9());
    my ($i,$ref,$val,$i2,@arr,$cnt,$key2,$val2,$add);
    my $text = '';
    my $lctxt = lc($txt);
    for ($i = 0; $i < $max; $i++) {
        $i2 = $i + 1;
        $val = ${$ra}[$i];
        $ref = ref($val);
        if ($ref && length($ref)) {
            if ($ref eq 'HASH') {
                @arr = keys %{$val};
                $cnt = scalar @arr;
                if ($cnt) {
                    prt("$ind $i2: $ref - $cnt keys - ");
                    $add = 0;
                    foreach $key2 (@arr) {
                        $val2 = ${$val}{$key2};
                        prt("$key2=$val2 ");
                    }
                    prt("\n");
                }
                $i++;   # skip the '0'
            } elsif ($ref eq 'ARRAY') {
                @arr = @{$val};
                $cnt = scalar @arr;
                prt("$ind $i2: $ref - $cnt items\n") if (VERB5());
                enum_array(\@arr,$ind.'   ',$text,$lev+1);
                $i++;   # skip the '0'
            } else {
                prt("$ind $i2: uncased *FIX ME*\n");
            }
            ###$i += 2;    # skip '0' and newline text
        } else {
            $val = trim_all($val);
            if (length($val)) {
                if ($lev > 1) {
                    if ($txt =~ /Waypoint/i) {
                        prt("$ind $i2: wpt $val\n") if (VERB5());
                    } else {
                        if (is_a_double($val)) {
                            prt("$ind $i2: num \"$lctxt\":$val\n");
                        } else {
                            prt("$ind $i2: txt \"$lctxt\":\"$val\"\n");
                        }
                    }
                } else {
                    if ($lev) {
                        if ($val =~ /Waypoint/i) {
                            prt("$ind $i2: Wpt $val\n");
                        } else {
                            prt("$ind $i2: TXT $val\n") if (VERB5());
                        }
                    } else {
                        prt("$ind $i2: Txt $val\n");
                    }
                }
                $text = $val
            }
        }
    }
}


sub process_in_file($) {
    my ($inf) = @_;
    #my $p = new XML::Parser(Style => 'Debug'); # NO GOOD Nothing special is returned by parse
    my $p = new XML::Parser(Style => 'Tree'); # seems best
    #my $p = new XML::Parser(Style => 'Objects'); # Hmmm, bless Kids - seems difficulat
    #my $p = new XML::Parser(Style => 'Stream'); # GOT NOTHING NO GOOD 
    #my $p = new XML::Parser(Style => 'Subs'); # NO GOOD Nothing special is returned by parse
    prt("Parsing file [$inf]\n");
    my $xml = $p->parsefile($inf);
    #    prt("Dumping return...\n");
    # prt(Dumper($xml));
    # $load_log = 1;
    #if (defined ${$xml}{ProceduresDB}) { # NOT a hash reference
    #if (defined $xml{ProceduresDB}) { # NO %xml exists
    #if (defined ${$xml}['ProceduresDB']) { # not numeric
    my $len = scalar @{$xml};
    my ($i,$ref,$i2,@arr,$ra2,$cnt,$j,$ra3,$ref2,$j2,$hadarr,@arr2,$cnt2,$key,$val,$add);
    my $lctxt = '';
    if (! defined ${$xml}[0]) { # not numeric
        prt("Do not have an array\n");
        return;
    }
    if ($len < 2) {
        prt("Insufficient length\n");
        return;
    }

    prt("ProceduresDB is defined. len $len\n");
    my $ra = ${$xml}[1];    # extracts an ARRAY
    #prt($ra);
    $len = scalar @{$ra};
    prt("Array len $len\n");
    for ($i = 0; $i < $len; $i++) {
        $i2 = $i + 1;
        $ra2 = ${$ra}[$i];
        $ref = ref($ra2);
        if ($ref && length($ref)) {
            if ($ref eq 'HASH') {
                @arr = keys %{$ra2};
                $cnt = scalar @arr;
                prt("$i2: $ref - with $cnt keys - ");
                $cnt = 0;
                foreach $key (@arr) {
                    $val = ${$ra2}{$key};
                    prt("$key=$val ");
                }
                prt("\n");
                $i += 2;    # skip the '0' and newline text
            } elsif ($ref eq 'ARRAY') {
                $cnt = scalar @{$ra2};
                prt("$i2: $ref - with $cnt items\n");
                for ($j = 0; $j < $cnt; $j++) {
                    $j2 = $j + 1;
                    $ra3 = ${$ra2}[$j];
                    $ref2 = ref($ra3);
                    if ($ref2 && length($ref2)) {
                        if ($ref2 eq 'HASH') {
                            @arr2 = keys %{$ra3};
                            $cnt2 = scalar @arr2;
                            prt("   $j2: $ref2 - with $cnt2 keys - ");
                            $add = 0;
                            foreach $key (@arr2) {
                                $val = ${$ra3}{$key};
                                if ($key =~ /ICAO/) {
                                    $key = 'icao';
                                    $val = uc($val);
                                } else {
                                    $key = lc($key);
                                    $val = lc($val);
                                }
                                prt("$add: $key=$val ");
                            }
                            prt("\n");
                        } elsif ($ref2 eq 'ARRAY') {
                            @arr2 = @{$ra3};
                            $cnt2 = scalar @{$ra3};
                            prt("   $j2: $ref2 - with $cnt2 items\n   ");
                            enum_array(\@arr2,'     ',$last_text,0);
                        } else {
                            prt("   $j2: ref=$ref2 uncased *FIX ME*");
                        }
                        $j += 2;
                    } else {
                        $ra3 = trim_all($ra3);
                        if (length($ra3)) {
                            $lctxt = lc($ra3);
                            prt("   $j2: txt [$lctxt] ");
                            $last_text = $ra3;
                        } else {
                            prt("   $j2: blank ") if (VERB9());
                        }
                    }
                }
                prt("\n");
                $i += 2;    # skip the '0' and newline text
            } else {
                prt("$i2: $ref - not parsed\n");
            }
        } else {
            # is text
            $ra2 = trim_all($ra2);
            if (length($ra2)) {
                $lctxt = lc($ra2);
                prt("$i2: txt $lctxt\n");
            } else {
                prt("$i2: txt blank\n");
            }
        }
    }
    $load_log = 1;
}

#########################################
### MAIN ###
parse_args(@ARGV);
process_in_file($in_file);
pgm_exit(0,"");
########################################

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                prt("Verbosity = $verbosity\n") if (VERB1());
            } elsif ($sarg =~ /^l/) {
                if ($sarg =~ /^ll/) {
                    $load_log = 2;
                } else {
                    $load_log = 1;
                }
                prt("Set to load log at end. ($load_log)\n") if (VERB1());
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_file = $sarg;
                prt("Set out file to [$out_file].\n") if (VERB1());
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Set input to [$in_file]\n") if (VERB1());
        }
        shift @av;
    }

    if ($debug_on) {
        prtw("WARNING: DEBUG is ON!\n");
        if ((length($in_file) ==  0) && $debug_on) {
            $in_file = $def_file;
            prt("Set DEFAULT input to [$in_file]\n");
        }
        $verbosity = 9;
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help  (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]     (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load        (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out <file>  (-o) = Write output to this file.\n");
}

# eof - template.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional