xml2json02.pl to HTML.

index -|- end

Generated: Sun Mar 2 17:20:30 2014 from xml2json02.pl 2014/03/01 12 KB. text copy

#!/usr/bin/perl -w
# NAME: xml2json02.pl
# AIM: Convert an XML file to a JSON file
# 01/03/2014 geoff mclane http://geoffair.net/mperl
# This version is bases on the oneliner from : http://blogs.perl.org/users/jhannah_mutation_grid/2010/09/one-liner-xmlperljson.html
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use JSON::Any;
use XML::Simple;
use Cwd;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.2 2014-01-13";
my $load_log = 0;
my $in_file = '';
my $verbosity = 0;
my $out_file = '';
my $do_pretty = 0;
my $indent = '  ';
my $fix_numbers = 0;
my $rem_trailing = 0;   # TODO: FIX THIS!!!
# ### DEBUG ###
my $debug_on = 0;
my $def_file = 'def_file';

### program variables
my @warnings = ();
my $cwd = cwd();

sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if (VERB9());
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub pretty_json_simple($) {
    my $json = shift;
    $json =~ s/,/,\n /g;
    $json =~ s/\[/\[\n /g;
    $json =~ s/\{/\n {/g;
    $json =~ s/\n\n/\n/g;
    $json =~ s/\n \n/\n/g;
    $json = trim_leading($json);
    return $json;
}

sub pretty_json($) {
    my $json = shift;
    my $len = length($json);
    my $njson = '';
    my ($i,$ch,$olen,$alen,$ind);
    my @arrays = ();
    my @objs = ();
    my $inq = 0;
    my $hf = 0;
    $olen = 0;
    $ind = '';
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($json,$i,1);
        if ($inq) {
            $njson .= $ch;
            $inq = 0 if ($ch eq '"');
        } else {
            next if ($ch =~ /\s/);
            if ($ch eq '"') {
                $inq = 1;
                $njson .= $ch;
                next;
            }
            if ($ch eq '{') {
                push(@objs,$ch);
                $olen = scalar @objs;
                $olen-- if ($olen);
                $ind = $indent x $olen;
                if ($hf) {
                    $njson .= "\n".$ind.$ch."\n";
                    $ind .= $indent;
                    $njson .= $ind;
                    $ch = '';
                } else {
                    $hf = 1;
                }
            } elsif ($ch eq '}') {
                $ind = $indent x $olen;
                $njson .= "\n".$ind;
                if (@objs) {
                    pop @objs;
                    $olen = scalar @objs;
                    $olen-- if ($olen);
                    $ind = $indent x $olen;
                }
            } elsif ($ch eq ',') {
                $njson .= $ch."\n".$ind;
                $ch = '';
            }
            $njson .= $ch;
        }
    }
    $njson =~ s/\n\s*\n/\n/g;
    $njson .= "\n" if ( !($njson =~ /\n$/g) );
    return $njson;
}

sub is_a_double($) {
    my $val = shift;
    return 1 if ($val =~ /^-?\d+\.?\d*$/); # { print "is a real number\n" }
    return 1 if ($val =~ /^\d+$/); # { print "is a whole number\n" }
    return 1 if ($val =~ /^-?\d+$/); # { print "is an integer\n" }
    return 1 if ($val =~ /^[+-]?\d+$/); # { print "is a +/- integer\n" }
    return 1 if ($val =~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/); # { print "is a decimal number\n" }
    return 1 if ($val =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/); # { print "a C float\n" }
    return 0 if ($val =~ /\D/); # { print "has nondigits\n" }
    return 0;
}

sub fix_json_decimal($) {
    my $val = shift;
    ##$val = trim_all($val);
    $val = substr($val,1) if ($val =~ /^\+/); # remove any redundant leading '+'
    my $len = length($val);
    return $val if ($len == 1);
    ###prt("fix json decimal $val\n");
    my $nval = '';
    my ($i,$pp,$cp,$np,$i2,$hd,$hm,$hv);
    $cp = '';
    $hd = 0;
    $hm = 0;
    $hv = 0;
    for ($i = 0; $i < $len; $i++) {
        $i2 = $i + 1;
        $pp = $cp;
        $cp = substr($val,$i,1);
        $np = ($i2 < $len) ? substr($val,$i2,1) : '';
        if ($cp eq '.') {
            $hd = 1;
        } elsif ($cp eq '-') {
            $hm = 1;
        } elsif ($cp eq '0') {
            if (!$hd && !$hv) {
                if ($np =~ /\d/) {
                    $cp = '';   # kill this
                }
            }
        } elsif ($cp =~ /\d/) {
            $hv = 1;
        }
        $nval .= $cp;
    }

    if ($rem_trailing && ($nval =~ /\./)) {
        prt("Got a decimal $nval ");
        my $oval = $nval;
        my @arr = split(/\./,$nval);
        $len = scalar @arr;
        if ($len == 2) {
            my $tmp = $arr[1];
            prt(" test $tmp ");
            if ($tmp =~ /^\d+$/) {
                my $chg = 0;
                $len = length($tmp);
                prt(" len $len ");
                for ($i = ($len - 1); $i > 0; $i--) {
                    $cp = substr($tmp,$i,1);
                    if ($cp eq '0') {
                        ###$tmp = substr($tmp,0,length($tmp)-2);
                        $chg++;
                        prt(" chg $chg [$cp] ");
                    } else {
                        prt(" $i NZ [$cp] $chg ");
                        last;
                    }
                }
                if ($chg > 0) {
                    prt(" chg $chg len $len ");
                    $tmp = substr($tmp,0,($len - $chg - 1));
                    $nval = $arr[0].'.'.$tmp;
                } else {
                    prt(" chg=$chg ");
                }
            } else {
                prt(" not DIG ");
            }
        } else {
            prt("Split NOT 2 $len");
        }
        if ($nval == $oval) {
            prt(" NO CHANGE\n");
        } else {
            prt(" change to $nval\n");
        }
    }
    return $nval;
}

# take pure number out of quotes, and also lose an leading zeros
sub number_fix($) {
    my $json = shift;
    my $len = length($json);
    my ($i,$ch,$inq,$num);
    my $njson = '';
    $inq = 0;
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($json,$i,1);
        if ($inq) {
            if ($ch eq '"') {
                if (is_a_double($num)) {
                    $njson .= fix_json_decimal($num);
                } else {
                    $njson .= "\"$num\"";
                }
                $inq = 0;
            } else {
                $num .= $ch;
            }
        } elsif ($ch eq '"') {
            $num = '';
            $inq = 1;
        } else {
            $njson .= $ch;
        }
    }
    return $njson;
}

sub process_in_file($) {
    my ($inf) = @_;
    if (! -f $inf ) {
        pgm_exit(1,"ERROR: Unable to find file [$inf]\n"); 
    }
    prt("Loading in file $in_file...\n");
    my $xml = XMLin($in_file);
    prt("Converting to JSON using JSON::Any...\n");
    my $json = JSON::Any->new()->objToJson($xml);
    if ($fix_numbers) {
        prt("Doing json number fix...\n");
        $json = number_fix($json);
    }
    if ($do_pretty) {
        prt("Doing pretty of json...\n");
        $json = pretty_json($json);
    }
    if (length($out_file)) {
        write2file($json,$out_file);
        prt("JSON written to [$out_file]\n");
    } else {
        prt("$json\n\n");
        prt("No -o out_file in command, so written to stdout\n");
    }
}

#########################################
### MAIN ###
parse_args(@ARGV);
process_in_file($in_file);
pgm_exit(0,"");
########################################

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    my $verb = VERB2();
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                $verb = VERB2();
                prt("Verbosity = $verbosity\n") if ($verb);
            } elsif ($sarg =~ /^l/) {
                if ($sarg =~ /^ll/) {
                    $load_log = 2;
                } else {
                    $load_log = 1;
                }
                prt("Set to load log at end. ($load_log)\n") if ($verb);
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_file = $sarg;
                prt("Set out file to [$out_file].\n") if ($verb);
            } elsif ($sarg =~ /^p/) {
                $do_pretty = 1;
                prt("Set to pretty the json output.\n") if ($verb);
            } elsif ($sarg =~ /^n/) {
                $fix_numbers = 1;
                prt("Set to fix json numbers.\n") if ($verb);
            } elsif ($sarg =~ /^i/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                if ($sarg =~ /^\d+$/) {
                    $indent = ' ' x $sarg;
                    prt("Set indent to [$indent].\n") if (VERB1());
                } else {
                    pgm_exit(1,"Expected integer following $arg, NOT [$sarg]\n");
                }
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Set input to [$in_file]\n") if ($verb);
        }
        shift @av;
    }

    if ($debug_on) {
        prtw("WARNING: DEBUG is ON!\n");
        if (length($in_file) ==  0) {
            $in_file = $def_file;
            prt("Set DEFAULT input to [$in_file]\n");
        }
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help   (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]      (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load         (-l) = Load LOG at end. ($outfile)\n");
    prt(" --pretty       (-p) = Pretty output. That is reline the json, inserting newlines, and indents.\n");
    prt(" --indent <num> (-i) = Number of indent spaces. (def=".length($indent).")\n");
    prt(" --number       (-n) = Fix json numbers. Default of JSON::Any is to quote them all.\n");
    prt(" --out <file>   (-o) = Write output to this file.\n");
}

# eof - xml2json02.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional