unicode.pl to HTML.

index -|- end

Generated: Tue Feb 2 17:54:59 2010 from unicode.pl 2008/09/02 7.3 KB.

#!/perl -w
# NAME: unicode.pl
# AIM: Generate a BIG SEQUENCE of UNICODE values, and put
# it all in a HTML page.
# 02/09/2008 - put it in a table, and add jump links
# 07/09/2007 - geoff mclane - http://geoffair.net
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.htm";
open_log($outfile);
# featues
my $addtable = 1;   # build it all into a table
my $wrap = 32;
my $cnt = 0;
my $i = 0;
my $msg = '';
my $val = 0;
my $lncnt = 0;
my $topeach = 64;
my $htm_head = <<EOF;
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
 <head>
  <meta http-equiv="Content-Language"
        content="en">
  <meta http-equiv="Content-Type"
        content="text/html; charset=us-ascii">
  <meta name="Generator"
        content="EditPlus">
  <meta name="Author"
        content="Geoff McLane">
  <meta name="Keywords"
        content="geoff, mclane, UNICODE, character set">
  <meta name="Description"
        content="Generation of complete UNICODE character set">
  <title>
   Uncode Page
  </title>
  <link rel="stylesheet"
        type="text/css"
        href="ms.css">
  <style type="text/css">
  .cn { font-family : "Courier New"; }
  </style>
 </head>
 <body>
EOF
prt( $htm_head );
prt( "<a name=\"top\"></a>\n" );
prt( "<h1>Unicode Page</h1>\n" );
prt( "<p class=\"ctr\">\n" );
prt( " |- <a target=\"_self\" href=\"index.htm\">index</a> \n" );
prt( "-|- <a target=\"_self\" href=\"../home2.htm\">home</a> \n" );
prt( "-|- <a target=\"_self\" href=\"#end\">end</a> \n" );
prt( "-|  \n" );
prt( "</p>\n" );
prt( "<p>A simple Perl generated list of UNICODE values, 0-65535, just to see what happens! And\n" );
prt( "what is displayed." );
if ($addtable) {
    prt( "</p>\n" );
    prt( "<div class=\"cn\">\n" );
    prt( "<table border=\"0\" width=\"100%\" cellpadding=\"0\" cellspacing=\"0\" summary=\"list of unicode\">\n" );
    prt( "<tr>\n" );
    prt( "<th>\n" );
    prt( " Decimal\n" );
    prt( "</th>\n" );
    prt( "<th>\n" );
    prt( " Codes\n" );
    prt( "</th>\n" );
    prt( "<th>\n" );
    prt( " Hexadecimal\n" );
    prt( "</th>\n" );
    prt( "</tr>\n" );
} else {
    prt( "<br>\n" );
    prt( "List: Decimal Range - Values in form &amp;#nnnnn; - (Hexadecimal Range)</p>\n" );
    prt( "<p class=\"cn\">Test 1 = 0 to 32767<br>\n" );
}
for ($i = 0; $i < 32768; $i++) {
   if ($cnt == 0) {
        $lncnt++;
        if ($addtable) {
            if ($lncnt > $topeach) {
                prt_top();
                $lncnt = 0;
            }
            prt( "<tr>\n" );
            prt( "<td>\n" );
            prt( sprintf("%05d-%05d: ", $i, ($i + $wrap - 1) ) );
            prt( "</td>\n" );
            prt( "<td>\n" );
        } else {
            prt( sprintf("%05d-%05d: ", $i, ($i + $wrap - 1) ) );
        }
   }
   prt( "&#$i;" );
   $cnt++;
   if ($cnt >= $wrap) {
      $msg = ' (';
      $val = $i - ($wrap - 1);
      $msg .= dec2hex( $val );
      $msg .= '-';
      $msg .= dec2hex( $i );
      $msg .= ')';
        if ($addtable) {
            prt( "</td>\n" );
            prt( "<td>\n" );
            prt( "$msg\n" );
            prt( "</td>\n" );
            prt( "</tr>\n" );
        } else {
            prt( "$msg<br>\n" );
        }
      $cnt = 0;
   }
}
if ($addtable) {
    if ($cnt == 0) {
        ### prt_top();
    }
} else {
    prt( "End Set 1</p>\n" );
    prt( "<p class=\"cn\">Test 2 - 32768 to 65535<br>\n" );
}
for (; $i < 65536; $i++) {
   if ($cnt == 0) {
        $lncnt++;
        if ($addtable) {
            if ($lncnt > $topeach) {
                prt_top();
                $lncnt = 0;
            }
            prt( "<tr>\n" );
            prt( "<td>\n" );
            prt( sprintf("%05d-%05d: ", $i, ($i + $wrap - 1) ) );
            prt( "</td>\n" );
            prt( "<td>\n" );
        } else {
            prt( sprintf("%05d-%05d: ", $i, ($i + $wrap - 1) ) );
        }
   }
   prt( "&#$i;" );
   $cnt++;
   if ($cnt >= $wrap) {
      $msg = ' (';
      $val = $i - ($wrap - 1);
      $msg .= dec2hex( $val );
      $msg .= '-';
      $msg .= dec2hex( $i );
      $msg .= ')';
        if ($addtable) {
            prt( "</td>\n" );
            prt( "<td>\n" );
            prt( "$msg\n" );
            prt( "</td>\n" );
            prt( "</tr>\n" );
        } else {
            prt( "$msg<br>\n" );
        }
      $cnt = 0;
   }
}
if ($addtable) {
    prt_top();
    prt( "</table>\n" );
    prt( "</div>\n" );
} else {
    prt( "End Set 2</p>\n" );
}
prt( "<p>\n" );
prt( "It also serves to show what 'code pages' your system presently supports! \n" );
prt( "If the default 'square' is displayed, then that character set is not available \n" );
prt( "for display, or is just not used by anyone. \n" );
prt( "</p>\n" );
prt( "<p>This page, <a target=\"_self\" href=\"unicodet.htm\">unicodet.htm</a>, is a \n" );
prt( "<a target=\"_blank\" href=\"http://geoffair.net/tidy/\">HTML Tidied</a> version of this \n" );
prt( "raw page, where quite a number of the &amp;#nnnn values have been replaced by their coding \n" );
prt( "entity - that is say &amp;#33; has been replaced with '!', &amp;#128; has been replaced \n" );
prt( "with &amp;euro;, etc ... but it should display the same ...</p>\n" );
prt( "<a name=\"end\"></a>\n" );
prt( "<p class=\"top\"><a target=\"_self\" href=\"#top\">top</a></p>\n" );
prt( "<!-- generated by $pgmname on ". localtime(time()) . " for geoffair.net -->\n" );
prt( "<!-- first generated by unicode.pl on Fri Sep  7 12:57:48 2007 for geoffair.net -->\n" );
prt( "</body>\n" );
prt( "</html>\n" );
prt( "\n" );
close_log($outfile,1);
exit(0);
################
### subs
sub prt_top {
    prt( "<tr>\n" );
    prt( "<td colspan=\"3\">\n" );
    prt( "<p class=\"top\">\n" );
    prt( "<a target=\"_self\" href=\"#top\">top</a> <a target=\"_self\" href=\"#end\">end</a>\n" );
    prt( "</p>\n" );
    prt( "</td>\n" );
    prt( "</tr>\n" );
}
sub dec2hex {
    my $decnum = $_[0];     # parameter passed to the subfunction
    my $hexnum = '';     # the final hex number
    my $tempval = 0;
   if ($decnum == 0) {
      return '0000';
   }
    while ($decnum != 0) {
      # get the remainder (modulus function)
      # by dividing by 16
      $tempval = $decnum % 16;
      # convert to the appropriate letter
      # if the value is greater than 9
      if ($tempval > 9) {
         $tempval = chr($tempval + 55);
      }
      # 'concatenate' the number to 
      # what we have so far in what will
      # be the final variable
      $hexnum = $tempval . $hexnum ;
      # new actually divide by 16, and 
      # keep the integer value of the 
      # answer
      $decnum = int($decnum / 16); 
      # if we cant divide by 16, this is the
      # last step
      if ($decnum < 16) {
         # convert to letters again..
         if ($decnum > 9) {
            $decnum = chr($decnum + 55);
         }
         # add this onto the final answer.. 
         # reset decnum variable to zero so loop
         # will exit
         $hexnum = $decnum . $hexnum; 
         $decnum = 0 
      }
    }
    while (length($hexnum) < 4) {
        $hexnum = '0' . $hexnum;
    }
    return $hexnum;
} # end sub
# eof -  testpage.pl

index -|- top

checked by tidy  Valid HTML 4.01 Transitional