Generated: Tue Feb 2 17:54:56 2010 from sitemap02.pl 2009/10/16 7.9 KB.
#!\perl -w # # sitemap02.pl -- a command-line utility for building an HTML site map # =========================================== # Usage: perl --localpath=/file/path/to/serverroot/ # --exts="xhtml, html" sitemap.p # 2009/10/16 - revisited # 22/07/2007 - some initial experiments use strict; use warnings; use File::Basename; use Cwd; use File::stat; use File::Find; use Getopt::Long; require 'logfile.pl' or die "Unable to load logfile.pl ...\n"; # debug my $dbg01 = 1; # output extra messages my $dbg2 = 0; # output extra messages my $dbg3 = 0; # output extra messages my $dbg4 = 0; my $dbg05 = 0; # show prt( "[dbg05] Skipping $directory ...\n" ) if ($dbg05); # log file stuff my ($LF); my $pgmname = $0; if ($pgmname =~ /\w{1}:\\.*/) { my @tmpsp = split(/\\/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = "temp.$pgmname.txt"; open_log($outfile); ###prt( "$0 ... Hello, World ...\n" ); # features my $use_table = 1; # output HTML results in a TABLE form my $use_fn_only = 1; my $out_file = 'tempsm.htm'; # HTML stuff my $m_doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'."\n". '"http://www.w3.org/TR/html4/loose.dtd">'; my (%file_list, $local_path, $file_exts, $fn, @lines, $line, $max, $title); my @results = (); my @fpfolders = qw( _vti_cnf _vti_pvt _private _derived ); # parse the command line options: ##GetOptions( "localpath=s" => \$local_path, ## "exts=s" => \$file_exts); # or JUST A SIMPLE SHIFT $local_path = shift || 'C:/homepage/GA'; push(@fpfolders, 'ok'); #$local_path = shift || 'C:/homepage/GeoffAir'; #$file_exts = shift || 'htm, html, xhtml, php'; $file_exts = shift || 'htm, html, xhtml'; #mydie( "Usage: perl --localpath=/file/path/to/root/ --exts='xhtml, html' sitemap.pl\n" ) mydie( "Usage: perl sitemap02.pl /file/path/to/root/ ['xhtml, html']\n" ) unless $local_path and $file_exts; $local_path =~ s/\/$//; my @file_exts = split (/\s?,\s?/, $file_exts); my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my @weekDays = qw(Sun Mon Tue Wed Thu Fri Sat Sun); sub get_date_time { my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime(); my $year = 1900 + $yearOffset; my $theTime = "$hour:$minute:$second"; my $theDate = "$weekDays[$dayOfWeek] $months[$month] $dayOfMonth, $year"; return "$theTime, $theDate"; } %file_list = &find_local_files(\@file_exts, [$local_path]); ## # now, to the hunt... # DIR: foreach my $directory ( sort (keys (%file_list))) { my @arr = split('/', $directory); if (is_fp_folder( $arr[-1] ) ) { prt( "[dbg05] Skipping $directory ...\n" ) if ($dbg05); next; } prt( "[dbg01] Processing $directory ...\n" ) if ($dbg01); FILE: foreach my $file (sort (@{$file_list{$directory}})) { $fn = substr($file, (length($local_path)+1)); prt( "Processing file $fn [$file] ...\n" ) if ($dbg2); if (open INF, "<$file") { @lines = <INF>; close INF; $line = join('',@lines); $max = length($line); prt( "Processing $fn, $max characters, in ".scalar @lines." lines ...\n" ) if ($dbg4); $title = get_title($line); push(@results, [$fn, $title]); prt( "push(\@results, [$fn, $title])\n") if ($dbg4); } else { prt( "WARNING: Unable to open file [$file] ... $! ... \n" ); } } } sub write_a_html_file($$) { my ( $out, $rresa ) = @_; my $mcnt = scalar @{$rresa}; prt( "Got $mcnt results ...\n" ); if ($mcnt) { my ($OF, $i, $fnm, $tit,$cnt, $dr,$dnm); if (open $OF, ">$out") { write_html_head($OF); print $OF "<p>List of $max files found ...</p>\n"; if ($use_table) { print $OF "<table border=\"0\" cellpadding=\"2\" cellspacing=\"2\" summary=\"List of site pages\" align=\"center\">\n"; print $OF " <tr>\n"; print $OF " <th>Num</th><th>Link</th><th>Title</th>\n"; print $OF " </tr>\n"; $cnt = 0; for ($i = 0; $i < $mcnt; $i++) { $cnt++; $fnm = ${$rresa}[$i][0]; $tit = ${$rresa}[$i][1]; $dnm = $fnm; ($dnm,$dr) = fileparse($fnm) if ($use_fn_only); print $OF " <tr>\n"; print $OF " <td align=\"right\">$cnt</td><td><a href=\"$fnm\">$dnm</a></td><td><b>$tit</b></td>\n"; print $OF " </tr>\n"; } print $OF "</table>\n"; } else { print $OF "<ol>\n"; for ($i = 0; $i < $mcnt; $i++) { $fnm = ${$rresa}[$i][0]; $tit = ${$rresa}[$i][1]; print $OF "<li><a href=\"$fnm\">$fnm</a> - <b>$tit</b></li>\n"; } print $OF "</ol>\n"; } write_html_tail($OF, $out_file); close $OF; system($out_file); } else { prt( "WARNING: Unable to create file [$out] ... $! ... \n" ); } } } write_a_html_file( $out_file, \@results ); my $msg = "TBD"; prt( "$msg\n" ); close_log($outfile,1); exit(0); # utility subroutines sub get_title { my ($txt) = shift; my $len = length($txt); my $tit = ''; my $tag = ''; my ($i, $ch); prt( "Get title from $len characters ...\n" ) if ($dbg3); for ($i = 0; $i < $len; $i++) { $ch = substr($txt,$i,1); if ($ch eq '<') { $tag = $ch; $i++; for (; $i < $len; $i++) { $ch = substr($txt,$i,1); $tag .= $ch; if ($ch eq '>') { last; } } ##prt( "Got TAG $tag ...\n" ); if ($tag =~ /<title>/i) { $i++; prt( "Got $tag ...\n" ) if ($dbg3); for (; $i < $len; $i++) { $ch = substr($txt,$i,1); if ($ch eq '<') { last; } $tit .= $ch; } last; } } } return trim_all($tit); } sub find_local_files { my ($extensions, $directories) = @_; my %file_list = (); my $extension_re = '('; $extension_re .= join ('|', @{$extensions}); $extension_re .= ')'; prt( "Finding files with extensions [$extension_re]...\n" ); local *wanted_files = sub { return if -d; return if -l; push (@{$file_list{$File::Find::dir}}, $File::Find::name) if $File::Find::name =~ /\.$extension_re$/; }; File::Find::find(\&wanted_files, @{$directories}); my $cnt = scalar keys(%file_list); prt( "Done... returning file list of $cnt items...\n" ); return %file_list; } sub translate_path { my ($old_path, $new_path, $file) = @_; $file =~ s|$old_path|$new_path|; return $file; } ################################################ # ignore FRONTPAGE folders ################################################ sub is_fp_folder { my ($inf) = shift; foreach my $fil (@fpfolders) { if (lc($inf) eq lc($fil)) { return 1; } } return 0; } sub write_html_head { # ($OF) my ($f) = shift; print $f "$m_doctype\n"; print $f <<"EOF"; <html> <head> <title>Site Index</title> <meta http-equiv="Content-Language" content="en-au"> <meta http-equiv="Content-Type" content="text/html; charset=windows-1252"> </head> <body> <h1 align="center"><a name="bm_top" id="bm_top"></a>Site Index</h1> EOF } sub write_html_tail { # ($OF, filename); my ($f, $of) = @_; my ($msg); print $f <<"EOF"; <p><a name="bm_end" id="bm_end">EOF - $off </p> EOF $msg = "<!-- generated by $pgmname -->\n"; $msg .= "<!-- "; $msg .= get_date_time(); $msg .= " -->\n"; print $f $msg; print $f "</body>\n"; print $f "</html>\n"; } # eof - sitemap02.pl