Generated: Tue Feb 2 17:54:52 2010 from pageviewer.pl 2006/07/04 7.5 KB.
#!/Perl # # AIM: Get info from net, and generate a page viewer ... use Win32::Internet; #use LWP::Simple; my ($LF, $HF); my $outfile = 'temp'.$0.'.txt'; my $htmfile = 'temppage2.htm'; require "logfile.pl" or die "Missing logfile.pl ...\n"; open_log($outfile); open $HF, ">$htmfile" or mydie("Failed to create $htmfile ...\n" ); my $testfile = 'temphtm.htm'; prt( "$0 ... Hello, World...\n" ); my $url = 'http://www.google.com/search?q=javascript+iframe+scrollTo'; my $url2 = 'http://www.google.com/search?q=javascript+iframe+scrollTo&num=100&hl=en&lr=&start=100&sa=N'; my $url3 = 'http://www.google.com/search?q=javascript+iframe+scrollTo&num=100&hl=en&lr=&start=200&sa=N'; my $res = 'Results <b>301</b> - <b>400</b> of about <b>12,400</b> for <b>javascript iframe scrollTo</b>. (<b>0.46</b> seconds) </font>'; my $loadfile = 1; my @exclude = qw( www.google.com images.google.com groups.google.com news.google.com ); my @arr = (); my $lc = 0; if ($loadfile) { prt( "Loading file $testfile ...\n"); if ( ! -f $testfile) { mydie( "ERROR: Unable to find $testfile ...\n" ); } open IF, "<$testfile" or mydie( "ERROR: Unable to open test file \n"); @arr = <IF>; close( IF ); $lc = scalar @arr; if ($lc) { prt( "Got $lc filest lines ...\n" ); } else { mydir( "ERROR: Got no lines from file ...\n" ); } } else { prt( "Fetching $url ...\n"); my $conn = new Win32::Internet(); ##my $text2 = Win32::Internet->new->FetchURL("$url"); my $text2 = $conn->FetchURL("$url"); ##my $text2 = get($url); my $llen = length($text2); prt( "Got $llen characters ...\n" ); prt( $text2 ); ###prt( "\nWritten $llen characters ...\n" ); @arr = split( /</, $text2 ); $lc = scalar @arr; prt( "Got $lc split lines ...\n" ); } my @hrefarr = (); my %hurl = (); my $rline = ''; my $rbgn = 0; foreach my $line (@arr) { chomp $line; ###if ($line =~ /href="http:\/\//) { ###prt( "$line\n"); if ($line =~ /.*href="(\S+)".*/io) { my $href = $1; if ($href =~ /http:\/\/(.*)/io) { my $st = $1; ##prt( "[$href]\n" ); if ($href =~ m|^.*(\d+\.\d+\.\d+\.\d+)/search|io) { # 1.2.3.4 prt( "Discarded IP [$href][$st]\n" ); } else { ###my @arr2 = split( /\//, $href ); my @arr2 = split( /\//, $st ); my $ha3 = ''; my $fnd = 0; my @arr3 = (); my $nurl = $arr2[0]; if (in_exclude($nurl)) { prt( "Discard ADD [$nurl]][$href][$st]\n" ); } else { if (defined $hurl{$nurl}) { ###prt( "Repeat [$nurl]\n" ); $ha3 = $hurl{$nurl}; @arr3 = split(/ /, $ha3); $fnd = 0; foreach my $u (@arr3) { if ($u eq $href) { $fnd = 1; last; } } if ($fnd) { prt( "Repeat [$nurl]\n" ); prt( "Discarded REPEAT [$nurl]][$href][$st]\n" ); } else { push(@arr3, $href); $ha3 = join( ' ', @arr3 ); $hurl{$nurl} = $ha3; prt( "Repeat [$nurl]\n" ); prt( "Added [$ha3]\n" ); } } else { $hurl{$nurl} = $href; } push(@hrefarr, $href); } } } else { prt( "Discarded [$href][$st]\n" ); } } elsif ($rbgn) { if ($line =~ /\/font>/) { $rline .= ' '; $rline .= $line; prt( "End RESULT line ...[$rline]\n" ); $rbgn = 0; } else { $rline .= ' '; $rline .= $line; } } elsif ($line =~ /Result/) { $rbgn = 1; $rline = $line; } ###} if (length($line)) { ###prt( "<$line\n" ); } } ### done it all my $kc = keys %hurl; prt( "Got $kc different sites ...\n" ); out_htm_head(); print $HF '<script language="javascript" type="text/javascript">'; print $HF "\n"; print $HF "<!-- \n"; print $HF "var ma = new Array(\n"; my $cnt = 0; foreach my $k (keys %hurl) { my $ha3 = $hurl{$k}; my @arr3 = split(/ /, $ha3); my $st; prt( "Site $k, with ". scalar @arr3 . " entries ...\n" ); foreach my $a (@arr3) { if ($cnt) { print $HF ','."\n"; } $cnt++; prt( " $a\n" ); $st = "2006/07/03"; if ($a =~ /http:\/\/(.*)\//io) { my @arr4 = split( /\//, $1 ); $st = $arr4[0]; } print $HF 'new item( "' . $a . '", "' . $st . '", "Page '.$cnt.'" )'; } } print $HF ");\n"; add_form(); print $HF "// -->\n"; print $HF '</script>'."\n"; out_htm_tail(); close( $HF ); if ($loadfile) { close_log($outfile,1); } else { system( $htmfile ); close_log($outfile,0); } exit(0); sub out_htm_head { print $HF <<"EOF"; <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <meta http-equiv="Content-Language" content="en"> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> <meta name="Generator" content="EditPlus"> <meta name="Author" content="Geoff McLane"> <meta name="Keywords" content=""> <meta name="Description" content=""> <title>Page Viewer</title> <p align="center"> <iframe id="IFrame1" frameborder="1" scrolling="auto" style="width:98%;height:500px;" src="http://geoffmclane.com/fg/index.htm"> IFRAME NOT SUPPORTED ... </iframe> </p> <script language="javascript" type="text/javascript"> <!-- var displaymode = 0; // 1 to open new pages ... var max = 0; var curp = 0; var curind = 0; function item( lnk, dsc, fd ) { this.lnk = lnk; this.dsc = dsc; this.fd = fd; } function show_url( url ) { if ( document.getElementById && (displaymode==0) ) { //alert('Got (document.getElementById && (displaymode==0) ) ...'); document.getElementById("IFrame1").src = url; } else if ( document.all && (displaymode==0) ) { //alert('Got (document.all && (displaymode==0) ) ...'); document.all.IFrame1.src = url; } else { //alert( 'Not document.getElementById or document.all' ); if (!window.win2||win2.closed){ win2 = window.open( url ); }else{ //else if win2 already exists win2.location = url; win2.focus(); } } } function gone(){ curind = document.jumpy.example.selectedIndex; var selectedurl=document.jumpy.example.options[curind].value show_url( selectedurl ); // view(selectedurl); curind++; if (curind >= max) { curind = 0; } document.jumpy.example.selectedIndex = curind; // select NEXT page } function nextPage() { curp++; cur = 0; if (curp >= max) { curp = 0; } var pg = ma[curp].lnk; document.jumpy.example.selectedIndex = curp; show_url(pg); // setTimeout( "nextPage();", mto ); } // --> </script> </head> <body> EOF } sub add_form { print $HF <<"EOF"; max = ma.length; function set_form() { var n; document.writeln('<div align="right">'); document.writeln('<form name="jumpy" action="">'); document.writeln('<select name="example" size="1">'); for( n = 0; n < max; n++ ) { var opt = '<option value="' + ma[n].lnk; if( 0 == n ) { opt += '" selected>'; } else { opt += '">'; } // opt += ma[n].fd; // + " (circa " + ma[n].dsc + " - " + ma[n].lnk + ")"); opt += ma[n].fd + ' (' + ma[n].dsc + ')'; // + " - " + ma[n].lnk + ")"); document.writeln(opt); document.writeln('</option>'); } document.writeln('</select>'); document.writeln('<input type="button" name="test" value="Go!" onClick="gone()">'); document.writeln('</form>'); document.writeln('</div>'); } set_form(); EOF } sub out_htm_tail { print $HF <<"EOF"; </body> </html> EOF } sub in_exclude { my ($ad) = shift; foreach my $s (@exclude) { if ($s eq $ad) { return 1; } } return 0; } ## eof - pageviewer.pl