#!/usr/local/bin/perl # # status.pl v1.0 960413 Iain Lea (iain@sbs.de) # # ChangeLog # 960413 IL # # Produces a HTML 'Search Engine Status' page with last 5 runs # and 'Top 10' servers by #URLS indexed. # # Usage: status.pl [options] # -h help # -F file HTML footer # -H file HTML header # -o file HTML generated file # -v verbose # # TODO require 'timelocal.pl'; require 'getopts.pl'; require '/www/search.sbs.de/bin/sbs.pl'; $DataDir = '/www/search.sbs.de/data/robot'; $RunTimeFile = "$DataDir/current-runtime"; $RobotFile = "$DataDir/current-robot"; $IndexFile = '/www/search.sbs.de/test/db/db.wordlist'; $DefOutputFile = '/www/search.sbs.de/test/pub/status.html'; $TmpFile = "/tmp/status.$$"; $DefFooter = ''; $DefHeader = ''; $Verbose = 0; $Top10Servers = 10; &ParseCmdLine; print "Generating status.html...\n" if $Verbose; &ReadDataFiles ($RunTimeFile, $RobotFile, $IndexFile); &WriteStatus ($DataDir, $DefOutputFile, $DefHeader, $DefFooter); exit 1; ############################################################################# # Subroutines # sub ParseCmdLine { &Getopts ('F:hH:o:v'); if ($opt_h ne "") { print <) { chop; if (! $EndTime && $BegTime) { # Sat Apr 13 12:57:52 MET DST 1996 /^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/; $EndTime = timelocal ($5, $4, $3, $2, $Months{$1}, $6 - 1900); $RunTime = $EndTime - $BegTime; $RunTime = sprintf ("%02d%02d", $RunTime/3600, ($RunTime%3600)/60); print "END=[$_] [$EndTime] [$RunTime]\n" if $Verbose; } if (! $BegTime) { # Sat Apr 13 12:57:52 MET DST 1996 /^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/; $Mon = $Months{$1}; $Year = $6 - 1900; $BegTime = timelocal ($5, $4, $3, $2, $Mon, $Year); $RunDate = sprintf ("%02d%02d%02d", $Year, $Mon+1, $2); print "BEG=[$_] [$BegTime] [$RunDate]\n" if $Verbose; } } close (TIME); # IndexSize : NumWords : NumURLS : NumServers @StatData = stat ($IndexFile); $IndexSize = $StatData[7]; print "SIZE=[$IndexSize]\n" if $Verbose; # NumWords : NumURLS : NumServers $NumWords = $NumURLS = $NumServers = 0; open (ROBOT, "$RobotFile") || die "Error: $RobotFile - $!\n"; while () { if (/^htdig:\s+(.*)\s+([0-9]*)\s+documents$/) { $NumURLS += $2; $NumServers++; if ($2 > 0) { $Key = sprintf ("%07d|%s", $2, $1); $Top10ByName{$Key} = $2; } print "SERVER=[$1] DOCS=[$2]\n" if $Verbose; } elsif (/^Read\s+([0-9]*)\s+words$/) { $NumWords = $1; print "WORDS=[$NumWords]\n" if $Verbose; } } close (ROBOT); # Write data to YYMMDD-info file $InfoFile = "$DataDir/$RunDate-info"; $CurrFile = "$DataDir/current-info"; open (INFO, ">$InfoFile") || die "Error: $InfoFile - $!\n"; print "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n" if $Verbose; print INFO "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n"; close (INFO); unlink ($CurrFile); symlink ($InfoFile, $CurrFile); } sub WriteStatus { my ($DataDir, $OutFile, $Header, $Footer) = @_; $RobotInfo = &ReadRobotInfo ("$DataDir/current-info"); open (HTML, ">$OutFile") || die "Error: $OutFile - $!\n"; &PrintBoilerPlate ($Header, 1); print HTML < $RobotInfo

EOT ; # read YYMMDD-info files opendir (DIR, $DataDir) || die "Error: $DataDir - $!\n"; @InfoFiles = grep (/^[0-9]{6}-info$/, readdir (DIR)); closedir (DIR); @InfoFiles = reverse (sort (@InfoFiles)); @InfoFiles = @InfoFiles[0,1,2,3,4]; foreach $File (@InfoFiles) { $File = "$DataDir/$File"; open (INFO, "$File") || die "Error: $File - $!\n"; chop (($_ = )); ($RunDate, $RunTime, $IndexSize, $NumWords, $NumURLS, $NumServers) = split (':'); $IndexSize = sprintf ("%.1f", $IndexSize / (1024*1024)); $RunTime =~ /(..)(..)/; $RunTime = "$1:$2"; print HTML <$RunDate EOT ; close (INFO); } print HTML <

Table of last 5 robot runs.
Run DateRun Time# Servers# URL's# WordsIndex (MB)
$RunTime $NumServers $NumURLS $NumWords $IndexSize
EOT ; $NumServers = 0; foreach $Key (reverse (sort (keys (%Top10ByName)))) { if ($NumServers < $Top10Servers) { $NumServers++; $NumURLS = $Top10ByName{$Key}; $Key =~ /^[0-9]*\|(.*)$/; $Server = $1; $Server =~ s/:80$//; print HTML <$Server EOT ; } } print HTML "
Table of Top 10 servers listed by number of indexed documents.
Top 10 Servers# URL's
$NumURLS
\n"; &PrintBoilerPlate ($Footer, 0); close (HTML); } sub PrintBoilerPlate { my ($File, $IsHeader) = @_; if ($File ne "" && -e $File) { open (FILE, $File) || die "Error: $File - $!\n"; while () { print HTML; } close (FILE); } else { if ($IsHeader) { print HTML < Search Engine Status

Search Engine Status


EOT ; } else { &PrintFooterHTML; } } }