#!/usr/local/bin/perl ## ## changehost.pl (C) 1995 Andrew Scherpbier ## ## This program will change hostnames of URLs in the document database and index. ## ## usage: ## changehost.pl database_base from to ## ## example: ## changehost.pl /opt/www/htdig/sdsu www.sdsu.edu www.northpole.net ## ## Two new database will be created with a base of '/tmp/new'. ## These databases can then be used by htsearch. ## use GDBM_File; $base = $ARGV[0]; $from = $ARGV[1]; $to = $ARGV[2]; $dbfile = "$base.docdb"; $newfile = "/tmp/new.docdb"; ## ## Convert the document database first. ## tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'"; tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!"; while (($key, $value) = each %docdb) { if ($key =~ /http:\/\/$from/i) { %record = parse_ref_record($value); $key =~ s/http:\/\/$from/http:\/\/$to/i; print "$key\n"; $t = $record{"URL"}; $t =~ s/http:\/\/$from/http:\/\/$to/i; $record{"URL"} = $t; $value = create_ref_record(%record); } $newdb{$key} = $value; } untie %newdb; untie %docdb; ## ## Now create the document index ## $newfile = "/tmp/new.docs.index"; $dbfile = "$base.docs.index"; tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'"; tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!"; while (($key, $value) = each %docdb) { if ($value =~ /http:\/\/$from/i) { $value =~ s/http:\/\/$from/http:\/\/$to/i; } $newdb{$key} = $value; } untie %newdb; untie %docdb; ###################################################################### sub create_ref_record { local(%rec) = @_; local($s); if (exists $rec{"ID"}) { $s .= pack("Ci", 0, $rec{"ID"}); } if (exists $rec{"TIME"}) { $s .= pack("Ci", 1, $rec{"TIME"}); } if (exists $rec{"ACCESSED"}) { $s .= pack("Ci", 2, $rec{"ACCESSED"}); } if (exists $rec{"STATE"}) { $s .= pack("Ci", 3, $rec{"STATE"}); } if (exists $rec{"SIZE"}) { $s .= pack("Ci", 4, $rec{"SIZE"}); } if (exists $rec{"LINKS"}) { $s .= pack("Ci", 5, $rec{"LINKS"}); } if (exists $rec{"IMAGESIZE"}) { $s .= pack("Ci", 6, $rec{"IMAGESIZE"}); } if (exists $rec{"HOPCOUNT"}) { $s .= pack("Ci", 7, $rec{"HOPCOUNT"}); } if (exists $rec{"URL"}) { $s .= pack("Ci", 8, length($rec{"URL"})); $s .= $rec{"URL"}; } if (exists $rec{"HEAD"}) { $s .= pack("Ci", 9, length($rec{"HEAD"})); $s .= $rec{"HEAD"}; } if (exists $rec{"TITLE"}) { $s .= pack("Ci", 10, length($rec{"TITLE"})); $s .= $rec{"TITLE"}; } if (exists $rec{"DESCRIPTIONS"}) { @v = split('', $rec{"DESCRIPTIONS"}); $s .= pack("Ci", 11, $#v - 1); foreach (@v) { $s .= pack("i", length($_)); $s .= $_; } } if (exists $rec{"ANCHORS"}) { @v = split('', $rec{"ANCHORS"}); $s .= pack("Ci", 12, $#v - 1); foreach (@v) { $s .= pack("i", length($_)); $s .= $_; } } if (exists $rec{"EMAIL"}) { $s .= pack("Ci", 13, length($rec{"EMAIL"})); $s .= $rec{"EMAIL"}; } if (exists $rec{"NOTIFICATION"}) { $s .= pack("Ci", 14, length($rec{"NOTIFICATION"})); $s .= $rec{"NOTIFICATION"}; } if (exists $rec{"SUBJECT"}) { $s .= pack("Ci", 15, length($rec{"SUBJECT"})); $s .= $rec{"SUBJECT"}; } return $s; } sub parse_ref_record { local($value) = @_; local(%rec, $length, $count, $result); while (length($value) > 0) { $what = unpack("C", $value); $value = substr($value, 1); if ($what == 0) { # ID $rec{"ID"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 1) { # TIME $rec{"TIME"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 2) { # ACCESSED $rec{"ACCESSED"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 3) { # STATE $rec{"STATE"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 4) { # SIZE $rec{"SIZE"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 5) { # LINKS $rec{"LINKS"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 6) { # IMAGESIZE $rec{"IMAGESIZE"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 7) { # HOPCOUNT $rec{"HOPCOUNT"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 8) { # URL $length = unpack("i", $value); $rec{"URL"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 9) { # HEAD $length = unpack("i", $value); $rec{"HEAD"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 10) { # TITLE $length = unpack("i", $value); $rec{"TITLE"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 11) { # DESCRIPTIONS $count = unpack("i", $value); $value = substr($value, 4); $result = ""; foreach (1 .. $count) { $length = unpack("i", $value); $result = $result . unpack("x4 A$length", $value) . ""; $value = substr($value, 4 + $length); } chop $result; $rec{"DESCRIPTIONS"} = $result; } elsif ($what == 12) { # ANCHORS $count = unpack("i", $value); $value = substr($value, 4); $result = ""; foreach (1 .. $count) { $length = unpack("i", $value); $result = $result . unpack("x4 A$length", $value) . ""; $value = substr($value, 4 + $length); } chop $result; $rec{"ANCHORS"} = $result; } elsif ($what == 13) { # EMAIL $length = unpack("i", $value); $rec{"EMAIL"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 14) { # NOTIFICATION $length = unpack("i", $value); $rec{"NOTIFICATION"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 15) { # SUBJECT $length = unpack("i", $value); $rec{"SUBJECT"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } } return %rec; }