#!@BASH@ # # gen-collect 1.1 # # Copyright (c) 1998-1999 The ht://Dig Group # Distributed under the terms of the GNU General Public License (GPL) # version 2 or later. # for the ht://Dig search system http://www.htdig.org/ # and the multidig script system http://www.htdig.org/contrib/scripts/ # # Part of the "multidig script system" # a system of shell scripts and some modified conf files # that makes dealing with multiple databases easier for ht://Dig # # Syntax: # gen-collect [-v] # # Merges multiple databases into ``collected'' db # (This is done by multidig too, but this script lets you *just* # generate the collections.) # # This is useful for debugging info if [ "$1" = "-v" ]; then verbose=-v fi # You may need to set the following: MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf source $MULTIDIG_CONF # We may be called inside multidig, so we don't want to mess with the report. for collect in `cat $COLLECT_LIST`; do # What's the conf file for this database? CONF=$CONFIG_DIR/$collect.conf echo Generating $collect at: `date` # We want to replace the old .work files with the first database # This ensures that we *only* get documents from the merged db # and not old ones left around in our previous collected db firstdb=`head -n 1 $DB_BASE/$collect/$collect.collect` cp $DB_BASE/$firstdb/db.docdb $DB_BASE/$collect/db.docdb.work cp $DB_BASE/$firstdb/db.docs.index $DB_BASE/$collect/db.docs.index.work cp $DB_BASE/$firstdb/db.wordlist.work $DB_BASE/$collect/db.wordlist.work cp $DB_BASE/$firstdb/db.words.db $DB_BASE/$collect/db.words.db.work # Now we need to work out the number of remaining db in the collection LENGTH=`wc -l $DB_BASE/$collect/$collect.collect | awk '{print $1;}'` let NUM=LENGTH-1 for db in `tail -n $NUM $DB_BASE/$collect/$collect.collect`; do if [ "$1" = "-v" ]; then echo Merging db $db of collect $collect fi MERGE_CONF=$CONFIG_DIR/$db.conf # There's a slight bug in the merge function. # It's looking for db.wordlist, not .work. So lets copy it temporarily cp $DB_BASE/$db/db.wordlist.work $DB_BASE/$db/db.wordlist # Do the merging, using -d and -w to prevent normal merging # (it would be a waste of time, we'd repeat it multiple times) $BINDIR/htmerge $verbose -s -d -w -m $MERGE_CONF -a -c $CONF >>$REPORT # And now remove the copy rm $DB_BASE/$db/db.wordlist done # Now after merging in all of those databases # we need to do the usual htmerge run $BINDIR/htmerge -a $verbose -s -c $CONF >>$REPORT if [ "$1" = "-v" ]; then echo Moving files $collect at: `date` fi # If you don't have the space for backups, this step can be omitted if [ $BACKUPS = "true" ]; then cp $DB_BASE/$collect/db.docdb $DB_BASE/$collect/db.docdb.bak cp $DB_BASE/$collect/db.docs.index $DB_BASE/$collect/db.docs.index.bak # cp $DB_BASE/$collect/db.wordlist $DB_BASE/$collect/db.wordlist.bak cp $DB_BASE/$collect/db.words.db $DB_BASE/$collect/db.words.db.bak fi # Move them because we don't want .work files around # (Remember, we're generating using merging, # so we want to make sure we don't have old stuff to gum up the works... mv $DB_BASE/$collect/db.docdb.work $DB_BASE/$collect/db.docdb mv $DB_BASE/$collect/db.docs.index.work $DB_BASE/$collect/db.docs.index # mv $DB_BASE/$collect/db.wordlist.work $DB_BASE/$collect/db.wordlist mv $DB_BASE/$collect/db.words.db.work $DB_BASE/$collect/db.words.db # Make them world readable! chmod 644 $DB_BASE/$collect/db.docdb chmod 644 $DB_BASE/$collect/db.docs.index # chmod 644 $DB_BASE/$collect/db.wordlist chmod 644 $DB_BASE/$collect/db.words.db if [ "$1" = "-v" ]; then echo Done with $collect at: `date` fi done # That's it!