#!/bin/sh # shar: Shell Archiver (v1.22) # # Run the following text with /bin/sh to create: # Readme # mklist # mylist.cover # mylist.response # srch.cgi # sed 's/^X//' << 'SHAR_EOF' > Readme && XTitleSrch - a CGI script to search for HTML titles or do grep searches X======================================================================= X XOverview: X XThis package consists of two perl scripts and two customizable HTML Xdocs (a coversheet and a response). The first script, mklist, Xproduces a file consisting of title/URL pairs (or for grep searches Xtitle/URL/filepath). This file can be edited or entirely built by Xhand if you desire. The CGI script, srch.cgi, searches the title Xfield of all lines in this file and returns a response HTML document Xin which it inserts an unordered list of all URLs matching the query Xwhen it encounters a line containing only the special token $MATCHES. XAlso the token $QUERY in the HTML response is replaced by the search Xterm. If the list file has three fields per line, the last being the Xpath to the file, then a grep search of the file is done instead of Xlooking for matches only in the title. X X XHere are the steps to get srch.cgi running. X X1. Edit the mklist script entering your hostname and data root Xdirectory. Then produce a list file with this script. This is a Xfile each line of which looks like X XTitleURL\n (for title searches) X Xor X XTitleURLpath2file\n (for grep searches) X XBy default mklist produces (on standard output) a list file for title Xsearches in the directory (relative to the data root) which is its Xfirst argument. If it is given the -r option it will recursively do Xall subdirectories of that directory. If a directory is not given Xthe root is used. To find the title it checks the first 5 lines of Xevery text file looking for or <TITLE>. The beginning and Xending title tags should be on the same line. X XExamples: "mklist > mylist" -> list for all files in root dir X "mklist -r > mylist -> list for all files in root and its subdirs X "mklist -r /dir/dir2 > mylist" -> list for all files in X root/dir/dir2 and its subdirectories X X XIf you use the "-g" option it produces the three field file which indicates Xto srch.cgi to do a complete grep search of the files rather than just Xsearch the titles. X X2. Install the "mylist" file you made in step 1 in some directory on Xyour server along with the mylist.cover and mylist.response files. XYou will probably want to edit these two files to customize your Xinformation. There is nothing magic about the name "mylist", but if Xthe listfile is named "foo" the cover and response files must be named X"foo.cover" and "foo.response" and must be in the same directory. X X3. Install srch.cgi in your cgi-bin. The URL pointing to this script Xshould then be X Xhttp://host/cgi-bin/srch.cgi/path2/mylist X Xwhere "/path2/mylist" is whatever your server will translate to the Xcorrect path to the mylist file and place in the CGI environment Xvariable PATH_TRANSLATED. X XThat's it. X X XJohn Franks Dept of Math. Northwestern University X john@math.nwu.edu X X X SHAR_EOF chmod 0644 Readme || echo "restore of Readme fails" sed 's/^X//' << 'SHAR_EOF' > mklist && X#!/usr/net/bin/perl X# Mklist version 0.1 X# Usage: mklist [-rtbg] [-R rootdir] [-h host] dir X# -r means recursively do subdirectories X# -t include all text files X# -b include binary and text files X# -g full text grep search of files rather than title only X# dir is starting directory relative to root (if empty use root) X X# This program produces a file consisting of lines of the form X# title<tab>url\n or title<tab>url<tab>path2file\n X# if the -g option is used. It assumes that the only address translation X# from a URL to the file path is prepending the root directory. X X# This program has been placed in the public domain by its author X# John Franks X X# You must edit the following fields filling in you root directory and host. X X$host="host.univ.edu"; X$rootdir="/data/root"; X X########################################################################## Xrequire "getopts.pl"; X&Getopts( "rtbgR:h:"); X$opt_t |= $opt_b; #Option -b implies option -t X X$host= $opt_h ? $opt_h : $host; X$rootdir= $opt_R ? $opt_R : $rootdir; X$startdir=$ARGV[0]; X X&dodir($startdir); X Xsub dodir { X local($dir) = @_; X local($path) = $rootdir.$dir; X local($file); X opendir( DIR, $path) || die "Can't open $path"; X local( @filenames) = readdir(DIR); X closedir(DIR); X X foreach $file (@filenames) { X next if ($file =~ /^\./); X if ( $file =~ /\.html$/) { X &do_file( $dir, $file, "h"); X } elsif ( $opt_t && (-T "$path/$file")) { X &do_file( $dir, $file, "t"); X } elsif ( $opt_b && (-B "$path/$file") X && !(-d "$path/$file")) { X &do_file( $dir, $file, "b"); X } X &dodir( "$dir/$file") if (( -d "$path/$file") && $opt_r ); X X } X} X Xsub do_file { X local( $dir, $file, $type) = @_; X local( $filepath) = $rootdir.$dir."/".$file; X X if ( $type eq "b" ) { X $title = "Binary file: $file"; X } elsif ( $type =~ /[ht]/ ) { X open( TEXT_FILE, $filepath) X || die "Can't open $filepath\n"; X local( $line) = ""; X local( $linenum) = 1; X $title = "Text file: $file"; X $foundtitle = 0; X while ( $line = <TEXT_FILE>) { X last if ($linenum > 5); X $linenum++; X if ($line =~ s/^.*<title>//i ) { X chop( $line); X $line =~ s!.*$!!i; X $title = $line; X $title =~ s/^\s*//; X $title =~ s/\s*$//; X $foundtitle = 1; X last; X } X } X close( TEXT_FILE); X } X if ( ($type =~ /[bt]/) || $foundtitle) { X if ( $opt_g ) { X printf( "%s\thttp://%s%s/%s\t%s\n", X $title, $host, $dir, $file, $filepath); X } else { X printf( "%s\thttp://%s%s/%s\n", X $title, $host, $dir, $file); X } X } X} SHAR_EOF chmod 0755 mklist || echo "restore of mklist fails" sed 's/^X//' << 'SHAR_EOF' > mylist.cover && X X X XSearch Titles X X X

Search Titles on this server

X XThe search is case insenstive. Any Perl regular expression is an Xallowable search term. X X X SHAR_EOF chmod 0644 mylist.cover || echo "restore of mylist.cover fails" sed 's/^X//' << 'SHAR_EOF' > mylist.response && X X X XDocument Search X X X

Results of Title Search

X XHere are the matches for the search term '$QUERY': X

X X$MATCHES X XYou may repeat your search with a new search term. The search is case Xinsenstive. Any Perl regular expression is an allowable search term. X

X X SHAR_EOF chmod 0644 mylist.response || echo "restore of mylist.response fails" sed 's/^X//' << 'SHAR_EOF' > srch.cgi && X#!/usr/net/bin/perl X# Srch.cgi version 0.1 X# This is a CGI script which receives a query and pathinfo specifying X# a file consisting of lines of the form TitleURL. It prints a X# response file to stdout replacing the token $QUERY with the query passed X# to it an the line containing $MATCHES with an unorderd list of URLs X# for matching documents. If the query is empty it prints a coversheet. X# If the list file contains entries of the form TitleURLfilepath, X# then the file referenced by filepath is grepped for the query string instead X# of seeking a match in the title field. X X# This program has been placed in the public domain by its author X# John Franks X X X$pathinfo = $ENV{PATH_INFO}; # Get the argument from the URL X$listfile = $ENV{PATH_TRANSLATED}; # Get the argument from the URL X$coverfile = $listfile.".cover"; X$responsefile = $listfile.".response"; X$query = $ENV{QUERY_STRING}; X$query =~ s/%([\da-f]{1,2})/pack(C,hex($1))/eig; X Xif ( $query eq "") { X open (COVER, $coverfile) X || &punt( "Couldn't open cover file $coverfile.\n"); X print "Content-type: text/html\n\n"; X print "Got here\n"; X while ( $line = ) { X print $line; X } X exit( 0); X} X Xopen (LISTFILE, $listfile) || &punt( "Couldn't open list file: $listfile.\n"); Xopen (RESPONSE, $responsefile) || &punt( "Couldn't open response file.\n"); X Xprint "Content-type: text/html\n\n"; X Xwhile ( $line = ) { X $line =~ s/\$QUERY/$query/; X if ( $line =~ /^\$MATCHES/) { X &matches; X } X else { X print $line; X } X} X Xsub matches X{ X local( $foundmatch); X $foundmatch = 0; X LINE: X while ( $listline = ) { X chop $listline; X $listline =~ s/\s*#.*$//; X next LINE if ( $listline eq ""); X X ( $title, $url, $filepath ) = split( /\t/, $listline, 3); X if ( $filepath ) { X open( GREPFILE, $filepath) X || &punt( "Couldn't open file to grep.\n"); X while ( ) { X if ($_ =~ /$query/i) { X if ( !$foundmatch) { X print "

    \n"; X $foundmatch = 1; X } X printf( "
  • %s\n", X $url, $title); X next LINE; X } X } X } X else { X if ( $title =~ /$query/i) { X if ( !$foundmatch) { X print "
      \n"; X $foundmatch = 1; X } X printf( "
    • %s\n", X $url, $title); X } X } X } X if ( $foundmatch) { X print "
    \n

    \n"; X } else { X print "Sorry no matches were found.

    \n"; X } X} X Xsub punt X{ X print "Content-type: text/html\n\n"; X printf( "500 %s\n", @_); X printf( "

    Error code 500

    \n%s\n\n", @_); X exit( 1); X} SHAR_EOF chmod 0755 srch.cgi || echo "restore of srch.cgi fails" exit 0