#!/usr/bin/perl
#
# NAME
#   TuneBot - search the Web for ABC tunes
#
# SYNOPSIS
#   TuneBot  [options] ["<oldlist"] [URL|dir]...
#
# DESCRIPTION
#   This program is a web explorer.  It accepts a  list  of  starting
#   URLs,  either  on  the  command line or in an input document, and
#   fetches each URL in turn. Each is scanned for ABC tunes and URLs.
#   Any  URLs  discovered  are remembered for later use, and each ABC
#   tune produces a single line of output giving info about it.
#
#   We implement shell-like redirection ourself, because the  oldlist
#   and  newlist  files  may be URLs, but not many people have shells
#   that know how to redirect over the net.
#
#   The  URLs  may  be  directories  or  .abc  files.   If  they  are
#   directories, we read  the  directory  listing  and  (recursively)
#   extract a list of all the .abc files that they contain.
#
#   Because  of  the  difficulties  in preventing infinite loops with
#   URLs we implement two ways of  limiting  the  URLs  that  may  be
#   followed:   You  can  restrict the depth of recursion with the -D
#   option, and you can restrict the hostname(s) with the +H option.
#
# REQUIRES
#   This program no longer uses the LWP::Simple modules. I've found a
#   simpler approach.  But you'll have to download these modules, and
#   possibly change push to say where you put them:
#
	push(@INC,".","$ENV{HOME}/sh","$ENV{HOME}/pl");
	use abcCode;			# Calculates tune codes.
	require "HTTPcon.pm";	# Makes HTTP connection to server.
	require "URLdata.pm";	# Opens URL and returns file handle.
	require "URLhref.pm";	# Combines URL + HREF -> new URL.
	require "URLtrim.pm";	# Shrinks URLs.
	require "HTMLdir.pm";	# HTML directory listing.
#
#   They'll have to be in your @INC path; by default we add  $HOME/sh
#   and $HOME/pl to @INC, so those are good places to put them.
#
#   Oh, and one more thing: We use w3cat to fetch files from the web.
#   You should find it in the same directory.  This was done so  that
#   we  could  properly  time  out  zombie connections to some of the
#   broken web sites out there.  It turns out that you can only abort
#   a  connect()  with  sig('ALRM'),  and if you attempt to close the
#   socket, you die a horrible death. With that isolated in the w3cat
#   subprocess, we can continue to run past such disasters.
#
# ENVIRONMENT
#   We read the following from the environment:
#
#   V_TuneBot=<l><file>
#     If defined, this defines our "verbose" level and  output  file.
#     The  level  <l> is a number (which defaults to 0), the optional
#     <file> (which defaults  to  STDERR)  is  where  the  output  is
#     written.   Note that this variable's name consists of 'V_' plus
#     the program's name.  If you call this  program  by  some  other
#     name, you should of course use 'V_' plus that name.
#
# INPUT
#   We always read from stdin, so if you don't want to  provide  any
#   input, you'll need to redirect our input to /dev/null. The input
#   is scanned for URLs, and they are added to our starting list (at
#   depth 1).
#
#   As a special aid in limiting  searches,  the  input  may  contain
#   lines of these forms (with or without the colons):
#     done:   http://foo.bar.com/xyz
#     ignore  http://foo.bar.com/xyz
#     avoid:  http://foo.bar.com/xyz
#   These  are ways of telling TuneBot to ignore certain URLs.  The
#   "done" and "ignore" commands give specific URLs that are to be
#   avoided; this is implemented by simply listing them as "already
#   done".  With the "avoid" command, we extract the host name, and
#   URLs for that host will not be used.
#
# OUTPUT
#   The output is one line per ABC title discovered.  The  format  is
#   rather  simple  HTML, designed to be surrounded by <pre>...</pre>
#   to generate a simple table.  The fields are currently somewhat in
#   flux as I experiment ...
#
#   Note that the output is unsorted, and is in the  order  that  the
#   tunes  were discovered.  We've found that this is the best way to
#   do it, with subsequent sorting done by a separate process. One of
#   the reasons is that we've had a lot of problems with this program
#   either hanging or bombing while trying to connect.  There's a lot
#   of very flakey web software around. By producing our output as we
#   go, you at least get access to what was found. Also, this program
#   can take many hours to run, but our (partial) output is available
#   for use at any time.
#
# OPTIONS
#   Options start with '-' or '+' plus  a  letter,  with  possibly  a
#   parameter  (and no embedded spaces).  Some of the options take an
#   initial '+' to mean "enable" and  '-'  to  mean  "disable".   For
#   others,  the  '-' or '+' is not relevant.  If '+' is shown in the
#   list below,, then it is significant. Capitalization of the option
#   letters  doesn't  matter (but it may matter in an argument string
#   if there is one).
#
#   -<n>
#     where <n> is an integer, means a timeout of <n>  seconds.   The
#     default is currently:
#
	$ABCtmout = 30;
#
#   -d<depth>
#     This restricts the depth of directory searches to <depth>. This
#     is mostly to avoid infinite loops. The default is 3. Experience
#     has shown that each depth level produces at least a  factor  of
#     10  increase  in  run time, so you should be careful with this.
#     It's much faster to have a shallow depth and  a  long  list  of
#     starting URLs.  One recommendation:  use the previous output as
#     input, so all the successes then will be re-scanned  (at  depth
#     2) in the current run.
#
#   +h<host>
#     Allow URLs for <host>. Default: All hosts allowed.  If there is
#     one or more +h options, then only these hosts are allowed.
#
#   -s
#   +s<n>
#     Skip over <n> URLs while searching.  This has the effect of not
#     making  a lot of requests in succession of a single server.  It
#     is implemented by moving n-1 URLs to the end of  the  URL  list
#     before each attempt to fetch a URL.
#
# EXAMPLES
#
# SIGNALS
#   There are various ways that this program may get hung up  because
#   of  misbehavior  (or  behavior  that  may  be  valid  but I don't
#   understand it) on the part of web servers.  You can  "kick"  this
#   program by sending it these signals:
#
#   CONT
#     Abandon the current URL by closing the connection.
#   INT or HUP
#     Abandon the search and write the output files.
#   USR1
#     Write a dump of the call stack to the verbose log.  Useful  for
#     diagnosing  hangups.   If  this program doesn't do anything for
#     more than about 2 minutes, you might send it a USR1 signal,  to
#     see  what  it  was  trying  to  do.   And since this causes the
#     "interrupted system call" error, it tends to  also  get  things
#     moving again.
#
# BUGS
#   This program is highly experimental,  in  alpha  state,  and  all
#   that.  Use it at your own risk.  (Not much risk, there, actually,
#   but I thought I'd give the usual friendly warnings.)  Just  don't
#   write the output back over the input, and check its output with a
#   browser or two, and there shouldn't be many problems.
#
#   Of course, there  are  constant  problems  with  slight  spelling
#   variations.   This  program  doesn't  even attempt to tackle this
#   issue.
#
# AUTHOR:
#   John Chambers <jc@trillian.mit.edu> http://trillian.mit.edu/~jc/music/

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Assorted initializations:
$| = 1;
($me = $0) =~ s'.*/'';
($myhost = `hostname`) =~ s/\s+$//;

$abcCode = new abcCode;

# Counters:
$abcfiles  = 0;	# Number of files that contain abc.
$abctunes  = 0;	# Number of X: lines discovered.
$abctlines = 0;	# Number of T: lines discovered.
$abctitles = 0;	# Number of distinct T: titles discovered.
$loadcount = 0;	# Number of files loaded.
$SCDkludge = 1; # Try to ignore SCD dance-form titles.

# Options:
$chkparentdirs = 0;	# Check for "parent dir" lines.
$listabchosts  = 1;	# Collect list of hosts with abc files.

# Alignment of various output fields (default = 'L')
$align{X}   = 'R';	# Right-align X fields.

# Initial widths of various output fields:
%max = (
	'K',    12,	# Pad K fields to this many bytes.
	'X',     4,	# Pad X fields to this many bytes.
	'orig', 40,	# Pad origin fields to this many bytes.
	'type',  8,	# Pad type fields to this many bytes.
);

# Recursion control:
$maxdepth = 3;	# Default depth limit for directories.
$abcdepth = 4;	# Depth limit for *.abc files.
$depth    = 1;	# The current depth in directories.

# Before this point should be only simple assignments of initial values.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Set the verbosity from various environment variables. The value may
# be a verbose level (1 digit), plus an optional output file name.

$Vopt = $ENV{"D_$me"} || $ENV{"T_$me"} || $ENV{"V_$me"} || '1';
if ($Vopt =~ /^(\d)(.+)/) {
	$V = $1;
	$Vfil = $2;
	if (!open(V,">>$Vfil")) {
		print V "$me: Can't write \"$Vfil\" ($!)\n" if $V>0;
		open(V,">>STDERR");
	}
} else {
	$V = $Vopt;
	open(V,">>STDERR");
}
select V; $| = 1; select STDOUT; $| = 1;
print V "$me started with V=$V ", `date` if $V>1;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This is the list of URLs to process.  You can prime this
# list with hard-coded URLs if you wish.  This is useful
# for testing.  Or you can read them from stdin, below.
@URLs = (
#	'./',		# Useful for testing.
#	'http://trillian.mit.edu/~jc/music/'	# My own music archive.
);
$URL = '';		# Current URL, also localized in scan()..

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Read in a list of URLs that need special treatment:
print V "$me: Read STDIN ...\n" if $V>3;
for $line (<STDIN>) {				# Input contains URL directives.
	$line =~ s/\s*$/ /;				# Exactly one space at end of line.
	print V "$me: line \"$line\"\n" if $V>3;
	if ($line =~ s"^\s*(http://\S+)\s"$1"i) {
		print V "$me: URL: $line\n" if $V>3;
		&URL($line,1);				# Add it as a level-1 URL to be examined.
	} elsif ($line =~ s"^(scan|search):*\s*(\S+)\s"$2"i) {
		print V "$me: SCAN $line\n" if $V>3;
		&URL($line,1);				# Add it as a level-1 URL to be examined.
	} elsif ($line =~ s"^(done|ignore):*\s*(\S+)/*\s"$2"i) {
		print V "$me: DONE $line\n" if $V>3;
		$Depth{$line} = 1;			# Mark this one as "already done".
	} elsif ($line =~ s"^(avoid):*\s*(\S+)/*\s"$2"i) {
		print V "$me: Avoid $line\n" if $V>2;
		if (($host,$rest) = ($line =~ m"http://([-\w.:]+)(.*)"i)) {
			$BadHost{$host} = 1;	# Mark this one as "to be avoided".
			print V "$me: Avoid \"$host\"\n" if $V>2;
		} elsif ($line =~ /^[-\w.:]+$/) {
			$BadHost{$line} = 1;	# Mark this one as "to be avoided".
			print V "$me: Avoid \"$line\"\n" if $V>2;
		} else {
			print V "$me: AVOID $line IGNORED (can't parse).\n" if $V>2;
		}
	} else {						# Otherwise it's a comment.
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here are some patterns to suppress URLs (if enabled):
#%ignore = (
#	'/peacebook/' => 1,	# Interesting, but HUGE!
#	'^http://lscftp.kgn.ibm.com/' => 1,	# No response.
#);

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Suffixes to type mapping:
%suf = (
	'abc',  'abc',
	'html', 'html',
);

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Global patterns:

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# An INT or HUP signal causes us to wrap things up.
$SIG{CONT} = 'sigcont';
$SIG{INT}  = 'sigdone';
$SIG{HUP}  = 'sigdone';
$SIG{USR1} = 'showcalls';

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Scan the command-line arguments, processing them as we  go.   Input #
# files  are  read and used to build tables.  Any URLs discovered are #
# accumulated in @URLs.  Options are processed as read, so they will #
# only  affect  things to their right, except for URLs, which we save #
# for last.                                                           #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
for $a (@ARGV) {
	print V "$me: Arg \"$a\"\n" if $V>3;
	if (($fl,$opt) = ($a =~ m'^([-+])(.*)'i)) {
		if ($opt =~ m'^art'i) {
			$articles = $fl;
			print V "$me: " . ($articles eq '-' ? 'Ignoring' : 'Including') . " articles." if $V>1;
		} elsif ($opt =~ m'^D(\d*)$'i) {
			$maxdepth  = ($fl eq '-') ? 0 : $1;
			print V "$me: maxdepth='$maxdepth'\n" if $V>1;
			$abcdepth = $maxdepth + 1;
		} elsif ($opt =~ m'^P$'i) {
			$Pflag  = ($fl eq '-') ? 0 : $1;
			print V "$me: Pflag='$Pflag'\n" if $V>1;
		} elsif ($opt =~ m'^H(.*)$'i) {
			$host{$h = $1} = ($fl eq '-') ? 0 : 1;
			print V "$me: host{$h}=$host{$h}\n" if $V>2;
		} elsif ($opt =~ m'^S(\d*)$'i) {
			$urlskip = ($fl eq '-') ? 1 : ($1 || 1);
			print V "$me: host{$h}=$host{$h}\n" if $V>2;
		} elsif ($opt =~ s'^(\d+)$'') {
			$ABCtmout = $1;
			print V "$me: ABCtmout=$ABCtmout\n" if $V>2;
		}
	} elsif ($a =~ m'^\.(\w+)=(\w+)$') {
		$suf{$1} = $2;
	} else {
		&URL($a,1);
	}
}
print V "$me:   articles=$articles.\n";
print V "$me:   maxdepth=$maxdepth.\n";
print V "$me:   abcdepth=$abcdepth.\n";
print V "$me: max{list} =$max{list}.\n";
print V "$me: max{K}    =$max{K}.\n";
print V "$me: max{X}    =$max{X}.\n";
print V "$me: max{table}=$max{table}.\n";

# We use our starting time CCYYMMDD as part of the name of our output
# files, to make it clear just when the search ran.

($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time);
$cymd = sprintf("%04d%02d%02d",1900+$year,1+$mon,$mday);

$ofile = "$me.$cymd";
if (open(O,">$ofile")) {
	print V "$me: Writing to \"$ofile\"\n" if $V>1;
	select O; $| = 1; select STDOUT;
	link($ofile,"$ofile.html");
} else {
	print V "$me: Can't write \"$ofile\" [$!]\n" if $V>0;
}

$sfile = "Summary.$cymd";
if (open(S,">$sfile")) {
	print V "$me: Writing to \"$sfile\"\n" if $V>1;
	select S; $| = 1; select STDOUT;
} else {
	print V "$me: Can't write \"$sfile\" [$!]\n" if $V>0;
}

print O "<html><head><title>ABC tunes</title></head><body><pre>\n";

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Run through the list of URLs, reading each one.  If  the  data  looks #
# like a directory, we read it recursively. If the data is a .abc file, #
# we read it, extract the title(s), and add it  to  our  %U  table  for #
# later use..                                                           #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
$urlskip = 1;				# If scattering URLs.
while (@URLs) {
	last if $finishup;		# set by INT and HUP signals.
	$u = shift @URLs;		# Next URL, please.
	if ($urlcount++ % $urlskip) {	# Save URLs for later.
		print V "     (\"$u\") [$depth]\n" if $V>3;
		push @URLs, $u;		# Others are thrown back.
	} else {
		$urlsdone++;		# Count the URLs that we process.
		$depth = $Udepth{$u} || 1;
		print V "Next: \"$u\" [$depth]\n" if $V>3;
		&scan($u);
	}
}
print V "\n$me search done ", `date` if $V>1;

if ($V>0) {			# Summary statistics.
	print S "\n";
	print S "$loadcount files loaded.\n";
	print S "$abcfiles files contain abc.\n";
	print S "$abctunes ABC tunes found.\n";
	print S "$abctlines T: lines found.\n";
	print S "$abctitles distinct ABC titles found.\n";
	if ($loadcount > 0) {
		$i = $loadtime / $loadcount;
		print S "Loads: $loadcount took $loadtime sec ($i sec/file)\n";
	} else {
		print S "No successful loads.\n";
	}
	if ($fails) {
		$i = $failtime / $fails;
		print S "Fails: $fails took $failtime sec ($i sec/file)\n";
	} else {
		print S "No failed loads.\n";
	}
	print S "\nABC files linked to from:\n";
	for $x (sort keys %ABClink) {
		printf S "%8d < $x\n", $ABClink{$x};
	}
	print S "\nABC files found at:\n";
	for $x (sort keys %Tabcs) {
		printf S "%8d + $x\n", $Tabcs{$x};
	}
	print S "\nSites visited:\n";
	for $h (sort keys %Site) {
		printf S "\t%8d : $h\n",$SiteN{$h};
	}
}

print O "</pre></body>\n";
print V "$me exit ", `date` if $V>1;
exit 0;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub TuneEnd {
	local($name) = shift;
	local($n)    = shift;
	local($t)    = shift;
	local($GBcode,$JCcode,$lines);
	local($id) = "$me/TuneEnd";
	$abctunes++ if $n >0;	# Count the tunes.
	$lines = @_;			# Count the lines in this tune.
	print V "$id: Tune has $lines lines; K=\"$K\" L=\"$L\" M=\"$M\" .\n" if $V>3;
	if (@_) {
		($GBcode,$JCcode) = $abcCode->abcCode($K,$L,$M,@_);
		print V "$id: GBcode=\"$GBcode\" JCcode=\"$JCcode\" T=\"$T\"\n" if $V>3;
	}
	if ($name) {
		for ($i=0; $i<=$t; $i++) {
			print V "$id: Tune $i: ti=$ti[$i] tn=\"$tn[$i]\"\n" if $V>3;
			OStune($ti[$i],$tn[$i]) if $tn[$i];
		}
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Given a tune name and  index,  this  routine  writes  its  info  in #
# "short"  html  form  to  file  O,  which must be open.  We use this #
# routine to produce the intermediate "trace" file  as  we  load  the #
# info. We also call this from abclist to write to the current output #
# html file at the end.                                               #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub OStune {
	local($i,$name) = @_;
	local($nuna,$r,$key,$seq,$titl,$tabc,);
	$nuna = "$i:$name";
	$titl = &abc2html($Title{$nuna});
	$tabc = &findURL('abc',$i,$name,$titl);
	$seq  = &pad($TX{$nuna}, ' ', 'X');
	$key  = &pad($TK{$nuna}, ' ', 'K');
	print V "$me: No title for \"$nuna\"\n" if ($V>0 && !$titl);
	$r = 0;
	print O "<tt><!-- $name $i -->";
	if ($tabc) {print O "<A HREF=\"$tabc\">abc</A> "; $r++} else {print O '___ '}
	print O "$seq ";
	print O "$GBcode ";
	print O "$JCcode ";
	print O "$key ";
	print O '</tt>';
	print O "$titl\n";
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Parse a chunk of abc code. The arg is the X: or T:  line that triggered the #
# call.   We  process  lines  from  <DOC>  until the first blank line or EOF, #
# whichever comes first. We return the number of lines we ate. This tells the #
# caller how much of the input we actually used up. Note that, for files with #
# multiple abc tunes, this routine must be called repeatedly, once  for  each #
# new tune.  It's the caller's job to skip over junk between tunes.           #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub abc {
	local($line,$prnt) = @_;
	local($n,$name,$nuna,@ti,@tn,$K,$L,$M,$O,$T,$TT,@tune);
	local($t) = 0;	# Count the titles.
	local($X) = 1;	# In case X: line is missing.
	local($l) = 1;	# Count the arg as first line.
	local($n) = 0;	# Tune instance count.
	local($id) = "$me/abc";
line:
	for (;$line;$line = &docline) {	# The first time we process the arg.
		$line =~ s/\s+$//;	# Discard CRs and LFs.
		if (!$line) {		# This is abc's end-of-tune.
			print V "$id: Blank line $l is end of \"$nuna\" ($n tunes found).\n" if $V>3;
			TuneEnd($name,$n,$t,@tune);
			return $l;
		}
		++ $l;
		print V "$id: ABC \"$line\"\n" if $V>4;
		if ($line =~ /^X:\s*(\d+)/) {
			$X = $1;
		} elsif ($line =~ /^%/) {				# Ignore comments and directives.
			next line;
		} elsif (($line =~ /^T:\s*(.*)$/)		# Title line.
		|| (!$T && ($line =~ /^P:\s*(.*)$/))	# P: line if no titles seen.
		) {
			$T = $1;					# One title found.
			if ($SCDkludge && ($T =~ /^[Xx_\d]+[HJMRSW]\d*$/)) {	# Special Scottish dance-form kludge.
				print V "$id: Ignore \"$T\" (SCD kludge)\n" if $V>3;
				$T = $TT;				# Remember previous title, if any.
				next line;				# Otherwise ignore numeric titles.
			}
			next line if ($T =~ /^[-\d_]+$/);	# Ignore if just these.
			print V "      \"$URL\" T: $T\n" if $V>2;
			if (!($name = &canon($T))) {	# Canonicalize the tune'sname.
				print V "$id: Reject title \"$T\"\n" if $V>1;
				next line
			}
			++$t;						# Count the titles.
			$TT = $T;					# Remember last-accepted title.
			$n = ++$Tcount{$name};		# Number of instances of this tune.
			print V "$id: Instance $n of \"$name\"\n" if $V>4;
			$abctitles++ if $n == 1;	# Count the distinct titles.
			$abctlines++;				# Count the T: lines.
			$nuna = "$n:$name";			# Num+name for this tune.
			$TX{$nuna} = $TX{$t} = $X;	# Remember the tune's X: index.
			if ($prnt) {
				++$ABClink{$prnt};	# Count the ABC tunes linked to by the parent.
				if ($Pflag && $Udepth{$prnt} > 1) {
					print V "depth \"$prnt\" was $Udepth{$prnt} now 1.\n" if $V>1;
					$Udepth{$prnt} = 1;	# Promote parent.
				}
			} else {
				print V "$id: \"$URL\" has no parent.\n" if $V>3;
			}
			print V "$id: TX{$nuna}=$TX{$nuna}\n" if $V>4;
			$ti[$t] = $n; $tn[$t] = $name;	# Note $n:$name pairs in this tune.
			print V "$id: ti[$t]=$ti[$t] tn[$t]=$tn[$t] TX{$nuna}=$TX{$nuna}\n" if $V>3;
			$U{"abc:$name"} .= "$URL ";	# Add to list of possible URLs.
			$U{"abc:$nuna"} .= "$URL ";	# Add to list of specific URLs.
			&ABCcount($URL) if $listabchosts;
			$abcfiles++ 				# Number of ABC files found.
				if !($Turls{$URL}++);	# List of URLs that we found abc in.
			$SiteN{$currsite}++;		# Count of ABC tunes per site.
			$Title{$nuna} = $T;			# Remember this instance's title.
		} elsif ($line =~ /^O:\s*(.*)/) {
			$O = $1;
			for ($i=1; $i<=$t; $i++) {		# Search thru tunes in this file.
				if ($TX{$i} == $X) {		# If this is the current tune,
					$nuna = "$ti[$i]:$tn[$i]";
					$TX{$nuna} = $X if $X;	# Remember X and O fields.
					print V "$id: TX{$nuna}=$TX{$nuna}\n" if $V>5;
				}
			}
		} elsif (!$K && ($line =~ /^K:\s*([\^=_\s\w]+)/)) {
			$K = $1;					# One key signature found.
			print V "$id: K='$K' t=$t\n" if $V>3;
			for ($i=1; $i<=$t; $i++) {	# It's the key for all the titles.
				print V "$id: TX{$i}='$TX{$i}'\n" if $V>5;
				if ($TX{$i} == $X) {	# Only for the current tune.
					$nuna = "$ti[$i]:$tn[$i]";
					$TX{$nuna} = $X if $X;
					if (length($X) > $max{X}) {
						print V "$id: New max{X} length($X) > $max{X}\n" if $V>3;
						$max{X} = &Max($max{X},length($X));
					}
					$max{X} = &Max($max{X},length($X));
					print V "$id: TX{$nuna}=$TX{$nuna}\n" if $V>5;
					$TK{$nuna} = $K;
					print V "$id: TK{$nuna}=$TK{$nuna}\n" if $V>3;
				} else {
					print V "$id: TX{$i} is '$TX{$i}' not '$X'\n" if $V>5;
				}
			}
		} elsif ($line =~ /^L:\s*(\d+)\/(\d+)/) {
			$L = $1;
			print V "$id: L=\"$L\"\n" if $V>3;
		} elsif ($line =~ /^M:\s*(\d+)\/(\d+)/) {
			$M = $1;
			unless ($L) {($L = $M) =~ s/^\d+/1/}
			print V "$id: M=\"$M\" L=\"$L\"\n" if $V>3;
		} elsif ($line =~ /^M:\s*[Cc]/) {
			$M = '4/4';
			$L = '1/4' unless $L;
			print V "$id: M=\"$M\" L=\"$L\"\n" if $V>3;
		} elsif ($line =~ /^([A-Za-z]):\s*(.*)/) {
			print V "$id: $1: line ignored.\n" if $V>3;
		} else {
			print V "$id: Music: \"$line\"\n" if $V>3;
			push @tune, $line;
		}
	}
	print V "$id: EOF at line $l is end of \"$nuna\".\n" if $V>3;
	TuneEnd($name,$n,$t,@tune);
	$l;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Given a URL that contains an ABC tune, we strip  away  fields  from #
# its  URL  one  at  a  time,  and  increment  the ABC count for each #
# resulting (partial) URL.  The end result is a count of how many ABC #
# titles are found under each web directory.                          #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub ABCcount {
	local($u) = @_;
	local($d,$x);
	while (($d,$x) = ($u =~  m'^([a-z]+)://(.+)/([^/]*)$'i)) {
		++$Tabcs{"$1://$2/"};
		$u = "$1://$2";
		printf(V "%6d $u/\n", $Tabcs{"$1://$2/"}) if $V>3;
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Given a tune name and index, this routine writes its info in "long" #
# html  form  to  file O, which must be open.  We use this routine to #
# produce the intermediate "trace" file as we load the info.          #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub OLtune {
	local($i,$name) = @_;
	local($nuna,$r,$seq,$ndx,$tabc,$titl,$tkey,$turl);
	local($id) = "$me/OLtune";
	$nuna = "$i:$name";
	$ndx  = $TX{$nuna};
	$tkey = &pad($TK{$nuna}, '.', 'key');		print V "$id: tkey=\"$tkey\"\n" if $V>3;
	$titl = &abc2html($Title{$nuna});
	$turl = &findURL('abc',$i,$name,$titl);
#	$turl =~ s"/www.irishfest.com/oneills/dev/"/www.irishfest.com/oneills/___/";
	print V "$id: turl=\"$turl\"\n" if $V>4;
	$tabc = &TuneLink('abc',$ndx,$name,$nuna,$turl);
	$seq  = &pad($ndx, '_', 'X');
	print V "$id: No title for \"$nuna\"\n" if ($V>0 && !$titl);
	$r = 0;
	print O "<tt><!-- $name $i -->";
	if ($turl) {print O "<A HREF=\"$turl\">File</A> "; $r++} else {print O '____ '}
	if ($tkey) {print O "$tkey "} else {print O '______ '}
	if ($tabc) {print O $tabc} else {print O '___ '}
	print O "$titl\n";
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub TuneLink {
	local($typ,$ndx,$name,$nuna,$url) = @_;
	local($TYP) = uc($typ);
	local($lnk);
	local($id) = "$me/TuneLink";
	print V "$id: typ=$typ ndx=$ndx name=$name nuna=$nuna url=\"$url\"\n" if $V>4;
	if ($url) {
		$lnk = "<a href=\"/cgi/abc/Tune?F=$TYP&X=$ndx&U=$url\">$TYP</a> ";
		print V "$id: URL \"$lnk\"\n" if $V>4;
#	} elsif ($url = &findURL($typ,$ndx,$name,$titl)) {
#		$lnk = "<a href=\"/cgi/abc/Tune?F=$TYP&X=$ndx&U=$url\">$TYP</a> ";
#		print V "$id: LNK \"$lnk\"\n" if $V>4;
	} else {
		$lnk = ('-' x length($typ)) . ' ';
		print V "$id: NUL \"$lnk\"\n" if $V>4;
	}
	return $lnk;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Convert the abc escape sequences to HTML.
sub abc2html {
	local($s) = @_;
	$s =~ s#\\(o)#\&${1}slash;#ig;
	$s =~ s#\\a(a)#\&${1}ring;#ig;
	$s =~ s#\\"(\w)#\&${1}uml;#ig;
	$s =~ s#\\'(\w)#\&${1}acute;#ig;
	$s =~ s#\\`(\w)#\&${1}grave;#ig;
	$s =~ s#\\,(\w)#\&${1}cedille;#ig;
	$s =~ s#\\~(\w)#\&${1}tilde;#ig;
	$s;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Canonicalize a name.  We upper-case everything, and strip  out  all #
# funny  chars.   If  $articles  is  enabled,  we  look  for articles #
# initially and after a comma, and delete them.                       #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub canon {
	local($name) = @_;
	local($lcs,$ucs);
	($ucs = $name) =~ s/[^A-Z]+//g;	# Extract upper-case letters.
	($lcs = $name) =~ s/[^a-z]+//g;	# Extract lower-case letters.
	print V "canon: name=\"$name\" lcs=\"$lcs\" ucs=\"$ucs\"\n" if  $V>5;
	if ($ucs && $lcs) {			# Both cases used.
		$name =~ s/^[^A-Z]+//;	# Strip stuff before first upper-case letter.
		print V "canon: Mixed-case \"$name\"\n" if $V>5;
	} elsif (!$ucs && $lcs) {	# All lower case
		print V "canon: Lower-case \"$name\" curious\n" if $V>5;
	} elsif ($ucs && !$lcs) {	# All upper case
		print V "canon: Upper-case \"$name\" suspect.\n" if $V>5;
	} else {					# No letters at all.
		print V "canon: Name \"$name\" with no letters rejected.\n" if $V>5;
		return '';
	}
	if ($articles eq '-') {
		$name =~ s/^the\s+//i;
		$name =~ s/^an?\s+//i;
		$name =~ s/^l[ae]?s?\s+//i;
		$name =~ s/,s*the\s+//i;
		$name =~ s/,s*an?\s+//i;
		$name =~ s/,s*l[ae]?s?\s+//i;
	}
	$name = uc($name);	# Upper-case everything.
	$name =~ s"&(\w)\w*;"$1"g;	# De-htmlize the name.
#	$name =~ s/,.*//;	# Discard everything after a comma.
	$name =~ s"\W+""g;	# Delete non-alpha chars.
#	$Tname{$name} = 1;	# Note that we've seen the name.
	return $name;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This looks in several arrays for the "right" URL to produce  for  a #
# specific tune, given the name, number, and type codes.  If we can't #
# find a usable URL, we return a null string.                         #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub findURL {
	local($typ,$num,$nam,$ttl) = @_;
	local($u);
	local($TYP) = uc($typ);
	for $lis ($U{"$typ:$num:$nam"}, $U{"$typ:$nam"}) {
		@lis = split ' ', $lis;
		for $u (@lis) {
			return $u if $u;
		}
	}
	if ($ttl =~ /^(\w*):\s*(.*)/) {
		return $u if ($u = &findURL($typ,$num,&canon($2),$2));
	}
	return '';
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Given a URL or local file name, this routine attempts  to  open  it #
# and  return  with  F containing the file handle.  If we succeed, we #
# return 1; a return of 0 means that we can't read the object.        #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub Open {
	local(*F,$name) = @_;
	local($h,$stat,$p,$path,$t,$t0);
	my $id = "$me/Open";
	if ($V>3) {
		local($p,$c,$l) = caller;
		print V "$id: \"$name\" (from $p/$c/$l)\n";
	} elsif ($V>1) {
		print V "$id: \"$name\" [$depth]\n" if $V>3;
	}
	($path = $name) =~ s/^[<\s]+//;
	if (($p,$h) = ($path =~ m'^([a-z]+)://([-a-z0-9:._]+?)/'i)) {	# Hostname?
		print V "$id: URL contains host \"$h\"\n" if $V>6;
		$currsite = "$p://$h";
		$Sites{$currsite} = 1;
		$SiteN{$currsite} = 0;
	} else {
		$h = $myhost;
		print V "$id: URL contains no host, using \"$h\"\n" if $V>6;
	}
	if (%host) {			# Are there host restrictions?
		print V "$id: Testing  host \"$h\" ...\n" if $V>6;
		if ($host{$h}) {
			print V "$id: Accepted host \"$h\"\n" if $V>6;
		} else {
			print V "$id: Rejected host \"$h\"\n" if $V>3;
			return;
		}
	} else {
		print V "$id: Accepted host \"$h\" (no host restrictions)\n" if $V>6;
	}
	if (-d $path) {				# A local directory is special.
		require "HTMLdir.pm";	# Dir-to-HTML module.
		if (HTMLdir(*F,$path)) {
			$loadcount ++;
			return 1;
		}
		print V "$id: Can't read local dir \"$path\" ($!)\n" if $V>0;
		close F;
		return undef;
	}
	if (open(*F,$name)) {		# Is it a local file?
		print V "$id: Opened local file \"$name\"\n" if $V>3;
		$loadcount ++;
		return 1;
	}				# Is it a URL?
	if ($name !~ m"^(http|file|ftp):"i) {
		local($p,$c,$l) = caller;
		print V "$id: \"$name\" ignored (from $p/$c/$l)\n";
		return undef;
	}
	$t0 = time;
	if ($directopen) {
		$stat = &URLdata(*F,$path);	# Try a web connection to the URL.
		$tt = time - $t0;
		print V "$id: \"$path\" returned in $tt sec. ($!)\n" if $V>4;
	} else {
		$cmd = "w3cat -T$ABCtmout +TH $path |";
		print V "$id: \"$cmd\"\n" if $V>3;
		if ($stat = open(F,$cmd)) {
			print V "$id: \"$cmd\" running.\n" if $V>3;
			$URLhdr = 1;
		} else {
			print V "$id: \"$cmd\" failed ($!).\n" if $V>1;
		}
	}
	if (!$stat) {
		print V "$id: \"$path\" failed in $tt sec. ($!)\n" if $V>0;
		$fails ++;
		$failtime += $tt;
		close F;
		return undef;
	}
	$loadcount ++;
	$loadtime += $tt;
	print V "$id: \"$path\" $tt sec.\n" if (($V>1) && ($tt>0));
	return 1;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Register a file under its name and type.  For example, if  we  note #
# the file 'http://foo.bar/qux.gif', we call                          #
#   &File('gif','http://foo.bar/qux','http://foo.bar/qux.gif')        #
# This will leave behind global information:                          #
#   $U{'gif:qux'] = 'http://foo.bar/qux.gif'                          #
# This tells us how to find a gif file for the name 'qux'.            #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub File {
	local($ext,$pth,$url) = @_;
	local($nam);
	($nam = $pth) =~ s'.*/'';
	print V "File: \"$url\"\n" if $V>1;
	$nam = &canon($nam);
	$U{"$ext:$nam"} .= "$url ";
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Given an href, we decide here how to handle it.   The  caller  must #
# pass  us  the  URL from the href, and the string (item) between the #
# '>' and the </a>, in case we need to check what's there.  The  main #
# use  we  make  of  the item is to check for and reject "parent dir" #
# references. We also look at a few other suffixes and decide whether #
# we  should  load  them  and  scan  their  contents.   If the URL is #
# accepted, we pass it to &URL() for later processing.  For  rejected #
# URLs, we just return.                                               #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub href {
	local(
		$prnt,		# Parent URL to current, if any.
		$curr,		# Current URL.
		$href,		# URL pointed to by $curr.
		$item)		# HTML text associated with $href.
			= @_;
	local($a,$l,$p,$s,$u);
	print V "href: path=\"$curr\" href=\"$href\" item=\"$item\"\n" if $V>3 || ($href =~ /^\$/);
	return if $href =~ /^(mailto|file|ftp):/;
	if ($href =~ /^cgi\b/i) {
		print V "href: Ignore href=\"$href\" (/cgi)\n" if $V>4;
		return;
	}
	if ($href =~ '/$') {	# If final '/', treat as directory.
		print V "href: Treat href=\"$href\" as directory.\n" if $V>4;
		if ($chkparentdirs && ($item =~ /Parent Dir/i)) {
			print V "href: Ignore href=\"$href\" item=\"$item\"\n" if $V>4;
			return;
		} elsif ($href =~ /^\w*:/) {	# Full URL
			print V "href: \"$href\" read at depth $depth.\n" if $V>3;
			&URL($href,$depth+1,$prnt);
		} else {						# Relative URL.
			$u = &URLhref($curr,$href);
			print V "href: \"$u\" read at depth $depth.\n" if $V>3;
			&URL($u,$depth+1,$prnt);
		}
		return;
	}
	# No final '/' on HREF:
	print V "href: Treat href=\"$href\" as non-directory.\n" if $V>4;
	if ($href =~ m'#') {
		print V "href: \"$curr\" href=\"$href\" ignored (#).\n" if $V>3;
#	} elsif ($href =~ m'(.*)\.abc$'i) {
#		$u = &URLhref($curr,$href);
#		print V "href: \"$u\" abc at depth $depth.\n" if $V>3;
#		$a = &URL($u,$depth+1,$prnt);
#	} elsif (($p,$s) = ($href =~ m'(.*)\.(\w+)$'i)) {
#		print V "href: \"$href\" suffix \"$s\"\n" if $V>3;
#		if ($suf{$s} eq 'abc') {		# Is this a possible abc file?
#			$u = &URLhref($curr,$href);
#			print V "href: \"$u\" at depth $depth.\n" if $V>3;
#			&URL($u,$depth+1,$prnt);
#		} else {
#			print V "href: \"$href\" suffix \"$s\" unknown.\n" if $V>3;
#		}
	} else {
		$u = &URLhref($curr,$href);
		print V "href: \"$u\" URL at depth $depth.\n" if $V>3;
		&URL($u,$depth+1,$prnt);
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub Max {local($n) = shift; for (@_) {$n = $_ if $_ > $n; shift} return $n}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub outfooter {
	local($ll);
	$ll = @footer;
	print V "outfooter: Produce $ll footer lines.\n" if $V>5;
	print O "<hr>\n" if !$gothr;
	for $l (@footer) {
		++$inHTML if (!$inHTML && ($l =~ /<\w*>/));
		if (!$inHTML) {
			$l = HTML::Entities::encode_entities($l);
			$l =~ s#\&lt;(\w+@[-\w.]+)\&gt;#<a href="mailto:$1">$&</a>#;
		}
		print V "outfooter: \"$l\"\n" if $V>5;
		print O "$l\n";
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub outheader {
	local($ll,$xx);
	$ll = @header;
	print V "outheader: Produce $ll header lines.\n" if $V>5;
	if ($doctitle) {
		print O "<TITLE>$doctitle</TITLE>\n";
	} else {
		$xx = "for $outlttr" if $outlttr;
		print O "<TITLE> Tune list $xx </TITLE>\n";
	}
	for $l (@header) {
		++$inHTML if (!$inHTML && ($l =~ /<\w*>/));
		$l = HTML::Entities::encode_entities($l) if (!$inHTML);
		print V "outheader: \"$l\"\n" if $V>5;
		print O "$l\n";
		++$gothr if ($l =~ /<hr>/i);
	}
	print O "<hr>\n" if !$gothr;
	print O "$outhdr\n" if $outhdr;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This pads a field to $max{$fld} chars. The pad arg is the pad char;
# if null, we don't do any padding.
sub pad {
	local($str,$pad,$fld) = @_;
	local($l,$m,$n,$p,$v);
	$m = $max{$fld} || 1;		# Max actual field length.
	$l = ($lim{$fld} || $m);	# Limit to field length.
	$n = ($m < $l) ? $m : $l;	# Min of actual and limit.
	$p = $l - length($str);		# Padding needed.
	print V "pad: str='$str' pad='$pad' fld='$fld' l=$l m=$m n=$n p=$p.\n" if $V>5;
	$fld =~ s/^\s+//;			# Trim initial white stuff.
	$fld =~ s/\s+$//;			# Trim trailing white stuff.
	$fld =~ s/\s+/ /g;			# Convert internal white stuff to single space.
	$fld =~ s/\s/_/g if $pad eq '_';	# Convert internal spaces to underscores.
	if ($align{$fld} eq 'R') {	# Right-aligned field?
		$v = ($pad x $p) . $str;
	} else {
		$v = substr($str .  ($pad x $p), 0, $l);
	}
	print V "pad: m=$m l=$l n=$n p=$p for fld=$fld\t\"$str\" => \"$v\"\n" if $V>5;
	return $v;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub closeDOC {
	close DOC;
	$DOCopen = 0;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here's an alarm handler for reads from DOC. When a timeout happens,
# we close the DOC file and return, which should cause abandonment of
# the current document.
sub HTTPtmout {
	local($t);
	$t = time - $HTTPcontime;
	print V "ALARM (HTTPtmout) after $t sec.\n" if $V>1;
	$closeDOC = 1;
	print V "HTTPtmout: Close \"$URL\" (tmout) ...\n" if $V>1;
	$HTTPclosetime = time;
	&closeDOC if $DOCopen;
	$HTTPclosedtime = time - $HTTPclosetime;
	print V "HTTPtmout: Closed \"$URL\" in $HTTPclosedtime sec.\n" if $V>1;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Grovel through a file, looking for  hyperlinks  or  pieces  of  abc #
# code.   check  out  each  of  the  files listed.  Directories cause #
# recursive traversal.  Files with interesting suffixes are read.     #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub scan {
	local($URL) = @_;
	local(*DOC,$closeDOC,$DOCopen,$DOCdtype,$DOCstype);	# Localize the DOC file.
	local($flag,$nhrefs,$isdir,$inHTML,$prnt,$tunes);
	local($b,$d1,$d2,$dpth,$h,$href,$item,$l,$line,$lurl,$n,$p,$s,$surl,$t,$ttl,$u);
	local($id) = "scan";
	$DOCopen = 0;
	$tunes = 0;
	$lurl = &URLtrim($URL);	# Long URL may have final '/'.
	($surl = $lurl) =~ s"/+$"";	# Short URL lacks final '/'.
	$prnt = $Parent{$lurl} || $Parent{$surl};
#	print V "$id: \"$lurl\" <- \"$prnt\" (lurl)\n" if $V>5 && $Parent{$lurl};
#	print V "$id: \"$surl\" <- \"$prnt\" (surl)\n" if $V>5 && $Parent{$surl};
	print V "\n====> \"$lurl\" [$depth]	<- \"$prnt\"\n" if $V>1;
	if ($surl eq '') {		# Shouldn't happen.
		local($p,$c,$l) = caller;
		print V "scan: \"$URL\" (from $p/$c/$l)\n";
		return 0;
	}
	if (($dpth = $Depth{$surl}) && ($dpth <= $depth)) {
		print V "scan: \"$URL\" already scanned at depth $dpth.\n" if $V>2;
		return 1;
	}
	$Depth{$surl} = $depth;	# Note that we've done this URL.
	$inHTML = 0;			# Not (yet) known to be HTML format.
	print V "scan: \"$lurl\"\n" if $V>3;
	if (!&Open(*DOC,$lurl)) {
		$DOCopen = 0;
		print V "scan: \"$lurl\" not accessible.\n" if $V>3;
		return 0;
	}
	$DOCopen = 1;
	print V "scan: \"$lurl\" ...\n" if $V>3;
buffer:
	while (!$closeDOC && !$HTTPalrm) {
		if (!($b = &docline)) {	# Read one line from document.
			print V "scan: EOF\n" if $V>3;
			last buffer;
		}
#		print V "scan: \"$line$b\"\n" if $V>2 && $line;
		$line .= $b;		# Add to leftover from last line.
		$line =~ s/\s+$//;	# Discard trailing white stuff.
		if (!$line) {		# Null line -
			$URLhdr = 0;	# Terminates headers.
			next buffer;	# Otherwise ignore it.
		}
		print V "scan: \"$line\"\n" if $V>3;
		if ($URLhdr) {
			print V "HEAD: \"$line\"\n" if $V>3;
			if (($DOCdtype,$DOCstype) = ($line =~ m"Content-Type:\s*(.*)/(.*)\s*$")) {
				$dt = lc($DOCdtype);
				if ($dt ne 'text' && $dt ne 'application') {
					print V "scan: Non-text type \"$DOCdtype/$DOCstype\" ignored.\n" if $V>2;
					&closeDOC;
					return 1;
				}
				if ($DOCstype =~ m"html"i) {
					$inHTML = 1;
				} else {
					$inHTML = 0;
				}
			}
			$line = '';
			next buffer;
		}
		if (!$ttl && !$inHTML) {	# Code to save the first <title>.
			print V "scan: Check for <TITLE>\n" if $V>5;
			if (($ttl) = ($line =~ m"<TITLE>(.*)</TITLE>"i)) {
				print V "scan: Found <TITLE> ...\n" if $V>4;
				++$inHTML;
				if ($ttl =~ m"Index of "i) {
					print V "scan: Found Index of ...\n" if $V>4;
					$ttl =~ s"/*$"/";
					$isdir = 1;
					$lurl =~ s"/*$"/";	# Make sure long url has final '/'
				}	# We have removed the title from the line.
			}
		}
		$nhrefs = 0;
		$Apat = '<a\s+.*?href="*([^"]+?)"*>(.*?)</a>';
		while (($href,$item) = ($line =~ m/$Apat/i)) {
			print V "scan: Matched href=\"$href\" item=\"$item\"\n" if $V>3;
			if (!$href) {		# Is this a perl bug?
				print V "scan: Matched \"$line\" with null href!!!\n" if $V>3;
				$line = '';		# Abandon the line.
				next buffer;
			}
			$line =~ s/$Apat//i;
			$inHTML = 1;		# Note it's HTML.
			$nhrefs ++;
			&href($lurl,$lurl,$href,$item);	# Handle this href later.
		}
		if ($line =~ m/<a\s/i) {	# Unclosed anchor?
			print V "scan: Unclosed <A in \"$line\"\n" if $V>3;
			next buffer;
		}
		if ($nhrefs) {
			print V "scan: Drop \"$line\" after $nhrefs hrefs removed.\n" if $V>3;
			$line = '';
			next buffer;
		}
		print V "scan: No hrefs.\n" if $V>5;
		if ($line =~ /^<a\s/i) {
			print V "scan: HTML anchor not terminated.\n" if $V>5;
			$inHTML = 1;		# Note it's HTML.
			next buffer;		# Append another line.
		}
		if ($line =~ /^([XT]):/) {
			$flag = $1;
			++$tunes;			# Count the (possible) ABC tunes.
			print V "scan: \"$line\" may be ABC.\n" if $V>3;
			$d1 = $Udepth{$prnt};
			$n = &abc($line,$prnt);
			print V "scan: $n lines of ABC found in tune $tunes.\n" if $V>3;
			$d2 = $Udepth{$prnt};
			print V "scan: Parent \"$prnt\" promoted from level $d1 to $d2.\n" if $V>2 && $d1!=$d2;
			$line = '';
			next buffer;
		}
		print V "scan: No X or T line.\n" if $V>5;
		if ($line =~ s"^(<.*>)\s*"") {
			$inHTML = 1;
			print V "scan: HTML tag \"$1\" deleted.\n" if $V>5;
		}
		next buffer if !$line;
		if (-d $line) {
			print V "scan: \"$line\" is local directory.\n" if $V>3;
			$line =~ s"/*$"/";			# Make sure it has final '/'.
			&URL($line,$depth+1,$URL);	# Add to list of URLs to process.
			next buffer;
		}
		if (-f $line) {
			print V "scan: \"$line\" is local file.\n" if $V>3;
			&URL($line,$depth+1,$URL);
			next buffer;
		}
		print V "scan: Can't parse \"$line\"\n" if $V>4;
		if ($inHTML) {
			$line =~ s"^([^<]+)"";
			print V "scan: Drop \"$1\" from HTML.\n" if $V>3;
		} else {
			print V "scan: Drop \"$line\"\n" if $V>3;
			$line = '';
		}
		print V "scan: \"$line\"\n" if $V>6;
	}
	if ($closeDOC || $HTTPalrm) {	# Some disaster detected.
		$t = time - Max($HTTPcontime,$HTTPreadtime);
		print V "Close \"$URL\" (timeout alarm after $t sec.)\n" if $V>1;
		$closeDOC = $HTTPalrm = 0;
	} else {
		print V "scan: EOF on DOC file.\n" if $V>3;
	}
	&closeDOC if $DOCopen;
	if ($tunes>0 && $V>1) {	# ABC line count.
		($ss,$mm,$hh,$DD,$MM,$YY) = gmtime(time); ++$MM; $YY += 1900;
		$s = ($tunes > 1) ? 'tunes' : 'tune';
		print V "      \"$lurl\" ==== $tunes abc $s ==== $YY/$MM/$DD $hh:$mm:$ss\n";
	}
	alarm 0; $SIG{ALRM} = 0;
	print V "scan: Set alarm 0.\n" if $V>3;
	return 1;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Return one line of input from the current document.  Here is  where
# we  try  to  deal with incoming HTML, by splitting it into lines on
# any strings of \r or \n, and stripping out tags.

sub docline {
	local($line);
	if (!$DOCline) {
		$DOCline = <DOC>;	# Next chunk of input.
		print V ":DOC: \"$DOCline\"\n" if $V>4;
		return '' if !$DOCline;
	}
	if ($inHTML) {			# HTML: return one "line".
		if ($DOCline) {		# Any input left?
			if ($DOCline =~ s"^(.*?)[\r\n]+"") {
				$line = $1;
				print V "line: $line\n" if $V>4;
				$line =~ s"</*(bl|br|hr|img|li|dl|p|pre|ul)\b.*?>""ig;
			} else {
				$line = $DOCline;
				$DOCline = '';
			}
		}
	} else {				# Not HTML.
		$line = $DOCline;
		$DOCline = '';
	}
	$line =~ s/\s*$/\n/;
	print V "LINE: $line" if $V>3;
	return $line;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub sigcont {
	print V "$me/sigcont: close DOC ...\n" if $V>3;
	if ($DOCopen) {
		&closeDOC;
		print V "$me/sigcont: closed DOC.\n" if $V>3
	}
	&showcalls();
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub sigdone {
	&showcalls();
	$finishup = 1;
	exit 1;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub showcalls {
	local($l,$package, $filename, $line, $subroutine);
	print V "$me: Call stack:\n" if $V>0;
	$l = 0;
	while (($package, $filename, $line, $subroutine) = caller($l)) {
		printf V "\tLevel %3d line %5d $filename\tin $subroutine\n",$l,$line if $V>0;
		++$l;
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Register a new URL for later scanning.  We can do some weeding  out #
# here  if  we  so desire.  We return 0 if we reject the URL; 1 if we #
# accept it, though callers don't yet use this info.  We implement  a #
# special  ABC  kludge here:  If the URL ends with .abc, we accept it #
# even if it's beyond the maximum depth.                              #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub URL {
	local($url,		# New URL.
		$dpth,		# Depth in search.
		$prnt)		# Parent URL.
			= @_;
	local($F) = ':URL';
	print V "$F: \"$url\" d=$dpth.\n" if $V>4;
	if ($dpth > $maxdepth) {
		if (($url =~ /\.abc$/i) && ($dpth > $abcdepth)) {
			print V "$F: \"$url\" ignored (depth $dpth > $abcdepth && .abc file)\n" if $V>3;
			return 0;
		} else {
			print V "$F: \"$url\" ignored (depth $dpth >= $maxdepth)\n" if $V>3;
			return 0;
		}
	}
	print V ">->-> \"$url\" [$depth]\n" if $dpth>$maxdepth && $V>2;
	$url = &URLtrim($url);	# Shorten the URL if possible.
	print V "$F: u=\"$url\" after URLtrim\n" if $V>4;
	if ($prnt) {
		unless ($Parent{$url}) {
			$Parent{$url} = $PRNT;
			print V "$F: \"$url\" <- \"$prnt\"\n" if $V>3;
		} else {
			print V "$F: \"$url\" parent already known.\n" if $V>3;
		}
	} else {
		print V "$F: \"$url\" parent unknown.\n" if $V>3;
	}
#	$url =~ s"/www.irishfest.com/oneills/___/"/www.irishfest.com/oneills/dev/";
	print V "----> \"$url\" [$dpth] <- \"$prnt\"\n" if $V>1;
	if ($url !~ /^(http|file):/i) {	# Accept only these protocols.
		print V "$F: \"$url\" ignored (http|file rule)\n" if $V>2;
		return undef;
	}
	if ($url =~ m"\bbin/"i) {		# Don't try to fetch from bin/
		print V "$F: \"$url\" ignored (bin rule)\n" if $V>2;
		return undef;
	}
	if ($url =~ m"\bcgi\b"i) {		# Don't try to fetch from cgi
		print V "$F: \"$url\" ignored (cgi rule)\n" if $V>2;
		return undef;
	}
	if (($url =~ m"^http://([-\w.:]+)/"i) && $BadHost{$1}) {
		print V "$F: \"$url\" ignored (bad host \"$1\")\n" if $V>2;
		return undef;
	}
	if ($Udepth{$url} < 1) {
		push @URLs, $url;
		$Udepth{$url} = $dpth;
		print V "$F: \"$url\" new at depth $dpth.\n" if $V>3;
		$Parent{$url} = $prnt if $prnt;
	} else {
		if ($Udepth{$url} > $dpth) {	# Eventually this should't happen.
			print V "$F: \"$url\" new depth $dpth (was $Udepth{$url}).\n" if $V>1;
			$Udepth{$url} = $dpth;
		} else {
			print V "$F: \"$url\" old at depth $Udepth{$url}.\n" if $V>3;
		}
	}
	return 1;
}

