#!/usr/bin/perl -T
#
	$version = '20160105';	# Which version we claim to be
	($P = $0) =~ s"^.*/"";	# This program's actual name
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#NAME
#  webcat - catenate web pages and files to standard output
#
#SYNOPSIS
#  webcat [ URL | path ]...
#
#DESCRIPTION
#  This is a web version of the Unix cat(1) command, coded in perl 5.
#
#  Given a list of URLs and files, this program reads them one at a time, and
#  writes their contents, catenated into one long string, to standard output.
#
#  Directories are output in a simplified HTML format,  somewhat  similar  to
#  what the apache web server produces.
#
#  If you want to learn how to do this stuff, you can study this program.  It
#  is  useful as a starting point for writing other simple web clients.  It's
#  not nearly as difficult as people would like  you  to  believe.   But  the
#  socket  stuff  uses  several  magical  incantations that "you just have to
#  know"; see the required perl module files for this socket magic.
#
#OPTIONS
#  The default setup is to deliver only the data  (contents)  of  a  URL  and
#  discard  the  header  and  tracing  information.   Here is the list of our
#  current options.  The options may be combined into  a  single  string,  as
#  usual,  with  the  qualification  that options which have an arg (O and P)
#  must be the last in the string.  Options may be in any order, and apply to
#  all subsequent URLs unless canceled by another option.
#
#  +D
#    Output the data [default].
#  -D
#    Don't output the data.  -D +H returns just the HTTP headers.
#
### Disabled:
#  -E
#    Don't extract text; deliver the data as-is. (default)
#  +E
#    Extract the text.  This is a simple conversion from HTML to plain  text.
#    Assorted  line-end  tags  are  converted  to  \n or \n\n as appropriate.
#    Hyperlinks are converted to <URL:...>, which is recognized by a  lot  of
#    non-HTML software, including ABCbot.  The formatting is rudimentary.  If
#    you want good-looking, formatted text, it's better to use something like
#    lynx that has a full HTML rendering capability.
#
#	+H
#    Include the HTTP header info in the output.
#  -H
#    Don't include the HTTP header info in the output [default].
#
#
#  -I
#    Don't send agent identification (default).
#  +I"agentid"
#    Send the quoted string as the agent identification. Some web sites won't
#    talk to you unless you pretend to be an acceptable browser.  If there is
#    no string, we send the following string, which seems  to  convince  most
#    servers  that  we're  either  netscape  or  IE  (which  pretends  to  be
#    netscape), plus a bogus sytem name to confuse those servers  that  won't
#    serve  to linux or BSD users.  Feel free to use your own AgentID string.
#    But you should know that there is server software out  there  that  will
#    refuse service to any client that doesn't call itself "Mozilla".
#
#  +M
#    Macintosh kludge: convert \r to \n (if not followed by \n).  This may be
#    obsolescent,  now  that  we  have  code to automatically split the input
#    apart on \r, \n or \r\n.
#
#  -O<file>
#    Write the data to <file>.  Default is stdout.
#
#  +P<proxy>
#    Proxy gateway.  If you are hidden behind a proxy, put the proxy hostname
#    (and  optionally  :port)  in  a  -P  option, and we will try to indirect
#    through the proxy server.
#  -P<noproxy>
#    Proxy exception. The <noproxy> string should be a perl pattern. If a URL
#    matches this pattern, the proxy gateway isn't used.
#
#  -R
#    Ignore redirects (default).
#  +R
#    Follow HTTP "Location:" redirects. If you want to know that the redirect
#    happened,  you  must  use the +H option.  Without +H, you'll get no clue
#    that the URL was redirected; webcat will follow the redirection and give
#    the data with no clue where it came from.
#
# +S<file>
#    Write the incoming data stream (all of it) to <file>  This  write  every
#    byte  received, including the HTTP headers and HTTP "chunk" sizes to the
#    file.  This is intended as a debugging aid, so that you can compare what
#    the  server  actually  sent with what this program writes as the output.
#    Some problems with HTTP chunking have been debugged with this.
# -S
#    Default:  Don't write the data stream; just write the (reconstructed)
#    data file to stdout.
#
#  +T
#    Enable WWW tracing. This sets a global flag that causes various routines
#    to produce lines of the form:
#      <!--name:  ...  some message ...  -->
#    These look like both HTTP header lines and HTML comments. Some WWW tools
#    (such as the "H" html viewer) can show these to tell you which stage  of
#    a GET operation we have reached.
#  -T
#    Disable WWW tracing [default].
#
#  -T<n>
#    Timeout of <n> seconds.  The default is no  timeout,  meaning  that  the
#    underlying system's connect() will determine the timeout, if any.
#
#  +T<n>
#    Retry <N> times on dropped connections. This is to handle a failure mode
#    seen  on some servers:  The server accepts the TCP connection, and drops
#    the connection after receiving the GET request, without even sending  an
#    error code. We retry such connections $tries time, with a default of 10,
#    with 1-second delays between tries.  This  option  selects  a  different
#    limit to the number of retries.  (See home1.swipnet.se for examples.)
#
#  +V<version>
#  -V<version>
#    The HTTP version to use. The default is -V1.0. For -V1.1, an extra Host:
#    header is sent, since some servers require it.
#
#  -X
#    Abandon the URL on timeout (default).
#  +X
#    Exit on timeout, ignoring the rest of the URLs and file names.  On  some
#    systems,  there is a bug in the connect() system call that can result in
#    hanging indefinitely. This option is a last-resort "solution" that works
#    if you are only trying to get one URL.
#
#ENVIRONMENT
#  We use the following from the environment:
#
#  W3PROXY
#    The name (or address) and an optional :port for a proxy  gateway.   URLs
#    that  don't  match  the  W3NOPROXY  will  be  fetched indirectly via the
#    proxy's web  server.   If  not  defined,  we  will  attempt  direct  TCP
#    connections for all URLs.
#
#  W3NOPROXY
#    A pattern which is applied to URLs, and if they match, no proxy is used.
#    That is, any URL that matches W3NOPROXY is considered local, and we will
#    access it directly. If not defined, we will use W3PROXY for all URLs (if
#    it exists).
#
#LIMITATIONS
#  So far only the http://  protocol  is  implemented;  ftp://,  file://  and
#  others  may appear if I need them.  If someone feels like adding FTP code,
#  you might send me a copy.
#
#  HTTP  "redirection"  (the  "Location:" HTTP header) is implemented now via
#  the +R option.  By default, it is disabled and  must  be  handled  by  the
#  caller, if desired.  This mainly means that if you omit the final '/' on a
#  directory name, we will fail.  This is not  considered  a  bug,  so  it'll
#  probably never be fixed.
#
#DEBUGGING
#  You can use "perl -dw", of course. Or you can do the following:
#    setenv V_webcat 5/tmp/webcat.out	# csh or tcsh users.
#    export V_webcat=5/tmp/webcat.out	# ksh or bash users.
#  This will turn on the "print V" lines for $V in the range 0-5,  and  write
#  the verbose output to /tmp/webcat.out. Note that the default verbose level
#  is 1; setting it to 0 should turn off all verbose messages.
#
#BUGS
#  Despite many attempts to detect failure, we still don't  optimally  handle
#  all  the myriad things that can go wrong.  In particular, on some systems,
#  the connect() system call can hang indefinitely and can't be killed by  an
#  ALARM. There does not appear to be any known solution to this problem. (No
#  amount of clever code will help if your process doesn't get any cpu time.)
#
#SEE ALSO
#  wget(1), which is more general and recursive
#
#FIXES
# 2009-6-3
#  Added "Accept */*" HTTP header line, because www.thursdaycontra.com didn't
#  accept  any  requests that lacked this line.  Also added a firefox agentid
#  string to the list of builtin agentids, but it's not the default (yet).
#
#AUTHOR
#   <a href="mailto:jc@trillian.mit.edu">John Chambers</a>
# Feel free to use this program for any purpose.  If you add any interesting
# new features, please send me a copy.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

use Socket;

&LocalSetup(1);		# Figure out localization stuff
$| = 1;				# Don't buffer output
$exitstat =   0;	# Set this to get a failure exit status.
&Vopt($ENV{"V_$P"} || $ENV{"D_$P"} || $ENV{"T_$P"} || '1');
print V "$P: Started $ymd $hms\n" if $V>1;

$Prot = '';
$PROT = '';
$Host = '';
$File = '';

#bufsiz =    10;	# Size of input buffer, small for testing
$bufsiz = 10000;	# Size of input buffer, large for routine use

$W302    = 1;		# Follow 302 redirects (value is hop limit)
$W3hdrs  = 0;		# Whether to output header lines
$W3data  = 1;		# Whether to output data
$W3trace = 0;		# Whether to output tracing info
$kludge1_1_404 = 0;	# HTTP/1.1 GET required

# Some more initializations to prevent warnings:
$savsig = '';		# For remembering signal settings
$HTTPopentime = 0;
$W3proxy = '';
$streamflg  = 0;	# Whether to write data stream to file
$streamfile = '';	# File name for output data-stream file
$dfltstream = 'datastream.txt';	# Default name of streamfile

# That's all the config stuff you should have to worry about.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Global data for handling "chunked" data:

$isChunked = 0;		# Is the data "chunked"?
$inChunked = 0;		# Are we in chunked data now?
$ChnkLeft  = 0;		# Bytes left in current chunk
$ChnkNeed  = 0;		# Bytes needed to fill current chunk
$ChnkSize  = 0;		# Size of current chunk
$UbufSize  = 0;		# Bytes left in current $Ubuf

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Agent ID strings:

# Pick a default ID string:
#	$dflagentid = "";
	$dflagentid = "$P/$version [en]";
#	$dflagentid = "Mozilla/4.5 [en] (compatible; I; $P)";
#	$dflagentid = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)";
#	$dflagentid = "Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.8.1.19) Gecko/20081213 SUSE/2.0.0.19-0.1 Firefox/2.0.0.19";
$W3agentid = $dflagentid;	# What's actually send as our ID string

# Agent ID strings that we might want to deliver:
%agentids = (
	"null"        => "",
	"$P/$version" => "$P/$version [en]",
	"$P"          => "$P/$version [en]",
	"Mozilla/4.5" => "Mozilla/4.5 [en] (compatible; I; $P)",
	"Mozilla/4.0" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)",
	"Mozilla"     => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)",
	"Mozilla/5.0" => "Mozilla/5.0 (X11 U; Linux i686; fr; rv:1.8.1.19) Gecko/20081213 SUSE/2.0.0.19-0.1 Firefox/2.0.0.19",
);

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
%escmap = (	# Escape chars
	"\n" => '\\n',
	"\r" => '\\r',
	"\t" => '\\t',
);

#if ($V>1) {			# Test esc()
#	print "$P: Testing esc ...\n";
#	$foo = "a\tb\rc\n";
#	$bar = &esc($foo);
#	print "$P: esc('$foo') gave '$bar'\n";
#	print "$P: Tested  esc.\n";
#}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here's our table for converting HTML's symbolic entities to characters. The
# value  is  the  decimal code for the character in 8859-1.  How to deal with
# other character sets is still not very clearly defined.

%htmlsym = (
	'lt'     =>  60,	# Less than
	'gt',    =>  62,	# Greater than
	'quot'   =>  34,	# (Double) quote
	'nbsp'   => 160,	# no-break space = non-breaking space, U+00A0 ISOnum
	'iexcl'  => 161,	# cent sign, U+00A2 ISOnum
	'pound'  => 163,	# currency sign, U+00A4 ISOnum
	'yen'    => 165,	# broken bar = broken vertical bar,
	'sect'   => 167,	# diaeresis = spacing diaeresis,
	'copy'   => 169,	# feminine ordinal indicator, U+00AA ISOnum
	'laquo'  => 171,	# left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
	'not'    => 172,	# soft hyphen = discretionary hyphen,
	'reg'    => 174,	# registered sign = registered trade mark sign, U+00AE ISOnum
	'macr'   => 175,	# macron = spacing macron = overline = APL overbar, U+00AF ISOdia
	'deg'    => 176,	# plus-minus sign = plus-or-minus sign,
	'sup2'   => 178,	# superscript two = superscript digit two = squared, U+00B2 ISOnum
	'sup3'   => 179,	# superscript three = superscript digit three = cubed, U+00B3 ISOnum
	'acute'  => 180,	# acute accent = spacing acute, U+00B4 ISOdia
	'micro'  => 181,	# pilcrow sign = paragraph sign,
	'middot' => 183,	# middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
	'cedil'  => 184,	# superscript one = superscript digit one,
	'ordm'   => 186,	# masculine ordinal indicator, U+00BA ISOnum
	'raquo'  => 187,	# right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
	'frac14' => 188,	# vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
	'frac12' => 189,	# vulgar fraction one half = fraction one half, U+00BD ISOnum
	'frac34' => 190,	# vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
	'iquest' => 191,	# inverted question mark = turned question mark, U+00BF ISOnum
	'Agrave' => 192,	# latin capital letter A with grave = latin capital letter A grave,
	'Aacute' => 193,	# latin capital letter A with acute, U+00C1 ISOlat1
	'Acirc'  => 194,	# latin capital letter A with circumflex, U+00C2 ISOlat1
	'Atilde' => 195,	# latin capital letter A with tilde, U+00C3 ISOlat1
	'Auml'   => 196,	# latin capital letter A with diaeresis, U+00C4 ISOlat1
	'Aring'  => 197,	# latin capital letter A with ring above = latin capital letter A ring,
	'AElig'  => 198,	# latin capital letter AE = latin capital ligature AE,
	'Ccedil' => 199,	# latin capital letter C with cedilla, U+00C7 ISOlat1
	'Egrave' => 200,	# latin capital letter E with grave, U+00C8 ISOlat1
	'Eacute' => 201,	# latin capital letter E with acute, U+00C9 ISOlat1
	'Ecirc'  => 202,	# latin capital letter E with circumflex, U+00CA ISOlat1
	'Euml'   => 203,	# latin capital letter E with diaeresis, U+00CB ISOlat1
	'Igrave' => 204,	# latin capital letter I with grave, U+00CC ISOlat1
	'Iacute' => 205,	# latin capital letter I with acute, U+00CD ISOlat1
	'Icirc'  => 206,	# latin capital letter I with circumflex, U+00CE ISOlat1
	'Iuml'   => 207,	# latin capital letter I with diaeresis, U+00CF ISOlat1
	'ETH'    => 208,	# latin capital letter N with tilde,
	'Ograve' => 210,	# latin capital letter O with grave, U+00D2 ISOlat1
	'Oacute' => 211,	# latin capital letter O with acute, U+00D3 ISOlat1
	'Ocirc'  => 212,	# latin capital letter O with circumflex, U+00D4 ISOlat1
	'Otilde' => 213,	# latin capital letter O with tilde, U+00D5 ISOlat1
	'Ouml'   => 214,	# latin capital letter O with diaeresis, U+00D6 ISOlat1
	'times'  => 215,	# latin capital letter O with stroke
	'Ugrave' => 217,	# latin capital letter U with grave, U+00D9 ISOlat1
	'Uacute' => 218,	# latin capital letter U with acute, U+00DA ISOlat1
	'Ucirc'  => 219,	# latin capital letter U with circumflex, U+00DB ISOlat1
	'Uuml'   => 220,	# latin capital letter U with diaeresis, U+00DC ISOlat1
	'Yacute' => 221,	# latin capital letter Y with acute, U+00DD ISOlat1
	'THORN'  => 222,	# latin capital letter THORN, U+00DE ISOlat1
	'szlig'  => 223,	# latin small letter sharp s = ess-zed, U+00DF ISOlat1
	'agrave' => 224,	# latin small letter a with grave = latin small letter a grave,
	'aacute' => 225,	# latin small letter a with acute, U+00E1 ISOlat1
	'acirc'  => 226,	# latin small letter a with circumflex, U+00E2 ISOlat1
	'atilde' => 227,	# latin small letter a with tilde, U+00E3 ISOlat1
	'auml'   => 228,	# latin small letter a with diaeresis, U+00E4 ISOlat1
	'aring'  => 229,	# latin small letter a with ring above = latin small letter a ring,
	'aelig'  => 230,	# latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
	'ccedil' => 231,	# latin small letter c with cedilla, U+00E7 ISOlat1
	'egrave' => 232,	# latin small letter e with grave, U+00E8 ISOlat1
	'eacute' => 233,	# latin small letter e with acute, U+00E9 ISOlat1
	'ecirc'  => 234,	# latin small letter e with circumflex, U+00EA ISOlat1
	'euml'   => 235,	# latin small letter e with diaeresis, U+00EB ISOlat1
	'igrave' => 236,	# latin small letter i with grave, U+00EC ISOlat1
	'iacute' => 237,	# latin small letter i with acute, U+00ED ISOlat1
	'icirc'  => 238,	# latin small letter i with circumflex, U+00EE ISOlat1
	'iuml'   => 239,	# latin small letter i with diaeresis, U+00EF ISOlat1
	'eth'    => 240,	# latin small letter n with tilde,
	'ograve' => 242,	# latin small letter o with grave, U+00F2 ISOlat1
	'oacute' => 243,	# latin small letter o with acute, U+00F3 ISOlat1
	'ocirc'  => 244,	# latin small letter o with circumflex, U+00F4 ISOlat1
	'otilde' => 245,	# latin small letter o with tilde, U+00F5 ISOlat1
	'ouml'   => 246,	# latin small letter o with diaeresis, U+00F6 ISOlat1
	'divide' => 247,	# latin small letter o with stroke,
	'ugrave' => 249,	# latin small letter u with grave, U+00F9 ISOlat1
	'uacute' => 250,	# latin small letter u with acute, U+00FA ISOlat1
	'ucirc'  => 251,	# latin small letter u with circumflex, U+00FB ISOlat1
	'uuml'   => 252,	# latin small letter u with diaeresis, U+00FC ISOlat1
	'yacute' => 253,	# latin small letter y with acute, U+00FD ISOlat1
	'thorn'  => 254,	# latin small letter thorn, U+00FE ISOlat1
	'yuml'   => 255,	# latin small letter y with diaeresis, U+00FF ISOlat1
);

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# That's all the global setup. Now we run thru the args, attempt to read each #
# one, and write the contents to stdout:                                      #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
arg:
for $u (@ARGV) {
	$moved = 0;
	$URLerr = "Don't know why";				# Set by openURL when failures.
	if (($pfx,$opt) = ($u =~ /^([-+])(.*)/)) {
		while ($opt) {						# Each time MUST remove at least one char.
			if ($opt =~ s/^D//i) {			# +D or -D (whether to produce data)
				$W3data = ($pfx eq '+') ? 1 : 0;
				print V ($W3data ? "Do" : "Don't"), " produce data.\n" if $V>1;
			} elsif ($opt =~ s/^E//i) {		# E option disabled in this version
				#
			} elsif ($opt =~ s/^H//i) {		# +H or -H (whether to produce headers)
				$W3hdrs = ($pfx eq '+') ? 1 : 0;
				print V ($W3hdrs ? "Do" : "Don't"), " produce headers.\n" if $V>1;
			} elsif ($opt =~ s/^I//i) {		# +I<agentid> or -I
				if ($pfx eq '-') {
					$W3agentid = '';
					print V "$P: No agent identification." if $V>1;
				} else {
					$W3agentid = $opt || $dflagentid;	# Rest of string is id.
					print V "$P: Agent \"$W3agentid\"\n" if $V>1;
					if (defined($id = $agentid{$W3agentid})) {
						$W3agentid = $id;
						print V "$P: Agent \"$W3agentid\"\n" if $V>1;
					}
					$opt = '';
				}
			} elsif ($opt =~ s/^O//i) {		# +O or -O (output file)
				$outfile = $opt;			# Rest of string is file name.
				$opt = '';
				print V "$P: Output to \"$outfile\"\n" if $V>1;
			} elsif ($opt =~ s/^P//i) {		# +P<proxy> or -P<noproxy>
				if ($pfx eq '-') {
					$W3nopxy = $opt;		# Rest of string is pattern.
					print V "$P: Proxy exceptions are /$W3nopxy/\n" if $V>1;
				} else {
					$W3proxy = $opt;		# Rest of string is proxy host.
					print V "$P: Proxy server is $W3nopxy.\n" if $V>1;
				}
				$opt = '';
			} elsif ($opt =~ s/^R//i) {		# +R or -R (whether to follow 302 redirects)
				$W302 = ($pfx eq '+') ? $W302+1 : 0;
				print V ($W302 ? "Do" : "Don't"), " follow 302 redirects.\n" if $V>1;
			} elsif ($opt =~ s/^(S)(.*)//i) {		# +S or -S (whether to write the data stream)
				$streamflg = ($pfx eq '+') ? $W302+1 : 0;
				$streamfile = $2 || $dfltstream;
				if ($streamflg) {
					print V "$P: Write data stream to '$streamfile'\n" if $V>1;
					if (open(STREAM,">$streamfile")) {
						print V "$P: Writing data stream to '$streamfile'\n" if $V>1;
					} else {
						print V "$P: ### Can't write '$streamfile' [$!]\n" if $V>1;
						$streamflg = 0;		
						print V "$P: ### Ignoring \"$pfx$1$2\" option.\n" if $V>1;
					}
				} else {
					print V "$P: Don't write data stream.\n" if $V>1;
				}
			} elsif ($opt =~ s/^T//i) {		# +T or -T (WWW tracing) or -T<n> (timeout)
				if ($opt =~ s/^(\d+)//) {	# T with number:
					if ($pfx eq '+') {
						$tries = $1;		# +T\d+ is limit to tries.
						print V "tries = $tries.\n" if $V>1;
					} else {
						$HTTPtimeout = $1;	# -T\d+ is timeout.
						print V "HTTPtimeout = $HTTPtimeout sec.\n" if $V>1;
					}
				} else {					# T without number:
					if ($pfx eq '+') {
						$W3trace = 1;		# +T enables tracing.
						print V "Do produce WWW tracing.\n" if $V>1;
					} else {
						$W3trace = 0;		# -T disables tracing.
						print V "Don't produce WWW tracing.\n" if $V>1;
					}
				}
			} elsif ($opt =~ s/^V//i) {				# +V has two forms:
				if ($opt =~ s/^(\d+\.\d+)//)	{	# +V<HTTPversion>
					$HTTPvopt = $HTTPversion = $1;
					$opt = '';
					print V "$P: HTTP version '$HTTPvopt'\n" if $V>1;
				} elsif ($opt =~ s/^(\d+)//) {		# +V<debuglvl>
					$V = int($1);
					print V "$P: Verbose level V=$V\n" if $V>1;
				} else {
					print V "$P: V$opt not understood\n" if $V>1;
				}
			} elsif ($opt =~ s/^X//i) {		# +X
				$HTTPtimexit = ($pfx eq '+') ? 1 : 0;
				print V ($HTTPtimexit ? "Do" : "Don't"), " exit on timeout.\n" if $V>1;
			} else {
				print V "$P: unknown option \"$opt\" ignored.\n" if $V>0;
				$opt =~ s/.//;				# Discard this option char.
			}
		}
		next arg;
	}
	$try = $hdrlines = 0;		# Count the opens and header lines.
	$tries = 3 unless $tries;
try:
	while ($try++ < $tries && $hdrlines == 0) {
		print V "<!--$P/$F: Try $try -->\n" if $V>0 && $try>1 && $W3trace;
		sleep 1 if $try>1;	# Don't hit the server too hard
		unless (&openURL(*U,$u)) {
			print V "$P: Can't open \"$u\" ($URLerr)\n" if $V>1;
			$exitstat = 1;
			next arg;
		}
		print V "$P: Opened \"$u\"\n" if $V>1;
		if ($HTTPtimeout > 0) {
			alarm $HTTPtimeout;
			$savsig = $SIG{ALRM};
			$SIG{ALRM} = 'READalarm';
			print V "$P: Set alarm after $HTTPtimeout sec.\n" if $V>2;
		}
		$isChunked = $inChunked = 0;	# HTTP headers aren't chunked
#		$inTEXT  = $URLhdr ? 1 : 0;
#		$inHTML  =		# Is it HTML?
#		$inPRE   =		# Within a <pre>...</pre> section?
		$statmax = 0;	# Max status code seen
		$Ubuf    = '';	# Input buffer
		$staterr = '';	# Last error message
		# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
		# Here's where we read the data from one URL and write it  to #
		# standard output.  If you want to do something else with the #
		# data, you should rewrite this loop:                         #
		# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
		if ($outfile && !$outopen) {	# Do we need to open the output?
			print V "$P: Open \"$outfile\"\n" if $V>1;
			if (open(O,">$outfile")) {	# Try to open it for writing.
				print V "$P: Writing \"$outfile\" [$!]\n" if $V>1;
			} else {
				print V "$P: Can't write \"$outfile\" [$!]\n" if $V>0;
				$outfile = '';
			}
			$outopen = 1;
		}
		print V "$P: Headers (URLhdr=$URLhdr) ...\n" if $V>1 && $W3trace;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# In all cases, we must first read through the HTTP headers.  We look #
# for a few of them, and set global variables to match what we see.   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
hdr:	while ($URLhdr && ($b = &Line())) {
			print "<!--$P: Next header line-->\n"  if $V>1 && $W3trace;
			$statcode = 0;
#			$inTEXT = 1;
			$b =~ s/\s+$//;				# Discard trailing white stuff
			print "URLhdr=$URLhdr W3hdrs=$W3hdrs W3trace=$W3trace outfile=$outfile.\n" if $V>1;
			if ($W3hdrs) {
				if ($outfile) {print O "$b\n"} else {print "$b\n"}
				sleep 1 if $V>1;
			}
			if (length($b) > 0) {		# Anything left in the buffer?
				++$hdrlines;
				$statcode = 0;
				if ($b =~ /^HTTP\/([\d.]+)\s+(\d+)\s+(.*)/) {
					$httpvrs = $1;
					$statcode = $2;
					$statmsg = $3;
					print "<!--$P: httpvrs=$httpvrs statcode=$statcode -->\n" if $V>1 && $W3trace;
					if ($statcode > $statmax) {$statmax = $statcode; $staterr = $statmsg}
					if ($W302 > 0 && $statcode == 302) {
						print "<!--$P: MOVED -->\n" if $V>1 && $W3trace;
						$moved = 1;
						--$W302;	# Decrease the redirect hop count
					}
					if ($httpvrs eq '1.1' && $statcode >= 400) {
						if ($HTTPversion ne '1.1') {
							print "<!--$P: HTTP/$HTTPversion => HTTP/1.1 404 kludge -->\n" if $W3trace && $V>2;
							$kludge1_1_404 = 1;
							$HTTPversion = '1.1';
							redo arg;
						} else {
							print "<!--$P: HTTP/1.1 $statcode $statmsg (probably real) \"$u\" -->\n" if $W3trace && $V>1;
						}
					}
				} elsif ($b =~ m"^Transfer-Encoding:\s*(.*)$"i) {
					if ($1 eq 'chunked') {
						print "<!--$P: $b \"$u\"-->\n" if $V>1 && $W3trace;
						$isChunked = 1;	# We have to handle chunked encoding
						$inChunked = 0;	# HTTP headers aren't chunked
					}
#				} elsif ($b =~ m"^Content-type:\s*(.*)/(.*)$"i) {
#					$doctype = lc($1);
#					$subtype = lc($2);
#					print "<!--$P: Content-type: \"$doctype/$subtype\" -->\n" if $V>1 && $W3trace;
#					if ($doctype eq 'text') {
#						$inTEXT = 1;
#						$inHTML = ($subtype eq 'html') ? 1 : 0;
#					} else {
#						$inTEXT = $inHTML = 0;
#					}
#					print "<!--$P: inTEXT=$inTEXT inHTML=$inHTML -->\n" if $V>1 && $W3trace;
				} elsif ($moved && ($b =~ /^Location:\s*(.*)$/)) {
					print "<!--$P: New URL \"$1\" -->\n" if $V>1 && $W3trace;
					$u = &URL($1);
					redo arg;
				}
				if ($statcode >= 400) {
					print "<!--$P: Fail with HTTP err $statcode ($statmsg)-->\n" if $W3trace && $V>0;
					print "<!--$P: CAN'T GET \"$u\"-->\n" if $W3trace && $V>0;
				}
			} else {
				$URLhdr = 0;		# Blank line ends headers
			}
			print "<!--$P: End of header line-->\n"  if $V>1 && $W3trace;
		}
		print "<!--$P: End of headers-->\n"  if $V>1 && $W3trace;
	}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# We should now be positioned just after the double  CRLF  that  ends #
# HTTP header lines.  The rest should be the contents of the file.    #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	if ($statmax >= 400) {	# Did we get a fatal error code?
		print V "<!--$P: Exit 1 because HTTP status = $statmax -->\n" if $V>2;
		exit 1;
	}
	$/ = undef;
	if ($HTTPtimeout > 0) {alarm $HTTPtimeout}
	print V "$P/$P: Headers done (URLhdr=$URLhdr)\n" if $V>1;
	if (!$W3data) {			# Data not wanted?
		print V "$P: Data not wanted, quitting.\n" if $V>1;
		close(U);			# Close this connection.
		next arg;			# Go on to next URL.
	}
#	if ($W3hdrs) {
#		print V "$P: Writing NL\n" if $V>1;
#		if ($outfile) {print O "\n"} else {print "\n"}
#	}
# Now we copy the data, doing any needed processing to each line.
data:
	while ($b = &Line()) {
		print V "$P: Got: \"$b\"\n" if $V>5;
		if (!$W3hdrs && $URLhdr) {	# Suppressing header lines.
			print V "$P: HDR check in \"$b\"\n" if $V>1;
			if ($b =~ s/^.*\r\n\r\n//s) {
				$URLhdr = 0;		# Found \n\n separator.
			} else {
				next;				# No separator, discard it all.
			}
		}
#		if ($MACfl) {$b =~ s"\r\n?"\n"g}
		if ($outfile) {print O $b} else {print $b}
		if ($HTTPtimeout) {alarm $HTTPtimeout}
	}
	unless (defined $b) {
		print V "\n$P: Error reading \"$u\" ($tries tries; reason: \"$!\", $errmsg)\n" if $V>0;
		$exitstat = $?;
	}
}

exit $exitstat;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub ChkSiz {my $F = 'ChkSiz';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Remove a chunk size from the start of $Ubuf and set  the  global  variables #
# $ChnkSize,  $ChnkLeft  and  $ChnkNeed  to handle the new chunk.  Try to get #
# enough data to complete the chunk.                                          #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($n,$buf,$x);
#	$ChnkSize = 0;
#	$ChnkLeft = 0;
	$Ubuf =~ s/^[\r\s]+//;		# Strip off any initial white stuff
	$UbufSize = length($Ubuf);
	print V "$P/$F: Ubuf has $UbufSize bytes.\n" if $V>1;
	while ($UbufSize < 10) {
		print V "$P/$F: Ubuf needs more data.\n" if $V>1;
		if (($n = sysread(U,$buf,$bufsiz)) > 0) {
			print V "$P/$F: Got $n bytes from URL read\n" if $V>2;
			$Ubuf .= $buf;
			print STREAM $buf if $streamflg;
		} elsif ($n == 0) {		# EOF
			print V "$P/$F: Got EOF from URL read\n" if $V>1;
			last;
		} else {
			print V "$P/$F: Got $n from URL read ($!)\n" if $V>0;
			last;
		}
		$UbufSize = length($Ubuf);
		print V "$P/$F: Ubuf has $UbufSize bytes.\n" if $V>1;
	}
	print V "$P/$F: Ubuf=\"$Ubuf\"\n" if $V>4;
	return 0 unless $Ubuf;
	if ($V>1) {
		$x = &esc(substr($Ubuf,0,20));		# Show initial 20 bytes in buffer
		print V "$P/$F: x=[$x]\n";
	}
	if ($Ubuf =~ s/^[\r\n]*([0-9a-fA-F]+)\s*\r*\n//) {	# Match hex chunk size + \s\r\n terminator
		print V "$P/$F: Found '$1'\n" if $V>1;
		$ChnkSize = hex($1);
		$UbufSize = length($Ubuf);
		$ChnkLeft = $ChnkSize;
		$ChnkNeed = ($UbufSize < $ChnkSize) ? $ChnkSize - $UbufSize : 0;
	} else {		# Chunk size not found here
		if ($V>0) {
			local($xxx) = '#' x 32;
			print V "\n$xxx\n";
			$x = &esc(substr($Ubuf,0,20));		# Show initial 20 bytes in buffer
			print V "$P/$F: Ubuf: $x ...\n";
			print V "$P/$F: No hex number where chunk size expected!\n";
			print V "$P/$F: URL =\"$u\"\n";
			print V "$P/$F: Ubuf=\"$Ubuf\"\n";
			print V "$xxx\n";
		}
	}
	print V "$P/$F: ChnkSize=$ChnkSize ChnkLeft=$ChnkLeft ChnkNeed=$ChnkNeed UbufSize=$UbufSize.\n" if $V>2;
	while ($ChnkSize > $UbufSize) {
		print V "$P/$F: Ubuf needs $ChnkSize bytes, has only $UbufSize.\n" if $V>1;
		if (($n = sysread(U,$buf,$bufsiz)) > 0) {
			print V "$P/$F: Got $n bytes from URL read\n" if $V>2;
			$Ubuf .= $buf;
			print STREAM $buf if $streamflg;
		} elsif ($n == 0) {		# EOF
			print V "$P/$F: Got EOF from URL read\n" if $V>1;
			last;
		} else {
			print V "$P/$F: Got $n from URL read ($!)\n" if $V>0;
			last;
		}
		$UbufSize = length($Ubuf);
		print V "$P/$F: Ubuf now has $UbufSize bytes.\n" if $V>1;
	}
	return $ChnkSize;
}

sub HTMLdir {my $F = 'HTMLdir';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This routine reads a  directory  and  produces  a  simplified  html #
# listing. But you might want to reformat it in some other way that's #
# more useful for your application. We do the work in a child process #
# and return with F open to the pipe from the child.                  #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local(*F,$d) = @_;
	local(@files,$f,$parent,@st,$mt,*DIR);
	$HTTPversion = '1.0' if !$HTTPversion;
	if (open(F,'-|')) {	# Parent.
		$URLhdr = 1;	# Warn caller of HTML headers.
		return 1;		# Parent returns the open file.
	}
	# Child reads the directory.
	if (opendir(DIR,$d)) {
		print V "HTMLdir: Producing HTML headers.\n" if $V>2;
		print "HTTP/$HTTPversion 200 OK\n";
		print "Server: HTMLdir.pm\n";
		if (@st = stat($d)) {
			$mt = gmtime($st[9]);
			print "Last-Modified: $mt GMT\n";
		}
		print "Content-Type: text/html\n\n";
		print "<HEAD><TITLE>Index of $u</TITLE></HEAD><BODY>\n";
		print "<PRE>\n";
		print "<IMG SRC=\"/icons/blank.xbm\" ALT=\"     \"> Name\n";
		@files = readdir(DIR);
		closedir DIR;
		for $f (sort @files) {
			if (-d "$d/$f") {
				if ($f eq '.') {		# Ignore self reference.
				} elsif ($f eq '..') {	# Parent reference.
					($parent = $d) =~ s"/[^/]+/+$"/";
					print "<!-- parent=\"$parent\" -->\n";
					print "<IMG SRC=\"/icons/back.gif\" ALT=\"[DIR]\"> <A HREF=\"../\">Parent directory</A>\n";
				} else {
					print "<IMG SRC=\"/icons/menu.gif\" ALT=\"[DIR]\"> <A HREF=\"$f/\">$f/</A>\n";
				}
			} else {
				print "<IMG SRC=\"/icons/text.gif\" ALT=\"[___]\"> <A HREF=\"$f\">$f</A>\n";
			}
		}
		exit 0;
	}
	print V "HTMLdir: Can't open directory \"$d\" ($!)\n" if $V>0;
	exit $!;
}

sub HTTPalarm {my $F='HTTPalarm';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	my $t = time - $conHTTPtime;
	print "<!--$F: ALARM after $t sec -->\n" if $W3trace;
#	exit -1;
	$errmsg = 'timeout';
}

sub conHTTP {my $F='conHTTP';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# NAME                                                                #
#   conHTTP - make HTTP connection.                                   #
#                                                                     #
# SYNOPSIS                                                            #
#   $stat = &conHTTP(*F,'fubar.com:1234');                            #
#                                                                     #
# DESCRIPTION                                                         #
#   This  accepts a URL's host:port portion, and attempts to make the #
#   connection.  If successful, we return 1 with F open  to  the  TCP #
#   socket.   If  we fail, we return 0, and F may or may not be open. #
#   (Maybe we should close it.)                                       #
#                                                                     #
# TIMEOUTS                                                            #
#   I've added a timeout kludge:  If $HTTPtimeout is nonzero, we will #
#   exit after that many seconds. This is drastic, but it seems to be #
#   the only solution to the hung-connect problem.   This  is  mostly #
#   used  in webcat, which is used as a subprocess by other programs. #
#   If you call "webcat -T15 ...", it will exit after 15  seconds  if #
#   the connection can't be made, and you can go about your business. #
#                                                                     #
# AUTHOR                                                              #
#   <a href="mailto:jc@trillian.mit.edu">John Chambers</a>            #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local(*HTTPsock,$hp) = @_;
	local($a,$b,$c,$d);
	local(@addrs,$host,$port,$savsig,$t,$this,$that,$This,$That);
	$HTTPtimeout = 30   unless defined $HTTPtimeout;
	$W3trace     =  0   unless defined $W3trace;
	$HTTPalrm    =  0   unless defined $HTTPalrm;	# Alarm needs to be handled
	$conHTTPs    =  0   unless defined $conHTTPs;	# Count of connections
	$conHTTPtime = time unless defined $conHTTPtime;
	$savsig      = ''   unless defined $savsig;
	if (($host,$port) = ($hp =~ m"^(.*):(\d+)$")) {
		print V "$P/$F: host=\"$host\" port=\"$port\"\n" if $V>5;
	} else {
		$host = $hp;
		$port = 80;
		print V "$P/$F: host=\"$host\" port=$port.\n" if $V>5;
	}
	$AF_INET = 2;
	$SOCK_STREAM = 1;
	$sockaddr = 'S n a4 x8';
	($name,$aliases,$proto) = getprotobyname('tcp');
	($name,$aliases,$port) = getservbyname($port,'tcp')
		unless $port =~ /^\d+$/;
	$thisaddr = "\0\0\0\0";
	print "<!--$F: Get address for \"$host\" -->\n" if $W3trace;
	($name,$aliases,$type,$len,@addrs) = gethostbyname($host);
	print V "$P/$F: type=$type len=$len.\n" if $V>5;
	if (!@addrs) {
		$errmsg = "No address for \"$host\"";
		return 0;
	}
	$thataddr = $addrs[0];
	$this = pack($sockaddr, $AF_INET, 0, $thisaddr);
	$that = pack($sockaddr, $AF_INET, $port, $thataddr);
	($a,$b,$c,$d) = unpack('C4',$thisaddr);
	$This = "$a.$b.$c.$d:0";
	($a,$b,$c,$d) = unpack('C4',$thataddr);
	$That = "$a.$b.$c.$d:$port";
	print V "<!--$F: That=\"$That\"-->\n" if $V>1;
	if (socket(HTTPsock,$AF_INET,$SOCK_STREAM,$proto)) {
		print V "<!--$F: Got socket-->\n" if $V>1;
	} else {
		print V "<!--$F: Can't get socket ($!)-->\n" if $V>0;
		$exitstat = $!;
		return 0;
	}
	if (bind(HTTPsock,$this)) {	# Is this actually needed?
		$t = time - $conHTTPtime;
		print V "$P/$F: Bind to \"$This\" succeeded in $t sec.\n" if $V>5;
	} else {
		$t = time - $conHTTPtime;
		print V "$P/$F: Bind to \"$This\" failed in $t sec ($!)\n" if $V>2;
		$exitstat = $!;
		return 0;
	}
	++$conHTTPs;
	$conHTTPtime = time;
	if ($HTTPtimeout > 0) {
		print "<!--$F: HTTPtimeout=$HTTPtimeout -->\n" if $W3trace;
		alarm $HTTPtimeout;
		$savsig = $SIG{ALRM};
		$SIG{ALRM} = 'HTTPalarm';
		print V "$P/$F: Set alarm after $HTTPtimeout sec.\n" if $V>3;
	}
	print V "<!--$F: That='$That' -->\n" if $V>1;
	print "<!--$F: Connecting to \"$That\" -->\n" if $W3trace || $V>1;
	if ($that = sockaddr_in($port,$thataddr)) {
		print V "<!--$F: sockaddr_in() succeeed -->\n" if $V>2;
	} else {
		print V "<!--$F: ### sockaddr_in() failed for port='$port' thataddr='$thataddr' ###-->\n" if $V>1;
	}
	if (connect(HTTPsock,$that)) {
		$t = time - $conHTTPtime;
		print V "$P/$F: Connect $conHTTPs to \"$That\" succeeded in $t sec.\n" if $V>5;
		print "<!--$F: Connected to \"$That\" -->\n" if $W3trace;
		if ($HTTPtimeout > 0) {
			alarm 0;
			$SIG{ALRM} = $savsig;
			print V "$P/$F: Set alarm 0.\n" if $V>5;
		}
	} else {
		$t = time - $conHTTPtime;
		print "<!--$F: Can't connect to \"$That\" in $t sec. ($!)\n" if $W3trace;
		print V "$P/$F: Connect $conHTTPs to \"$That\" failed ($!) after $t sec.\n" if $V>2;
		$exitstat = $!;
		if ($HTTPtimeout > 0) {
			alarm 0;
			$SIG{ALRM} = $savsig;
			print V "$P/$F: Set alarm 0.\n" if $V>5;
		}
		return 0;
	}
	if ($HTTPalrm) {
		close HTTPsock;
		alarm 0;
		$SIG{ALRM} = $savsig;
		print V "$P/$F: Set alarm 0.\n" if $V>5;
	} else {
		select(HTTPsock); $| = 1; select(STDOUT);
	}
	return 1;
}

sub LocalSetup { $Vtest = shift;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Figure out some stuff about where we're running The end result of  this  is #
# to initialize a list of global variables.  The return value of this routine #
# is no longer used for anything.                                             #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	$ENV{PATH} = '/bin:/sbin:/usr/bin:/usr/sbin';
	push @INC, '.';
	$testing = '0';	# Set to 1 while testing
	umask 0002;		# Output files must be group writable
	$| = 1;			# Unbuffered STDOUT
	$" = ',';		# Used in verbose messages
	($P = $0 || 'webcat') =~ s".*/"";
	(($ENV{"V_$P"} || $ENV{"D_$P"} || $testing) . ' 1') =~ /(\d+)/;	# Verbose level
	$V = $1;
	($ENV{REMOTE_ADDR} || '0.0.0.0') =~ m/^\s*([\d.]+)\s*$/;
	$RA = $1;
	if ($RA eq '207.172.223.184') {		# My home machine
		$V = $Vtest if $V<$Vtest;
	} elsif (substr($RA,0,8) eq '192.168.') {	# My home network
		$V = $Vtest if $V<$Vtest;
	}
	local($ss,$mm,$hh,$DD,$MM,$YY) = gmtime(time);		# Current date/time
	$ymd = sprintf("%d-%02d-%02d",1900+$YY,1+$MM,$DD);	# CCYY-MM-DD
	$hms = sprintf("%02d:%02d:%02d",$hh,$mm,$ss);		# HH:MM:SS
	$hostname = `/bin/hostname`;		# What does this machine call itself?
	$hostname =~ s/^\s*([-.\w]*)([\r\s]*)$/$1/;		# Strip off domain info
	($host = $1) =~ s/\..*//;			# Extract first field of name
	print "$P: V=$V on $host at $ymd $hms\n" if $V>1;
#	($cwd = `/bin/pwd`) =~ s/[\r\s]+$//;
#	$hstloc = $host . "-cgilocal.pm";
#	print "$P: hstloc=\"$hstloc\" cwd=\"$cwd\"\n" if $V>1;
#	$cgiloc = (-f $hstloc) ? $hstloc : 'cgilocal.pm';
#	print "$P: cgiloc=\"$cgiloc\"\n" if $V>1;
#	return $cgiloc;
}

sub sendHTTP {my $F='sendHTTP';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($str,$dsc);
	for $str (@_) {
		$dsc = $str;
		$dsc =~ s"\r"\\r"g;
		$dsc =~ s"\n"\\n"g;
		$dsc =~ s"\t"\\t"g;
		print "<!--$F: Send server \"$dsc\" -->\n" if $W3trace;
		print V "$P/$F: Send \"$dsc\"\n" if $V>4;
		print F $str;
		print V "$P/$F: Sent \"$dsc\"\n" if $V>5;
	}
}

sub Hsym {my $F = 'Hsym';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here's the routine to convert &foo; to the corresponding 8859-1  character. #
# If it's not found, we just return the "&foo;" encoding.  Eventually we will #
# have to deal with the Unicodization of the unix world.                      #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($s) = @_;
	local($c) = $htmlsym{$s};
	return(chr($c)) if defined $c;
	return("&$s;");
}

sub Line {my $F='Line';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Return one chunk from the URL file U.  While in the HTTP  headers  ($URLhdr #
# true),  we  treat the data as text and return it line at a time, regardless #
# of settings. When we reach the data, we then start looking at how we are to #
# process the data.                                                           #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($buf,$c,$C,$s,$n,$Lbuf);
loop: while (1) {
		print "<!--$P: Line-->\n"  if $V>1 && $W3trace;
		unless ($Ubuf) {
			print "<!--$P: Fill buffer with $bufsize bytes-->\n"  if $V>3 && $W3trace;
			if (defined($n = sysread(U,$buf,$bufsiz)) && ($n > 0)) {
				$Ubuf .= $buf;
				print STREAM $buf if $streamflg;
			}
		}
		if ($URLhdr) {
			$Lbuf = length($Ubuf);
			print "<!--$P: In headers, $Lbuf bytes in Ubuf-->\n"  if $V>3 && $W3trace;
			print "<!--$P: $Ubuf=\"$Ubuf\"-->\n"  if $V>3 && $W3trace;
			if ($Ubuf =~ s/^([^\r\n]*)([\r\n])//) {	# Read headers line by line
				print "<!--$P: Found line -->\n"  if $V>1 && $W3trace;
				$s = $1;
				$c = $2;
				$C = ($c eq "\n") ? '\n' : ($c eq "\r") ? '\r' : $c;
				print V "$P/$F: Line $s$C\n" if $V>3;
				if ($c eq "\r")  {		# CRLF kludge (DOS, HTTP)
					unless ($Ubuf) {
						if (($n = sysread(U,$buf,$bufsiz)) > 0) {
							$Ubuf .= $buf;
							print STREAM $buf if $streamflg;
						}
					}
					$Ubuf =~ s/^\n//;	# remove \n after \r
					$c = "\n";
				}
				if ($c eq "\n")  {		# LFCR kludge (VMS)
					unless ($Ubuf) {
						if (($n = sysread(U,$buf,$bufsiz)) > 0) {
							$Ubuf .= $buf;
							print STREAM $buf if $streamflg;
						}
					}
					$Ubuf =~ s/^\r//;	# remove \r after \n
				}
				unless ($s) {	# Null line ends HTTP headers
					print V "$P/$F: End of headers, isChunked=$isChunked.\n" if $V>1;
					$URLhdr = 0;
					if ($isChunked) {	# Is the data chunked?
						$inChunked = 1;	# We are in chunked data now
						&ChkSiz();		# Get size of first chunk
					}
				}
				return "$s$c";			# Return header line
			} else {
				print "<!--$P: No line in Ubuf-->\n"  if $V>1 && $W3trace;
				if (($n = sysread(U,$buf,$bufsiz)) > 0) {
					$Ubuf .= $buf;
					print STREAM $buf if $streamflg;
					$Lbuf = length($Ubuf);
					print "<!--$P: Now $Lbuf bytes in Ubuf-->\n"  if $V>3 && $W3trace;
				} elsif ($n == 0) {
					print "<!--$P: EOF-->\n"  if $V>3 && $W3trace;
				} else {
					print "<!--$P: Input error ($!)-->\n"  if $V>3 && $W3trace;
					return $Ubuf ? $Ubuf : undef;
				}
			}
			print "<!--$P: No line in Ubuf-->\n"  if $V>1 && $W3trace;
		} else {		# We're in the data now
			print V "$P/$F: Data ...\n" if $V>4;
			$UbufSize = length($Ubuf) unless $UbufSize;
			print V "$P/$F: ChnkSize=$ChnkSize ChnkLeft=$ChnkLeft UbufSize=$UbufSize.\n" if $V>3;
			if ($inChunked) {
				print V "$P/$F: Data is chunked.\n" if $V>4;
				if ($ChnkLeft <= $UbufSize) {
					print V "$P/$F: Ubuf has enough data.\n" if $V>4;
					$buf = substr($Ubuf,0,$ChnkLeft,'');
					$ChnkLeft = 0;
					$UbufSize -= $ChnkLeft;
				} else {
					print V "$P/$F: Ubuf has less than a chunk.\n" if $V>4;
					$buf = $Ubuf;
					$Ubuf = '';
					$ChnkLeft = $ChnkSize - $UbufSize;
					$UbufSize = 0;
				}
				&ChkSiz();		# Get size of next chunk
			} else {
				print V "$P/$F: Data not chunked.\n" if $V>4;
				unless ($Ubuf) {
					$n = sysread(U,$Ubuf,$bufsiz);	# Read in the next chunk of data
					print V "$P/$F: Read returned $n bytes.\n" if $V>1;
					if ($n > 0) {print STREAM $Ubuf if $streamflg}
					if (!defined($n)) {
						print V "$P/$F: Read failed; n=$n ($!)\n" if $V>1;
						$staterr =  "$!";		# Remember the error message
						$buf = $Ubuf;			# Whatever is left
						$Ubuf = undef;
					} elsif ($n == 0) {	# No data yet 
						sleep 1;		# Avoid busy wait
				#	} else {
				#		print V "$P/$F: Read failed; n=$n ($!)\n" if $V>1;
				#		$staterr =  "$!";		# Remember the error message
				#		$buf = $Ubuf;			# Whatever is left
				#		$Ubuf = ($n < 0) ? undef : '';
					}
				}
				$buf = $Ubuf;
				$Ubuf = '';
			}
			return $buf;
		}
		print "<!--$P: Loop end -->\n"  if $V>1 && $W3trace;
	}
	print V "$P/$F: Can't get here.\n" if $V>0;
}

sub READalarm {my $F='READalarm';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	my $t = time - $HTTPopentime;
	print "<!--$P/$F: ALARM after $t sec -->\n" if $W3trace || $V>0;
	exit -1;
}

sub URL {my $F = 'URL'; local($u) = shift;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Massage a URL into its general form.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($prot,$host,$path,$U);
	if (($prot,$host,$path) = ($u =~ m"^(http)://([^/]+)(/.*)$")) {
		return $u;
	} elsif ($u =~ m"^/+(.*)$") {
		$U = "http://$Host/$1";
		print "<!--$P/$F: URL = \"$U\"\n" if $W3trace || $V>0;
		return $U;
	}
	print "<!--$P/$F: Can't parse \"$u\"\n" if $W3trace || $V>0;
	return $u;
}

sub URLalarm {my $F = 'URLalarm';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	my $t = time - $HTTPopentime;
	print "<!--$P/$F: ALARM after $t sec -->\n" if $W3trace;
	exit -1;
}

sub URLalarmoff {my $F = 'URLalarmoff';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	alarm 0;
	$SIG{ALRM} = '';	# Was $savsig;
	print V "$P/$F: Set alarm 0.\n" if $V>2;
}

sub openURL {my $F='openURL';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#
# NAME
#   openURL - Open HTTP connection and request a URL
#
# SYNOPSIS
#   &openURL(*HANDLE,$URL,$OP)
#
# DESCRIPTION
#   This routine accepts a URL and attempts  to  open  it  for  reading.   If
#   successful,  the  return  value  is  1,  and  FD will be the open file (a
#   socket, actually).  The caller can read the data from it.  If you're  not
#   going  to  exit  after  the  EOF, t's a good idea to close it when you're
#   done, to prevent the connection from hanging around.
#
#   The third parameter, the "operation", defaults to GET.  You may also pass
#   'POST' as the third arg, and we will send it, and not send the extra \n.
#
#   We send a "$OP $URL HTTP/$HTTPversion\n\n" request, and the  server  will
#   send back first the HEAD information for the URL, then \r\n\r\n, then the
#   data.  Note that \r\n\r\n is specified in the HTTP specs, and so far  not
#   even Microsoft has seen fit to violate this.
#
#   If the attempt to parse the URL fails, we will try to open it as a  local
#   file,  and  if  this succeeds, we will return success.  So any local file
#   whose name doesn't look like a URL can be used as a "remote" file.
#
# ENVIRONMENT
#   We use several global variables:
#
#   $W3proxy  is host name (or IP address) and port for HTTP proxy.
#   $W3nopxy  is perl pattern for local (non-proxied) hosts.
#   $URLhdr   is set to 1 indicating that we're in the header 
#   $URLerr   is set to an error message if we fail.
#
#   We also set $W3proxy and $W3nopxy from the environment variables  W3PROXY
#   and  W3NOPROXY,  if  the latter are defined and the former aren't, so the
#   caller should probably not worry about this in general.
#
# BUGS
#   At present, we only do the HTTP:// protocol.  Maybe eventually ...
#
# AUTHOR
#   John Chambers <jc@trillian.mit.edu>
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local(*F,$url,$OP) = @_;
	local($t);
	local($savsig);
	$HTTPopens    = 0     unless defined $HTTPopens;
	$HTTPopentime = 0     unless defined $HTTPopentime;
	$HTTPversion  = '1.0' unless defined $HTTPversion;
	$OP = 'GET' if !$OP;
	$HTTPtimeout = 30 unless defined $HTTPtimeout;
	print V "$P/$F: \"$url\"\n" if $V>4;
	print "<!--$F: $url -->\n" if $W3trace && $V>5;
	$W3proxy = $ENV{W3PROXY}   if !$W3proxy;
	$W3nopxy = $ENV{W3NOPROXY} if !$W3nopxy;
	if ($HTTPtimeout > 0) {
		alarm $HTTPtimeout;
		$savsig = $SIG{ALRM};
		$SIG{ALRM} = 'URLalarm';
		print V "$P/$F: Set alarm after $HTTPtimeout sec.\n" if $V>2;
	}
	++$HTTPopens;
	$HTTPopentime = time;
	if (($Prot,$Host,$File) = ($url =~ m'^(\w+)://+([-_.:\w]+)(/.*)')) {
		;
	} elsif (($Prot,$Host) = ($url =~ m'^(\w+)://+([-_.:\w]+)$')) {
		$File = '/';
	} else {
		print V "$P/$F: Can't parse \"$url\"\n" if $V>0;
		return 0;
	}
	if ($url = "$Prot://$Host$File") {
		if ($W3proxy) {
			if ($W3nopxy && ($Host =~ $W3nopxy)) {
				print V "$P/$F: host \"$Host\" matches \"$W3nopxy\"\n" if $V>2;
			} else {
				print V "$P/$F: pxy=\"$W3proxy\" url=\"$url\"\n" if $V>2;
				print "<!--$F: Connecting to proxy \"$W3proxy\" -->\n" if $W3trace;
				if (&conHTTP(*F,$W3proxy)) {
					&sendHTTP("GET $url HTTP/$HTTPversion\r\n");
					&sendHTTP("Accept: */*\r\n");
					if ($HTTPversion eq '1.1') {
						&sendHTTP("Host: $Host\r\n");
						&sendHTTP("Connection: Close\r\n");
					}
					print V "$P/$F: Sent \"$OP $url HTTP/$HTTPversion\\r\\n\"\n" if $V>5;
					if ($W3agentid) {
						print F "User-agent: $W3agentid\n";
						&sendHTTP("User-agent: $W3agentid\r\n");
					}
					if ($OP eq 'GET') {
						&sendHTTP("\r\n");
					}
					$URLhdr = 1;    # Note we're in the header.
					&URLalarmoff() if ($HTTPtimeout > 0);
					return 1;
				}
				$t = time - $HTTPopentime;
				$URLerr = "$F: Can't connect to proxy \"$W3proxy\"";
				print "<!--$F: $URLerr in $t sec ($!) -->\n" if $W3trace;
				print V "$P/$URLerr\n" if $V>0;
				&URLalarmoff() if ($HTTPtimeout > 0);
				return 0;
			}
		}
		print V "$P/$F: \"$url\" Prot=\"$Prot\" Host=\"$Host\" File=\"$File\"\n" if $V>2;
		($PROT = $Prot) =~ tr/a-z/A-Z/;
		if ($PROT eq 'HTTP') {
			print V "$P/$F: \"$url\" HTTP protocol\n" if $V>2;
			print "<!--$F: Connecting to server \"$Host\" -->\n" if $W3trace;
			if (&conHTTP(*F,$Host)) {
				print V "$P/$F: Connected to \"$Host\"\n" if $V>2;
				&sendHTTP("$OP $File HTTP/$HTTPversion\r\n");
				&sendHTTP("Accept: */*\r\n");
				if ($HTTPversion eq '1.1') {
					&sendHTTP("Host: $Host\r\n");
					&sendHTTP("Connection: Close\r\n");
				}
				if ($W3agentid) {
					print F "User-agent: $W3agentid\n";
					&sendHTTP("User-agent: $W3agentid\r\n");
				}
				if ($OP eq 'GET') {
					&sendHTTP("\r\n");
				}
				$URLhdr = 1;    # Note we're in the header.
				&URLalarmoff() if ($HTTPtimeout > 0);
				return 1;
			}
			$t = ($now = time) - $HTTPopentime;
			$URLerr = "$P/$F: Can't connect to host \"$Host\" in $t sec ($!)";
			$localtime = localtime($now);
			print "<!--$URLerr -->\n" if $W3trace;
			print V "[$localtime] $URLerr\n" if $V>0;
			&URLalarmoff() if ($HTTPtimeout > 0);
			return 0;
		}
		$URLerr = "Can't do protocol \"$Prot\"";
		print "<!--$P/$F: Can't open \"$url\" $URLerr -->\n" if $W3trace;
		print V "$P/$F: Can't open \"$url\" ($URLerr)\n" if $V>0;
		$exitstat = 254;
		&URLalarmoff() if ($HTTPtimeout > 0);
		return 0;
	}
	if (-d $url) {
		print V "$P/$F: Directory \"$url\" ...\n" if $V>4;
		&URLalarmoff() if ($HTTPtimeout > 0);
		return &HTMLdir(*F,$url);
	}
	if (open(F,$url)) {
		print V "$P/$F: Local file \"$url\" opened.\n" if $V>4;
		$URLhdr = 0;    # No headers for local files (yet).
		&URLalarmoff() if ($HTTPtimeout > 0);
		return 1;
	}
	$URLerr = "$!";
	print "<!--$F: Can't read \"$url\" ($URLerr) -->\n" if $V>0;
	print V "$P/$F: Can't read \"$url\" ($URLerr)\n" if $V>0;
	$exitstat = int($!);
	&URLalarmoff() if ($HTTPtimeout > 0);
	return 0;
}

sub esc {my($s) = @_;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Encode special chars as escape sequences.  We should probably include a few #
# more chars, but this is only used for debugging, and this is what we need.  #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($v) = $s;
	$v =~ s"[\n\r\t]"$escmap{$&}"eg;
	print V "esc: \"$s\" -> \"$v\"\n" if $V>5;
	return $v;
}

sub Vopt {my $F = 'Vopt';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Set the verbosity from various environment variables. The value may #
# be  a  verbose  level (1 digit), plus an optional output file name. #
# The file V is opened to the file, if any, or STDERR by default. The #
# default  value  for the verbosity level is 1, which generally means #
# to produce only serious error messages.                             #
#                                                                     #
# Here's how this routine is typically called:                        #
#    ($P = $0) =~ s'.*/'' unless defined($P);                         #
#    &Vopt($ENV{"V_$P"} || $ENV{"D_$P"} || $ENV{"T_$P"} || '1');      #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	$Vopt = shift;
	$V = 1 unless defined $V;
	print "<br>Vopt: Vopt=\"$Vopt\"<br>\n" if $V>1;
	if ($Vopt =~ /^(\d+)(.+)$/) {
		$V = int($1);
		$Vfil = $2;
		if (!open(V,">>$Vfil")) {
			print V "$P	Can't write \"$Vfil\" ($!)\n" if $V>0;
			open(V,">>&STDERR");
		}
	} else {
		$V = $Vopt;
		open(V,">>&STDERR");
	}
	select V; $| = 1; select STDOUT; $| = 1;
	print V "$P started with V=$V $ymd $hms\n" if $V>1;
}
1;
