Index of $u

#!/usr/bin/perl -T # $version = '20160105'; # Which version we claim to be ($P = $0) =~ s"^.*/""; # This program's actual name # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # #NAME # webcat - catenate web pages and files to standard output # #SYNOPSIS # webcat [ URL | path ]... # #DESCRIPTION # This is a web version of the Unix cat(1) command, coded in perl 5. # # Given a list of URLs and files, this program reads them one at a time, and # writes their contents, catenated into one long string, to standard output. # # Directories are output in a simplified HTML format, somewhat similar to # what the apache web server produces. # # If you want to learn how to do this stuff, you can study this program. It # is useful as a starting point for writing other simple web clients. It's # not nearly as difficult as people would like you to believe. But the # socket stuff uses several magical incantations that "you just have to # know"; see the required perl module files for this socket magic. # #OPTIONS # The default setup is to deliver only the data (contents) of a URL and # discard the header and tracing information. Here is the list of our # current options. The options may be combined into a single string, as # usual, with the qualification that options which have an arg (O and P) # must be the last in the string. Options may be in any order, and apply to # all subsequent URLs unless canceled by another option. # # +D # Output the data [default]. # -D # Don't output the data. -D +H returns just the HTTP headers. # ### Disabled: # -E # Don't extract text; deliver the data as-is. (default) # +E # Extract the text. This is a simple conversion from HTML to plain text. # Assorted line-end tags are converted to \n or \n\n as appropriate. # Hyperlinks are converted to , which is recognized by a lot of # non-HTML software, including ABCbot. The formatting is rudimentary. If # you want good-looking, formatted text, it's better to use something like # lynx that has a full HTML rendering capability. # # +H # Include the HTTP header info in the output. # -H # Don't include the HTTP header info in the output [default]. # # # -I # Don't send agent identification (default). # +I"agentid" # Send the quoted string as the agent identification. Some web sites won't # talk to you unless you pretend to be an acceptable browser. If there is # no string, we send the following string, which seems to convince most # servers that we're either netscape or IE (which pretends to be # netscape), plus a bogus sytem name to confuse those servers that won't # serve to linux or BSD users. Feel free to use your own AgentID string. # But you should know that there is server software out there that will # refuse service to any client that doesn't call itself "Mozilla". # # +M # Macintosh kludge: convert \r to \n (if not followed by \n). This may be # obsolescent, now that we have code to automatically split the input # apart on \r, \n or \r\n. # # -O # Write the data to . Default is stdout. # # +P # Proxy gateway. If you are hidden behind a proxy, put the proxy hostname # (and optionally :port) in a -P option, and we will try to indirect # through the proxy server. # -P # Proxy exception. The string should be a perl pattern. If a URL # matches this pattern, the proxy gateway isn't used. # # -R # Ignore redirects (default). # +R # Follow HTTP "Location:" redirects. If you want to know that the redirect # happened, you must use the +H option. Without +H, you'll get no clue # that the URL was redirected; webcat will follow the redirection and give # the data with no clue where it came from. # # +S # Write the incoming data stream (all of it) to This write every # byte received, including the HTTP headers and HTTP "chunk" sizes to the # file. This is intended as a debugging aid, so that you can compare what # the server actually sent with what this program writes as the output. # Some problems with HTTP chunking have been debugged with this. # -S # Default: Don't write the data stream; just write the (reconstructed) # data file to stdout. # # +T # Enable WWW tracing. This sets a global flag that causes various routines # to produce lines of the form: # # These look like both HTTP header lines and HTML comments. Some WWW tools # (such as the "H" html viewer) can show these to tell you which stage of # a GET operation we have reached. # -T # Disable WWW tracing [default]. # # -T # Timeout of seconds. The default is no timeout, meaning that the # underlying system's connect() will determine the timeout, if any. # # +T # Retry times on dropped connections. This is to handle a failure mode # seen on some servers: The server accepts the TCP connection, and drops # the connection after receiving the GET request, without even sending an # error code. We retry such connections $tries time, with a default of 10, # with 1-second delays between tries. This option selects a different # limit to the number of retries. (See home1.swipnet.se for examples.) # # +V # -V # The HTTP version to use. The default is -V1.0. For -V1.1, an extra Host: # header is sent, since some servers require it. # # -X # Abandon the URL on timeout (default). # +X # Exit on timeout, ignoring the rest of the URLs and file names. On some # systems, there is a bug in the connect() system call that can result in # hanging indefinitely. This option is a last-resort "solution" that works # if you are only trying to get one URL. # #ENVIRONMENT # We use the following from the environment: # # W3PROXY # The name (or address) and an optional :port for a proxy gateway. URLs # that don't match the W3NOPROXY will be fetched indirectly via the # proxy's web server. If not defined, we will attempt direct TCP # connections for all URLs. # # W3NOPROXY # A pattern which is applied to URLs, and if they match, no proxy is used. # That is, any URL that matches W3NOPROXY is considered local, and we will # access it directly. If not defined, we will use W3PROXY for all URLs (if # it exists). # #LIMITATIONS # So far only the http:// protocol is implemented; ftp://, file:// and # others may appear if I need them. If someone feels like adding FTP code, # you might send me a copy. # # HTTP "redirection" (the "Location:" HTTP header) is implemented now via # the +R option. By default, it is disabled and must be handled by the # caller, if desired. This mainly means that if you omit the final '/' on a # directory name, we will fail. This is not considered a bug, so it'll # probably never be fixed. # #DEBUGGING # You can use "perl -dw", of course. Or you can do the following: # setenv V_webcat 5/tmp/webcat.out # csh or tcsh users. # export V_webcat=5/tmp/webcat.out # ksh or bash users. # This will turn on the "print V" lines for $V in the range 0-5, and write # the verbose output to /tmp/webcat.out. Note that the default verbose level # is 1; setting it to 0 should turn off all verbose messages. # #BUGS # Despite many attempts to detect failure, we still don't optimally handle # all the myriad things that can go wrong. In particular, on some systems, # the connect() system call can hang indefinitely and can't be killed by an # ALARM. There does not appear to be any known solution to this problem. (No # amount of clever code will help if your process doesn't get any cpu time.) # #SEE ALSO # wget(1), which is more general and recursive # #FIXES # 2009-6-3 # Added "Accept */*" HTTP header line, because www.thursdaycontra.com didn't # accept any requests that lacked this line. Also added a firefox agentid # string to the list of builtin agentids, but it's not the default (yet). # #AUTHOR # John Chambers # Feel free to use this program for any purpose. If you add any interesting # new features, please send me a copy. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # use Socket; &LocalSetup(1); # Figure out localization stuff $| = 1; # Don't buffer output $exitstat = 0; # Set this to get a failure exit status. &Vopt($ENV{"V_$P"} || $ENV{"D_$P"} || $ENV{"T_$P"} || '1'); print V "$P: Started $ymd $hms\n" if $V>1; $Prot = ''; $PROT = ''; $Host = ''; $File = ''; #bufsiz = 10; # Size of input buffer, small for testing $bufsiz = 10000; # Size of input buffer, large for routine use $W302 = 1; # Follow 302 redirects (value is hop limit) $W3hdrs = 0; # Whether to output header lines $W3data = 1; # Whether to output data $W3trace = 0; # Whether to output tracing info $kludge1_1_404 = 0; # HTTP/1.1 GET required # Some more initializations to prevent warnings: $savsig = ''; # For remembering signal settings $HTTPopentime = 0; $W3proxy = ''; $streamflg = 0; # Whether to write data stream to file $streamfile = ''; # File name for output data-stream file $dfltstream = 'datastream.txt'; # Default name of streamfile # That's all the config stuff you should have to worry about. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Global data for handling "chunked" data: $isChunked = 0; # Is the data "chunked"? $inChunked = 0; # Are we in chunked data now? $ChnkLeft = 0; # Bytes left in current chunk $ChnkNeed = 0; # Bytes needed to fill current chunk $ChnkSize = 0; # Size of current chunk $UbufSize = 0; # Bytes left in current $Ubuf # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Agent ID strings: # Pick a default ID string: # $dflagentid = ""; $dflagentid = "$P/$version [en]"; # $dflagentid = "Mozilla/4.5 [en] (compatible; I; $P)"; # $dflagentid = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"; # $dflagentid = "Mozilla/5.0 (X11; U; Linux i686; fr; rv:1.8.1.19) Gecko/20081213 SUSE/2.0.0.19-0.1 Firefox/2.0.0.19"; $W3agentid = $dflagentid; # What's actually send as our ID string # Agent ID strings that we might want to deliver: %agentids = ( "null" => "", "$P/$version" => "$P/$version [en]", "$P" => "$P/$version [en]", "Mozilla/4.5" => "Mozilla/4.5 [en] (compatible; I; $P)", "Mozilla/4.0" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)", "Mozilla" => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)", "Mozilla/5.0" => "Mozilla/5.0 (X11 U; Linux i686; fr; rv:1.8.1.19) Gecko/20081213 SUSE/2.0.0.19-0.1 Firefox/2.0.0.19", ); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # %escmap = ( # Escape chars "\n" => '\\n', "\r" => '\\r', "\t" => '\\t', ); #if ($V>1) { # Test esc() # print "$P: Testing esc ...\n"; # $foo = "a\tb\rc\n"; # $bar = &esc($foo); # print "$P: esc('$foo') gave '$bar'\n"; # print "$P: Tested esc.\n"; #} # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Here's our table for converting HTML's symbolic entities to characters. The # value is the decimal code for the character in 8859-1. How to deal with # other character sets is still not very clearly defined. %htmlsym = ( 'lt' => 60, # Less than 'gt', => 62, # Greater than 'quot' => 34, # (Double) quote 'nbsp' => 160, # no-break space = non-breaking space, U+00A0 ISOnum 'iexcl' => 161, # cent sign, U+00A2 ISOnum 'pound' => 163, # currency sign, U+00A4 ISOnum 'yen' => 165, # broken bar = broken vertical bar, 'sect' => 167, # diaeresis = spacing diaeresis, 'copy' => 169, # feminine ordinal indicator, U+00AA ISOnum 'laquo' => 171, # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum 'not' => 172, # soft hyphen = discretionary hyphen, 'reg' => 174, # registered sign = registered trade mark sign, U+00AE ISOnum 'macr' => 175, # macron = spacing macron = overline = APL overbar, U+00AF ISOdia 'deg' => 176, # plus-minus sign = plus-or-minus sign, 'sup2' => 178, # superscript two = superscript digit two = squared, U+00B2 ISOnum 'sup3' => 179, # superscript three = superscript digit three = cubed, U+00B3 ISOnum 'acute' => 180, # acute accent = spacing acute, U+00B4 ISOdia 'micro' => 181, # pilcrow sign = paragraph sign, 'middot' => 183, # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum 'cedil' => 184, # superscript one = superscript digit one, 'ordm' => 186, # masculine ordinal indicator, U+00BA ISOnum 'raquo' => 187, # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum 'frac14' => 188, # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum 'frac12' => 189, # vulgar fraction one half = fraction one half, U+00BD ISOnum 'frac34' => 190, # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum 'iquest' => 191, # inverted question mark = turned question mark, U+00BF ISOnum 'Agrave' => 192, # latin capital letter A with grave = latin capital letter A grave, 'Aacute' => 193, # latin capital letter A with acute, U+00C1 ISOlat1 'Acirc' => 194, # latin capital letter A with circumflex, U+00C2 ISOlat1 'Atilde' => 195, # latin capital letter A with tilde, U+00C3 ISOlat1 'Auml' => 196, # latin capital letter A with diaeresis, U+00C4 ISOlat1 'Aring' => 197, # latin capital letter A with ring above = latin capital letter A ring, 'AElig' => 198, # latin capital letter AE = latin capital ligature AE, 'Ccedil' => 199, # latin capital letter C with cedilla, U+00C7 ISOlat1 'Egrave' => 200, # latin capital letter E with grave, U+00C8 ISOlat1 'Eacute' => 201, # latin capital letter E with acute, U+00C9 ISOlat1 'Ecirc' => 202, # latin capital letter E with circumflex, U+00CA ISOlat1 'Euml' => 203, # latin capital letter E with diaeresis, U+00CB ISOlat1 'Igrave' => 204, # latin capital letter I with grave, U+00CC ISOlat1 'Iacute' => 205, # latin capital letter I with acute, U+00CD ISOlat1 'Icirc' => 206, # latin capital letter I with circumflex, U+00CE ISOlat1 'Iuml' => 207, # latin capital letter I with diaeresis, U+00CF ISOlat1 'ETH' => 208, # latin capital letter N with tilde, 'Ograve' => 210, # latin capital letter O with grave, U+00D2 ISOlat1 'Oacute' => 211, # latin capital letter O with acute, U+00D3 ISOlat1 'Ocirc' => 212, # latin capital letter O with circumflex, U+00D4 ISOlat1 'Otilde' => 213, # latin capital letter O with tilde, U+00D5 ISOlat1 'Ouml' => 214, # latin capital letter O with diaeresis, U+00D6 ISOlat1 'times' => 215, # latin capital letter O with stroke 'Ugrave' => 217, # latin capital letter U with grave, U+00D9 ISOlat1 'Uacute' => 218, # latin capital letter U with acute, U+00DA ISOlat1 'Ucirc' => 219, # latin capital letter U with circumflex, U+00DB ISOlat1 'Uuml' => 220, # latin capital letter U with diaeresis, U+00DC ISOlat1 'Yacute' => 221, # latin capital letter Y with acute, U+00DD ISOlat1 'THORN' => 222, # latin capital letter THORN, U+00DE ISOlat1 'szlig' => 223, # latin small letter sharp s = ess-zed, U+00DF ISOlat1 'agrave' => 224, # latin small letter a with grave = latin small letter a grave, 'aacute' => 225, # latin small letter a with acute, U+00E1 ISOlat1 'acirc' => 226, # latin small letter a with circumflex, U+00E2 ISOlat1 'atilde' => 227, # latin small letter a with tilde, U+00E3 ISOlat1 'auml' => 228, # latin small letter a with diaeresis, U+00E4 ISOlat1 'aring' => 229, # latin small letter a with ring above = latin small letter a ring, 'aelig' => 230, # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 'ccedil' => 231, # latin small letter c with cedilla, U+00E7 ISOlat1 'egrave' => 232, # latin small letter e with grave, U+00E8 ISOlat1 'eacute' => 233, # latin small letter e with acute, U+00E9 ISOlat1 'ecirc' => 234, # latin small letter e with circumflex, U+00EA ISOlat1 'euml' => 235, # latin small letter e with diaeresis, U+00EB ISOlat1 'igrave' => 236, # latin small letter i with grave, U+00EC ISOlat1 'iacute' => 237, # latin small letter i with acute, U+00ED ISOlat1 'icirc' => 238, # latin small letter i with circumflex, U+00EE ISOlat1 'iuml' => 239, # latin small letter i with diaeresis, U+00EF ISOlat1 'eth' => 240, # latin small letter n with tilde, 'ograve' => 242, # latin small letter o with grave, U+00F2 ISOlat1 'oacute' => 243, # latin small letter o with acute, U+00F3 ISOlat1 'ocirc' => 244, # latin small letter o with circumflex, U+00F4 ISOlat1 'otilde' => 245, # latin small letter o with tilde, U+00F5 ISOlat1 'ouml' => 246, # latin small letter o with diaeresis, U+00F6 ISOlat1 'divide' => 247, # latin small letter o with stroke, 'ugrave' => 249, # latin small letter u with grave, U+00F9 ISOlat1 'uacute' => 250, # latin small letter u with acute, U+00FA ISOlat1 'ucirc' => 251, # latin small letter u with circumflex, U+00FB ISOlat1 'uuml' => 252, # latin small letter u with diaeresis, U+00FC ISOlat1 'yacute' => 253, # latin small letter y with acute, U+00FD ISOlat1 'thorn' => 254, # latin small letter thorn, U+00FE ISOlat1 'yuml' => 255, # latin small letter y with diaeresis, U+00FF ISOlat1 ); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # That's all the global setup. Now we run thru the args, attempt to read each # # one, and write the contents to stdout: # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # arg: for $u (@ARGV) { $moved = 0; $URLerr = "Don't know why"; # Set by openURL when failures. if (($pfx,$opt) = ($u =~ /^([-+])(.*)/)) { while ($opt) { # Each time MUST remove at least one char. if ($opt =~ s/^D//i) { # +D or -D (whether to produce data) $W3data = ($pfx eq '+') ? 1 : 0; print V ($W3data ? "Do" : "Don't"), " produce data.\n" if $V>1; } elsif ($opt =~ s/^E//i) { # E option disabled in this version # } elsif ($opt =~ s/^H//i) { # +H or -H (whether to produce headers) $W3hdrs = ($pfx eq '+') ? 1 : 0; print V ($W3hdrs ? "Do" : "Don't"), " produce headers.\n" if $V>1; } elsif ($opt =~ s/^I//i) { # +I or -I if ($pfx eq '-') { $W3agentid = ''; print V "$P: No agent identification." if $V>1; } else { $W3agentid = $opt || $dflagentid; # Rest of string is id. print V "$P: Agent \"$W3agentid\"\n" if $V>1; if (defined($id = $agentid{$W3agentid})) { $W3agentid = $id; print V "$P: Agent \"$W3agentid\"\n" if $V>1; } $opt = ''; } } elsif ($opt =~ s/^O//i) { # +O or -O (output file) $outfile = $opt; # Rest of string is file name. $opt = ''; print V "$P: Output to \"$outfile\"\n" if $V>1; } elsif ($opt =~ s/^P//i) { # +P or -P if ($pfx eq '-') { $W3nopxy = $opt; # Rest of string is pattern. print V "$P: Proxy exceptions are /$W3nopxy/\n" if $V>1; } else { $W3proxy = $opt; # Rest of string is proxy host. print V "$P: Proxy server is $W3nopxy.\n" if $V>1; } $opt = ''; } elsif ($opt =~ s/^R//i) { # +R or -R (whether to follow 302 redirects) $W302 = ($pfx eq '+') ? $W302+1 : 0; print V ($W302 ? "Do" : "Don't"), " follow 302 redirects.\n" if $V>1; } elsif ($opt =~ s/^(S)(.*)//i) { # +S or -S (whether to write the data stream) $streamflg = ($pfx eq '+') ? $W302+1 : 0; $streamfile = $2 || $dfltstream; if ($streamflg) { print V "$P: Write data stream to '$streamfile'\n" if $V>1; if (open(STREAM,">$streamfile")) { print V "$P: Writing data stream to '$streamfile'\n" if $V>1; } else { print V "$P: ### Can't write '$streamfile' [$!]\n" if $V>1; $streamflg = 0; print V "$P: ### Ignoring \"$pfx$1$2\" option.\n" if $V>1; } } else { print V "$P: Don't write data stream.\n" if $V>1; } } elsif ($opt =~ s/^T//i) { # +T or -T (WWW tracing) or -T (timeout) if ($opt =~ s/^(\d+)//) { # T with number: if ($pfx eq '+') { $tries = $1; # +T\d+ is limit to tries. print V "tries = $tries.\n" if $V>1; } else { $HTTPtimeout = $1; # -T\d+ is timeout. print V "HTTPtimeout = $HTTPtimeout sec.\n" if $V>1; } } else { # T without number: if ($pfx eq '+') { $W3trace = 1; # +T enables tracing. print V "Do produce WWW tracing.\n" if $V>1; } else { $W3trace = 0; # -T disables tracing. print V "Don't produce WWW tracing.\n" if $V>1; } } } elsif ($opt =~ s/^V//i) { # +V has two forms: if ($opt =~ s/^(\d+\.\d+)//) { # +V $HTTPvopt = $HTTPversion = $1; $opt = ''; print V "$P: HTTP version '$HTTPvopt'\n" if $V>1; } elsif ($opt =~ s/^(\d+)//) { # +V $V = int($1); print V "$P: Verbose level V=$V\n" if $V>1; } else { print V "$P: V$opt not understood\n" if $V>1; } } elsif ($opt =~ s/^X//i) { # +X $HTTPtimexit = ($pfx eq '+') ? 1 : 0; print V ($HTTPtimexit ? "Do" : "Don't"), " exit on timeout.\n" if $V>1; } else { print V "$P: unknown option \"$opt\" ignored.\n" if $V>0; $opt =~ s/.//; # Discard this option char. } } next arg; } $try = $hdrlines = 0; # Count the opens and header lines. $tries = 3 unless $tries; try: while ($try++ < $tries && $hdrlines == 0) { print V "\n" if $V>0 && $try>1 && $W3trace; sleep 1 if $try>1; # Don't hit the server too hard unless (&openURL(*U,$u)) { print V "$P: Can't open \"$u\" ($URLerr)\n" if $V>1; $exitstat = 1; next arg; } print V "$P: Opened \"$u\"\n" if $V>1; if ($HTTPtimeout > 0) { alarm $HTTPtimeout; $savsig = $SIG{ALRM}; $SIG{ALRM} = 'READalarm'; print V "$P: Set alarm after $HTTPtimeout sec.\n" if $V>2; } $isChunked = $inChunked = 0; # HTTP headers aren't chunked # $inTEXT = $URLhdr ? 1 : 0; # $inHTML = # Is it HTML? # $inPRE = # Within a

...

section? $statmax = 0; # Max status code seen $Ubuf = ''; # Input buffer $staterr = ''; # Last error message # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Here's where we read the data from one URL and write it to # # standard output. If you want to do something else with the # # data, you should rewrite this loop: # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # if ($outfile && !$outopen) { # Do we need to open the output? print V "$P: Open \"$outfile\"\n" if $V>1; if (open(O,">$outfile")) { # Try to open it for writing. print V "$P: Writing \"$outfile\" [$!]\n" if $V>1; } else { print V "$P: Can't write \"$outfile\" [$!]\n" if $V>0; $outfile = ''; } $outopen = 1; } print V "$P: Headers (URLhdr=$URLhdr) ...\n" if $V>1 && $W3trace; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # In all cases, we must first read through the HTTP headers. We look # # for a few of them, and set global variables to match what we see. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # hdr: while ($URLhdr && ($b = &Line())) { print "\n" if $V>1 && $W3trace; $statcode = 0; # $inTEXT = 1; $b =~ s/\s+$//; # Discard trailing white stuff print "URLhdr=$URLhdr W3hdrs=$W3hdrs W3trace=$W3trace outfile=$outfile.\n" if $V>1; if ($W3hdrs) { if ($outfile) {print O "$b\n"} else {print "$b\n"} sleep 1 if $V>1; } if (length($b) > 0) { # Anything left in the buffer? ++$hdrlines; $statcode = 0; if ($b =~ /^HTTP\/([\d.]+)\s+(\d+)\s+(.*)/) { $httpvrs = $1; $statcode = $2; $statmsg = $3; print "\n" if $V>1 && $W3trace; if ($statcode > $statmax) {$statmax = $statcode; $staterr = $statmsg} if ($W302 > 0 && $statcode == 302) { print "\n" if $V>1 && $W3trace; $moved = 1; --$W302; # Decrease the redirect hop count } if ($httpvrs eq '1.1' && $statcode >= 400) { if ($HTTPversion ne '1.1') { print "\n" if $W3trace && $V>2; $kludge1_1_404 = 1; $HTTPversion = '1.1'; redo arg; } else { print "\n" if $W3trace && $V>1; } } } elsif ($b =~ m"^Transfer-Encoding:\s*(.*)$"i) { if ($1 eq 'chunked') { print "\n" if $V>1 && $W3trace; $isChunked = 1; # We have to handle chunked encoding $inChunked = 0; # HTTP headers aren't chunked } # } elsif ($b =~ m"^Content-type:\s*(.*)/(.*)$"i) { # $doctype = lc($1); # $subtype = lc($2); # print "\n" if $V>1 && $W3trace; # if ($doctype eq 'text') { # $inTEXT = 1; # $inHTML = ($subtype eq 'html') ? 1 : 0; # } else { # $inTEXT = $inHTML = 0; # } # print "\n" if $V>1 && $W3trace; } elsif ($moved && ($b =~ /^Location:\s*(.*)$/)) { print "\n" if $V>1 && $W3trace; $u = &URL($1); redo arg; } if ($statcode >= 400) { print "\n" if $W3trace && $V>0; print "\n" if $W3trace && $V>0; } } else { $URLhdr = 0; # Blank line ends headers } print "\n" if $V>1 && $W3trace; } print "\n" if $V>1 && $W3trace; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # We should now be positioned just after the double CRLF that ends # # HTTP header lines. The rest should be the contents of the file. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # if ($statmax >= 400) { # Did we get a fatal error code? print V "\n" if $V>2; exit 1; } $/ = undef; if ($HTTPtimeout > 0) {alarm $HTTPtimeout} print V "$P/$P: Headers done (URLhdr=$URLhdr)\n" if $V>1; if (!$W3data) { # Data not wanted? print V "$P: Data not wanted, quitting.\n" if $V>1; close(U); # Close this connection. next arg; # Go on to next URL. } # if ($W3hdrs) { # print V "$P: Writing NL\n" if $V>1; # if ($outfile) {print O "\n"} else {print "\n"} # } # Now we copy the data, doing any needed processing to each line. data: while ($b = &Line()) { print V "$P: Got: \"$b\"\n" if $V>5; if (!$W3hdrs && $URLhdr) { # Suppressing header lines. print V "$P: HDR check in \"$b\"\n" if $V>1; if ($b =~ s/^.*\r\n\r\n//s) { $URLhdr = 0; # Found \n\n separator. } else { next; # No separator, discard it all. } } # if ($MACfl) {$b =~ s"\r\n?"\n"g} if ($outfile) {print O $b} else {print $b} if ($HTTPtimeout) {alarm $HTTPtimeout} } unless (defined $b) { print V "\n$P: Error reading \"$u\" ($tries tries; reason: \"$!\", $errmsg)\n" if $V>0; $exitstat = $?; } } exit $exitstat; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub ChkSiz {my $F = 'ChkSiz'; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Remove a chunk size from the start of $Ubuf and set the global variables # # $ChnkSize, $ChnkLeft and $ChnkNeed to handle the new chunk. Try to get # # enough data to complete the chunk. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # local($n,$buf,$x); # $ChnkSize = 0; # $ChnkLeft = 0; $Ubuf =~ s/^[\r\s]+//; # Strip off any initial white stuff $UbufSize = length($Ubuf); print V "$P/$F: Ubuf has $UbufSize bytes.\n" if $V>1; while ($UbufSize < 10) { print V "$P/$F: Ubuf needs more data.\n" if $V>1; if (($n = sysread(U,$buf,$bufsiz)) > 0) { print V "$P/$F: Got $n bytes from URL read\n" if $V>2; $Ubuf .= $buf; print STREAM $buf if $streamflg; } elsif ($n == 0) { # EOF print V "$P/$F: Got EOF from URL read\n" if $V>1; last; } else { print V "$P/$F: Got $n from URL read ($!)\n" if $V>0; last; } $UbufSize = length($Ubuf); print V "$P/$F: Ubuf has $UbufSize bytes.\n" if $V>1; } print V "$P/$F: Ubuf=\"$Ubuf\"\n" if $V>4; return 0 unless $Ubuf; if ($V>1) { $x = &esc(substr($Ubuf,0,20)); # Show initial 20 bytes in buffer print V "$P/$F: x=[$x]\n"; } if ($Ubuf =~ s/^[\r\n]*([0-9a-fA-F]+)\s*\r*\n//) { # Match hex chunk size + \s\r\n terminator print V "$P/$F: Found '$1'\n" if $V>1; $ChnkSize = hex($1); $UbufSize = length($Ubuf); $ChnkLeft = $ChnkSize; $ChnkNeed = ($UbufSize < $ChnkSize) ? $ChnkSize - $UbufSize : 0; } else { # Chunk size not found here if ($V>0) { local($xxx) = '#' x 32; print V "\n$xxx\n"; $x = &esc(substr($Ubuf,0,20)); # Show initial 20 bytes in buffer print V "$P/$F: Ubuf: $x ...\n"; print V "$P/$F: No hex number where chunk size expected!\n"; print V "$P/$F: URL =\"$u\"\n"; print V "$P/$F: Ubuf=\"$Ubuf\"\n"; print V "$xxx\n"; } } print V "$P/$F: ChnkSize=$ChnkSize ChnkLeft=$ChnkLeft ChnkNeed=$ChnkNeed UbufSize=$UbufSize.\n" if $V>2; while ($ChnkSize > $UbufSize) { print V "$P/$F: Ubuf needs $ChnkSize bytes, has only $UbufSize.\n" if $V>1; if (($n = sysread(U,$buf,$bufsiz)) > 0) { print V "$P/$F: Got $n bytes from URL read\n" if $V>2; $Ubuf .= $buf; print STREAM $buf if $streamflg; } elsif ($n == 0) { # EOF print V "$P/$F: Got EOF from URL read\n" if $V>1; last; } else { print V "$P/$F: Got $n from URL read ($!)\n" if $V>0; last; } $UbufSize = length($Ubuf); print V "$P/$F: Ubuf now has $UbufSize bytes.\n" if $V>1; } return $ChnkSize; } sub HTMLdir {my $F = 'HTMLdir'; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # This routine reads a directory and produces a simplified html # # listing. But you might want to reformat it in some other way that's # # more useful for your application. We do the work in a child process # # and return with F open to the pipe from the child. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # local(*F,$d) = @_; local(@files,$f,$parent,@st,$mt,*DIR); $HTTPversion = '1.0' if !$HTTPversion; if (open(F,'-|')) { # Parent. $URLhdr = 1; # Warn caller of HTML headers. return 1; # Parent returns the open file. } # Child reads the directory. if (opendir(DIR,$d)) { print V "HTMLdir: Producing HTML headers.\n" if $V>2; print "HTTP/$HTTPversion 200 OK\n"; print "Server: HTMLdir.pm\n"; if (@st = stat($d)) { $mt = gmtime($st[9]); print "Last-Modified: $mt GMT\n"; } print "Content-Type: text/html\n\n"; print "Index of $u\n"; print "

\n";
		print " Name\n";
		@files = readdir(DIR);
		closedir DIR;
		for $f (sort @files) {
			if (-d "$d/$f") {
				if ($f eq '.') {		# Ignore self reference.
				} elsif ($f eq '..') {	# Parent reference.
					($parent = $d) =~ s"/[^/]+/+$"/";
					print "\n";
					print " Parent directory\n";
				} else {
					print " $f/\n";
				}
			} else {
				print " $f\n";
			}
		}
		exit 0;
	}
	print V "HTMLdir: Can't open directory \"$d\" ($!)\n" if $V>0;
	exit $!;
}

sub HTTPalarm {my $F='HTTPalarm';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	my $t = time - $conHTTPtime;
	print "\n" if $W3trace;
#	exit -1;
	$errmsg = 'timeout';
}

sub conHTTP {my $F='conHTTP';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# NAME                                                                #
#   conHTTP - make HTTP connection.                                   #
#                                                                     #
# SYNOPSIS                                                            #
#   $stat = &conHTTP(*F,'fubar.com:1234');                            #
#                                                                     #
# DESCRIPTION                                                         #
#   This  accepts a URL's host:port portion, and attempts to make the #
#   connection.  If successful, we return 1 with F open  to  the  TCP #
#   socket.   If  we fail, we return 0, and F may or may not be open. #
#   (Maybe we should close it.)                                       #
#                                                                     #
# TIMEOUTS                                                            #
#   I've added a timeout kludge:  If $HTTPtimeout is nonzero, we will #
#   exit after that many seconds. This is drastic, but it seems to be #
#   the only solution to the hung-connect problem.   This  is  mostly #
#   used  in webcat, which is used as a subprocess by other programs. #
#   If you call "webcat -T15 ...", it will exit after 15  seconds  if #
#   the connection can't be made, and you can go about your business. #
#                                                                     #
# AUTHOR                                                              #
#   John Chambers            #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local(*HTTPsock,$hp) = @_;
	local($a,$b,$c,$d);
	local(@addrs,$host,$port,$savsig,$t,$this,$that,$This,$That);
	$HTTPtimeout = 30   unless defined $HTTPtimeout;
	$W3trace     =  0   unless defined $W3trace;
	$HTTPalrm    =  0   unless defined $HTTPalrm;	# Alarm needs to be handled
	$conHTTPs    =  0   unless defined $conHTTPs;	# Count of connections
	$conHTTPtime = time unless defined $conHTTPtime;
	$savsig      = ''   unless defined $savsig;
	if (($host,$port) = ($hp =~ m"^(.*):(\d+)$")) {
		print V "$P/$F: host=\"$host\" port=\"$port\"\n" if $V>5;
	} else {
		$host = $hp;
		$port = 80;
		print V "$P/$F: host=\"$host\" port=$port.\n" if $V>5;
	}
	$AF_INET = 2;
	$SOCK_STREAM = 1;
	$sockaddr = 'S n a4 x8';
	($name,$aliases,$proto) = getprotobyname('tcp');
	($name,$aliases,$port) = getservbyname($port,'tcp')
		unless $port =~ /^\d+$/;
	$thisaddr = "\0\0\0\0";
	print "\n" if $W3trace;
	($name,$aliases,$type,$len,@addrs) = gethostbyname($host);
	print V "$P/$F: type=$type len=$len.\n" if $V>5;
	if (!@addrs) {
		$errmsg = "No address for \"$host\"";
		return 0;
	}
	$thataddr = $addrs[0];
	$this = pack($sockaddr, $AF_INET, 0, $thisaddr);
	$that = pack($sockaddr, $AF_INET, $port, $thataddr);
	($a,$b,$c,$d) = unpack('C4',$thisaddr);
	$This = "$a.$b.$c.$d:0";
	($a,$b,$c,$d) = unpack('C4',$thataddr);
	$That = "$a.$b.$c.$d:$port";
	print V "\n" if $V>1;
	if (socket(HTTPsock,$AF_INET,$SOCK_STREAM,$proto)) {
		print V "\n" if $V>1;
	} else {
		print V "\n" if $V>0;
		$exitstat = $!;
		return 0;
	}
	if (bind(HTTPsock,$this)) {	# Is this actually needed?
		$t = time - $conHTTPtime;
		print V "$P/$F: Bind to \"$This\" succeeded in $t sec.\n" if $V>5;
	} else {
		$t = time - $conHTTPtime;
		print V "$P/$F: Bind to \"$This\" failed in $t sec ($!)\n" if $V>2;
		$exitstat = $!;
		return 0;
	}
	++$conHTTPs;
	$conHTTPtime = time;
	if ($HTTPtimeout > 0) {
		print "\n" if $W3trace;
		alarm $HTTPtimeout;
		$savsig = $SIG{ALRM};
		$SIG{ALRM} = 'HTTPalarm';
		print V "$P/$F: Set alarm after $HTTPtimeout sec.\n" if $V>3;
	}
	print V "\n" if $V>1;
	print "\n" if $W3trace || $V>1;
	if ($that = sockaddr_in($port,$thataddr)) {
		print V "\n" if $V>2;
	} else {
		print V "\n" if $V>1;
	}
	if (connect(HTTPsock,$that)) {
		$t = time - $conHTTPtime;
		print V "$P/$F: Connect $conHTTPs to \"$That\" succeeded in $t sec.\n" if $V>5;
		print "\n" if $W3trace;
		if ($HTTPtimeout > 0) {
			alarm 0;
			$SIG{ALRM} = $savsig;
			print V "$P/$F: Set alarm 0.\n" if $V>5;
		}
	} else {
		$t = time - $conHTTPtime;
		print "\n" if $W3trace;
		print V "$P/$F: Send \"$dsc\"\n" if $V>4;
		print F $str;
		print V "$P/$F: Sent \"$dsc\"\n" if $V>5;
	}
}

sub Hsym {my $F = 'Hsym';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here's the routine to convert &foo; to the corresponding 8859-1  character. #
# If it's not found, we just return the "&foo;" encoding.  Eventually we will #
# have to deal with the Unicodization of the unix world.                      #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($s) = @_;
	local($c) = $htmlsym{$s};
	return(chr($c)) if defined $c;
	return("&$s;");
}

sub Line {my $F='Line';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Return one chunk from the URL file U.  While in the HTTP  headers  ($URLhdr #
# true),  we  treat the data as text and return it line at a time, regardless #
# of settings. When we reach the data, we then start looking at how we are to #
# process the data.                                                           #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($buf,$c,$C,$s,$n,$Lbuf);
loop: while (1) {
		print "\n"  if $V>1 && $W3trace;
		unless ($Ubuf) {
			print "\n"  if $V>3 && $W3trace;
			if (defined($n = sysread(U,$buf,$bufsiz)) && ($n > 0)) {
				$Ubuf .= $buf;
				print STREAM $buf if $streamflg;
			}
		}
		if ($URLhdr) {
			$Lbuf = length($Ubuf);
			print "\n"  if $V>3 && $W3trace;
			print "\n"  if $V>3 && $W3trace;
			if ($Ubuf =~ s/^([^\r\n]*)([\r\n])//) {	# Read headers line by line
				print "\n"  if $V>1 && $W3trace;
				$s = $1;
				$c = $2;
				$C = ($c eq "\n") ? '\n' : ($c eq "\r") ? '\r' : $c;
				print V "$P/$F: Line $s$C\n" if $V>3;
				if ($c eq "\r")  {		# CRLF kludge (DOS, HTTP)
					unless ($Ubuf) {
						if (($n = sysread(U,$buf,$bufsiz)) > 0) {
							$Ubuf .= $buf;
							print STREAM $buf if $streamflg;
						}
					}
					$Ubuf =~ s/^\n//;	# remove \n after \r
					$c = "\n";
				}
				if ($c eq "\n")  {		# LFCR kludge (VMS)
					unless ($Ubuf) {
						if (($n = sysread(U,$buf,$bufsiz)) > 0) {
							$Ubuf .= $buf;
							print STREAM $buf if $streamflg;
						}
					}
					$Ubuf =~ s/^\r//;	# remove \r after \n
				}
				unless ($s) {	# Null line ends HTTP headers
					print V "$P/$F: End of headers, isChunked=$isChunked.\n" if $V>1;
					$URLhdr = 0;
					if ($isChunked) {	# Is the data chunked?
						$inChunked = 1;	# We are in chunked data now
						&ChkSiz();		# Get size of first chunk
					}
				}
				return "$s$c";			# Return header line
			} else {
				print "\n"  if $V>1 && $W3trace;
				if (($n = sysread(U,$buf,$bufsiz)) > 0) {
					$Ubuf .= $buf;
					print STREAM $buf if $streamflg;
					$Lbuf = length($Ubuf);
					print "\n"  if $V>3 && $W3trace;
				} elsif ($n == 0) {
					print "\n"  if $V>3 && $W3trace;
				} else {
					print "\n"  if $V>3 && $W3trace;
					return $Ubuf ? $Ubuf : undef;
				}
			}
			print "\n"  if $V>1 && $W3trace;
		} else {		# We're in the data now
			print V "$P/$F: Data ...\n" if $V>4;
			$UbufSize = length($Ubuf) unless $UbufSize;
			print V "$P/$F: ChnkSize=$ChnkSize ChnkLeft=$ChnkLeft UbufSize=$UbufSize.\n" if $V>3;
			if ($inChunked) {
				print V "$P/$F: Data is chunked.\n" if $V>4;
				if ($ChnkLeft <= $UbufSize) {
					print V "$P/$F: Ubuf has enough data.\n" if $V>4;
					$buf = substr($Ubuf,0,$ChnkLeft,'');
					$ChnkLeft = 0;
					$UbufSize -= $ChnkLeft;
				} else {
					print V "$P/$F: Ubuf has less than a chunk.\n" if $V>4;
					$buf = $Ubuf;
					$Ubuf = '';
					$ChnkLeft = $ChnkSize - $UbufSize;
					$UbufSize = 0;
				}
				&ChkSiz();		# Get size of next chunk
			} else {
				print V "$P/$F: Data not chunked.\n" if $V>4;
				unless ($Ubuf) {
					$n = sysread(U,$Ubuf,$bufsiz);	# Read in the next chunk of data
					print V "$P/$F: Read returned $n bytes.\n" if $V>1;
					if ($n > 0) {print STREAM $Ubuf if $streamflg}
					if (!defined($n)) {
						print V "$P/$F: Read failed; n=$n ($!)\n" if $V>1;
						$staterr =  "$!";		# Remember the error message
						$buf = $Ubuf;			# Whatever is left
						$Ubuf = undef;
					} elsif ($n == 0) {	# No data yet 
						sleep 1;		# Avoid busy wait
				#	} else {
				#		print V "$P/$F: Read failed; n=$n ($!)\n" if $V>1;
				#		$staterr =  "$!";		# Remember the error message
				#		$buf = $Ubuf;			# Whatever is left
				#		$Ubuf = ($n < 0) ? undef : '';
					}
				}
				$buf = $Ubuf;
				$Ubuf = '';
			}
			return $buf;
		}
		print "\n"  if $V>1 && $W3trace;
	}
	print V "$P/$F: Can't get here.\n" if $V>0;
}

sub READalarm {my $F='READalarm';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	my $t = time - $HTTPopentime;
	print "\n" if $W3trace || $V>0;
	exit -1;
}

sub URL {my $F = 'URL'; local($u) = shift;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Massage a URL into its general form.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($prot,$host,$path,$U);
	if (($prot,$host,$path) = ($u =~ m"^(http)://([^/]+)(/.*)$")) {
		return $u;
	} elsif ($u =~ m"^/+(.*)$") {
		$U = "http://$Host/$1";
		print "\n" if $W3trace;
	exit -1;
}

sub URLalarmoff {my $F = 'URLalarmoff';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	alarm 0;
	$SIG{ALRM} = '';	# Was $savsig;
	print V "$P/$F: Set alarm 0.\n" if $V>2;
}

sub openURL {my $F='openURL';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#
# NAME
#   openURL - Open HTTP connection and request a URL
#
# SYNOPSIS
#   &openURL(*HANDLE,$URL,$OP)
#
# DESCRIPTION
#   This routine accepts a URL and attempts  to  open  it  for  reading.   If
#   successful,  the  return  value  is  1,  and  FD will be the open file (a
#   socket, actually).  The caller can read the data from it.  If you're  not
#   going  to  exit  after  the  EOF, t's a good idea to close it when you're
#   done, to prevent the connection from hanging around.
#
#   The third parameter, the "operation", defaults to GET.  You may also pass
#   'POST' as the third arg, and we will send it, and not send the extra \n.
#
#   We send a "$OP $URL HTTP/$HTTPversion\n\n" request, and the  server  will
#   send back first the HEAD information for the URL, then \r\n\r\n, then the
#   data.  Note that \r\n\r\n is specified in the HTTP specs, and so far  not
#   even Microsoft has seen fit to violate this.
#
#   If the attempt to parse the URL fails, we will try to open it as a  local
#   file,  and  if  this succeeds, we will return success.  So any local file
#   whose name doesn't look like a URL can be used as a "remote" file.
#
# ENVIRONMENT
#   We use several global variables:
#
#   $W3proxy  is host name (or IP address) and port for HTTP proxy.
#   $W3nopxy  is perl pattern for local (non-proxied) hosts.
#   $URLhdr   is set to 1 indicating that we're in the header 
#   $URLerr   is set to an error message if we fail.
#
#   We also set $W3proxy and $W3nopxy from the environment variables  W3PROXY
#   and  W3NOPROXY,  if  the latter are defined and the former aren't, so the
#   caller should probably not worry about this in general.
#
# BUGS
#   At present, we only do the HTTP:// protocol.  Maybe eventually ...
#
# AUTHOR
#   John Chambers 
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local(*F,$url,$OP) = @_;
	local($t);
	local($savsig);
	$HTTPopens    = 0     unless defined $HTTPopens;
	$HTTPopentime = 0     unless defined $HTTPopentime;
	$HTTPversion  = '1.0' unless defined $HTTPversion;
	$OP = 'GET' if !$OP;
	$HTTPtimeout = 30 unless defined $HTTPtimeout;
	print V "$P/$F: \"$url\"\n" if $V>4;
	print "\n" if $W3trace && $V>5;
	$W3proxy = $ENV{W3PROXY}   if !$W3proxy;
	$W3nopxy = $ENV{W3NOPROXY} if !$W3nopxy;
	if ($HTTPtimeout > 0) {
		alarm $HTTPtimeout;
		$savsig = $SIG{ALRM};
		$SIG{ALRM} = 'URLalarm';
		print V "$P/$F: Set alarm after $HTTPtimeout sec.\n" if $V>2;
	}
	++$HTTPopens;
	$HTTPopentime = time;
	if (($Prot,$Host,$File) = ($url =~ m'^(\w+)://+([-_.:\w]+)(/.*)')) {
		;
	} elsif (($Prot,$Host) = ($url =~ m'^(\w+)://+([-_.:\w]+)$')) {
		$File = '/';
	} else {
		print V "$P/$F: Can't parse \"$url\"\n" if $V>0;
		return 0;
	}
	if ($url = "$Prot://$Host$File") {
		if ($W3proxy) {
			if ($W3nopxy && ($Host =~ $W3nopxy)) {
				print V "$P/$F: host \"$Host\" matches \"$W3nopxy\"\n" if $V>2;
			} else {
				print V "$P/$F: pxy=\"$W3proxy\" url=\"$url\"\n" if $V>2;
				print "\n" if $W3trace;
				if (&conHTTP(*F,$W3proxy)) {
					&sendHTTP("GET $url HTTP/$HTTPversion\r\n");
					&sendHTTP("Accept: */*\r\n");
					if ($HTTPversion eq '1.1') {
						&sendHTTP("Host: $Host\r\n");
						&sendHTTP("Connection: Close\r\n");
					}
					print V "$P/$F: Sent \"$OP $url HTTP/$HTTPversion\\r\\n\"\n" if $V>5;
					if ($W3agentid) {
						print F "User-agent: $W3agentid\n";
						&sendHTTP("User-agent: $W3agentid\r\n");
					}
					if ($OP eq 'GET') {
						&sendHTTP("\r\n");
					}
					$URLhdr = 1;    # Note we're in the header.
					&URLalarmoff() if ($HTTPtimeout > 0);
					return 1;
				}
				$t = time - $HTTPopentime;
				$URLerr = "$F: Can't connect to proxy \"$W3proxy\"";
				print "\n" if $W3trace;
				print V "$P/$URLerr\n" if $V>0;
				&URLalarmoff() if ($HTTPtimeout > 0);
				return 0;
			}
		}
		print V "$P/$F: \"$url\" Prot=\"$Prot\" Host=\"$Host\" File=\"$File\"\n" if $V>2;
		($PROT = $Prot) =~ tr/a-z/A-Z/;
		if ($PROT eq 'HTTP') {
			print V "$P/$F: \"$url\" HTTP protocol\n" if $V>2;
			print "\n" if $W3trace;
			if (&conHTTP(*F,$Host)) {
				print V "$P/$F: Connected to \"$Host\"\n" if $V>2;
				&sendHTTP("$OP $File HTTP/$HTTPversion\r\n");
				&sendHTTP("Accept: */*\r\n");
				if ($HTTPversion eq '1.1') {
					&sendHTTP("Host: $Host\r\n");
					&sendHTTP("Connection: Close\r\n");
				}
				if ($W3agentid) {
					print F "User-agent: $W3agentid\n";
					&sendHTTP("User-agent: $W3agentid\r\n");
				}
				if ($OP eq 'GET') {
					&sendHTTP("\r\n");
				}
				$URLhdr = 1;    # Note we're in the header.
				&URLalarmoff() if ($HTTPtimeout > 0);
				return 1;
			}
			$t = ($now = time) - $HTTPopentime;
			$URLerr = "$P/$F: Can't connect to host \"$Host\" in $t sec ($!)";
			$localtime = localtime($now);
			print "\n" if $W3trace;
			print V "[$localtime] $URLerr\n" if $V>0;
			&URLalarmoff() if ($HTTPtimeout > 0);
			return 0;
		}
		$URLerr = "Can't do protocol \"$Prot\"";
		print "\n" if $W3trace;
		print V "$P/$F: Can't open \"$url\" ($URLerr)\n" if $V>0;
		$exitstat = 254;
		&URLalarmoff() if ($HTTPtimeout > 0);
		return 0;
	}
	if (-d $url) {
		print V "$P/$F: Directory \"$url\" ...\n" if $V>4;
		&URLalarmoff() if ($HTTPtimeout > 0);
		return &HTMLdir(*F,$url);
	}
	if (open(F,$url)) {
		print V "$P/$F: Local file \"$url\" opened.\n" if $V>4;
		$URLhdr = 0;    # No headers for local files (yet).
		&URLalarmoff() if ($HTTPtimeout > 0);
		return 1;
	}
	$URLerr = "$!";
	print "\n" if $V>0;
	print V "$P/$F: Can't read \"$url\" ($URLerr)\n" if $V>0;
	$exitstat = int($!);
	&URLalarmoff() if ($HTTPtimeout > 0);
	return 0;
}

sub esc {my($s) = @_;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Encode special chars as escape sequences.  We should probably include a few #
# more chars, but this is only used for debugging, and this is what we need.  #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($v) = $s;
	$v =~ s"[\n\r\t]"$escmap{$&}"eg;
	print V "esc: \"$s\" -> \"$v\"\n" if $V>5;
	return $v;
}

sub Vopt {my $F = 'Vopt';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Set the verbosity from various environment variables. The value may #
# be  a  verbose  level (1 digit), plus an optional output file name. #
# The file V is opened to the file, if any, or STDERR by default. The #
# default  value  for the verbosity level is 1, which generally means #
# to produce only serious error messages.                             #
#                                                                     #
# Here's how this routine is typically called:                        #
#    ($P = $0) =~ s'.*/'' unless defined($P);                         #
#    &Vopt($ENV{"V_$P"} || $ENV{"D_$P"} || $ENV{"T_$P"} || '1');      #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	$Vopt = shift;
	$V = 1 unless defined $V;
	print "
Vopt: Vopt=\"$Vopt\"
\n" if $V>1;
	if ($Vopt =~ /^(\d+)(.+)$/) {
		$V = int($1);
		$Vfil = $2;
		if (!open(V,">>$Vfil")) {
			print V "$P	Can't write \"$Vfil\" ($!)\n" if $V>0;
			open(V,">>&STDERR");
		}
	} else {
		$V = $Vopt;
		open(V,">>&STDERR");
	}
	select V; $| = 1; select STDOUT; $| = 1;
	print V "$P started with V=$V $ymd $hms\n" if $V>1;
}
1;