#!/usr/bin/perl # # This runs through all the hosts in the hst/ directory, and tells us # which have a robots.txt file. The output is the list of URLs for # the robots.txt files. $| = 1; $V = 1; for $h (`ls hst`) { chomp $h; $url = "http://${h}/robots.txt"; $cmd = "webcat +THV1.1 $url"; unless (open(P,"$cmd |")) { print STDERR "$0: Can't run \"$cmd\" ($!)\n" if $V>1; next; } for $line (

) { $line =~ s/[\s\r]+$//; if (($line =~ m"^HTTP/([\d.]+)\s+(\d+)\s(.*)"i) || ($line =~ m"^HTTP/([\d.]+)\s+ERR\s+(\d+)\s(.*)"i)) { if ($2 < 400) { print "http://$h/robots.txt\n"; close P; } } } }