#!/usr/bin/perl
#
# This runs through all the hosts in the hst/ directory, and tells us
# which  have  a robots.txt file.  The output is the list of URLs for
# the robots.txt files.


$| = 1;
$V = 1;

for $h (`ls hst`) {
	chomp $h;
	$url = "http://${h}/robots.txt";
	$cmd = "webcat +THV1.1 $url";
	unless (open(P,"$cmd |")) {
		print STDERR "$0: Can't run \"$cmd\" ($!)\n" if $V>1;
		next;
	}
	for $line (<P>) {
		$line =~ s/[\s\r]+$//;
		if (($line =~ m"^HTTP/([\d.]+)\s+(\d+)\s(.*)"i)
		||	($line =~ m"^HTTP/([\d.]+)\s+ERR\s+(\d+)\s(.*)"i)) {
			if ($2 < 400) {
				print "http://$h/robots.txt\n";
				close P;
			}
		}
	}
}
