#!/usr/bin/perl -w
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#NAME
#  Hosts - Multi-Process abcbot controller
#
#SYNOPSIS
#  Hosts -m<M> -n<N> [host..]
#
#REQUIRES
#
#DESCRIPTION
#  This is a program for a simple controller for multiple invocations of  the
#  abcbot search program.  We start N copies of abcbot, each with a different
#  host, and when one dies,  we  start  another,  until  we  have  started  M
#  processes and all have exited.
#
#  If no hosts are listed on the command line, we make  a  list  of  all  the
#  hosts in the add/ and hst/ directories, and use them.
#
#  We also have a few heuristics for skipping over hosts.   One  is  that  if
#  there's  a lck/ file for the host, we assume that the last abcbot for that
#  host is either still running or bombed for some  reason.   We  produce  an
#  error message and skip it.
#
#  Also, if the host's file in the hst/ directory is too new,  we  skip  over
#  it. This is controlled by the $hostdelay variable, which can be set by the
#  HOSTDELAY environment variable, and defaults to one day.
#
#OPTIONS
#
#  -m<M>
#    is the maximum number of processes to start.  The default  is  0,  which
#    means that there is no limit.
#
#  -n<N>
#    is the max number of subprocesses we have alive at the same  time.   The
#    default is 5.
#
#REQUIRES
	push @INC, '.', 'lib', 'sh';
	require "Vopt.pm";		# Verbosity output stuff
	require "DT.pm";		# Date/time stuff
	require "Backup.pm";	# File-backup routine
#
#EXAMPLES
#
#FILES
#
#BUGS
#
#SEE ALSO
#
#AUTHOR
#  John Chambers <jc@trillian.mit.edu> April 2009
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;
$exitstat = 0;
($P = $0) =~ s".*/"";
&Vopt($ENV{"V_$P"} || $ENV{"V"} || '2');	# Verbose level.

$spm = 60;	# Seconds per minute
$mph = 60;	# Minutes per hour
$hpd = 24;	# Hours per day
$spd = $spm * $mph * $hpd;		# Should be 86400

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Pull some values out of the environment:

$hostdelay = $ENV{'HOSTDELAY'} || 5*$spd;		# Avoid a host unless its hst/ file is at least this old
$maxprocs  = $ENV{${P} . '_maxprocs'} || 1000;	# Quit after starting this many subprocesses
$numprocs  = $ENV{${P} . '_numprocs'} || 3;		# Number of subprocesses at the same time

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Global info that controls subprocesses:

$bot = 'abcbot';	# The command to run in each process
$lastpid  =   0;	# The pid of the last process started
#maxsleep =  10;	# Max param to pass to the sleeper command
$maxslot  =   0;	# The highest-numbered process slot used so far
$pdone    =   0;	# Number of processes that have finished
$pstarted =   0;	# Number of processes we've started so far
$sdelay   =   1;	# Delay between initial startups

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Globals for the host list:

@add = (); $adds = 1;	# Scan all hosts in the add directory?
%dup = (); #dups = 1;	# Notes duplicate host names
@hst = (); $hsts = 1;	# Scan all hosts in the hst directory?
@new = (); $news = 0;	# Scan all hosts in the new directory?

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Globals for the multi-process code:

@pst = ();	# When a process started
@s2p = ();	# Maps slot number to process id
@s2c = ();	# Maps slot number to command
@s2h = ();	# Maps slot number to hostname
#s2a = ();	# Maps slot number to argument
%p2s = ();	# Maps proccess id to slot number

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Chew up the command-line args, looking for host names

print V "$P: Process command-line args ...\n" if $V>1;
for $arg (@ARGV) {
	print V "$P: Arg \"$arg\" ...\n" if $V>2;
	if (($flg,$opt) = ($arg =~ /^([-+])(.*)/)) {
		print V "$P:	Option \"$flg$opt\"\n" if $V>1;
		while ($opt ne '') {
			if ($opt =~ s/^M(\d+)//i) {
				$maxprocs = int($1);
				print V "$P: maxprocs=$maxprocs\n" if $V>1;
			} elsif ($opt =~ s/^N(\d+)//i) {
				$numprocs = int($1);
				print V "$P: numprocs=$numprocs\n" if $V>1;
			} elsif ($opt =~ s/^V(\d*)//i) {
				$V = ($1 eq '') ? ($V + 1) : int($1);
				print V "$P: V=$V\n" if $V>0;
			} else {
				print V "$P: Option \"$opt\" not recognized.\n" if $V>0;
				$opt =~ s/^.//;
			}
		}
	} else {
		$hst = $arg;
		$hst =~ s"\.LCK$"";
		$hst = lc($hst);
		$hst =~ s"^\w*/+"";
		$hst =~ s"-+$"";
		$hst =~ s"-add$"";
		print V "$P: Arg '$arg' is host '$hst'\n" if $V>1;
		push @lst, $hst unless $dup{$hst};
		++$dup{$hst};
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Dump various stuff to the verbose log

print V "$P:	maxprocs: $maxprocs numprocs: $numprocs\n" if $V>1;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Build the @hst list from the above @lst or the contents  of  the  hst/  and
# /add directories.  (We used to include the new/ directory, but don't now.)

@hst = @add = @new = ();

unless (@lst) {
	$" = ',';
	if ($adds) {	# List of files in the add/ directory
		@add = glob("add/*");
		print V "$P	add=(@add)\n" if $V>4;
	}
	if ($hsts) {	# List of files in the hst/ directory
		@hst = glob("hst/*");
		print V "$P	hst=(@hst)\n" if $V>4;
	}
	if ($news) {	# List of files in the new/ directory
		@new = glob("new/*");
		print V "$P	new=(@new)\n" if $V>4;
	}
	@lst = grep(!/(-|\.LCK)\s*$/,(@hst, @add, @new));
}
for $p (@lst) {		# Merge them into one list
	$p =~ s'^.*/'';	# Strip away the directory name
	$p =~ s'-+$'';	# Trim trailing hyphens
	++$hosts{$p};	# List of hosts to test
}

@lst = sort keys %hosts;
print  V "$P:	lst=(@lst)\n" if $V>4;
printf V "$P:	We have %d hosts.\n",int(@lst); # if $V>1;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Start up the initial batch of bots:

$s = 0;
while (@lst && $pstarted < $numprocs) {
	print V "$P: Slot $s initial process ...\n" if $V>1;
	if ($hst = shift @lst) {
		print V "$P: HOST: \"$hst\"\n" if $V>2;
		$s += &startproc($s,$bot,$hst);
		print V "$P: We've started pstarted=$pstarted initial processes (s=$s)\n" if $V>2;
	} else {
		print V "$P: Bad host name \"$hst\"\n" if $V>0;
	}
	sleep $sdelay if $sdelay > 0;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

waiting:
while ($p = wait) {		# Wait for a process to exit
	&dt();				# Note the time
	if ($p < 0) {		# No processes alive (yet)
		print V "$cymd $hms: wait failed with status $p.\n" if $V>0;
		print V "\n$cymd $hms: No processes alive.\n" if $V>0;
		if ($maxprocs && ($pdone >= $maxprocs)) {
			print V "$P: $pdone of $maxprocs done; exiting.\n" if $V>0;
			exit ($pdone - $maxprocs);	# Nonzero exit if too many finished
		}
		$pleft = $maxprocs ? ($maxprocs - $pdone) : 0;
		print V "$P: pstarted=$pstarted pdone=$pdone pleft=$pleft maxprocs=$maxprocs\n" if $V>0;
		exit $pleft;
	} elsif ($p == 0) {	# "There are processes running" [on some OSs]
		print V "\n$cymd $hms: wait returned zero.\n" if $V>0;
	} else {			# A specific process just died
		print V "\n" if $V>2;
		$s = $p2s{$p};
		$h = $s2h[$s];
		print V "$cymd $hms\tProcess $p for host $h in slot $s finished.\n" if $V>2;
		Backup("add/$h") if -e "add/$h";
		++$pdone;		# Count the processes that finish.
		$ptime = time - $pst[$s];	# How many seconds the process lived
		$dhms = &dhms($ptime);
		print V "$cymd $hms: Slot $s pid $p [" . $s2c[$s] . "] done ($dhms = $ptime sec)\n", if $V>1;
		&initslot($s);	# Wipe out the info about this slot
		while (@lst && (($maxprocs < 1) || ($pstarted < $maxprocs))) {	# Any more to start?
			&dt();
			print V "$cymd $hms: Slot $s start a new process ...\n" if $V>2;
			if ($host = shift @lst) {
				print V "$P: Start process for host $host in slot $s ...\n" if $V>2;
				$started = &startproc($s,$bot,$host);
				&dumpslots() if $V>2;
				if ($started > 0) {
					print V "$P: ========================================\n" if $V>3;
					next waiting;
				} else {
					print V "$P: Failed to start process for host $host in slot $s ...\n" if $V>0;
				}
			} else {
				print V "$P: Ignored host \"$host\"\n" if $V>0;
				last unless @lst;
			}
		}
		print V "$P: No more processes to start.\n" if $V>1;
	}
	&dumpslots() if $V>2;
}

print V "$P: Exit with status $exitstat.\n" if $V>1;
exit $exitstat;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub dhms {my $F='dhms';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Convert a second count to days, minutes, hours and seconds.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($ptime) = @_;
	local($val,$d,$h,$m,$s);
	$s = $ptime % 60; $ptime /= 60;
	$m = $ptime % 60; $ptime /= 60;
	$h = $ptime % 24; $d = $ptime /24;
	$val = sprintf("%dd%dh%dm%ds",$d,$h,$m,$s);
	$val =~ s/^[0hmds]*//;
	$val = '0s' unless $val;
	return $val;
}

sub dumpslots {my $F='dumpslots';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($s) = 0;
	while ($s <= $maxslot) {
		&dumpslot($s++);
	}
}

sub dumpslot {my $F='dumpslot';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($s) = @_;
	printf V "$P: SLOT $s s2p=%d pst=%d s2c='%s'\n",
		$s2p[$s],$pst[$s],$s2c[$s];
}

sub initslot {my $F='initslot';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($slot) = @_;
	$s2p[$slot] = 0;	# Maps slot to pid
	$s2c[$slot] = '';	# Maps slot to command
	$s2h[$slot] = '';	# Maps slot to hostname
#	$s2a[$slot] = '';	# Maps slot to argument
	$pst[$slot] = 0;	# When the process started
}

sub startproc {my $F='startproc';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Return value: 1 if we started a process, 0 otherwise.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($slot,$botname,$hstname) = @_;
	local($afile,$pid,$cmd,$hstfile,$lckfile,$t);
	print V "$F: Called with slot=$slot botname=\"$botname\" hstname=\"$hstname\"\n" if $V>3;
	$afile = "add/$hstname";
	if ($hstname =~ /-$/) {
		print V "$F: #### Host $hstname ignored, final '-'\n" if $V>0;
		return 0;
	}
	if (-f ($lckfile = "lck/$hstname")) {
		print V "$F: #### Host $hstname ignored, $lckfile exists.\n" if $V>0;
		return 0;
	}
	if (-f ($hstfile = "hst/$hstname")) {
		local(undef,undef,undef,undef,undef,undef,undef,undef,undef,$mtime)
			= stat($hstfile);
		if (($t = time - $mtime) < $hostdelay) {
			print V "$F: Host $hstname ignored, hst file is only $t sec old.\n" if $V>0;
			Backup($afile) if -e $afile;
			return 0;
		}
	}
	$logfil = "log/$hstname";
	$ENV{"V_abcbot"} = "$V$logfil";	# Pass our verbose level to abcbot
#	$cmd = "$botname +CURLs $hstname";
	$cmd = "$botname $hstname";
	print V "$F: Slot $slot cmd [$cmd]\n" if $V>2;
	if ($pid = fork) {	# Parent
		&dt();
		if ($pid < 0) {
			print V "$cymd $hms: ### Can't start a new process [$!]\n" if $V>0;
			return 0;
		}
		print V "$cymd $hms: Slot $slot pid $pid [$cmd] started <<<<<<\n\n" if $V>1;
		$p2s{$lastpid = $pid} = $slot;
		$s2p[$slot] = $pid;
		$s2c[$slot] = $cmd;
		$s2h[$slot] = $hstname;
	#	$s2a[$slot] = join(' ',@cmd);
		$pst[$slot] = $t = time;	# Note when we start the process
		print V "$F: pst[$slot] is $t [$cmd]\n" if $V>2;
		++ $pstarted;	# Count the processes that we actually start
		$maxslot = $slot if $slot > $maxslot;	# Note the max slot number used
		&dumpslots() if $V>2;
		return 1;		# Tell caller that we started a process
	} else {			# Child (new process)
		print V "$F: Child process $$ slot $slot [$cmd]\n" if $V>2;
		exec $cmd;
		warn "\n$F: Child process $$ bombed: $!\n" if $V>0;
		exit $!;		# In case it returns
	}
	return 0;
}
