#!/usr/bin/perl
#
#NAME
#  CompareIndexes - Show URLs that differ between index directories.
#
#SYNOPSIS
#  CompareIndexes
#
#DESCRIPTION
#
#OPTIONS
#
#BUGS
#
#SEE ALSO
#
#AUTHOR
#  John Chambers <jc@trillian.mit.edu>

$| = 1;
($me = $0) =~ s".*/"";

$Vopt = $ENV{"D_$me"} || $ENV{"T_$me"} || $ENV{"V_$me"} || '1';
if ($Vopt =~ /^(\d)(.+)/) {
	$V = $1;
	$Vfil = $2;
	if (!open(V,">>$Vfil")) {
		print V "$me: Can't write \"$Vfil\" ($!)\n" if $V>0;
		open(V,">>&STDERR");
	}
} else {
	$V = $Vopt;
	open(V,">>&STDERR");
}
select V; $| = 1;	# select STDOUT; $| = 1;

@ilist = glob("ndx/??.html");

for $f (@ilist) {$f =~ s"ndx/""; $flist{$f} = 1}

for $x (sort keys %flist) {
	$ifile = "ndx/$x";
	if (-f $Ifile && -f $ifile) { print V "$x:\n" if $V>0}
	elsif (-f $Ifile) {print V "$x: $ifile doesn't exist.\n" if $V>0}
	elsif (-f $ifile) {print V "$x: $Ifile doesn't exist.\n" if $V>0}
	unless (open(FI,$Ifile)) {
		print V "\tCan't read $Ifile ($!)\n" if $V>4;
		next;
	}
	unless (open(Fi,$ifile)) {
		print V "\tCan't read $ifile ($!)\n" if $V>4;
		next;
	}
	$URLI = $URLi = '';
	$EOFI = $EOFi = 0;
	%URLI = (); %URLi = (); %URLs = ();
	for $l (<FI>) {
		if ($l =~ /<A HREF="([^"]+)">/i) {
			$U = $1;
			$URLI{$U} ++;
			$URLs{$U} ++;
			print V "$Ifile: $U\t$URLI{$U}\t$URLs{$U}.\n" if $V>2;
		}
	}
	for $l (<Fi>) {
		if ($l =~ /<A HREF="([^"]+)">/i) {
			$U = $1;
			$URLi{$U} ++;
			$URLs{$U} ++;
			print V "$ifile: $U\t$URLi{$U}\t$URLs{$U}.\n" if $V>2;
		}
	}
	for $U (sort keys %URLs) {
		next if $U =~ m"/abc/Scotland/";
		next if $U =~ m"/book/oneills/";
		next if $U =~ m"/book/RJ/";
		print V "$U\t$URLI{$U}\t$URLi{$U}.\n" if $V>2;
		print "$U in $Ifile only.\n" unless $URLi{$U};
		print "$U in $ifile only.\n" unless $URLI{$U};
	}
#URL:
#	while (!$EOFI && !$EOFi) {
#		if ($EOFI) {
#			unless ($URLi = &geti) {$EOFi = 1}
#			$URLi =~ s/.*<A HREF="([^"]+)".*$/$1/;
#		} elsif ($EOFi) {
#			unless ($URLI = &getI) {$EOFI = 1}
#			$URLI =~ s/.*<A HREF="([^"]+)".*$/$1/;
#		} elsif ($URLI lt $URLi) {
#			unless ($URLI = &getI) {$EOFI = 1}
#			$URLI =~ s/.*<A HREF="([^"]+)".*$/$1/;
#		} elsif ($URLI gt $URLi) {
#			unless ($URLi = &geti) {$EOFi = 1}
#			$URLi =~ s/.*<A HREF="([^"]+)".*$/$1/;
#		} else {
#			unless ($URLI = &getI) {$EOFI = 1}
#			unless ($URLi = &geti) {$EOFi = 1}
#			$URLI =~ s/.*<A HREF="([^"]+)".*$/$1/;
#			$URLi =~ s/.*<A HREF="([^"]+)".*$/$1/;
#		}
#		print V "URLI=\"$URLI\"\n" if $V>2;
#		print V "URLi=\"$URLi\"\n" if $V>2;
#		if ($URLI && $URLi) {
#			if ($URLI lt $URLi) {
#				print "$URLI in $Ifile only.\n" if $URLI !~ m"/book/oneills/";
#			} elsif ($URLI gt $URLi) {
#				print "$URLi in $ifile only.\n" if $URLi !~ m"/book/oneills/";
#			}
#		} elsif (!$URLI && $URLi) {
#			print "$URLi in $ifile only.\n";
#		} elsif ($URLI && !$URLi) {
#			print "$URLI in $Ifile only.\n";
#		}
#	}
	close FI;
	close Fi;
}

sub getI {
	local($l);
	while (!$EOFI) {
		if ($l = <FI>) {
			if ($l =~ /<A HREF="([^"]+)">/i) {
				return $1;
			}
		} else {
			$EOFI = 1;
			return undef;
		}
	}
	return undef;
}

sub geti {
	local($l);
	while (!$EOFi) {
		if ($l = <Fi>) {
			if ($l =~ /<A HREF="([^"]+)">/i) {
				return $1;
			}
		} else {
			$EOFi = 1;
			return undef;
		}
	}
	return undef;
}
