#!/usr/bin/perl
#
#NAME
#  purgebkups - remove backups if identical with primary file
#SYNOPSIS
#  purgebkups file|dir ..
#
#DESCRIPTION
#  Run thru the files or directories looking for  backup  files  that
#  are identical to their primaries, and delete them.  At present, we
#  only recognize '*-" files as backups.
#
#  What we do is:  For each file named on the command line, we  check
#  to see if there is a "backup" file that has the same name with one
#  or more hyphens appended.  If so, we compare any such backup  file
#  with  the  primary  file, and if they are identical, we delete the
#  backup file.
#
#  If we encounter a directory name on the command line, we  scan  it
#  (recursively) for files, and treat them as the primary files.
#
#  Note that we don't have any defaults. If there are no directory or
#  file names on the command line, we just exit.
#
#  Our exit status will be 0 if we encounter no file-access problems.
#  If  we fail to unlink any backup file, our exit status will be the
#  errno code for the last such failure.
#
#AUTHOR
#  John Chambers <jc@trillian.mit.edu>

$| = 1;
($me = $0) =~ s".*/"";
$V = $ENV{"V_$me"} || $ENV{"D_$me"} || 1;
print STDERR "$me: Called.\n" if $V>1;
$bufsiz = 1000;
$exitstat = 0;
$recurse = 1;

for $x (@ARGV) {
	if (($oflg,$ostr) = ($x =~ /^([-+])(.*)/)) {
		while (($ochr,$ostr) = ($ostr =~ /(.)(.*)/)) {
			$OC = uc($ochr);	# Upper-case option char
			if ($OC eq 'R') {	# Recurse through directories
				$recurse = ($oflg eq '+') ? 1 : 0;
				print STDERR "$me: Recursion " . ($recurse?'on':'off') . "\n" if $V>1;
			} elsif ($OC eq 'V' || $OC eq 'D') {	# Verbose/Debug
				if ($ostr =~ s/\d+//) {
					$V = int($1);
				} else {
					if ($oflg eq '-') {$V--} else {$V++}
				}
				print STDERR "$me: Verbose level $V.\n" if $V>1;
			} elsif ($OC eq '') {	# End of string
			} else {
				print STDERR "$me: Unknown option '$ochr' in \"$x\" ignored.\n" if $V>0;
			}
		}
	} elsif (-d $x) {
		&onedir($x);
	} elsif (-f $x) {
		if ($x =~ /^(.*)-+/) {
			&onename($1,$x);
		}
	} else {
	}
}

exit $exitstat;

sub delete {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Delete one or more files.   This  is  a  subroutine  mostly  for  debugging #
# purposes,  though  it  also lets us omit a loop through multiple files.  We #
# don't actually use this looping in this program.                            #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	for $f (@_) {
		print "$me: DEL \"$f\"\n" if $V>0;
		unlink $f;
	}
}

sub onedir {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Process one directory.  This just runs through the files in the directory,  #
# and passes each name to a routine to handle it.                             #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($dir) = @_;
	print STDERR "$me: dir \"$dir\"\n" if $V>1;
	if (!opendir(DIR,$dir)) {
		print STDERR "$me: Can't read directory \"$dir\" [$!]\n";
		return 0;
	}
	local(@files) = readdir(DIR);
	closedir DIR;
	foreach $x (sort @files) {
		print STDERR "$me: nam \"$x\"\n" if $V>2;
		next if substr($x,0,1) eq '.';
		if (-d "$dir/$x") {
			&onedir("$dir/$x") if $recurse;
		} elsif ($x =~ /^(.*?)-+$/) {
			&onename("$dir/$1","$dir/$x");
		} else {
			print STDERR "$me: --- \"$x\"ignored.\n" if $V>2;
		}
	}
}

sub onename {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Compare one file with its backup.  The second arg should be the  first  arg #
# with one or more hypens appended.  The return value is ignored.             #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($name,$bkup) = @_;
	print STDERR "$me: name=\"$name\"\n" if $V>3;
	print STDERR "$me: bkup=\"$bkup\"\n" if $V>3;
	unless (($bdev,$bino,undef,undef,undef,undef,undef,$bsize) = stat($bkup)) {
		print STDERR "$me: Can't access \"$bkup\" [$!]\n";
		print STDERR "$me: name=\"$name\"\n";
		print STDERR "$me: bkup=\"$bkup\"\n";
		$exitstat = $!;
#		exit $exitstat;	# Exit at first failure.
		return 0;
	}
	unless (($ndev,$nino,undef,undef,undef,undef,undef,$nsize) = stat($name)) {
		print STDERR "$me: CAN'T ACCESS \"$name\" [$!]\n";
		$exitstat = $!;
		return 0;
	}
	if ($bsize != $nsize) {
		print "$me: \"$nsize\" and \"$bsize\" are different sizes.\n" if $V>2;
		return 0;
	}
	if ($bdev == $ndev && $bino == $nino) {
		print "$me: \"$name\" and \"$bkup\" are the same file.\n" if $V>2;
		&delete($bkup);
		return 1;
	}
	if (&same($name,$bkup)) {
		print "$me: \"$nsize\" and \"$bsize\" have the same contents.\n" if $V>2;
		&delete($bkup);
		return 1;
	}
}

sub same {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Compare two files' contents.  Return 0 if they're different, 1  if  they're #
# the same.                                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($f1,$f2) = @_;
	if (!open(F1,$f1)) {
		print STDERR "$me: Can't read \"$f1\" [$!]\n";
		return 0;
	}
	if (!open(F2,$f2)) {
		print STDERR "$me: Can't read \"$f2\" [$!]\n";
		return 0;
	}
	while (1) {
		if (!defined ($n1 = read(F1,$b1,$bufsiz))) {
			print STDERR "$me: Can't read \"$f1\" [$!]\n" if $V>1;
			return 0; 
		}
		if (!defined ($n2 = read(F2,$b2,$bufsiz))) {
			print STDERR "$me: Can't read \"$f2\" [$!]\n" if $V>1;
			return 0; 
		}
		if ($n1 == 0 && $n2 == 0) {
			print "$me: EOF on both files.\n" if $V>3;
			return 1;
		}
		if ($n1 != $n2) {
			print STDERR "$me: Got different sizes for \"$f1\" and  \"$f2\".\n" if $V>1;
			return 0; 
		}
		if ($b1 ne $b2) {
			print STDERR "$me: Got different data for \"$f1\" and  \"$f2\".\n" if $V>1;
			return 0; 
		}
		print "$me: Got $n1 identical bytes.\n" if $V>3;
	}
}
