#!/usr/bin/perl
#
#NAME
#  PurgeBackups - remove backups if identical with primary file
#SYNOPSIS
#  PurgeBackups file|dir ..
#
#DESCRIPTION
#  Run thru the files or  directories  looking  for  backup  files  that  are
#  identical to their primaries, and delete them.
#
#  What we do is: For each file named on the command line, we check to see if
#  there  is  a "backup" file that has the same name with one or more hyphens
#  appended.  If so, we compare any such backup file with the  primary  file,
#  and if they are identical, we delete the backup file.
#
#  If  we  encounter  a  directory  name  on  the  command  line,  we scan it
#  (recursively) for files, and treat them as the primary files.
#
#  Note that we don't have any defaults.  If there are no directory  or  file
#  names on the command line, we just exit.
#
#  Our  exit status will be 0 if we encounter no file-access problems.  If we
#  fail to unlink any backup file, our exit status will be the errno code for
#  the last such failure.
#
#BUGS
#  At present, we only recognize '*-" files as backups.  We should recognize
#  the BACKUP environment variable, and use whatever format it says.
#
#SEE ALSO
#  Rm, Backup.pm
#
#AUTHOR
#  John Chambers <jc@trillian.mit.edu>

$| = 1;
($P = $0) =~ s".*/"";	# Our name minus directory
$V = $ENV{"V_$P"} || $ENV{"D_$P"} || 1;	# Verbose level

$bufsiz = 1000;
$exitstat = 0;
$recurse = 1;

for $x (@ARGV) {
	if (($oflg,$ostr) = ($x =~ /^([-+])(.*)/)) {
		while (($ochr,$ostr) = ($ostr =~ /(.)(.*)/)) {
			$OC = uc($ochr);	# Upper-case option char
			if ($OC eq 'R') {	# Recurse through directories
				$recurse = ($oflg eq '+') ? 1 : 0;
				print STDERR "$P: Recursion " . ($recurse?'on':'off') . "\n" if $V>1;
			} elsif ($OC eq 'V' || $OC eq 'D') {	# Verbose/Debug
				if ($ostr =~ s/\d+//) {
					$V = int($1);
				} else {
					if ($oflg eq '-') {$V--} else {$V++}
				}
				print STDERR "$P: Verbose level $V.\n" if $V>1;
			} elsif ($OC eq '') {	# End of string
			} else {
				print STDERR "$P: Unknown option '$ochr' in \"$x\" ignored.\n" if $V>0;
			}
		}
	} elsif (-d $x) {
		&onedir($x);
	} elsif (-f $x) {
		if ($x =~ /^(.*)-+/) {
			&onename($1,$x);
		}
	} else {
		print STDERR "$P: Unrecogized arg \"$x\"\n" if $V>0;
	}
}

exit $exitstat;

sub delete {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Delete one or more files.   This  is  a  subroutine  mostly  for  debugging #
# purposes,  though  it  also lets us omit a loop through multiple files.  We #
# don't actually use this looping in this program.                            #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	for $f (@_) {
		print "$P: DEL \"$f\"\n" if $V>0;
		unlink $f;
	}
}

sub onedir {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Process one directory.  This just runs through the files in the directory,  #
# and passes each name to a routine to handle it.                             #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($dir) = @_;
	print STDERR "$P: dir \"$dir\"\n" if $V>1;
	if (!opendir(DIR,$dir)) {
		print STDERR "$P: Can't read directory \"$dir\" [$!]\n";
		return 0;
	}
	local(@files) = readdir(DIR);
	closedir DIR;
	foreach $x (sort @files) {
		print STDERR "$P: nam \"$x\"\n" if $V>2;
		next if substr($x,0,1) eq '.';
		if (-d "$dir/$x") {
			&onedir("$dir/$x") if $recurse;
		} elsif ($x =~ /^(.*?)-+$/) {
			&onename("$dir/$1","$dir/$x");
		} else {
			print STDERR "$P: --- \"$x\"ignored.\n" if $V>2;
		}
	}
}

sub onename {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Compare one file with its backup.  The second arg should be the  first  arg #
# with one or more hypens appended.  The return value is ignored.             #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($name,$bkup) = @_;
	print STDERR "$P: name=\"$name\"\n" if $V>3;
	print STDERR "$P: bkup=\"$bkup\"\n" if $V>3;
	unless (($bdev,$bino,undef,undef,undef,undef,undef,$bsize) = stat($bkup)) {
		$exitstat = $!;
		print STDERR "$P: Can't access \"$bkup\" [$!]\n";
		print STDERR "$P: name=\"$name\"\n";
		print STDERR "$P: bkup=\"$bkup\"\n";
#		exit $exitstat;	# Exit at first failure (for testing)
		return 0;
	}
	unless (($ndev,$nino,undef,undef,undef,undef,undef,$nsize) = stat($name)) {
		print STDERR "$P: CAN'T ACCESS \"$name\" [$!]\n" if $V>0;
		$exitstat = $!;
		return 0;
	}
	if ($bsize != $nsize) {
		print "$P: \"$nsize\" and \"$bsize\" are different sizes.\n" if $V>1;
		return 0;
	}
	if ($bdev == $ndev && $bino == $nino) {
		print "$P: \"$name\" and \"$bkup\" are the same file.\n" if $V>2;
		&delete($bkup);
		return 1;
	}
	if (&same($name,$bkup)) {
		print "$P: \"$nsize\" and \"$bsize\" have the same contents.\n" if $V>2;
		&delete($bkup);
		return 1;
	}
}

sub same {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Compare two files' contents.  Return 0 if they're different, 1  if  they're #
# the same.                                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($f1,$f2) = @_;
	if (!open(F1,$f1)) {
		print STDERR "$P: Can't read \"$f1\" [$!]\n";
		return 0;
	}
	if (!open(F2,$f2)) {
		print STDERR "$P: Can't read \"$f2\" [$!]\n";
		return 0;
	}
	while (1) {
		if (!defined ($n1 = read(F1,$b1,$bufsiz))) {
			print STDERR "$P: Can't read \"$f1\" [$!]\n" if $V>1;
			return 0; 
		}
		if (!defined ($n2 = read(F2,$b2,$bufsiz))) {
			print STDERR "$P: Can't read \"$f2\" [$!]\n" if $V>1;
			return 0; 
		}
		if ($n1 == 0 && $n2 == 0) {
			print "$P: EOF on both files.\n" if $V>3;
			return 1;
		}
		if ($n1 != $n2) {
			print STDERR "$P: Got different sizes for \"$f1\" and  \"$f2\".\n" if $V>1;
			return 0; 
		}
		if ($b1 ne $b2) {
			print STDERR "$P: Got different data for \"$f1\" and  \"$f2\".\n" if $V>1;
			return 0; 
		}
		print "$P: Got $n1 identical bytes.\n" if $V>3;
	}
}
