#!/usr/bin/perl
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#NAME
# PurgeBackups - remove backups if identical with primary file
#SYNOPSIS
# PurgeBackups host|file|dir ..
#
#DESCRIPTION
# Run thru the files  or  directories  looking  for  backup  files  that  are
# identical to their primaries, and delete them.
#
# What  we do is: For each file named on the command line, we check to see if
# there is a "backup" file that has the same name with one  or  more  hyphens
# appended. If so, we compare any such backup file with the primary file, and
# if they are identical, we delete the backup file.
#
# If we  encounter  a  directory  name  on  the  command  line,  we  scan  it
# (recursively) for files, and treat them as the primary files.
#
# Note  that  we  don't have any defaults.  If there are no directory or file
# names on the command line, we just exit.
#
# Our exit status will be 0 if we encounter no file-access problems.   If  we
# fail  to unlink any backup file, our exit status will be the errno code for
# the last such failure.
#
#OPTIONS
# Options  start  with  '-'  (for  'off'  or  'disable')  or '+' (for 'on' or
# 'enable').  Either may be the default, depending  on  how  I'm  using  this
# program at the moment.
#
#
# The current options are:
#
# -A
# +A
#   controls whether all '*-" backup files are deleted (+A), or whether  only
#   backup files the same as the primary file are deleted (-A).
#
# -ABC
#   Default: Disables the +ABC action.  Default
# +ABC
#   Deletes all files that lack the ".abc" suffix.  Use with care! This  gets
#   rid  of  most  of the source files, as well as previous files that used a
#   different naming convention for the single-tune cache files.
#
# -I
# +I
#  Informative.  Nothing is deleted; a list of what would be deleted is written
#  to stdout instead.
#
#BUGS
# At present, we only recognize '*-" files as backups.  We  should  recognize
# the BACKUP environment variable, and use whatever format it says. Maby I'll
# implement this someday when I need it.
#
#SEE ALSO
# Rm, Backup.pm
#
#AUTHOR
# John Chambers <jc@trillian.mit.edu>  2008
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;
($P = $0) =~ s".*/"";	# Our name minus directory
$V = $ENV{"V_$P"} || $ENV{"D_$P"} || 1;	# Verbose level

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# If you don't like the defaults, set them yourself:
	$abconly  =    0;	# Delete most files, leaving only *.abc files
	$all      =    1;	# Purge all backups, even if different from current version
	$bufsiz   = 1000;	# Small for testing, big for routine use
	$exitstat =    0;	# Set to nonzero for serious problems
	$inform   =    0;	# Informative only; don't delete anything
	$recurse  =    1;	# Delve into subdirectories

&setinformtxt();

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Run thru the args, processing each one.  Note that we  process  a  file  or
# directory as soon as we find it, so only early options apply to each.

for $x (@ARGV) {
	if (($oflg,$ostr) = ($x =~ /^([-+])(.*)/)) {
		while (($ochr,$ostr) = ($ostr =~ /(.)(.*)/)) {
			$OC = uc($ochr);	# Upper-case option char
			print "$P: $oflg$OC option.\n" if $V>1;
			if ($OC eq 'A') {	# Remove all backup-like files (*-)
				if ($ostr =~ s/^bc//i) {	# Was it 'abc'?
					$abconly = ($oflg eq '+' ? 1 : 0);
					print "$P: ABConly " . ($abconly?'on':'off') . ".\n" if $V>1;
				} else {
					$all = ($oflg eq '+') ? 1 : 0;
					print "$P: All " . ($all?'on':'off') . "\n" if $V>1;
				}
			} elsif ($OC eq 'I') {	# Informative; no deletion
				$inform = ($oflg eq '+') ? 1 : 0;
				print "$P: Informative " . ($inform?'on':'off') . ".\n" if $V>1;
				&setinformtxt();
			} elsif ($OC eq 'R') {	# Recurse through directories
				$recurse = ($oflg eq '+') ? 1 : 0;
				print "$P: Recursion " . ($recurse?'on':'off') . ".\n" if $V>1;
			} elsif ($OC eq 'V' || $OC eq 'D') {	# Verbose/Debug
				if ($ostr =~ s/(\d+)//) {
					$V = int($1);
				} else {
					if ($oflg eq '-') {$V--} else {$V++}
				}
				print "$P: Verbose level $V.\n" if $V>0;
			} elsif ($OC eq '') {	# End of string
				print "$P: $oflg without option letter.\n" if $V>0;
			} else {
				print "$P: Unknown option '$ochr' in \"$x\" ignored.\n" if $V>0;
			}
		}
	} elsif (-d $x) {
		&onedir($x);
	} elsif (-d "http/$x") {
		&onedir("http/$x");
	} elsif (-f $x) {
		if ($x =~ /^(.*)-+/) {
			&onename($1,$x);
		}
	} else {
		print "$P: Unrecogized arg \"$x\"\n" if $V>0;
	}
}

exit $exitstat;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub delete {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Delete one or more files.   This  is  a  subroutine  mostly  for  debugging #
# purposes,  though  it  also lets us omit a loop through multiple files.  We #
# don't actually use this looping in this program.                            #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	for $f (@_) {
		print "$P: $pDEL \"$f\"\n" if $V>0;
		unlink $f unless $inform;
	}
}

sub onedir {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Process one directory.  This just runs through the files in the directory,  #
# and passes each name to a routine to handle it.                             #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($dir) = @_;
	local($filessaved,$pth);
	print "$P: $pdir \"$dir\"\n" if $V>1;
	if (!opendir(DIR,$dir)) {
		print "$P: Can't read directory \"$dir\" [$!]\n";
		return 0;
	}
	$filessaved = 0;		# For counting the *.abc files
	local(@files) = readdir(DIR);
	closedir DIR;
	foreach $x (sort @files) {
		print "$P: nam \"$x\"\n" if $V>2;
		next if substr($x,0,1) eq '.';
		$pth = "$dir/$x";
		if (-d $pth) {
			&onedir($pth) if $recurse;
			++$filessaved if -e $pth;
		} elsif ($abconly) {	# Deleting everything except *.abc files
			if ($x =~ /\.abc$/) {
				print "$P: +++ \"$x\" saved.\n" if $V>2;
				++$filessaved;	# Count the files saved.
			} else {
				&delete($pth);
				print "$P: --- \"$x\" deleted.\n" if $V>2;
			}
		} elsif ($x =~ /^(.*?)-+$/) {
			&onename("$dir/$1",$pth);
			++$filessaved if -f $pth;	# Note which files still exist
		} else {
			print "$P: --- \"$x\" ignored.\n" if $V>2;
			++$filessaved;
		}
	}
	print "$P: $filessaved files left in '$dir'\n" if $V>1;
	if ($abconly && !$filessaved) {	# Did we empty the directory?
		print "$P: $pDEL '$dir' ..\n" if $V>1;
		system rmdir $dir unless $inform;	# Get rid of empty directory
		print "$P: $pdir '$dir' deleted.\n" if $V>1 && !-e $dir;
	}
}

sub onename {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Compare one file with its backup.  The second arg should be the  first  arg #
# with one or more hypens appended.  The return value is ignored.             #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($name,$bkup) = @_;
	print "$P: name=\"$name\"\n" if $V>3;
	print "$P: bkup=\"$bkup\"\n" if $V>3;
	if ($all) {
		print "$P: RM: \"$bkup\"\n" if $V>1;
		unlink $bkup unless $inform;
		return 1;
	}
	unless (($bdev,$bino,undef,undef,undef,undef,undef,$bsize) = stat($bkup)) {
		$exitstat = $!;
		print "$P: Can't access \"$bkup\" [$!]\n";
		print "$P: name=\"$name\"\n";
		print "$P: bkup=\"$bkup\"\n";
#		exit $exitstat;	# Exit at first failure (for testing)
		return 0;
	}
	unless (($ndev,$nino,undef,undef,undef,undef,undef,$nsize) = stat($name)) {
		print "$P: CAN'T ACCESS \"$name\" [$!]\n" if $V>0;
		$exitstat = $!;
		return 0;
	}
	if ($bsize != $nsize) {
		print "$P: \"$nsize\" and \"$bsize\" are different sizes.\n" if $V>1;
		return 0;
	}
	if ($bdev == $ndev && $bino == $nino) {
		print "$P: \"$name\" and \"$bkup\" are the same file.\n" if $V>2;
		&delete($bkup);
		return 1;
	}
	if (&same($name,$bkup)) {
		print "$P: \"$nsize\" and \"$bsize\" have the same contents.\n" if $V>2;
		&delete($bkup);
		return 1;
	}
}

sub same {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Compare two files' contents.  Return 0 if they're different, 1  if  they're #
# the same.                                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($f1,$f2) = @_;
	if (!open(F1,$f1)) {
		print "$P: Can't read \"$f1\" [$!]\n";
		return 0;
	}
	if (!open(F2,$f2)) {
		print "$P: Can't read \"$f2\" [$!]\n";
		return 0;
	}
	while (1) {
		if (!defined ($n1 = read(F1,$b1,$bufsiz))) {
			print "$P: Can't read \"$f1\" [$!]\n" if $V>1;
			return 0; 
		}
		if (!defined ($n2 = read(F2,$b2,$bufsiz))) {
			print "$P: Can't read \"$f2\" [$!]\n" if $V>1;
			return 0; 
		}
		if ($n1 == 0 && $n2 == 0) {
			print "$P: EOF on both files.\n" if $V>3;
			return 1;
		}
		if ($n1 != $n2) {
			print "$P: Got different sizes for \"$f1\" and  \"$f2\".\n" if $V>1;
			return 0; 
		}
		if ($b1 ne $b2) {
			print "$P: Got different data for \"$f1\" and  \"$f2\".\n" if $V>1;
			return 0; 
		}
		print "$P: Got $n1 identical bytes.\n" if $V>3;
	}
}

sub setinformtxt {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Some verbosity strings vary to indicate informative-only mode.  This should #
# be called whenever $inform is changed.                                      #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	if ($inform) {
		$pdir = '[dir]';
		$pDEL = '[DEL]';
	} else {
		$pdir = 'dir';
		$pDEL = 'DEL';
	}
}
