#!/usr/bin/perl
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#NAME
#  find1 - find one copy of each file
#
#SYNOPSIS
#  find1 [dir].. [pat]..
#
#REQUIRES
#
#DESCRIPTION
# Search the directories listed for files that match the patterns, and write
# their paths to stdout.  For multiply-linked files, only the first name is
# produced.  The purpose is to give a list of the "distinct" files, and not
# list multiply-linked files more than once.
#
#OPTIONS
# None yet, but there may be some soon.
#
#EXAMPLES
#
#FILES
#
#BUGS
#
#SEE ALSO
#
#AUTHOR
#  John Chambers <jc@trillian.mit.edu>
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;
$exitstat = 0;
($P = $0) =~ s".*/"";
$V = $ENV{"V_$P"} || $ENV{"D_$P"} || 1;	# Verbose level.

%dirs = ();
%pats = ();

# Run thru the args, classifying them into dirs and pats:
for $a (@ARGV) {
	if (-d $a) {
		print "$P: '$a' is a directory.\n" if $V>1;
		push @dirs, $a;
	} else {
		print "$P: '$a' is a pattern.\n" if $V>1;
		push @pats, $a;
	}
}
@dirs = ('.') unless @dirs;
@pats = ('.*') unless @pats;

# Run thru the directories:
$files = 0;
for $d (@dirs) {
	print "DIR:\t$d\n";
	$files += &onedir($d);
}

print "Total: $files files found.\n" if $V>0;
print "$P: Exit with status $exitstat.\n" if $V>1;
exit $exitstat;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

sub onedir {local($dir) = @_;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Scan one directory, looking for files and subdirectories.  Data  files  are #
# passed  to  onefile() for processing.  Directories are saved until the end, #
# when they're passed recursively to onedir() to be scanned. The return value #
# is  the  total number of files (not counting directories) that are found in #
# this directory and subdirectories whose names match a pattern.              #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($count,$d,@dirs,$file,@files,$lnks,$path);
	$count = 0;
	local ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks)
		= stat($dir);
	if (($lnks = $found{$ino}++) > 0) {	# Spot loops in symlinks.
		print "$P: We have scanned $dir/ already.\n" if $V>1;
		return 0;
	}
	return 0 if -l $dir;	# Ignore directories that are symlinks
dir:
	if (opendir(DIR,$dir)) {
		@files = readdir(DIR);
		close DIR;
		for $file (sort @files) {
			next if $file eq '.' || $file eq '..';
			($path = "$dir/$file") =~ s"//+"/"g;
			if (-d $path) {
				push @dirs, $path;	# Save for later scan
			} else {
				$count += &onefile($path);	# Produce info about the file
			}
			for $d (@dirs) {	# Scan the subdirectories
				$count += &onedir($d);	# Count the files they contain
			}
		}
	} else {
		print STDERR "$P: Can't read $dir/ [$!]\n" if $V>0;
	}
	print "$count files under $path\n" if $V>1;
	return $count;	# Return the number of files found
}

sub onefile {local($pth) = @_;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Do the processing for one file.  We first check to see if it matches any of #
# the  patterns.   If  not, we return 0.  If it's matched, we check its inode #
# number in the %found array, returning 0 if it's there, and incrementing its #
# if  it  was  found.   (We  actually  don't use the count, only whether it's #
# nonzero, but we keep the count for now in case it becomes useful.)          #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($cnt,$lnks,$pat);
	$cnt = 0;
	return 0 if -l $pth;	# Ignore symlinks
	for $pat (@pats) {		# Check against the patters for acceptability
		if ($pth =~ /$pat/) {
			print "$P: YES '$pth' =~ '$pat'\n" if $V>1;
			++$cnt;		# Count the matches
		} else {
			print "$P: NO: '$pth' =~ '$pat'\n" if $V>1;
		}
	}
	return 0 if $cnt < 1;
	local ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks)
		= stat($pth);
	if (($lnks = $found{$ino}++) > 0) {	# How many times we've seen this file
		print "$P: We have $lnks links to $pth already.\n" if $V>1;
		return 0;		# Don't count files we've seen already
	}
	print "$ino\t$pth\n";	# Output the inode number and path
	return 1;			# Count this file as a match
}
