#!/usr/bin/perl
#
#NAME
#  abcsplit - extract ABC tunes from files.
#
#SYNOPSIS
#  abcsplit [file | URL] ..
#
#DESCRIPTION
#  This program reads thru its input, looking for ABC music.  When it finds a
#  chunk  of  music, it creates a file derived from the title, and writes the
#  music to the file.
#
#  Input is from STDIN if there are no command-line URLs or files.  If  there
#  are  things  named on the command line, we will first attempt to open them
#  as local files, and if that fails, we then try to open them as  URLs.   At
#  present, only http:// URLs work.
#
#  If  there  is already a file by the given name, we add '_' and a number to
#  the name.
#
#  We recognize a tune when we encounter an X: or a T: line. We will generate
#  an X:0 line for tunes that lacked an X:  line.  A tune ends with the first
#  blank line (and a line that contains spaces and/or tabs  is  considered  a
#  blank line).
#
#REQUIRES
#  The following modules are needed for web access.  They should  be  in  the
#  same place that you found this script.
#
	push @INC,"$ENV{HOME}/sh",'sh';
	require "URLopen.pm";	# Parses URL and returns file handle.
	require "HTTPcon.pm";	# Makes HTTP connection, sends GET.
	require "HTMLdir.pm";	# Produces HTML listing of directory.
	require "outtune.pm";	# Produce output tune file.
	require "Backup.pm";	# For moving files to backup name
#
#OPTIONS
#  Options are args that start with '-' or '+', which disable or enable  some
#  feature, respectively.  The options are:
#
#  +N<n>
#     Number to append to the title to get the filename.  If the tune's  name
#     is  "Foo  Bar",  the default file name will be FooBar.abc, but with the
#     +N5 option, it will be FooBar_5.abc.   With  -O,  the  number  will  be
#     incremented if the file already exists.
#
#  +O
#     Overwrite existing files.  If there are two tunes with the same  title,
#     the second will wipe out the first.
#  -O
#     Don't overwrite existing files (default). Instead, '_' and a number are
#     added to the tune name, and a new file named for the tune is created.
#     Default.
#
#  +S<style>
#     File-name style.  The default is to capitalize each initial letter. The
#     style +S_ instead uses underscore as a word separator, and doesn't make
#     any changes to the capitalization.
#  -S
#     No style to file names; just run the title together and add ".abc".
#
#  +X
#     Generate X.abc files, where X is the tune's index  number.   The  X.abc
#     file will be a link to the file named for the title.
#  -X
#     Don't generate the X.abc files. Default.
#
#  For  the O'Neill's project files, where the tunes have the number from the
#  book, commands like this are used:
#      abcsplit +ox ../files/1176-1275B.abc
#  Then the Title.abc and X.abc files are moved to another directory.
#
#SEE ALSO
#  abcjoin
#
#BUGS
#  Each time this is run, an entirely new set of files will be created. Maybe
#  we  should  compare  each  tune  to  the  existing  file,  and if they are
#  identical, not write anything.  But that's for a future release.
#
#  ABC embedded in HTML files will probably not work sensibly.
#
#  We extract only the usascii letters [A-Za-z] to generate  the  file  name.
#  Perhaps  we should also recognize the Latin-1 letters.  Some day we'll all
#  convert to Unicode and this won't matter.
#
#  I wonder if there are any ABC tools that can't handle X:0 lines. The Arabs
#  taught  us  about  zero  many  centuries  ago,  but  it  seems  that  some
#  programmers still haven't caught on to the concept.
#
#AUTHOR
#  John  Chambers  <jc@trillian.mit.edu>  You may use this program freely for
#  any purposes, as long as you give me credit for it (and  take  credit  for
#  any changes you make).
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;
($me = $0) =~ s'.*/'';
$V = $ENV{"V_$me"} || 2;
$overwrite = 0;		# If true, overwrite existing files
$ONEILLS = 0;		# Kludge for unpacking O'Neill's tunes
$namestyle = ($ENV{"S_$me"}) || '_';	# Naming style (default is InitCaps)

&inittune;

for $f (@ARGV) {
	if (($flg,$Opts) = ($f =~ /^([-+])(.*)$/)) {
		while ($Opts =~ s/(.)//) {
			$OPT = uc($Opt = $1);
			if ($OPT eq 'N') {			# Number the output files?
				if ($flg eq '-') {
					$filnum = '';		# This is the default
					print STDERR "$me: No file numbers.\n" if $V>2;
				} elsif ($Opts =~ s/^(\d+)//) {	# Starting number?
					$filnum = $1;
					print STDERR "$me: File number = \"_$filnum\".\n" if $V>2;
				} else {
					$filnum = 1;
					print STDERR "$me: File number = _$filnum.\n" if $V>2;
				}
			} elsif ($OPT eq 'O') {		# Overwrite existing files
				$overwrite = ($flg eq '+' ? 1 : 0);
				print STDERR "$me: Overwriting existing files.\n" if $V>2;
			} elsif ($OPT eq 'S') {		# Style of file naming
				if ($flg eq '+') {
					if ($Opts =~ s/(.)//) {
						$namestyle = $1;
					} else {
						$namestyle = '_';
					}
				} else {
					$namestyle = '';
				}
				print STDERR "$me: Name style is '$namestyle'.\n" if $V>2;
			} elsif ($OPT eq 'X') {	# Kludge for producing numbered files
				$Xname = ($flg eq '+' ? 1 : 0);
				print STDERR "$me: Linking to X-index names.\n" if $V>2;
			} else {
				print STDERR "$me: Unknown option $flg$Opt ignored.\n" if $V>0;
			}
		}
	} elsif (open(FIL,$f)) {
		&onefile('FIL');
		++$files;
		print "$me: File $files \"$f\" read.\n" if $V>2;
	} elsif (&URLopen(*URL,$f)) {
		&onefile('URL');
		++$files;
		print "$me: URL $files \"$f\" read.\n" if $V>2;
	}
	&outtune('',$overwrite,$Xname,$filnum) if $lines > 1;
}
unless ($files) {
	print "$me: Reading STDIN.\n" if $V>2;
	&onefile('STDIN');
	++$files;
	print "$me: File $files STDIN read.\n" if $V>2;
}
&outtune('',$overwrite,$Xname,$filnum) if $lines > 1;
print "$me: $files read.\n" if $V>2;
exit 0;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Initialization for a new tune.
sub inittune {
	@tune = ();
	$lines = 0;
	$T = '';
	$X = 0;
}

sub onefile { my $F='onefile';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Extract the tunes from one open file. We are passed the name of the
# file handle.
	local($f) = @_;
	@ttl = ();
	for $l (<$f>) {
		$l =~ s"[ \t\n\r]+$"";
		if (!$l) {
			print "Got blank line.\n" if $V>2;
			&outtune('',$overwrite,$Xname,$filnum) if $lines > 1;
			next;
		}
		print "Line $l\n" if $V>3;
		if ($l =~ /^X:\s*(\d+)/) {
			print "Got X: $1.\n" if $V>2;
			$X = $1;
			&outtune('',$overwrite,$Xname,$filnum) if $lines > 1;
			@ttl = ();
			$tune[$lines++] = "$l\n";
			next;
		}
		if ($l =~ /^T:[\s"]*(.+)[\r\s"]*$/) {
			print "Got T: \"$1\"\n" if $V>2;
			push @ttl, $1;
			if (!$T) {
				$T = $1;
				print "$F: T=\"$T\"\n" if $V>2;
				$T =~ s"\\(ae|c|o)"$1"g;	# Elide \ae, \c, \o
				print "$F: T=\"$T\" [specials]\n" if $V>2;
				$T =~ s"\\\W*""g;			# Elide escaped accents
				print "$F: T=\"$T\" [escapes]\n" if $V>2;
				$T =~ s#['"/.,v]+##g;		# Elide posessives, quotes, abbr, and Gaelic accents
				print "$F: T=\"$T\" [poss/abbr]\n" if $V>2;
				$T =~ s"\\\W*""g;			# Elide escaped accents
				print "$F: T=\"$T\" [escapes]\n" if $V>2;
				$T =~ s"^the\s+""i;			# Delete initial definite article
				print "$F: T=\"$T\" [def article]\n" if $V>2;
				$T =~ s"^an?\s+""i;			# Delete initial indefinite article
				print "$F: T=\"$T\" [ind article]\n" if $V>2;
				if ($namestyle) {
					$T =~ s"[\s_]"$namestyle"g;		# Convert white space to style separator
					print "$F: T=\"$T\" [style='$namestyle']\n" if $V>2;
				} else {
					$T =~ s"\b([a-z])"\u$1"g;	# Uppercase first letters
					print "$F: T=\"$T\" [style=default]\n" if $V>2;
				}
				$T =~ s"[^_A-Za-z0-9$namestyle]""g;	# Delete non-alphanum chars
				print "$F: T=\"$T\" [AN]\n" if $V>2;
			}
			$tune[$lines++] = "$l\n";
			next;
		}
		if ($lines > 0) {
			print "Line $lines is \"$l\"\n" if $V>2;
			$tune[$lines++] = "$l\n";
		}
	}
}
