#!/usr/bin/perl
#
#NAME
# abcsplit - extract ABC tunes from files.
#
#SYNOPSIS
# abcsplit [file | URL] ..
#
#DESCRIPTION
# This program reads thru its input, looking for ABC music.  When it finds a
# chunk  of  music, it creates a file derived from the title, and writes the
# music to the file.
#
# Input is from STDIN if there are no command-line URLs or files.  If  there
# are  things  named on the command line, we will first attempt to open them
# as local files, and if that fails, we then try to open them as  URLs.   At
# present, only http:// URLs work.
#
# If  there  is already a file by the given name, we add '_' and a number to
# the name.
#
# We recognize a tune when we encounter an X: or a T: line. We will generate
# an X:0 line for tunes that lacked an X:  line.  A tune ends with the first
# blank line (and a line that contains spaces and/or tabs  is  considered  a
# blank line).
#
#REQUIRES
# The following modules are needed for web access.  They should  be  in  the
# same place that you found this script.
#
	push @INC,"$ENV{HOME}/sh",'sh';
	require "URLopen.pm";	# Parses URL and returns file handle.
	require "HTTPcon.pm";	# Makes HTTP connection, sends GET.
	require "HTMLdir.pm";	# Produces HTML listing of directory.
	require "outtune.pm";	# Produce output tune file.
	require "Backup.pm";	# For moving files to backup name
#
#OPTIONS
# Options are args that start with '-' or '+', which disable or enable  some
# feature, respectively.  The options are:
#
# +N<n>
#    Number to append to the title to get the filename.  If the tune's title
#    is  "Foo  Bar",  the default file name will be FooBar.abc, but with the
#    +N5 option, it will be FooBar_5.abc.   With  -O,  the  number  will  be
#    incremented if the file already exists.
#
# +O
#    Overwrite existing files.  If there are two tunes with the same  title,
#    the second will wipe out the first.
# -O
#    Don't overwrite existing files (default). Instead, '_' and a number are
#    added to the tune name, and a new file named for the tune is created.
#    Default.
#
# +RJ
#    Special kludge for Roaring Jelly sets:  The tunes are examined for the
#    classification id numbers (R-78, J-42, ...), and this id is prepended
#    to the file name.
#
# +S<style>
#    File-name style.  The default is to capitalize each initial letter. The
#    style +S_ instead uses underscore as a word separator, and doesn't make
#    any changes to the capitalization.
# -S
#    No style to file names; just run the title together and add ".abc".
#
# +X
#    Generate X.abc files, where X is the tune's index  number.   The  X.abc
#    file will be a link to the file named for the title.
# -X
#    Don't generate the X.abc files. Default.
#
# +X<n>T
#    Prepend a tune's X: index number to the title to get the file name.  The
#    <n> number is how many digits the number should be; it will have initial
#    zeroes to pad it to <n> chars.  The +S_ "style" option is  usually  used
#    with this, and the delimiter will be also used after the index number in
#    the file name. Thus, +S_ +X4T options would cause a tune with "X:15" and
#    "T:Foo's Reel" to be named "0014_Foos_Reel.abc".
#
# For  the O'Neill's project files, where the tunes have the number from the
# book, commands like this are used:
#     abcsplit +ox ../files/1176-1275B.abc
# Then the Title.abc and X.abc files are moved to another directory.
#
#SEE ALSO
# abcjoin
#
#BUGS
# Each time this is run, an entirely new set of files will be created. Maybe
# we  should  compare  each  tune  to  the  existing  file,  and if they are
# identical, not write anything.  But that's for a future release.
#
# ABC embedded in HTML files will probably not work sensibly.
#
# We extract only the usascii letters [A-Za-z] to generate  the  file  name.
# Perhaps  we should also recognize the Latin-1 letters.  Some day we'll all
# convert to Unicode and this won't matter.
#
# I wonder if there are any ABC tools that can't handle X:0 lines. The Arabs
# taught  us  about  zero  many  centuries  ago,  but  it  seems  that  some
# programmers still haven't caught on to the concept.
#
#AUTHOR
# John  Chambers  <jc@trillian.mit.edu>  You may use this program freely for
# any purposes, as long as you give me credit for it (and  take  credit  for
# any changes you make).
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;
($me = $0) =~ s'.*/'';
$V = $ENV{"V_$me"} || 2;
$overwrite = 0;		# If true, overwrite existing files
$ONEILLS = 0;		# Kludge for unpacking O'Neill's tunes
$namestyle = ($ENV{"S_$me"}) || '';	# Naming style (default is InitCaps)

$RJ = 0;		# RJ kludge
$filnum = '';	# Append file count to file names
$overwrite = 0;	# Overwrite existing files

&inittune;

for $f (@ARGV) {
	if (($flg,$Opts) = ($f =~ /^([-+])(.*)$/)) {
		while ($Opts =~ s/(.)//) {
			$OPT = uc($Opt = $1);
			if ($OPT eq 'N') {			# Number the output files?
				if ($flg eq '-') {
					$filnum = '';		# This is the default
					print STDERR "$me: No file numbers.\n" if $V>2;
				} elsif ($Opts =~ s/^(\d+)//) {	# Starting number?
					$filnum = $1;
					print STDERR "$me: File number = \"_$filnum\".\n" if $V>2;
				} else {
					$filnum = 1;
					print STDERR "$me: File number = _$filnum.\n" if $V>2;
				}
			} elsif ($OPT eq 'O') {		# Overwrite existing files
				$overwrite = ($flg eq '+' ? 1 : 0);
				print STDERR "$me: Overwriting existing files.\n" if $V>2;
			} elsif ($OPT eq 'R') {		# RJ kludge
				if ($Opts =~ s/^J//i) {
					$RJ = ($flg eq '+' ? 1 : 0);
					print STDERR "$me: RJ kludge.\n" if $V>0;
				} else {
					print STDERR "$me: Unknown opt $flg$Opt ignored.\n" if $V>0;
				}
			} elsif ($OPT eq 'S') {		# Style of file naming
				if ($flg eq '+') {
					if ($Opts =~ s/(.)//) {
						$namestyle = $1;
					} else {
						$namestyle = '_';
					}
				} else {
					$namestyle = '';
				}
				print STDERR "$me: Name style is '$namestyle'.\n" if $V>2;
			} elsif ($OPT eq 'X') {	# Kludge for producing numbered files
				if ($Opts =~ s/^(\d+)T//) {	# X3T means 3-digit index number + title as filename
					$X_T = int($1);	# Use the number length >0 as the flag to create this format
					print STDERR "$me: X_T=$X_T.\n" if $V>0;
				} else {
					$Xname = ($flg eq '+' ? 1 : 0);
					print STDERR "$me: Linking to X-index names.\n" if $V>2;
				}
			} else {
				print STDERR "$me: Unknown option $flg$Opt ignored.\n" if $V>0;
			}
		}
	} elsif (open(FIL,$f)) {
		&onefile('FIL');
		++$files;
		print "$me: File $files \"$f\" read.\n" if $V>0;
	} elsif (&URLopen(*URL,$f)) {
		&onefile('URL');
		++$files;
		print "$me: URL $files \"$f\" read.\n" if $V>0;
	}
	&outtune('',$overwrite,$Xname,$filnum,$prefix) if $lines > 1;
}
unless ($files) {
	print "$me: Reading STDIN.\n" if $V>2;
	&onefile('STDIN');
	++$files;
	print "$me: File $files STDIN read.\n" if $V>2;
}
&outtune('',$overwrite,$Xname,$filnum,$prefix) if $lines > 1;
print "$me: $files read.\n" if $V>2;
exit 0;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Initialization for a new tune.
#sub inittune {
#	@tune = ();
#	$lines = 0;
#	$T = '';
#	$X = 0;
#}

sub onefile { my $F='onefile';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Extract the tunes from one open file. We are passed the name of the
# file handle.
	local($f) = @_;
#	&inittune();	# Discard any previous tune
	for $l (<$f>) {
		$l =~ s"[ \t\n\r]+$"";
		if (!$l) {
			print "Got blank line.\n" if $V>2;
			&outtune('',$overwrite,$Xname,$filnum,$prefix) if $lines > 1;
			next;
		}
		print "Line $l\n" if $V>3;
		if ($l =~ /^X:\s*(\d+)/) {
			print "Got X: $1\n" if $V>2;
			$ndx = $1;
			print "Use ndx: $ndx\n" if $V>2;
			&outtune('',$overwrite,$Xname,$filnum,$prefix) if $lines > 1;
			&inittune();
			$X = $ndx;
			$tune[$lines++] = "X:$ndx\n";
			next;
		}
		if ($RJ) {	# RJ kludge: Try to find a prefix type & number
			if (!$prefix) {
				if ($l =~ /^[CT]:/) {		# C and T lines can have "(R-123)"
					if ($l =~ /^[CT]:\s*([A-Z])-(\d+)/) {
						$prefix = sprintf("$1%03d_",$2);
						print "$F: I prefix=\"$prefix\"\n" if $V>2;
					} elsif ($l =~ /^C:\s*(S)-([A-Z])/) {
						$prefix = sprintf("$1000_",$X);
						print "$F: I prefix=\"$prefix\"\n" if $V>0;
					}
				} elsif ($l =~ /^I:/) {
					if ($l =~ /\s([A-Z])-(\d+)/) {
						$prefix = sprintf("$1%03d_",$2);
						print "$F: I prefix=\"$prefix\"\n" if $V>2;
					} elsif ($l =~ /\s(S)-([A-Z])\b/) {
						$prefix = sprintf("Sq_%s_",$2);
						print "$F: I prefix=\"$prefix\"\n" if $V>2;
					}
				} elsif ($l =~ /^%%text/) {
					if ($l =~ /\s([A-Z])-(\d+)/) {
						$prefix = sprintf("$1%03d_",$2);
						print "$F: I prefix=\"$prefix\"\n" if $V>2;
					}
				}
			}
		}
		if ($l =~ /^T:\s*(.*)\s*$/) {
			print "Got T: \"$1\"\n" if $V>2;
			if (!$T) {
				$T = $1;
				print "$F: T=\"$T\"\n" if $V>2;
				if ($RJ) {	# RJ kludge
					if ($T =~ /(.*)\s+\((.*)\)\s*(.*)/) {	# Strip off parenthesised stuff
						$Tcomment = $2;
						$T = "$1 $3";
						print "$F: Tcomment=\"$Tcomment\"\n" if $V>2;
						print "$F: T=\"$T\"\n" if $V>2;
						if ($Tcomment =~ /\b([A-Z])-(\d+)/) {
							$prefix = sprintf("$1%03d_",$2);
							print "$F: T prefix=\"$prefix\"\n" if $V>2;
						}
					}
				}
				push @ttl, $T;
				$T =~ s"['/]+""g;			# Elide posessives, abbr, and Gaelic accents
				print "$F: T=\"$T\" [poss/abbr]\n" if $V>2;
				$T =~ s"\\\W*""g;			# Elide escaped accents
				print "$F: T=\"$T\" [escapes]\n" if $V>2;
				$T =~ s"^the\s+""i;			# Delete initial definite article
				print "$F: T=\"$T\" [def article]\n" if $V>2;
				$T =~ s"^an?\s+""i;			# Delete initial indefinite article
				print "$F: T=\"$T\" [ind article]\n" if $V>2;
				if ($namestyle) {
					$T =~ s"[\s_]"$namestyle"g;		# Convert white space to style separator
					print "$F: T=\"$T\" [style='$namestyle']\n" if $V>2;
				} else {
					$T =~ s"\b([a-z])"\u$1"g;	# Uppercase first letters
					print "$F: T=\"$T\" [style=default]\n" if $V>2;
				}
				$T =~ s"[^_A-Za-z0-9$namestyle]""g;	# Delete non-alphanum chars
			#	if ($X_T > 0)	{	# Are we prepending the index number to the title?
			#		$T = sprintf(("%0$X_T" . 'd'), $X) . $namestyle . $T; 
			#		print "$F: T=\"$T\" [X_T=$X_T]\n" if $V>0;
			#	}
				print "$F: T=\"$T\" [AN]\n" if $V>2;
			}
			$tune[$lines++] = "$l\n";
			next;
		}
		if ($lines > 0) {
			print "Line $lines is \"$l\"\n" if $V>2;
			$tune[$lines++] = "$l\n";
		}
	}
}
