#!/usr/bin/perl

# NAME
#   ListSplit - Split tune list into files by name.

# SYNOPSYS
#   ListSplit ListBot.*

# DESCRIPTION
#   This reads the HTML tune list generated by TuneList, and writes a
#   series  of files XX.html, where the XX is the initial two letters
#   of the title.

#   Note that if there are several tunes with the same canonical name
#   and  URL, we remember only the last one.  This is consistent with
#   our scheme for TuneBot, which produces a  file  ending  with  the
#   date.  If we combine several such files from different runs, each
#   tune+URL seen last will be the one used.

# AUTHOR
#   John Chambers <jc@trillian.mit.edu>

$P = 'xxyyzz';
$| = 1;
$V = 2;
$l = 2;	# Number of chars to use for file name.
if ($ARGV[0] =~ /^\d+$/) {$l = shift @ARGV}

$Xmax  =  6;
$Kmax  = 12;
$C1max = 16;
$C2max = 15;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# First,  read in all the data.  We match several line formats, some of which
# are historical, so that we get all the info produced by several versions of
# TuneBot. The data will be in an essentially random order in which the tunes
# were discovered.

for $l (<>) {
	++$found if $l =~ m"$P";
	if ($l =~ m#<!-- *(\w+) (\d+) *-->.*href="(.*)">abc</a> *_*(\d+)\s(\d+)\s(\d+)\s(.*)</tt> *(.*)#i) {
		$T{"$1:$3"} = "$4:$5:$6:$7:$8";	# TITLE:URL -> X:C1:C2:K:Title
		if ($l =~ m"$P") {++$match; print STDERR "Match1: $l"}
	} elsif ($l =~ m#<!-- *(\w+) (\d+) *-->.*href="(.*)">abc</a> *_*(\d+) (.*)</tt> *(.*)#i) {
		$T{"$1:$3"} = "$4:::$5:$6";	# TITLE:URL -> X:::K:Title
		if ($l =~ m"$P") {++$match; print STDERR "Match1: $l"}
	} elsif ($l =~ m#<!-- *(\w+) (\d+) *-->.*href="(.*)">abc</a> *_*(\d+) .*</tt> *(.*)#i) {
		$T{"$1:$3"} = "$4::::$5";	# TITLE:URL -> X::::Title
		if ($l =~ m"$P") {++$match; print STDERR "Match2: $l"}
	} elsif ($l =~ m#<!-- *(\w+) (\d+) *-->.*href="(.*)">abc</a> *_*(\d+) .*</tt> *(.*)#i) {
		$T{"$1:$3"} = "$4::::$5";	# TITLE:URL -> X::::Title
		if ($l =~ m"$P") {++$match; print STDERR "Match3: $l"}
#	} elsif ($l =~ m"<!--\s*(\w+\s\d+)\s*-->") {
#		$T{$1} = $l;
	} else {
		print STDERR "$0: Ignore $l" if $V>2;
	}
}
print STDERR "$0: $found found $match matched with \"$P\"\n" if $V>1 && $match>0;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# All the data has been read into the %T array.  We now  run  through  it  in
# lexical order, and output the data.  At present, we use the first two chars
# of the (upper-case) TTL field to decide which output file to produce. Every
# time these two chars change, we create a new output file.

for $key (sort keys %T) {
	if (($TTL,$URL) = ($key =~ m"^(\w+):(.*)$")) {
		$C = substr($TTL . '__',0,2);
		$val = $T{$key};
		if (($X,$C1,$C2,$K,$Title) = ($val =~ /^(\d+):(.*):(.*):(.*):(.*)$/)) {
			if ($C ne $B) {
				print O "</pre></body>\n";
				$ndxfile = "ndx/$C.html";
				if (!open(O,">$ndxfile")) {
					print STDERR "$0: Can't write \"$ndxfile\" [$!]\n";
					close O;
				}
				print O "<html><head><title>ABC tunes starting with $C</title></head><body><pre>\n";
				&line('','','ABC','X','Key','Code 1','Code 2','Title');
				print "$ndxfile\n" if $V>1;
				$B = $C;
			}
			&line($TTL,$URL,'ABC',$X,$K,$C1,$C2,$Title);
		}
	}
}
print O "</pre></body>\n";
close O;
exit 0;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Write one line to the current output file.  The first time this  is  called #
# for  a  new  file, the args will be constants to produce the column titles, #
# with no URL.  The rest of the calls will be with variable args to  generate #
# one tune reference.                                                         #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub line {
	local($TTL,$URL,$F,$X,$K,$C1,$C2,$Title) = @_;
	$C1 = substr(($C1 . (' ' x $C1max)),0,$C1max);
	$C2 = substr(($C2 . (' ' x $C2max)),0,$C2max);
	$K  = substr($K . (' ' x $Kmax), 0, $Kmax);
	$X  = substr((' ' x $Xmax) . $X, -$Xmax, $Xmax);
	if ($URL) {
		print O "<BR><TT><!--$TTL--><A HREF=\"$URL\">$F</A> $X $C1 $C2 $K </TT>$Title\n";
	} else {
		print O "<BR><TT><!--$TTL-->$F $X $C1 $C2 $K </TT>$Title\n";
	}
}
