ABC tunes starting with $CC

#!/usr/bin/perl # #NAME # Hosts2html - convert the hosts/* files to alphabetized HTML files # #SYNOPSIS # Hosts2html & # #REQUIRES # These should be in the same directory; if not, you'll have to set # @INC to say where to find them. # require "Vopt.pm"; require "Backup.pm"; require "CTitle.pm"; # #DESCRIPTION # The hst/ directory is scanned for host files, and for each, the # information about ABC tunes is extracted and written to a set of # HTML files. We create the output files in the ndx/ directory, # which must exist. # # This is a post-processor for the abcbot program, which creates the # per-host ABC info files in the hst/ directory. The data there is # in a random order, and not HTML. The ndx/* files that we create # may be used directly to locate tunes by name. They may also be # searched via the findtune.html web page, which provides lookup via # perl patterns.. # #OPTIONS # None, so far. # #ENVIRONMENT # We use the Vopt.pm module to set up verbose-mode output. This # means that we use the environment variable V_Hosts2html, whose # value should consists of a numeric verbose level followed by an # file name. The default output is STDERR. You can set the verbose # level as follows: # # setenv V_Hosts2html 3/tmp/Hosts2html.out # csh or tcsh users. # export V_Hosts2html=3/tmp/Hosts2html.out # ksh or bash users. # #BUGS # We don't attempt to create any needed directories. # #SEE ALSO # #AUTHOR # John Chambers $| = 1; $exitstat = 0; ($P = $0) =~ s".*/""; &Vopt($ENV{"V_$P"} || $ENV{"D_$P"} || $ENV{"T_$P"} || '1'); $Hlimit = $ENV{"H_$P"} || 0; # Host limit, for debugging. print V "$P: Started with H=$H V=$V.\n" if $V>1; $articles = '-'; # Suppress articles in titles $Xmax = 6; # Max width of index $Kmax = 12; # Max width of key $Hmax = 6; # Max width of header list $Mmax = 5; # Max width of meter $C1max = 15; # Max width of Code 1 $C2max = 15; # Max width of Code 2 %reduce = ( # URL reductions. 'http://localhost/~jc/' => '/~jc/', 'http://dmz.atsbank.com/~jc/' => '/~jc/', 'http://trillian.mit.edu/~jc/' => '/~jc/', ); @suppress = ( 'jc/.*/Scotland/.*\.hdr$' ); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Extract the list of hosts from the hst/ directory. We ignore any # file with '-' or '.LCK' on the end of its name. @hosts = grep(!/(-|\.LCK)\s*$/,glob("hst/*")); $hosts = int(@hosts); print V "$P: $esep\n" if $V>2; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Now run through the host list, and for each host, read its file, # leaving behind information about ABC titles in the %T table. print V "$P: We have $hosts hosts.\n" if $V>1; host: foreach $hstfil (@hosts) { print V "$P: host file \"$hstfil\"\n" if $V>2; if (($host) = ($hstfil =~ m".*/([-_:.\w]+)\s*$")) { print V "$P: $hsep\n" if $V>2; print V "$P: HOST \"$host\"\n" if $V>1; ++$Hcount; if ($Hlimit > 0 && $Hcount > $Hlimit) { print V "$P: Past host limit of $Hlimit; quitting.\n" if $V>1; last host; } unless (open(H,$hstfil)) { next host; } line: for $line () { print V "$P: Line $line" if $V>5; $line =~ s/[\r\n]+$//; unless ($line) { print V "$P: End of chunk.\n" if $V>3; # Do we want to forget anything here? next line; } print V "$P: line $line\n" if $V>5; ## # Assorded line formats that have been used: ## if (($line =~ m'^(\d+)(\s+)([-#\w])\s+D:(\d+)\s+B:(\d*)\s+L:([\d/.]*)\s+X:([\d/.]*)\s+T:(\d*)\s+(.*)$')) { ## # $dt = $1; ## # $sp = $2; ## # $tp = $3; ## # $Dn = $4; ## # $Bn = $5; ## # $Ln = $6; ## # $Xn = $7; ## # $Tn = $8; ## $path = $9; ## $bytes = sprintf "%06d",$5; ## print V "$P: Chunk ts=$1 dt=$2 fl='$3' D:$4 \"$path\"\n" if $V>2; ## next line; ## } ## if (($line =~ m'^(\d+)(\s+)([-#\w])\s+D:(\d+)\s+B:(\d*)\s+X:([\d/.]*)\s+T:(\d*)\s+(.*)$')) { ## # $dt = $1; ## # $sp = $2; ## # $tp = $3; ## # $Dn = $4; ## # $Bn = $5; ## # $Xn = $6; ## # $Tn = $7; ## $path = $8; ## $bytes = sprintf "%06d",$5; ## print V "$P: Chunk ts=$1 dt=$2 fl='$3' D:$4 \"$path\"\n" if $V>2; ## next line; ## } ## if (($line =~ m'^(\d+)(\s+)([-#\w])\s+D:(\d+)\s+B:(\d+)\s+(.*)$')) { ## # $dt = $1; ## # $sp = $2; ## # $tp = $3; ## # $Dn = $4; ## # $Bn = $5; ## $path = $6; ## $bytes = sprintf "%06d",$5; ## print V "$P: Chunk ts=$1 dt=$2 fl='$3' D:$4 \"$path\"\n" if $V>2; ## next line; ## } # General code to split the line apart one field at a time: # First, pick off the initial timestamp, which may have two forms, # depending on whether abcbot was run with the debugging turned on if (($line =~ m'^(\d+)(\s+)([-#\w])\s+D:(\d+)\s+(.*)$') || ($line =~ m'^(\d+)=(\d+)\s+([-#\w])\s+D:(\d+)\s+(.*)$')) { $path = $5; $bytes = 0; print V "$P: Chunk ts=$1 dt=$2 fl='$3' D:$4 \"$path\"\n" if $V>2; next line; } print V "$P: ---- $line\n" if $V>5; $X = $H = $K = $M = $C1 = $C2 = $T = ''; ## 975765312 X:33 M:9/8 K:F H:"BCLOQZ C1=067663066067664 C2=ddddududddduddu T:Some Title if ($line =~ s"^(\d+)\s+X:([\d/.]+)\s+"") { %V = (); # Values of various index fields $V{X} = $2; while ($line) { # Break up the line one field at a time if ($line =~ s"^([TPN]):(.*)$"") { # The title comes last $V{':'} = $1; # [TPN] flag for title $V{CT} = &CTitle($V{T} = $2); # Canonical and original title print V "$P: 1 V{:}=$V{':'} V{CT}=$V{CT} V{T}=\"$V{T}\"\n" if $V>2; } elsif ($line =~ s"^\s*(\w+)[:=]([^:=]*?)\s+(\w+)[:=]"$3:") { $V{$1} = $2; print V "$P: 2 V{$1}='$V{$1}'\n" if $V>2; } elsif ($line =~ s"^\s*(\w+)[:=](.*?)\s+(\w+)[:=]"$3:") { $V{$1} = $2; print V "$P: 3 V{$1}='$V{$1}'\n" if $V>2; } else { print V "$P: ### Can't parse \"$line\"\n" if $V>0; $line =~ s"^\s*\S+\s*""; # Pop off one field } } print V "$P: CT=$V{CT} B=$bytes\n" if $V>3; unless ($ct = $V{CT}) { # Canonical title. print V "$P: Null CTitle for \"$V{T}\"\n" if $V>2; next line; } $url = "http://$host$path"; # Generate HREF URL $cn = ++$CTN{$ct}; # Counter to distinguish duplicates $bc = sprintf "%06d",$bytes; # Byte count, fixed length for sorting $key = "${ct}:${bc}:${url}:${cn}"; # Sort key for entry $val = "$V{X}:$V{C1}:$V{C2}:$V{M}:$V{K}:$V{H}:$V{C}:$V{V}:$V{':'}:$V{T}"; # Fields we want now. print V "$P: key '$key' ==> '$val'\n" if $V>2; $T{$key} = $val; # TITLE:URL -> X:C1:C2:M:K:H:C:V:Title next line; } } } } print V "$P: $esep\n" if $V>2; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # All the data has been read into the %T array. We now run through it in # # lexical order, and output the data. # # # # At present, we use the first two chars of the (upper-case) TTL field to # # decide which output file to produce. Every time these two chars change, we # # create a new output file. # # # # Note that the sort key starts with the canonicalized title, followed by the # # 8-digit file size. This causes the entries for a single title to be sorted # # with smaller files first. This is intentional, because I've found that # # users tend to fetch the first version of a title first. This way, they will # # ask for the smaller files first, minimizing the network load. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # print V "$P: Input phase done; processing data.\n" if $V>1; for $key (sort keys %T) { $val = $T{$key}; print V "$P: key '$key' val '$val'\n" if $V>2; if (($TTL,$SZ,$URL,$CN) = ($key =~ m"^(\w+):(\d*):(.*):(\d*)$")) { print V "$P: T='$TTL' U='$URL' SZ=$SZ N=$CN.\n" if $V>2; $CC = substr($TTL . '__',0,2); if (($X,$C1,$C2,$M,$K,$H,$C,$V,$f,$Title) = ($val =~ m"^([\d/.]+):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):([^:]*):(.*)$")) { print V "$P: === X=$X C1='$C1' C2='$C2' M='$M' H='$H' K='$K' T='$f:$Title'\n" if $V>2; if ($CC ne $CX) { print V "$P: $hsep\n" if $V>2; print V "$P: Closing '$CX' and opening '$CC'.\n" if $V>2; print O "\n"; if (!open(O,">ndx/$CC.html")) { print V "$0: Can't write '$CC.html' [$!]\n"; close O; } print O "ABC tunes starting with $CC

\n";
				&line('','','ABC','X','Meter','Key','Headers','Code 1','Code 2','Title');
				&line('','','---','-','-----','---','-------','------','------','-----');
				print V "$CC.html\n" if $V>1;
				$CX = $CC;
				%dup = ();	# Record of duplicate lines.
			}
			&line("$TTL"   ,$URL,'ABC',$X,$M,$K,$H,$C1,$C2,$f,$Title,$SZ);
		} else {
			print V "$P: Can't parse \"$val\"\n" if $V>1;
		}
	}
}
print V "$P: $esep\n" if $V>2;
print O "

\n"; close O; exit 0; exit $exitstat; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub line { my $F='line'; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Write one line to the current output file. The first time this is called # # for a new file, the args will be constants to produce the column titles, # # with no URL. The rest of the calls will be with variable args to generate # # one tune reference. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # local($TTL, # Canonical title, all upper case $URL, # Full URL, or URI for local files $Fmt, # Format (always 'ABC' now) $X, # Index number $M, # Meter $K, # Key $H, # Header lines $C1, # JC code (GB code difference) $C2, # USD code $f, # Header flag ([TPN]) for title $Title, # Title in original form $Size # File size, in bytes. ) = @_; local($BC,$p,$r,$u,$XX); $C1 = substr(($C1 . (' ' x $C1max)),0,$C1max); $C2 = substr(($C2 . (' ' x $C2max)),0,$C2max); $K = substr($K . (' ' x $Kmax), 0, $Kmax); $H = substr($H . (' ' x $Hmax), 0, $Hmax) if length($H) < $Hmax; $M = substr($M . (' ' x $Mmax), 0, $Mmax); $XX = substr((' ' x $Xmax) . $X, -$Xmax, $Xmax); $BC = sprintf "%08d",$Size; $f = 'T' unless $f; # Title [TPN] flag print V "$F: f='$f' T=\"$title\"\n" if $V>2; for $p (keys %reduce) { $r = $reduce{$p}; print V "$P: p='$p' r='$r'\n" if $V>4; if ((($u = $URL) =~ s"$p"$r") ne $URL) { print V "$P: '$URL' => '$u'\n" if $V>4; $URL = $u; } } if ($URL) { $l = "$Fmt $XX C1:$C1 C2:$C2 M:$M K:$K H:$H $f:$Title"; } else { $l = "$Fmt $XX C1:$C1 C2:$C2 M:$M K:$K H:$H $f:$Title"; } print O "$l\n" unless $dup{$l}; $dup{$l} = 1; }