(.*)/i)) { $doctitle = $1; $doctitle =~ s"

#!/usr/bin/perl # # NAME # TuneList - rewrite a list of tunes to include hyperlinks. # # SYNOPSIS # TuneList [" # This restricts the depth of directory searches to . This is # mostly to avoid infinite loops. The default is 2. # # +t Produce an html table rather than a list. # # -type (default: on) # +key (default: off) # +seq (default: off) # +time (default: off) # -orig (default: on) # -abc (default: on) # -ps (default: on) # -gif (default: on) # -tune (default: on) # These enable or disable the production of various columns in the # output. The defaults are chosen for personal, idiosyncratic # reasons, and you might want to modify them to match your own # personal preferences. # # BUGS # This program is highly experimental, in alpha state, and all that. # Use it at your own risk. (Not much risk, there, actually, but I # thought I'd give the usual friendly warnings.) Just don't write the # output back over the input, and check its output with a browser or # two, and there shouldn't be many problems. # # Of course, there are constant problems with slight spelling # variations. This program doesn't even attempt to tackle this issue. # # REQUIRES # When given a URL, this file uses the LWP::Simple module, available # at any CPAN site. # We require this only when given a URL; this program should run ok # for local files with just the basic perl5. # # AUTHOR: # John Chambers "TuneList/~jc/music/" use LWP::Simple; use HTML::Entities; ($me = $0) =~ s'.*/''; $D = $ENV{"D_$me"} || $ENV{"T_$me"} || $ENV{"V_$me"} || 0; @URL = ('TuneList/~jc/music/abc/Intl/'); # Default URL. $max{orig} = 10; # We pad origin fields to this many bytes. $max{type} = 2; # We pad type fields to this many bytes. $dirdepth = 0; # The current depth in directories. $maxdepth = 2; # Default depth limit for directories. #%ignore = ( # Initial words to ignore it titles. # 'a' => 1, 'an' => 1, 'the' => 1, # 'das' => 1, 'der' => 1, 'die' => 1, # 'l' => 1, 'la' => 1, #); # Here are flags for the various columns of output: $want{type} = 1; $want{orig} = 1; $want{abc} = 1; $want{ps} = 1; $want{gif} = 1; $want{tune} = 1; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Scan the command-line arguments, processing them as we go. Input # # files are read and appended to @doc; URLs are accumulated in @URL. # # Options are processed as read, so they will only affect things to # # their right, except for URLS which we save for last. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # $infiles = $urls = 0; for $a (@ARGV) { if (($fl,$opt) = ($a =~ m'^([-+])(.*)'i)) { if ($opt =~ m'^A$'i) { $alltunes = 1; } elsif ($opt =~ m'^D(\d+)$'i) { $maxdepth = $1; } elsif ($opt =~ m'^T$'i) { $want{table} = ($fl eq '-') ? 0 : 1; } else { $want{$opt} = ($fl eq '-') ? 0 : 1} } elsif ($a =~ m'^<') { print STDERR "$me: Add \"$a\" to doc ...\n" if $D>2; push @doc, &load($a); print STDERR "$me: Added \"$a\" to doc.\n" if $D>2; ++$infiles; } elsif ($a =~ m'\|$') { print STDERR "$me: Add \"$a\" to doc ...\n" if $D>2; push @doc, &load($a); print STDERR "$me: Added \"$a\" to doc.\n" if $D>2; ++$infiles; } else { $URL[$urls++] = $a} } if (!$infiles) { print STDERR "$me: Add STDIN to doc ...\n" if $D>2; @doc = ; print STDERR "$me: Added STDIN to doc.\n" if $D>2; } if (!($want{table} || $want{list})) { if ($me =~ /tabl/i) {$want{table} = 1} elsif ($me =~ /list/i) {$want{list} = 1} } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Read the original tune list, and extract what we can from it. This # # gets a bit tricky, due to the variety of input formats that we # # expect, because the input was generally typed by humans with no # # regard for the needs of dumb programs. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # chomp @doc; $lines = $matches = 0; line: for $line (@doc) { next line if !$line; ++$lines; print STDERR "$me: l=\"$line\"\n" if $D>2; if (!$titl && ($line =~ m/\s*(.*)/i)) { $doctitle = $1; $doctitle =~ s".*""i; print STDERR "$me: doctitle=\"$doctitle\"\n" if $D>2; next line; } if ($line =~ m"^\s*") { if ($line =~ m"^\s*.* ") { &ttline($line); } else { $ttfl = 1; $ttline = $line; } ++$matches; @footer = (); next line; } if ($ttfl) { $ttline .= " $line"; if ($line =~ " ") { &ttline($ttline); $ttfl = 0; } ++$matches; @footer = (); next line; } if (!$html && ($line =~ m"^(\S+)\s+([A-Z]\S*)\s+([A-Z].*)$")) { &ttline("$1 $2 $3 "); ++$matches; @footer = (); next line; } if ($matches < 1) { push @header, $line; # Everything before the first matched line. } else { push @footer, $line; # Everything after the last matched line. } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Run through the list of URLs, reading each one. If the data looks # # like a directory, we read it recursively. If the data is a .abc file, # # we read it, extract the title(s), and add it to our %U table for # # later use.. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # for $u (@URL) { print STDERR "$me: URL=\"$u\"\n" if $D>2; if ($u =~ m"\.abc/*$") { &scanabc($u); # Extract abc header. } else { &htmldir($u); # Extract html directory. } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Now we produce the output document. First, we print any accumulated # # header info (unmatched lines at the start of the input), and a # # Then either a list or table of tunes, followed by whatever footer # # stuff was present in the input after the last matched line. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # if ($doctitle) { print "$doctitle\n"; } else { print "Tune table\n"; } for $l (@header) { ++$ishtml if (!$ishtml && ($l =~ /<\w*>/)); $l = HTML::Entities::encode_entities($l) if (!$ishtml); print "$l\n"; ++$gothr if ($l =~ //i); } print "\n" if !$gothr; if ($want{table}) { &abctable; } elsif ($want{list}) { &abclist; } else { &abclist; } print "\n" if !$gothr; for $l (@footer) { ++$ishtml if (!$ishtml && ($l =~ /<\w*>/)); if (!$ishtml) { $l = HTML::Entities::encode_entities($l); $l =~ s#\<(\w+@[\w.]+)\>#$&#; } print "$l\n" } exit 0; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # This looks in several arrays for the "right" URL to produce for a # # specific tune, given the name, number, and type codes. If we can't # # find a usable URL, we return a null string. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub findURL { local($typ,$num,$nam,$ttl) = @_; local($u); for $lis ($U{"$typ:$num:$nam"}, $U{"$typ:$nam"}) { @lis = split ' ', $lis; for $u (@lis) { return $u if $u; } } if ($ttl =~ /^(\w*):\s*(.*)/) { return $u if ($u = &findURL($typ,$num,&canon($2),$2)); } return ''; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Produce the new listing. At present, we just write to stdout, but # # maybe eventually we'll try pushing the result out to a web page. We # # output only those things in the %Tname list, which should have an # # entry for each line in the original input file. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub abclist { local($orig,$name,$tabc,$tgif,$titl,$tps,$type,$u); if (1) { # Produce a header line? $Tname{ '' } = 'title'; # Will sort to first place in list. $Title{'1:'} = 'Title'; # Set up header info ... $Torig{'1:'} = substr(('Origin' . ('_' x $max{orig})),0,$max{orig}); $Ttype{'1:'} = substr(('Type' . ('_' x $max{type})),0,$max{type}); } for $name (sort keys %Tname) { next if !$Tout{$name}; $nuna = "1:$name"; $orig = &pad($Torig{$nuna}, '_', 'orig'); $titl = &abc2html($Title{$nuna}); $tabc = &findURL('abc',1,$name,$titl); $tps = &findURL('ps',1,$name,$titl); $tgif = &findURL('gif',1,$name,$titl); $type = &pad($Ttype{$nuna},'_','type'); if (!$titl) { print STDERR "$me: No title for \"$nuna\"\n" if $D>0; } if ($want{type}) {print "$type "} if ($want{orig}) {print "$orig "} if ($want{abc} && $tabc) {print "abc "} else {print "___ "} if ($want{ps} && $tps ) {print "ps "} else {print "__ "} if ($want{gif} && $tgif) {print "gif "} else {print "___ "} if ($want{tune}) {print "$titl \n"} } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Produce the new table. At present, we just write to stdout, but # maybe eventually we'll try pushing the result out to a web page. We # output only those things in the %Tune table, which should have an # entry for each line in the original input file. sub abctable { local($orig,$n,$name,$nuna,@ps,$tabc,$tgif,$titl,$tkey,$tseq,$type,$u); print "\n\n"; if ($want{type}) {print "\t\n"; for $name (sort keys %Tname) { $n = $Tcount{$name}; next if !$Tout{$name}; print "\n"; for ($i=1; $i <= $n; $i++) { $nuna = "$i:$name"; $titl = &abc2html($Title{$nuna}); if (!$titl) {print STDERR "$me: No title for \"$nuna\"\n" if $D>0} print "\n"; if ($want{type}) { $type = $Ttype{$nuna}; print "\t" } if ($want{orig}) { $orig = $Torig{$nuna}; print "\t" } if ($want{abc}) { $tabc = &findURL('abc',$i,$name,$titl); print "\t\n"); } if ($want{ps}) { $tps = &findURL('ps',$i,$name,$titl); print "\t\n"); } if ($want{gif}) { $tgif = &findURL('gif',$i,$name,$titl); print "\t\n"); } if ($want{key}) { $tkey = $Tkey{$nuna}; print "\t\n"); } if ($want{tune}) {print "\t\n"} print "\n"; } } print "type"} if ($want{orig}) {print "\t origin"} if ($want{abc}) {print "\t abc\n"} if ($want{ps}) {print "\t ps\n"} if ($want{gif}) {print "\t gif\n"} if ($want{key}) {print "\t key\n"} if ($want{tune}) {print "\t tune\n"} print " $type $orig "; if ($tabc) { print "abc"; $tseq = $Tseq{$nuna}; print " $tseq" if $tseq; } else { print '___'; } print(" "; if ($tps) {print "ps "} else {print '__'} print(" "; if ($tgif) {print "gif "} else {print '___'} print(" "; if ($tkey) {print $tkey} else {print '___'} print(" $titl\n"; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Convert the abc escape sequences to HTML. sub abc2html { local($s) = @_; $s =~ s#\\(o)#\&${1}slash;#ig; $s =~ s#\\a(a)#\&${1}ring;#ig; $s =~ s#\\"(\w)#\&${1}uml;#ig; $s =~ s#\\'(\w)#\&${1}acute;#ig; $s =~ s#\\`(\w)#\&${1}grave;#ig; $s =~ s#\\,(\w)#\&${1}cedille;#ig; $s =~ s#\\~(\w)#\&${1}tilde;#ig; $s; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # This pads an orig field to $max{orig} chars. The pad arg is the pad # char; if null, we don't do any padding. sub pad { local($str,$pad,$fld) = @_; local($m); $m = 1 if !($m = $max{$fld}); return ($pad ? ($str ? ($str . ($pad x ($m - length($str)))) : ($pad x $m)) : $str); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Here's a routine to get either a local file, a local directory, or a # web page. If the arg is the name of a directory, we read it and # convert its contents to html, so that later code has only one format # for directories. With web pages, there's a minor kludge in that the # LWP::Simple packet's get routine doesn't do array context, so we # have to do the split ourself. This routine will also work if the arg # is a shell command, ending with '|'. sub load { local($name) = @_; local($txt,@doc,$path); print STDERR "$me/load: $name\n" if $D>1; ($path = $name) =~ s/^[<\s]+//; if (-d $path) { if (opendir(DOC,$path)) { @doc = grep !/^\./, readdir DOC; for $d (@doc) {$d =~ s#(.*)#$1#} } else { print STDERR "$me: Can't read \"$path\" ($!)\n"; @doc = (); } } elsif (open(DOC,$name)) { @doc = ; close DOC; } else { $txt = LWP::Simple::get($path); @doc = split(/\n/, $txt); } print STDERR "$me/load: Got $#doc lines from $path\n" if $D>4; @doc; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Register a file under its name and type. For example, if we note the # file 'http://foo.bar/qux.gif', we call # &file('gif','http://foo.bar/qux','http://foo.bar/qux.gif') # This will leave behind global information: # $U{'gif:qux'] = 'http://foo.bar/qux.gif' # This tells us how to find a gif file for the name 'qux'. sub file { local($ext,$pth,$url) = @_; (local($nam) = $pth) =~ s'.*/''; print STDERR "$me/file: $url\n" if $D>1; $nam = &canon($nam); $U{"$ext:$nam"} .= "$url "; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Grovel through an html directory listing, and check out each of the # files listed. Directories cause recursive traversal. Files with # interesting suffixes are read. sub htmldir { local($path) = @_; local($item,$href,$line,@data); @data = &load($path); $html = 0; # Not (yet) known to be HTML format. print STDERR "$me: path=\"$path\"\n" if $D>2; ++$dirdepth; $path =~ s"/+$""; for $line (@data) { next if !$line; if (($href,$item) = ($line =~ m#(.*)#i)) { $html = 1; # It looks like HTML format. if ($href =~ '/$') { if (!($item =~ /Parent Dir/i)) { # Ignore parent directory. &htmldir("$path/$href") # Recurse into directory. if ($dirdepth < $maxdepth); } } elsif ($href =~ m'(.*)\.abc$'i) { &scanabc("$path/$href") } elsif ($href =~ m'(.*)\.gif$'i) { &file('gif',$1,"$path/$href"); } elsif ($href =~ m'(.*)\.ps$'i ) { &file('ps',$1,"$path/$href")} } elsif (-d $line) { &htmldir("$path/$line") if ($dirdepth < $maxdepth); } elsif (-f $line) { if ($line =~ m'.*/(.*)\.abc$'i) {&scanabc("$path/$line")} } else { print STDERR "$me/htmldir: Can't parse \"$line\"\n" if $D>2; } } --$dirdepth; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub Max { local($n) = shift; for (@_) {$n = $_ if $_ > $n; shift} return $n; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # This routine reads an abc source file and extracts whatever info we # are currently looking for there. The file's title line(s) are the # most interesting; we might also want the tunes' sequence numbers and # keys for later output. # # Suppose we find that URL/foo.abc contains the tunes: # X: 1 # T: George's Jig # T: Sam's Tune # ... # X: 2 # T: Fred's Folly # T: George's Jig # ... # We will leave behind the following global data: # $Tcount{GeorgesJig} = 2 # $U{abc:1:GeorgesJig} = 'URL/foo.abc' # $U{abc:1:SamsTune} = 'URL/foo.abc' # $U{abc:1:FredsFolly} = 'URL/foo.abc' # $U{abc:2:GeorgesJig} = 'URL/foo.abc' # $Tunes{URL/foo.abc} = ('1:GeorgesJig', '1:SamsTune', '1:FredsFolly', '2:GeorgesJig') sub scanabc { local($URL) = @_; local(@a,$line,$name,$n,$K,$O,$T,$t,$h,$X); @a = &load($URL); $t = 0; # Count the tunes. line: for $line (@a) { # Read thru the abc file. if ($line =~ m/^(X):\s*(\d+)/) { $X = $2; } elsif ($line =~ m/^(T):\s*(.*)/) { $T = $2; if ($T =~ /^\d/) { if ($T =~ /^\d+x\d+\w/) { $Ttype{$nuna} = $T; $max{type} = Max($max{type},length($T)); } next line; } ++$t; $name = &canon($T); # Canonicalize the tune'sname. $n = ++$Tcount{$name}; # Number of instances of this tune. $TX[$t] = $X; # Remember the tune's X: index. $nuna = "$n:$name"; # Num+name for this tune. $U{"abc:$name"} .= "$URL "; # Add to list of possible URLs. $U{"abc:$nuna"} .= "$URL "; # Add to list of specific URLs. # $Tunes{$URL} .= "$nuna "; # Mapping from URL -> num+name. $Title{$nuna} = $T; # Remember this instance's title. $Tout{$name} = 1 # Force it into output, if ($alltunes); # if caller asked for everything. } elsif ($line =~ m/^(O):\s*(.*)/) { $O = $2; for ($i=1; $i<=$t; $i++) { # Search thru tunes in this file. if ($TX[$i] == $X) { # If this is the current tune, $Tseq{$nuna} = $X if $X; # Remember X and O fields. $Torig{$nuna} = $O if $O; $max{orig} = Max($max{orig},length($O)); } } } elsif ($line =~ m/^(K):\s*([\^=_\s\w]+)/) { $K = $2; for ($i=1; $i<=$t; $i++) { if ($TX[$i] == $X) { $Tseq{$nuna} = $X if $X; $Tkey{$nuna} = $K if $K; } } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # This routine parses the "......" lines that are found in # previous TuneList output files. This routine will need more cases as # we proceed with feeding it newer (and more complex) lines from # various listing. Stay tuned ... sub ttline { local($line) = @_; local($titl,$type,$orig,$orig,$name,$ltyp,$href,%hrefs,$n); if ($line =~ s#^\s*(.*)\s+(.*)\s*##) { $type = $1; $orig = $2; $orig =~ s"[.\s]+$""; } while ($line) { print STDERR "$me/ttline: Field \"$line\"\n" if $D>5; if ($line =~ s#^\s*([a-z]+)\s*##) { print STDERR "$me/ttline: Drop: \"$1\"\n" if $D>4; next; } if ($line =~ s#^\s*([a-z]+)##i) { $href = $1; $ltyp = $2; print STDERR "$me/ttline: Link type $ltyp \"$href\"\n" if $D>3; $hrefs{$ltyp} = $href; next; } if (!$html && ($line =~ s#\s*([A-Z].*)##)) { $titl = $1; while ($titl =~ s#(<[^<]*>)##) { print STDERR "$me/ttline: Removed \"$1\" from tune.\n" if $D>6; } $name = &canon($titl); print STDERR "$me/ttline: Tune name \"$name\" title \"$titl\"\n" if $D>3; $Tout{$name} = 1; # Include it in output. &cache($name,$type,$orig,$titl,$hrefs{abc},$hrefs{ps},$hrefs{gif}); if ($titl =~ m#^(\w+)\s*:\s*(.*)\s*#) { # "Word:" deleted. $titl = $2; $name = &canon($titl); $Tout{$name} = 1; # Include it in output. &cache($name,$type,$orig,$titl,$hrefs{abc},$hrefs{ps},$hrefs{gif}); } } } return $line; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # &cache(name,type,orig,tune,abc,ps,gif) # Remember an assortment of info about a tune. sub cache { local($name,$type,$orig,$titl,$abc,$ps,$gif) = @_; local($n,$nuna); $n = ++$Tcount{$name}; $nuna = "$n:$name"; $Ttype{$nuna} = $type; $Torig{$nuna} = $orig; $Title{$nuna} = $titl; if ($abc) { $U{"abc:$nuna"} .= "$abc "; $U{"abc:$name"} .= "$abc "; } if ($gif) { $U{"gif:$nuna"} .= "$gif "; $U{"gif:$name"} .= "$gif "; } if ($ps) { $U{"ps:$nuna"} .= "$ps "; $U{"ps:$name"} .= "$ps "; } $max{type} = Max($max{type},length($type)); $max{orig} = Max($max{orig},length($orig)); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Canonicalize a name. We do InterCaps, and strip out all funny chars. sub canon { local($name) = @_; local($init,$rest); $name = lc($name); # Lower-case everything. # ($init,$rest) =~ ($name =~ /^\s*(\w+)(.*)/); # $name = $rest if ($ignore{$init}); $name =~ s"&(\w)\w*;"$1"g; # De-htmlize the name. $name =~ s/,.*//; # Discard everything after a comma. $name =~ s"\W+""g; # Delete non-alpha chars. $Tname{$name} = 1; # Note that we've seen the name. return $name; }