# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This is the makefile for JC's abc web-search robot. The default "all" entry
# does everything, and will likely take several days, so be sure to run it in
# the background.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
SHELL=/bin/bash
Host1=trillian.mit.edu
Host2=john-chambers.us

LC=Lc
RM=/bin/rm -f
MV=/bin/mv -f
CP=/bin/cp -fp
LN=/bin/ln -f

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

all:
	@echo There is no default make here.  You must say what to make.
	@echo '	"make run"     does a full search, scan, and rebuilds the indexx
	@echo '	"make search"  queries google and alltheweb for abc sites."'
	@echo '	"make scan"    scans known hosts and rebuild index files.'
#	@echo '	"make install" copies index files to ../ndx/ .' 
	@echo '	"make clean"   removes log files and old host files.'

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

run: search init hosts # index stats tags
#	touch run

scan: init hosts index stats tags
#	touch scan

search:
#	nice ABCsearch >ABCsearch.log 2>&1	# This doesn't seem to work any more.
#	zapdir new
	nice URL http://abcnotation.com/ >abcbot.log 2>&1
	-mv new/* add/
#	touch search

install: ../ndx
#	touch install

markbadsites:
	Avoid `showbadsites ^2009 | sed 's/^.* at //'`

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# We start a special abcbot for our main machine.  (This is often done fom  a
# different  machine for various reasons.) It's probably a good idea to first
# go to the  $HOME/w/music/abc  directory,  do  a  "purgenewdirs"  and  "make
# clean",  which  will  clear  out temporary files that we'd rather not scan.
# Note that by default abcbot skips hosts that have  a  lockfile,  so  if  we
# start this scan the general scan will skip over $(Host1) and/or $(Host2).

mainhosts: Host1 Host2

Host1:
	U $(Host1) &
#	touch lck/$(Host1)	# Sometimes, we just block a scan of this host
	echo Pause to start scan of $(Host1)
	sleep 10
	Ln.sh lck/$(Host1) lck.$(Host1)	# Protect lockfile for debugging
	Ln.sh log/$(Host1) log.$(Host1)	# Protect logfile for debugging

Host2:
	U $(Host2) &
#	touch lck/$(Host2)	# Sometimes, we just block a scan of this host
	echo Pause to start scan of $(Host2)
	sleep 10
	Ln.sh lck/$(Host2) lck.$(Host2)	# Protect lockfile for debugging
	Ln.sh log/$(Host2) log.$(Host2)	# Protect logfile for debugging

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here are our cleanups:

neat:
	$(RM) *.bak
	find . -name '*[-~]' |xargs $(RM)
clean: neat
	$(RM) *.log *.out cache*.txt init hosts index install run scan search
	mv lck lck.del; mkdir lck; $(RM) -r lck.del &
	mv log log.del; mkdir log; $(RM) -r log.del &
	mv del del.del; mkdir del; $(RM) -r del.del &
	mv all all.del; mkdir all; $(RM) -r all.del &
	mv new new.del; mkdir new; $(RM) -r new.del &
	find cache -name '%%src*' -print0 |xargs -0 $(RM)
	find cache -name '*-' -print0 |xargs -0 $(RM)
clobber: clean
	mv old old.del; mkdir old; $(RM) -r old.del &
	mv nul nul.del; mkdir nul; $(RM) -r nul.del &
	mv ndx ndx.del; mkdir ndx; $(RM) -r ndx.del &

relink:
	V_relink=1relink.log relink +rv

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

tarkit: ../bot.tar.gz
../bot.tar.gz: ../bot.tar
	gzip ../bot.tar
../bot.tar: *
	make clean
	tar cf ../bot.tar .

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here's how we make a tags file from our perl programs.
# There is no need to sort the output, as pttags does a sort.

P=abcbot webcat *.pm pm/*.pm

tags: $P
	pttags $P >tags

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

init: URLs
	nice abcbot +CURLs >abcbot.log 2>&1
#	touch init

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# The hosts entry updates the per-host files in  hst/,  which  takes  several
# days.   This involves running abcbot first to set up the starting URLS, and
# then running it again per host via the scanhosts script.

hosts: hst/*
#	abcbot +CURLs >abcbot.log 2>&1
#	nice -n 10 scanhosts >scanhosts.log 2>&1
	sh/scanhosts >scanhosts.log 2>&1		# There are other "scanhosts" scripts around
#	nice relink +r cache &
#	touch hosts
	make stats
	make index
#	make TuneInfo

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Make the TuneInfo* files.  These are made from the cache/ files.  We  first
# write the raw data, one tune per line, to TuneInfoRandom.txt.  We then sort
# this and write the results to TuneInfoSorted.txt.

#TuneInfo: TuneInfoSorted.txt

#TuneInfoSorted.txt: TuneInfoRandom.txt
#	sort <TuneInfoRandom.txt >TuneInfoSorted_`dt`.txt
#	/bin/ln -f TuneInfoSorted_`dt`.txt TuneInfoSorted.txt
#TuneInfoRandom.txt: hst/*
#	httpTuneInfo >TuneInfoRandom_`dt`.txt
#	/bin/ln -f TuneInfoRandom_`dt`.txt TuneInfoRandom.txt

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Make the ndx/*.html files. These are built from the hst/* files.  You can
# make this while the above hosts make is running, but it may miss files from
# the host that is currently being scanned, so it's best to do this when  the
# hosts entry is not being made.

index:   ndx;   touch index
indexes: index	# Old name when we had several indexing schemes

ndx: ndx/AB.html ndx/CO.html ndx/MA.html ndx/MI.html ndx/PO.html ndx/ZW.html
ndx/AB.html ndx/MA.html ndx/ZW.html: ./Hosts2ndx hst/*
#	-rm ndx/*.html
	./Hosts2ndx >Hosts2ndx.log 2>&1
#	/bin/ln -f ndx/*.html ../ndx/	# These are now one directory plus a symlink.
#	touch ndx

count: titlecount
titlecount: index
	grep '<TT>' ndx/??.html | wc

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here's an entry to extract statistics.  On some  machines  the  time  check
# always  fails  and  the  targets are always rebuilt.  It's not obvious why,
# since the -`dt` parts seem to properly invoke the dt script and produce the
# expected file hames.

stats: HostStatsData-`dt` HostStatDiffs-`dt`
HostStatsData: HostStatsData-`dt`
HostStatsData-`dt`: hst/*
	./HostStats >HostStatsData-`dt` # 2>&1
#	touch HostStatsData
HostStatDiffs:  HostStatDiffs-`dt`
HostStatDiffs-`dt`: HostStatsData-*
	./HostStatDiffs `ls HostStatsData-* | tail -3` >HostStatDiffs-`dt` 2>&1
#	touch HostStatDiffs

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Miscellaneous entries:

TuneList.txt: TuneList TuneList.html
	sed <TuneList.html >TuneList.txt -e 's/<[^>]*>//g'

TuneList1.html: TuneList.html
	sed <TuneList.html >TuneList1.html \
		-e 's".*http://localhost/.*""'

TuneList2.html: TuneList1.html
	sort +1 +2n <TuneList1.html >TuneList2.html

TuneList3.html: TuneList2.html
	uniq <TuneList2.html >TuneList3.html

TuneBotList.txt: TuneBot TuneBot.html
	sed <Tunebot.html >TuneBotList.txt -e 's/<[^>]*>//g'

TuneBot1.html: TuneBot.html
	sed <TuneBot.html >TuneBot1.html \
		-e 's"^.*http://localhost/.*""'

TuneBot2.html: TuneBot1.html
	sort +1 +2n <TuneBot1.html >TuneBot2.html

TuneBot3.html: TuneBot2.html
	uniq <TuneBot2.html >TuneBot3.html

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
