#!/bin/bash
#This is complete set of training scripts, starting from 
#Two column files and list.codes

#-------------------------------------------------------------------
source ../config.sh
source env_adv.sh
#-------------------------------------------------------------------

mkdir -p $docroot
mkdir -p data2

echo "<html><head> <meta http-equiv=\"refresh\" content=\"30\" > </head> " > $docroot/index.html
echo "Links to results will be placed here, as soon as the calculations are completed." >> $docroot/index.html
echo "This page will be automatically refreshed after 30 seconds, or you might refresh it manually." >> $docroot/index.html
echo "</html>" >> $docroot/index.html
echo "Your data is received and request submitted. Click <a href=$linkroot/index.html target=results>here</a> to go to the results page. " 

list=`cat data/list.codes`

#First create fasta files, 21 bit codes, neighbor data, and composition vectors.
chmod 777 data
rm -f data/train.dat data/test.dat data/all.dat
for file in $list 
do
   awk '{if(NF==2)print $0;if(NF==3)print $2,$3}' data/$file > data/$file.tmp
     mv data/$file.tmp data/$file
	   


get-20bits -i data/$file -f data/$file.fasta -compo data/$file.compo -21 data/$file.21
cp data/$file.21 nbr0
	let i=0
	if [ ! -f data/$file.pssm ]; then
	echo "<br>$file.pssm is not found in the uploaded data. Creating PSSM. <br>"  >> $docroot/index.html
        $blastpgp -i data/$file.fasta  -d /data/bio/db/blast/nr -j 2 -Q data/$file.pssm > tmp.pssm
	else
	echo "<br>$file.pssm is assumed to be a pssm file of $file.<br>"  >> $docroot/index.html
	fi
	


	pssm2nbrs data/$file.pssm

		while [ $i -lt 9 ]; do
		mv pssm$i data2/$file.pssm$i
	    	cp nbr$i data2/$file.nbr$i
	    	cp data/$file.compo data2/$file.compo

		let i=$i+1
		done
	awk '{print $2}' data/$file > data/$file.o

echo "<br>"
	rm -f pastestring.w 
	listuse=`cat list.use`
	for prop in $listuse
	do
	printf " data2/$file.$prop " >> pastestring.w
	done	
#echo "<br> paste `cat pastestring.w`data/$file.o "
paste `cat pastestring.w`data/$file.o >> data/all.dat
paste `cat pastestring.w`data/$file.o > data/$file.dat
done

	awk '{if(NR%3==0)print $0}' data/all.dat > data/test.dat
	awk '{if(NR%3!=0)print $0}' data/all.dat > data/train.dat
	
	valtype=`cat valtype.dat`
	if [ $valtype == "trainall" ]; then
	cp data/all.dat data/train.dat 
	cp data/all.dat data/test.dat 
	rm -f data/list.train data/list.test data/list.val
	fi

	if [ $valtype == "pat3out" ]; then
	rm -f data/list.train data/list.test data/list.val
	fi
	

	if [ $valtype == "prot3out" ]; then
	rm -f data/list.train data/list.test data/list.val
	awk '{if(NR%3==0)print $0}' list.codes > list.train
	awk '{if(NR%3!=0)print $0}' list.codes > list.test
	fi
	

if [ -f data/list.train ]; then
	rm -f data/train.dat
	trlist=`cat data/list.train`
	for trfile in $trlist
	do
	cat data/$trfile.dat >> data/train.dat
	done
fi

if [ -f data/list.test ]; then
rm -f data/test.dat
tstlist=`cat data/list.test`
for tstfile in $tstlist
do
cat data/$tstfile.dat >> data/test.dat
done
fi

if [ -f data/list.val ]; then
rm -f data/val.dat
vallist=`cat data/list.val`
for valfile in $vallist
do
cat data/$valfile.dat >> data/val.dat
done
fi

	mkdir params res log predict
	chmod 777 params res log predict

	if [ -f create_opt.dat ]; then
	cp create_opt.dat params
	else
	cp $topdir/params_default/* params/
	fi

	if [ -f tropt.dat ]; then
	cp tropt.dat params/tr_opt.dat
	else
	cp $topdir/params_default/tr_opt.dat params/
	fi
	echo "num_layers  3"   > params/netarch.dat
	numunits=`awk '{if(NR==1)print NF-1}' data/train.dat`
	printf "num_units_each \t $numunits" >> params/netarch.dat
	hidunits=`grep hidunits netarch.dat | awk '{print $2}'`
	printf " $hidunits 1 \n"   >> params/netarch.dat
	grep activation netarch.dat >> params/netarch.dat
#	echo "activation_hidden atan"   >> params/netarch.dat
#	echo "activation_output sigmoidal"   >> params/netarch.dat
	echo "$PWD/params" > nnbench.rc

touch params/*
cp -u $topdir/params_default/* params/

tralgo=`grep algorithm params/tr_opt.dat | awk '{print $2}' `

	nnbench train >& log.txt

	if [ $tralgo == "netasa" ]; then
	grep Step log.txt > lcurve.txt 
	else
	grep Epoch log.txt > lcurve.txt
	fi

	grep Feature log.txt > features.txt
	tail -n 10 log.txt > summary.txt

	if [ -f "data/val.dat" ]; then
	nnbench eval data/val.dat > log.txt
	elif [ -f "data/test.dat" ]; then
	nnbench eval data/test.dat > log.txt
	else
	nnbench eval data/all.dat > log.txt
	fi


	grep ROC log.txt > roc.txt
	grep Prediction log.txt > results.txt
	grep "Best Cut" log.txt > bestcut.txt

	awk '{print $4,$6,$8,$10,$12,$14,$16,$18}' roc.txt > roc
	$octave < $SCPATH/plot-roc.m >& dum.txt
	convert -rotate 90 roc.ps roc.png
	awk '{print $6}' features.txt > correlations
	$octave < $SCPATH/plot-correl.m  >& dum.txt
	convert -rotate 90 correlations.ps correlations.png

	if [ $tralgo == "netasa" ]; then
	awk '{print $7,$9}' lcurve.txt > lcurve
	else
 	awk '{if($1!="Maximum")print $6+0,$10}' lcurve.txt > lcurve
	fi


	$octave < $SCPATH/plot-lcurve.m  >& dum.txt
	convert -rotate 90 lcurve.ps lcurve.png
	


#Copying data to html directory

	cp roc.ps $docroot/roc.ps
	cp roc.png $docroot/roc.png
	cp roc.txt $docroot/roc.txt
	cp summary.txt $docroot/summary.txt
	cp results.txt $docroot/results.txt
	cp features.txt $docroot/features.txt
	cp data/train.dat $docroot/train.txt
	cp data/test.dat $docroot/test.txt
	cp lcurve.txt $docroot/lcurve.txt
	cp correlations.ps $docroot/correlations.ps
	cp correlations.png $docroot/correlations.png
	cp lcurve.png $docroot/lcurve.png
	cp lcurve.ps $docroot/lcurve.ps
	cp $topdir/query_adv.cgi query.cgi

	cat $topdir/templates/form-top.html > $docroot/query.html
echo "<FORM ACTION=$formroot/query.cgi METHOD=POST>" >> $docroot/query.html
	cat $topdir/templates/form-bottom.html >> $docroot/query.html

#Creating Links on the index page.
tar -czvf port-html.tar.gz $docroot/query.html
tar -czvf port-cgi.tar.gz .
mv port-html.tar.gz $docroot/
mv port-cgi.tar.gz $docroot/


echo "<html>" > $docroot/index.html 
echo "Your calculations are completed!<br>" >> $docroot/index.html
echo "Click on the following links to get your results.<br>" >> $docroot/index.html
echo "<a href=$linkroot/summary.txt>Summary</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/roc.txt>ROC</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/results.txt>Predicted versus expected values.</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/features.txt>Features summary</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/train.txt>Pattern Vectors (training data)</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/test.txt>Pattern Vectors (validation data, used to stop training)</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/lcurve.txt>Learning history data</a><br>" >> $docroot/index.html

echo "<hline><br>" >> $docroot/index.html
echo "<br>Graphical Results may be accessed from the following links:<br>" >> $docroot/index.html
echo "<a href=$linkroot/roc.ps>ROC Curve (Postscript file)</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/roc.png>ROC Curve (PNG within HTML)</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/correlations.ps>Correlation between feature vectors and target property (Postscript file)</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/correlations.png>Correlation between feature vectors and target property (PNG within HTML)</a><br>" >> $docroot/index.html

echo "<a href=$linkroot/lcurve.ps>Learning curve (Postscript file)</a><br>" >> $docroot/index.html
echo "<a href=$linkroot/lcurve.png>Learning curve (PNG within HTML)</a><br>" >> $docroot/index.html


echo "<br>Links to the web server and binaries of the predictor created for your data. <br>" >> $docroot/index.html
echo "<a href=$linkroot/query.html>Query page. </a><br>" >> $docroot/index.html


echo "<a href=$linkroot/port-html.tar.gz>Trained server for local installation (HTML). </a><br>" >> $docroot/index.html
echo "<a href=$linkroot/port-cgi.tar.gz>CGI of the trained server </a><br>" >> $docroot/index.html






printf "\nJob completed ( $queryid ): "  >> ../logs/lock.log
date=`date`
printf "$date " >>../logs/lock.log

