#Dinucleotide scores calculation for a given protein sequence according to neural network predictor 
#trained with two-neighbours pssm feature (pssm2) and global amino acid composition.  
#Script requires as inputs a pssm output file of the queried protein sequence abd output file is a vector of scores for the 16 dinucleotides.
#Use
#sh dinuScoreProb.sh pssmFileName

installdir="/home/shandar/myprogs/RBP-dinuc-pred"
Scriptpath="$installdir/SCRIPTS"
Binpath="$installdir/BINS"
Rpath="$installdir/RSCRIPTS"

rm -rf TMP
mkdir TMP
cp $1 pssmfile
PSSMfile=pssmfile
#sequenceRNA=$2
mkdir -p RESULTS

###Calculating pssm features 
$Binpath/./PSSM2nbrs  $PSSMfile
mv pssm2 TMP/pssm2

###Calculating global composition
head -n-6  $PSSMfile|tail -n+4|awk '{print $2}'> TMP/fastatmp
$Binpath/./get-20bits -i TMP/fastatmp -f ghostfile -21 binarycode -compo TMP/compo

###Generating network test pattern
j=1
ls TMP/compo
for instances in `awk '{print $1}' TMP/compo`
                     do
  			#echo $j
    	   	 	lineData=`sed -n "${j}"p TMP/pssm2`
    			#echo $lineData 
   			lineComp=`sed -n "${j}"p TMP/compo`
    			#echo $lineLabel
                        echo  $lineData $lineComp " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0">> TMP/joinPat 
                        #echo  $lineLabel >> joinPat
    			let j=$j+1
			
done
cp TMP/joinPat  TMP/test.dat
cols=`awk '{if(NR==1)print NF}' TMP/test.dat`
outunits=16
inunits=`echo $cols $outunits | awk '{print $1-$2}' `
echo Adding header to test data
sh $Scriptpath/header-adding.sh TMP/test.dat  $inunits $outunits
echo Done!

###Neural network prediction
cd TMP
for f in `ls $installdir/NETWORK/*.net` 
do
        rm -f mynet.net
        rm -f test.res
        echo $f
        netName=`echo $f| cut -d"/" -f3 `
        echo $netName
        cp $f mynet.net
	echo "test_name  := \"test\" 					"> batchman.snns
	echo "net_name   := \"mynet\" 					">> batchman.snns
	echo "test_file  := test_name  + \".pat\" 			">> batchman.snns
	echo "test_log   := test_name  + \".log\" 			">> batchman.snns
	echo "ave_log    := net_name   + \".log\" 			">> batchman.snns
	echo "net_work   := net_name   + \".net\" 			">> batchman.snns
	echo "res_name   := net_name   + \".res\" 			">> batchman.snns
	echo "res_test   := test_name  + \".res\" 			">> batchman.snns
	echo "loadPattern(test_file) 					">> batchman.snns
	echo "loadNet(net_work) 					">> batchman.snns
	echo "setPattern(test_file) 					">> batchman.snns
	echo "testNet()							">> batchman.snns
	echo "saveResult(res_test, 1, PAT, FALSE, FALSE, \"create\") 	">> batchman.snns

	$Binpath/batchman -f batchman.snns > network-prediction.log
	sh $Scriptpath/flatten-snns.sh test.res > ../TMP/$netName.pred
        
        
done

###Dinucleotide affinity scores per residue

#R CMD BATCH ~/work/Rscripts/predAnalysis.R
R CMD BATCH $Rpath/probabilityPAllMod.R
cd ..
echo "res# res AA AC AG AT CA CC CG CT GA GC GG GT TA TC TG TT" > result.txt
awk '{if(NF==44)print $1,$2}' pssmfile > names.txt
paste names.txt TMP/www.pred  >> result.txt



#cp predMean $1-${2}Average
#cp predStdev $1-${2}Stdev
#mv TMP/maxperDiNu RESULTS/$1-DiNuScores
#mv TMP/maxperDiNuPerResidue RESULTS/$1-DiNuScoresPerResidue
#cp predStandardAve $1-${2}StandardAve
#cp $sequenceRNA RNA.seq
#R --save < $myRpath/RNAprofileGenerator.R
#cp RNAseq.profile $1-$sequenceRNA.profile
rm -f pssm*
rm -f nbr*
rm -f binarycode*
rm -f ghostfile
rm -f *tmp
