#Affinity-like profile calculation for a given RNA sequence for towards a given protein sequence according to neural 
#network predictor trained with two-neighbours pssm feature (pssm2) and global amino acid composition.  
#Script requires as inputs a pssm output file of the target protein sequence and the fasta file of the RNA sequence, output file is .profile.
#Use
# sh RNA-Protein_AffinityProfile.sh pssmFileName RNAsequenceFile
#
installdir="/home/shandar/myprogs/RBP-dinuc-pred"
Scriptpath="$installdir/SCRIPTS"
Binpath="$installdir/BINS"
Rpath="$installdir/RSCRIPTS"
rm -f  TMP/*
cp $1 sequence_pssm
cp $2 sequence_rna

PSSMfile=$PWD/sequence_pssm
sequenceRNA=$PWD/sequence_rna

mkdir -p RESULTS TMP
###Calculating pssm features 
$Binpath/PSSM2nbrs  $PSSMfile
mv pssm2 TMP/pssm2

###Calculating global composition
head -n-6  $PSSMfile|tail -n+4|awk '{print $2}'> TMP/fastatmp
$Binpath/./get-20bits -i TMP/fastatmp -f ghostfile -21 binarycode -compo TMP/compo

###Generating network test pattern
j=1
ls TMP/compo
for instances in `awk '{print $1}' TMP/compo`
                     do
  			#echo $j
    	   	 	lineData=`sed -n "${j}"p TMP/pssm2`
    			#echo $lineData 
   			lineComp=`sed -n "${j}"p TMP/compo`
    			#echo $lineLabel
                        echo  $lineData $lineComp " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0">> TMP/joinPat 
                        #echo  $lineLabel >> joinPat
    			let j=$j+1
			
done
cp TMP/joinPat  TMP/test.dat
cols=`awk '{if(NR==1)print NF}' TMP/test.dat`
outunits=16
inunits=`echo $cols $outunits | awk '{print $1-$2}' `
echo Adding header to test data
sh $Scriptpath/header-adding.sh TMP/test.dat  $inunits $outunits
echo Done!

###Neural network prediction
cd TMP
for f in `ls $installdir/NETWORK/*.net` 
do
        rm -f mynet.net
        rm -f test.res
        echo $f
        netName=`echo $f| awk -F/ '{print $NF}'  `
        echo $netName
        cp $f mynet.net
	echo "test_name  := \"test\" 					"> batchman.snns
	echo "net_name   := \"mynet\" 					">> batchman.snns
	echo "test_file  := test_name  + \".pat\" 			">> batchman.snns
	echo "test_log   := test_name  + \".log\" 			">> batchman.snns
	echo "ave_log    := net_name   + \".log\" 			">> batchman.snns
	echo "net_work   := net_name   + \".net\" 			">> batchman.snns
	echo "res_name   := net_name   + \".res\" 			">> batchman.snns
	echo "res_test   := test_name  + \".res\" 			">> batchman.snns
	echo "loadPattern(test_file) 					">> batchman.snns
	echo "loadNet(net_work) 					">> batchman.snns
	echo "setPattern(test_file) 					">> batchman.snns
	echo "testNet()							">> batchman.snns
	echo "saveResult(res_test, 1, PAT, FALSE, FALSE, \"create\") 	">> batchman.snns

	$Binpath/batchman -f batchman.snns > network-prediction.log
	sh $Scriptpath/flatten-snns.sh test.res > ../TMP/$netName.pred
        
        
done

###Dinucleotide affinity scores per residue

echo " source(\"$installdir/RSCRIPTS/triNucMaxProfile.r\") " > $Rpath/RNAprofileGenerator.R
echo " profile<-triNucMaxProfile(\"maxperDiNu\",\"RNA.seq\" ) " >> $Rpath/RNAprofileGenerator.R
echo " print(profile)  " >> $Rpath/RNAprofileGenerator.R
echo " write.table(profile,\"RNAseq.profile\") "  >> $Rpath/RNAprofileGenerator.R



ln -s $installdir/NETWORK .
R CMD BATCH $Rpath/probabilityPAll.R
cp maxperDiNu ../RESULTS/DiNuScores.txt
cp maxperDiNuPerResidue ../RESULTS/DiNuScoresPerResidue.txt
sed -n 2p  $sequenceRNA | awk '{for(i=0;i<length($0);i++)printf substr($0,i,1)" "(i+1)"\n"}' > RNA.seq

R CMD BATCH  $Rpath/RNAprofileGenerator.R
cd ..
awk '{if(NF==2)print $2}' TMP/RNAseq.profile > RESULTS/final-profile.txt
paste TMP/RNA.seq RESULTS/final-profile.txt | awk '{print $1":"$2":",substr($3,1,5)}' > final-score-dinuc.txt
rm -rf TMP RESULTS
echo "Final results are written in final-score-dinuc.txt" 
echo "Log files are in TMP and RESULTS directory, which can be manually removed now."
