#Affinity-like profile calculation for a given RNA sequence for towards a given protein sequence according to neural 
#network predictor trained with two-neighbours pssm feature (pssm2) and global amino acid composition.  
#Script requires as inputs a pssm output file of the target protein sequence and the fasta file of the RNA sequence, output file is .profile.
#Use
# sh RNA-Protein_AffinityProfile.sh pssmFileName RNAsequenceFile
#
Scriptpath="SCRIPTS"
Binpath="BINS"
Rpath="RSCRIPTS"
rm TMP/*
PSSMfile=$1
sequenceRNA=$2
echo $sequenceRNA
###Calculating pssm features 
$Binpath/./PSSM2nbrs  $PSSMfile
mv pssm2 TMP/pssm2
mkdir -p RESULTS

###Calculating global composition
head -n-6  $PSSMfile|tail -n+4|awk '{print $2}'> TMP/fastatmp
$Binpath/./get-20bits -i TMP/fastatmp -f ghostfile -21 binarycode -compo TMP/compo

###Generating network test pattern
j=1
ls TMP/compo
for instances in `awk '{print $1}' TMP/compo`
                     do
  			#echo $j
    	   	 	lineData=`sed -n "${j}"p TMP/pssm2`
    			#echo $lineData 
   			lineComp=`sed -n "${j}"p TMP/compo`
    			#echo $lineLabel
                        echo  $lineData $lineComp " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0">> TMP/joinPat 
                        #echo  $lineLabel >> joinPat
    			let j=$j+1
			
done
cp TMP/joinPat  TMP/test.dat
cols=`awk '{if(NR==1)print NF}' TMP/test.dat`
outunits=16
inunits=`echo $cols $outunits | awk '{print $1-$2}' `
echo Adding header to test data
sh $Scriptpath/header-adding.sh TMP/test.dat  $inunits $outunits
echo Done!

###Neural network prediction
cd TMP
for f in `ls ../NETWORK/*.net` 
do
        rm -f mynet.net
        rm -f test.res
        echo $f
        netName=`echo $f| cut -d"/" -f3 `
        echo $netName
        cp $f mynet.net
	echo "test_name  := \"test\" 					"> batchman.snns
	echo "net_name   := \"mynet\" 					">> batchman.snns
	echo "test_file  := test_name  + \".pat\" 			">> batchman.snns
	echo "test_log   := test_name  + \".log\" 			">> batchman.snns
	echo "ave_log    := net_name   + \".log\" 			">> batchman.snns
	echo "net_work   := net_name   + \".net\" 			">> batchman.snns
	echo "res_name   := net_name   + \".res\" 			">> batchman.snns
	echo "res_test   := test_name  + \".res\" 			">> batchman.snns
	echo "loadPattern(test_file) 					">> batchman.snns
	echo "loadNet(net_work) 					">> batchman.snns
	echo "setPattern(test_file) 					">> batchman.snns
	echo "testNet()							">> batchman.snns
	echo "saveResult(res_test, 1, PAT, FALSE, FALSE, \"create\") 	">> batchman.snns

	../$Binpath/batchman -f batchman.snns > network-prediction.log
	sh ../$Scriptpath/flatten-snns.sh test.res > ../TMP/$netName.pred
        
        
done

###Dinucleotide affinity scores per residue

#R CMD BATCH ~/work/Rscripts/predAnalysis.R
R CMD BATCH ../$Rpath/probabilityPAllMod.R
#R --save <../$Rpath/probabilityPAllMod.R
#cp predMean $1-${2}Average
#cp predStdev $1-${2}Stdev
cp maxperDiNu ../RESULTS/$1-DiNuScores
cp maxperDiNuPerResidue ../RESULTS/$1-DiNuScoresPerResidue
#cp predStandardAve $1-${2}StandardAve
sed -n 2p  ../$sequenceRNA | awk '{for(i=1;i<length($0);i++)printf substr($0,i,1)" "(i)"\n"}' > RNA.seq
#cp ../RESULTS/$1-DiNuScores maxperDiNu 
R CMD BATCH  ../$Rpath/RNAprofileGenerator.R
rm -f maxperDiNu
rm -f maxperDiNuPerResidue
cd ..
awk '{print $2}' TMP/RNAseq.profile | tail -n+2 > TMP/tmp.profile 
paste  TMP/RNA.seq  TMP/tmp.profile >  RESULTS/$1-$sequenceRNA.profile
rm -f TMP/RNAseq.profile 
rm -f TMP/tmp.profile 

rm -f pssm*
rm -f nbr*
rm -f binarycode*
rm -f ghostfile
rm -f *tmp
