#!/bin/bash

# This script should be called with first argument being the path of the directory, 
# where executables, such as pssm2nbrs.bin are located. 
# Second argument is the query protein pssm and the third argument is the probe protein pssm. (it should be in current directory)
#Foruth optional argument is path of R executable.
mkdir -p ../data

bins=$1
querypssm=$2
probepssm=$3
R=$4

if [ ! -z $R ]; then
R=$R
else
R=R
fi 

if [ ! -f ../data/$probepssm.summary ]; then

#Features for probe protein
#=============================

$bins/pssm2nbrs.bin $probepssm
cp pssm0  $probepssm.pssm0
awk '{for(i=1;i<=NF;i++)printf 0.1*$i" ";printf "\n"}' pssm2 > $probepssm.pssm2
#Create a dummy target vector file
awk '{if(NF==44 && $1+0==$1)print $2,"1"}' $probepssm > $probepssm.o
awk '{print $2}' $probepssm.o > $probepssm.o1
awk '{print $1 NR}' $probepssm.o > $probepssm.lheader.txt
$bins/get-20bits.bin -i $probepssm.o -f $probepssm.fasta -21 $probepssm.21 -compo $probepssm.compo
mv $probepssm.21  nbr0

echo "options(digits=3);" > my.R
echo "options(width=5000);" >> my.R
echo "pssm<-read.table(\"pssm0\");" >> my.R
echo "x0=0.1*colMeans(pssm)" >> my.R
echo "x=c(x0,0.1*max(pssm[,1]), 0.1*max(pssm[,2]),0.1*max(pssm[,3]),0.1*max(pssm[,4]),0.1*max(pssm[,5]), 0.1*max(pssm[,6]),0.1*max(pssm[,7]),0.1*max(pssm[,8]),0.1*max(pssm[,9]), 0.1*max(pssm[,10]),0.1*max(pssm[,11]),0.1*max(pssm[,12]),0.1*max(pssm[,13]), 0.1*max(pssm[,14]),0.1*max(pssm[,15]),0.1*max(pssm[,16]),0.1*max(pssm[,17]), 0.1*max(pssm[,18]),0.1*max(pssm[,19]),0.1*max(pssm[,20]))" >> my.R
echo "x" >> my.R

i=1
while [ $i -le 8 ]; do
mv pssm$i ../data/$probepssm.pssm$i
mv nbr$i ../data/$probepssm.nbr$i
let i=$i+1
done

$R -f my.R | grep -v ">" | tail -n 1  > ../data/$probepssm.summary
mv $probepssm.* ../data/
cp pssm0 ../data/$probepssm.pssm0
cp nbr0 ../data/$probepssm.nbr0
fi


##############################
#Features of query protein
##############################
if [ ! -f ../data/$querypssm.summary ]; then
$bins/pssm2nbrs.bin $querypssm
cp pssm0  $querypssm.pssm0
awk '{for(i=1;i<=NF;i++)printf 0.1*$i" ";printf "\n"}' pssm2 > $querypssm.pssm2
#Create a dummy target vector file
awk '{if(NF==44 && $1+0==$1)print $2,"1"}' $querypssm > $querypssm.o
awk '{print $2}' $querypssm.o > $querypssm.o1
awk '{print $1 NR}' $querypssm.o > $querypssm.lheader.txt
$bins/get-20bits.bin -i $querypssm.o -f $querypssm.fasta -21 $querypssm.21 -compo $querypssm.compo
cp $querypssm.21  ../data/$querypssm.nbr0

i=1
while [ $i -le 8 ]; do
mv pssm$i ../data/$querypssm.pssm$i
mv nbr$i ../data/$querypssm.nbr$i
let i=$i+1
done

$R -f my.R | grep -v ">" | tail -n 1  > ../data/$querypssm.summary
mv $querypssm.* ../data/
fi

        i=1
        numresquery=`wc -l ../data/$querypssm.pssm0 | awk '{print $1}' `
        numresprobe=`wc -l ../data/$probepssm.pssm0 | awk '{print $1}' `
        echo "$numresquery $numresprobe " | awk '{print $1/200,$2/200}' > tmp.length
        length=`cat tmp.length`
        summary=`cat ../data/$probepssm.summary`
        cp ../data/$querypssm.lheader.txt lheader.txt

        rm -f probe.summary tmp.length
        while [ $i -le $numresquery ]; do
        echo $summary >> probe.summary
        echo $length >>  tmp.length
        let i=$i+1
        done

#Append probe features to query protein features.

j=-1
while [ $j -le 5 ];
do
        i=-1
        while [ $i -le 5 ]; 
        do

if [ -f ../data/$querypssm.pssm$j ]; then
      if [ -f ../data/$querypssm.nbr$i ]; then
        paste ../data/$querypssm.nbr$i ../data/$querypssm.pssm$j probe.summary tmp.length ../data/$querypssm.o1 > pats.pssm$j-nbr$i
        else
        paste ../data/$querypssm.pssm$j probe.summary tmp.length ../data/$querypssm.o1 > pats.pssm$j-nbr$i
        fi
else
      if [ -f ../data/$querypssm.nbr$i ]; then
        paste ../data/$querypssm.nbr$i probe.summary tmp.length ../data/$querypssm.o1 > pats.pssm$j-nbr$i
      fi
fi
        let i=$i+1
        done
let j=$j+1
done

