#!/bin/bash

# This script should be called with first argument being the path of the directory, 
# where executables, such as pssm2nbrs.bin are located. 
# Second argument is the query protein pssm and the third argument is the probe protein pssm.
#Foruth optional argument is path of R executable.

bins=$1
querypssm=$2
probepssm=$3
R=$4

if [ ! -z $R ]; then
R=$R
else
R=R
fi 

echo "options(digits=3);" > my.R
echo "options(width=5000);" >> my.R
echo "pssm<-read.table(\"probe.pssm0\");" >> my.R
echo "x0=0.1*colMeans(pssm)" >> my.R
echo "x=c(x0,0.1*max(pssm[,1]), 0.1*max(pssm[,2]),0.1*max(pssm[,3]),0.1*max(pssm[,4]),0.1*max(pssm[,5]), 0.1*max(pssm[,6]),0.1*max(pssm[,7]),0.1*max(pssm[,8]),0.1*max(pssm[,9]), 0.1*max(pssm[,10]),0.1*max(pssm[,11]),0.1*max(pssm[,12]),0.1*max(pssm[,13]), 0.1*max(pssm[,14]),0.1*max(pssm[,15]),0.1*max(pssm[,16]),0.1*max(pssm[,17]), 0.1*max(pssm[,18]),0.1*max(pssm[,19]),0.1*max(pssm[,20]))" >> my.R
echo "x" >> my.R

#Features for probe protein
#=============================

$bins/pssm2nbrs.bin $probepssm
cp pssm0  probe.pssm0
awk '{for(i=1;i<=NF;i++)printf 0.1*$i" ";printf "\n"}' pssm2 > probe.pssm2
#Create a dummy target vector file
awk '{if(NF==44 && $1+0==$1)print $2,"1"}' $probepssm > probe.o
awk '{print $2}' probe.o > probe.o1
$bins/get-20bits.bin -i probe.o -f probe.fasta -21 probe.21 -compo probe.compo
mv probe.21  probe.nbr0
$R -f my.R | grep -v ">" | tail -n 1  > probe.summary

i=1
while [ $i -le 8 ]; do
mv pssm$i probe.pssm$i
mv nbr$i probe.nbr$i
let i=$i+1
done

#Features of query protein
$bins/pssm2nbrs.bin $querypssm
cp pssm0  query.pssm0
awk '{for(i=1;i<=NF;i++)printf 0.1*$i" ";printf "\n"}' pssm2 > query.pssm2
#Create a dummy target vector file
awk '{if(NF==44 && $1+0==$1)print $2,"1"}' $querypssm > query.o
awk '{print $2}' query.o > query.o1
awk '{print $1 NR}' query.o > lheader.txt
$bins/get-20bits.bin -i query.o -f query.fasta -21 query.21 -compo query.compo
mv query.21  query.nbr0
$R -f my.R | grep -v ">" | tail -n 1  > query.summary

i=1
while [ $i -le 8 ]; do
mv pssm$i query.pssm$i
mv nbr$i query.nbr$i
let i=$i+1
done


        i=1
        numresquery=`wc -l query.pssm0 | awk '{print $1}' `
        numresprobe=`wc -l probe.pssm0 | awk '{print $1}' `
        echo "$numresquery $numresprobe " | awk '{print $1/200,$2/200}' > tmp.length
        length=`cat tmp.length`
        summary=`cat probe.summary`

        while [ $i -lt $numresquery ]; do
        echo $summary >> probe.summary
        echo $length >>  tmp.length
        let i=$i+1
        done

#Append probe features to query protein features.

j=-1
while [ $j -le 5 ];
do
        i=-1
        while [ $i -le 5 ]; 
        do

if [ -f query.pssm$j ]; then
      if [ -f query.nbr$i ]; then
        paste query.nbr$i query.pssm$j probe.summary tmp.length query.o1 > pats.pssm$j-nbr$i
        else
        paste query.pssm$j probe.summary tmp.length query.o1 > pats.pssm$j-nbr$i
        fi
else
      if [ -f query.nbr$i ]; then
        paste query.nbr$i probe.summary tmp.length query.o1 > pats.pssm$j-nbr$i
      fi
fi
        let i=$i+1
        done
let j=$j+1
done

rm query.nbr? query.pssm?
