#This script will take data from a list of two column file, first with amino acid name
#and second with some class information about it e.g. secondary structure.
#It will detect how many patterns are present in this file and then
#create a binary representation for each of these classes. 

params=`cat nnbench.rc`
list=`cat $params/list.codes`
datadir=$1
fileext=$2
outdir=prop-bins

if [ ! -d $outdir ];then
mkdir $outdir
fi

rm -f tmp.col2

if [ ! -f $params/codes.$fileext ]; then
echo "File codes.$fileext not found. Generating new pattern codes."


	for name in $list
	do
	awk '{print "::"$2"::"}' $datadir/$name.$fileext >> tmp.col2
	done

	listpats=`sort tmp.col2 | uniq`
	numclass=`echo $listpats| awk '{print NF}'`

	echo $numclass
	namepats=`echo $listpats`

	i=0

	for name in $namepats
	do
	j=0
	printf "$name " >> $params/codes.$fileext
	
		while [ $numclass -gt $j ];
		do
			if [ $i == $j ];then
			printf "1 " >> $params/codes.$fileext
			else
			printf "0 " >> $params/codes.$fileext
			fi
		let j=$j+1
		done
	let i=$i+1
	echo "" >> $params/codes.$fileext
	done
else
echo "Using existing pattern code file codes.$fileext. If you want to generate new ones, delete this file and run the script again." 
fi



for name in $list
do
echo "Processing $name"
rm -f tmp.2
awk '{print "::"$2"::"}' $datadir/$name.$fileext > tmp.col2
awk '{print $1}' $datadir/$name.$fileext > tmp.1
	list2=`cat tmp.col2`
		for name2 in $list2
		do
		code=`grep $name2 $params/codes.$fileext`
		echo $code | awk -F\: '{print $NF}'  >> tmp.2
		done

mv tmp.2 $outdir/$name.$fileext
done

awk '{print $1}' $params/codes.$fileext | sed s/://g > $params/header.$fileext

rm -f tmp.1 tmp.2 tmp.col2


