proteins<-as.matrix(read.table("list.human-sp-clean"))[,1];

scoreit<-function(name){
filename<-paste(dir,name,"-preds-all.txt",sep="");
data<-read.table(filename,header=TRUE);
datadna<-data[,"DNA"];
datacarb<-data[,"Carb"];
dataatp<-data[,"ATP"];
datappi<-data[,"PPPI"];
datarna<-data[,"RNA"];

q3dna<-quantile(datadna,prob=c(0.75));
q3carb<-quantile(datacarb,prob=c(0.75));
q3atp<-quantile(dataatp,prob=c(0.75));
q3ppi<-quantile(datappi,prob=c(0.75));
q3rna<-quantile(datarna,prob=c(0.75));

countall<-nrow(data);
rcountA<-length(grep("A",data[,1]))/countall;
rcountC<-length(grep("C",data[,1]))/countall;
rcountD<-length(grep("D",data[,1]))/countall;
rcountE<-length(grep("E",data[,1]))/countall;
rcountF<-length(grep("F",data[,1]))/countall;
rcountG<-length(grep("G",data[,1]))/countall;

rcountH<-length(grep("H",data[,1]))/countall;
rcountI<-length(grep("I",data[,1]))/countall;
rcountK<-length(grep("K",data[,1]))/countall;

rcountL<-length(grep("L",data[,1]))/countall;
rcountM<-length(grep("M",data[,1]))/countall;
rcountN<-length(grep("N",data[,1]))/countall;
rcountP<-length(grep("P",data[,1]))/countall;
rcountQ<-length(grep("Q",data[,1]))/countall;
rcountR<-length(grep("R",data[,1]))/countall;
rcountS<-length(grep("S",data[,1]))/countall;
rcountT<-length(grep("T",data[,1]))/countall;
rcountV<-length(grep("V",data[,1]))/countall;
rcountW<-length(grep("W",data[,1]))/countall;
rcountY<-length(grep("Y",data[,1]))/countall;

top5dna<-mean(sort(datadna,decreasing=TRUE)[1:5]);
top5carb<-mean(sort(datacarb,decreasing=TRUE)[1:5]);
top5atp<-mean(sort(dataatp,decreasing=TRUE)[1:5]);
top5ppi<-mean(sort(datappi,decreasing=TRUE)[1:5]);
top5rna<-mean(sort(datarna,decreasing=TRUE)[1:5]);

top10dna<-mean(sort(datadna,decreasing=TRUE)[1:10]);
top10carb<-mean(sort(datacarb,decreasing=TRUE)[1:10]);
top10atp<-mean(sort(dataatp,decreasing=TRUE)[1:10]);
top10ppi<-mean(sort(datappi,decreasing=TRUE)[1:10]);
top10rna<-mean(sort(datarna,decreasing=TRUE)[1:10]);

top25dna<-mean(sort(datadna,decreasing=TRUE)[1:25]);
top25carb<-mean(sort(datacarb,decreasing=TRUE)[1:25]);
top25atp<-mean(sort(dataatp,decreasing=TRUE)[1:25]);
top25ppi<-mean(sort(datappi,decreasing=TRUE)[1:25]);
top25rna<-mean(sort(datarna,decreasing=TRUE)[1:25]);

countall<-max(10,countall);
sqsize=log10(countall);

scores<-cbind(q3dna,q3carb,q3atp,q3ppi,q3rna, top5dna,top5carb,top5atp,top5ppi,top5rna, top10dna,top10carb,top10atp,top10ppi,top10rna, top25dna,top25carb,top25atp,top25ppi,top25rna, 
rcountA,
rcountC,
rcountD,
rcountE,
rcountF,
rcountG,
rcountH,
rcountI,
rcountK,
rcountL,
rcountM,
rcountN,
rcountP,
rcountQ,
rcountR,
rcountS,
rcountT,
rcountV,
rcountW,
rcountY,
sqsize); 

colnames(scores)<-c("q3dna","q3carb","q3atp","q3ppi","q3rna", "top5dna","top5carb","top5atp","top5ppi","top5rna", "top10dna","top10carb","top10atp","top10ppi","top10rna", "top25dna","top25carb","top25atp","top25ppi","top25rna", 
"countA",
"countC",
"countD",
"countE",
"countF",
"countG",
"countH",
"countI",
"countK",
"countL",
"countM",
"countN",
"countP",
"countQ",
"countR",
"countS",
"countT",
"countV",
"countW",
"countY",
"sqsize"); 

rownames(scores)<-name;
scores[is.na(scores)]=0;
return(scores);
}


dsize=length(proteins);

dir=c("bs-preds/");
scores<-c();
for(pid in seq(1,dsize))
{
name=proteins[pid];
show(paste(pid,"of",dsize,":",name))
scores<-rbind(scores,scoreit(name));
}

rownames(scores)<-proteins;
write.table(scores,"feature-set-all.txt",quote=FALSE);
