/******************************************************************************
 The computer software and associated documentation called ASSP hereinafter
 referred to as the WORK which is more particularly identified and described in 
 Appendix A of the file LICENSE.  Conditions and restrictions for use of
 this package are also in this file.

 The WORK was developed by: 
	Robert B. Russell and Geoffrey J. Barton
	Laboratory of Molecular Biophysics
	University of Oxford
	Rex Richards Building
	South Parks Road
	Oxford OX1 3QU U.K.
	Tel:  (+44) 865-275379
	FAX:  (+44) 865-510454
	INTERNET: rbr@bioch.ox.ac.uk
	JANET:    rbr@uk.ac.ox.bioch

 The WORK is Copyright (1993) University of Oxford
	Administrative Offices
	Wellington Square
	Oxford OX1 2JD U.K.

 All use of the WORK must cite: 
 R.B. Russell and G.J. Barton, "The Limits of Protein Secondary Structure Prediction
  Accuracy from Multiple Sequence Alignment",
  Journal of Molecular Biology, 234, 951 - 957, 1993.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "assp.h"
#define AAS " ________________________________ABCDEFGHI_KLMN_PQRST_VWXYZ________c"

#define MBIG 1000000000
#define MSEED 161803398
#define MZ 0
#define FAC (1.0/MBIG)
#define lastmod "15 January 1993"

/* Reads in alignment in BLOCK file format.
 * Then calculates %C and returns the expected best
 *   secondary structure prediction accuracy given
 *   the alignment length and the data from the
 *   JMB paper */

main(argc,argv)
int argc;
char *argv[];
{
	char c;
	
	int i,j,k;
	int nblock,nblock2,newnblock;
	int max;
	int repeat,seed,nseq;
	int align_len,t_cons,n_pos,n_cons;
	int min_L,max_L,min_C_length;
	int gave_block_file;
	int L,n_omit;

	long int ncomb,count;

	int *cons;
	int *v;
	int *ignore;

	int **combs;

	float C,Lower,Upper;
	float min_C,max_C;
	float min_Q,max_Q;

	FILE *BLOCK,*MAT;

	struct parameters *parms;
	struct slist pmatrix;
	struct seqdat *block;
	struct seqdat *newblock;
	struct seqdat *sub_block;

	parms=(struct parameters*)malloc(sizeof(struct parameters));

	if(argc<3) exit_error();

	gave_block_file=0;

	/* default parameters */
	strcpy(&parms[0].property_file[0],"mpt.mat");
	parms[0].align_size=5;
	parms[0].Nmax=400;
	parms[0].window=1;
	parms[0].min_n_prop=7;
	parms[0].n_gaps_tolerated=0;
	parms[0].fraction_ignored=0.0;
	parms[0].display_parameters=0;
	parms[0].omit=(int*)malloc(sizeof(int));
	parms[0].q_only=0;

	n_omit=0;

        for(i=1; i<argc; ++i) {
           if(argv[i][0]!='-') exit_error();
           if(argv[i][1]=='f') {
	      if(i+1>=argc) exit_error();
              /* get and open block file */
              if((BLOCK=fopen(argv[i+1],"r"))==NULL) {
                printf("error: file %s does not exist\n",argv[i+1]);
                exit(-1);
              }
	      strcpy(&parms[0].align_file[0],argv[i+1]);
              i++;
	      gave_block_file=1;
            } else if(argv[i][1]=='s') {
	      if(i+1>=argc) exit_error();
	      sscanf(argv[i+1],"%d",&parms[0].align_size);
	      i++;
	   } else if(argv[i][1]=='l') {
	      if(i+1>=argc) exit_error();
	      sscanf(argv[i+1],"%d",&parms[0].Nmax);
	      i++;
	   } else if(argv[i][1]=='p') {
	     if(i+1>=argc) exit_error();
	     sscanf(argv[i+1],"%d",&parms[0].min_n_prop);
	     i++;
	   } else if(argv[i][1]=='w') {
	     if(i+1>=argc) exit_error();
	     sscanf(argv[i+1],"%d",&parms[0].window);
	     i++;
	   } else if(argv[i][1]=='g') { 
	     if(i+1>=argc) exit_error();
	     sscanf(argv[i+1],"%d",&parms[0].n_gaps_tolerated);
	     i++;
	   } else if(argv[i][1]=='i') {
	     if(i+1>=argc) exit_error();
	     sscanf(argv[i+1],"%f",&parms[0].fraction_ignored);
	     i++;
	   } else if(argv[i][1]=='m') {
	     if(i+1>=argc) exit_error();
	     strcpy(&parms[0].property_file[0],argv[i+1]);
	     i++;
	   } else if(argv[i][1]=='r') { 
	     if(i+1>=argc) exit_error();
	     sscanf(argv[i+1],"%d",&parms[0].initial_seed);
	     i++;
	   } else if(argv[i][1]=='P') {
	     parms[0].display_parameters=1;
	   } else if(argv[i][1]=='q') {
	     parms[0].q_only=1;
	   } else if(argv[i][1]=='o') {
	     /* given sequences to omit */
	     if(i+1>=argc) exit_error();
	     i++;
	     while(i<argc && argv[i][0]!='-') {
	       sscanf(argv[i],"%d",&parms[0].omit[n_omit]);
	       n_omit++;
	       parms[0].omit=(int*)realloc(parms[0].omit,(n_omit+1)*sizeof(int));
	       i++;
	     }
	     i--;
	   } else { 
	     exit_error();
	   }
	}

	if(!gave_block_file) {
	  printf("error: you must specify a block file with the -f option\n");
	  exit(-1);
	}
	
	if(parms[0].q_only==0) {
	
          printf("-------------------------------------------------------------------------------\n");
          printf("                                A S S P \n\n");
          printf("                 Accuracy of Secondary Structure Prediction  \n");
	  printf("\n");
          printf("            A program to predict the accuracy of secondary structure \n");
	  printf("                prediction from multiple protein sequence alignment.\n");
          printf("                  By Robert B. Russell and Geoffrey J. Barton\n");
          printf("                   Last Modified: %s\n",lastmod);
          printf("\n\n Please cite RB Russell and GJ Barton, J. Mol. Biol., 234, 951-957, 1993\n");
          printf("-------------------------------------------------------------------------------\n\n");

	  if(parms[0].display_parameters) {
	    printf("PARAMETERS:\n\n");
	    printf("DEFAULT PARAMETERS WILL GIVE RESULTS SIMILAR TO THE PAPER\n\n");
	    printf("Alignment to be read from file %s\n",parms[0].align_file);
	    printf("Property types to be read in from %s\n",parms[0].property_file);
	    printf("\n");
	    printf("Sub-alignment size: %d\n",parms[0].align_size);
	    printf("Maximum number of combinations allowed: %d\n",parms[0].Nmax);
	    printf(" (if there are more than %d possible sub-alignments of \n",parms[0].Nmax,parms[0].align_size);
	    printf("  sequences, then a random sample of %d sub-alignments\n",parms[0].Nmax);
	    printf("  will be generated)\n");
	    printf("\n");
	    printf("Minimum number of shared properties to define conserved\n");
	    printf("   positions: %d\n",parms[0].min_n_prop);
	    printf("Number of gaps tolerated to define a conserved positions: %d\n",parms[0].n_gaps_tolerated);
	    printf("Smoothing window for defining a conserved position: %d \n",parms[0].window);
	    printf("Fraction of residues which may be ignored to define conserved\n");
	    printf("   positions: %5.2f\n",
	     parms[0].fraction_ignored);
	    printf("\n");
	    printf("Initial seed for random numbers: %d\n",parms[0].initial_seed);
	    if(n_omit>0) {
	       printf("Sequences: ");
	       for(i=0; i<n_omit; ++i) printf("%2d ",parms[0].omit[i]);
	       printf("are to be ignored.\n");
	    }
	    printf("-------------------------------------------------------------------------------\n\n");
	  }

	  printf("Starting ASSP...\n\n");

	  /* read in block file */
	  printf("Reading alignment from %s\n",parms[0].align_file);
	}
	nblock=0;
	while((c=getc(BLOCK))!=(char)EOF) {
	  if(c=='>') nblock++;
	}
	block=(struct seqdat*)malloc((nblock+1)*sizeof(struct seqdat));
	newblock=(struct seqdat*)malloc((nblock+1)*sizeof(struct seqdat));
	rewind(BLOCK);
	Agetbloc(BLOCK,block,&nblock2);
	if(nblock!=nblock2) {
	   printf("error: something funny with the block file\n");
	   exit(-1);
	}

	/* picking sequences to ignore */
	ignore=(int*)malloc(nblock*sizeof(int));
	for(i=0; i<nblock; ++i) ignore[i]=0;

	/* user specified */
	for(i=0; i<n_omit; ++i)
	   ignore[parms[0].omit[i]-1]=1;
	
	/* other sequences known not be be sequence data */
	for(i=0; i<nblock; ++i) {
	   for(j=0; j<strlen(block[i+1].id); ++j)
	     if(block[i+1].id[j]=='\n') block[i+1].id[j]='\0';
	   rmsp(block[i+1].id);
	   if(strcmp(&block[i+1].id[strlen(block[i+1].id)-3],"str")==0 ||
	      strcmp(&block[i+1].id[strlen(block[i+1].id)-4],"dssp")==0 ||
	      strcmp(&block[i+1].id[strlen(block[i+1].id)-2],"rk")==0 ||
	      strcmp(&block[i+1].id[strlen(block[i+1].id)-6],"define")==0 ||
	      strncmp(block[i+1].id,"space",5)==0) ignore[i]=1;
	   if(ignore[i]==1 && parms[0].q_only==0) 
	     printf("Ignoring: sequence %d, %s\n",i+1,block[i+1].id);
	}




	if((MAT=fopen(parms[0].property_file,"r"))==NULL) {
	   printf("error: file %s does not exist\n",parms[0].property_file);
	   exit(-1);
	}



	sub_block=(struct seqdat*)malloc((parms[0].align_size+1)*sizeof(struct seqdat));

	/* Decide whether to use all possible combinations or just a random
	 *  sample of parms[0].Nmax */
	
	/* First we create a new block structure that does not contain
	 *  any of the ignored sequences */
	newblock[0]=block[0];
	newnblock=0;
	for(i=0; i<nblock; ++i) {
	   if(!ignore[i]) {
	      newblock[newnblock+1]=block[i+1];
	      newnblock++;
	   }
	}

	
	ncomb=nCr(newnblock,parms[0].align_size);

	if(parms[0].q_only==0) {
	  printf("\nThe alignment contains %d sequences\n",newnblock);
	  printf("Total number of sub-alignments of %d sequences is %d\n",parms[0].align_size,ncomb);
	}
	if(ncomb>parms[0].Nmax) {  /* random sample */
	   if(parms[0].q_only==0) 
	      printf("Generating a random sample of %d sub-alignments\n",parms[0].Nmax);
	   seed=parms[0].initial_seed;
	   max=parms[0].Nmax;
	   combs=(int**)malloc(max*sizeof(int*));
	   for(i=0; i<parms[0].Nmax; ++i) {
	      combs[i]=(int*)malloc(parms[0].align_size*sizeof(int));
	      for(j=0; j<parms[0].align_size; ++j) {
		repeat=1;
	        while(repeat>0) { 
		   repeat=0; 
		   combs[i][j]=(int)(ran3(&seed)*(float)newnblock)+1;
		   for(k=0; k<j; ++k) 
		      if(combs[i][k]==combs[i][j]) repeat++;
		   if(combs[i][j]>newnblock || combs[i][j]<1) repeat++;
		}
	     }
	  }
        } else { 
	   max=ncomb;
	   if(parms[0].q_only==0) printf("Will consider all %d possible sub-alignments\n",max);
	   combs=(int**)malloc(sizeof(int*));
	   v=(int*)malloc((parms[0].align_size+1)*sizeof(int));
	   count=0;
	   combs=comb(&parms[0].align_size,1,&newnblock,v,combs,&count);
	}
	min_C=110.0;
	max_C=-10.0;
	max_L=-1;
	min_L=1000000;

	pmatrix=readprop(MAT);
	fclose(MAT);
	for(i=0; i<max; ++i) {
	   for(j=0; j<parms[0].align_size; ++j) { 
	     sub_block[j+1].seq=newblock[combs[i][j]].seq;
	   }
	   cons=cons_test(pmatrix,sub_block,parms[0].align_size,
		 parms[0].min_n_prop,parms[0].window,
		 parms[0].n_gaps_tolerated,parms[0].fraction_ignored,
		 &t_cons,&n_cons,&n_pos,&align_len);
	   C=100*(float)n_cons/(float)align_len;
	   if(C<min_C) {
	     min_C=C;
	     min_C_length=align_len;
	   }
	   if(C>max_C) max_C=C;
	   if(align_len<min_L) min_L=align_len;
	   if(align_len>max_L) max_L=align_len;
	   free(cons); 
	}
	if(parms[0].q_only==0) {
	  printf("\nObserved %%C range: %6.2f - %6.2f %%\n",min_C,max_C);
	  printf("Observed L range %d - %d positions\n",min_L,max_L);
	}

	/* Now to predict the expected accuracy of an alignment based
	 *  prediction */
	
	max_Q=100.0;
	C=min_C;
	L=min_C_length;
	if(L<=50) {
	   /* two lines 
	    * first slope  = 0.0552 int = 68.0 
	    * second slope = 0.395  int = 70.1 */
	  if(C<46.3) {
	    min_Q = 0.0552 * C + 68.0;
 	  } else { 
	    min_Q = 0.395 * C + 70.1;
	  }
	} else if(L>50 && L<=100) {
	  /* two lines
	   * first slope  = 0.1044 int = 69.0 
	   * second slope = 0.4937 int = 75.4
	   */
	  if(C<68.8) {
	    min_Q = 0.1044 * C + 69.0;
	  } else {
	    min_Q = 0.4937 * C + 75.4;
	  }
	} else if(L>100 && L<=150) {
	  /* one line
	   * slope = 0.1765 int = 71.8
	   */
	  min_Q = 0.1765 * C + 71.8;
	} else if(L>150) { 
	  /* one line
	   *  slope = 0.0800 int = 81.3 */
	  min_Q = 0.0800 * C + 81.3;
	}
	
	if(parms[0].q_only==0) {
	  printf("\nMinimum %%C = %6.2f for a length of %d residues\n",C,L);
	  if(C<13.0) {
	    printf("WARNING: the alignment appears to contain very dissimilar sequences\n");
	    printf("   This may mean that the alignment is inaccurate\n");
	  }
	  if(C>70.0) {
	    printf("WARNING: the alignment appears to contain only ver similar sequences\n");
	    printf("   This may limit the ability of the alignment to improve the prediction\n");
	    printf("    of secondary structure accurately\n");
	  }
	  if(L>300) {
	    printf("WARNING: the alignment length is quite long, it may be more sensible\n");
	    printf("   to split the alignment into smaller regions (ie.  corresponding to separate\n");
	    printf("   domains), and run the program again\n");
	    printf("   The original study was performed mostly on proteins smaller than this.\n");
	  }
	  printf("\nThe expected range for Q3ave: %6.2f - %6.2f %%\n",min_Q,max_Q);
	  printf(" (from Figure 2a-d, Russell and Barton, \n");
	  printf("   J. Mol. Biol., 234, 951-957, 1993)\n");
	  printf("\n");
	  printf("This means that the expected accuracy for comparing a \n");
	  printf(" *perfect* prediction based on this alignment to *each* \n");
	  printf(" subsequently determined secondary structure will be\n");
	  printf(" expected to have accuracies distributed between these\n");
	  printf(" two values: at worst %6.2f %% at best %6.2f %%\n",min_Q,max_Q);

	  printf("\n...ASSP done.\n");

	} else {
	  printf("%6.2f %6.2f\n",C,min_Q);
	}
	exit(0);


}

int exit_error()
{
	printf("format: accuracy -f <block file> \n");
	printf("  ( -m <property matrix file> \n");
	printf("    -s <align size>  -l <maximum number of alignments> \n");
	printf("    -p <min properties> -g <min gaps allowed>\n");
	printf("    -w <window length> -i <fraction ignored> \n");
	printf("    -r <initial random seed> \n");
	printf("    -o <sequence to miss 1> <sequence to miss 2>... \n");
	printf("    -q -P )\n");
	printf("    -P => show parameters\n");
	printf("    -q => just give Q3min\n");
	exit(0);
}
   
/* Routine to read in bloc file format */

int Agetbloc(bfile,bloc,nbloc)
FILE *bfile;
struct seqdat *bloc;
int *nbloc;

{
    int i,llen;
    char *buff;

    char *idstart, *idend, *bstart, sident = 0;
    int idlen,totseq = 0,k,j;
    char *GJstrblank();
	
    buff = malloc(sizeof(char) * MAXtlen);

l1: 
    buff = fgets(buff,MAXtlen,bfile);
    if(buff == NULL){
	printf("Premature end of BLOCK FILE\n");
	return -1;
    }
    if((idstart = strchr(buff,'>')) != NULL){
	if(++totseq == MAXnbloc){
	    printf("Max Number of block file sequences exceeded: %d\n", totseq);
	    printf("Use MAX_NSEQ command to increase value");
	    return -1;
	}
	sident = 1;
	idend = strchr(idstart,' ');
	if(idend == NULL){
	  idend = strchr(idstart,'\0');
	}
	if(idend == NULL){
	  printf("Error reading identifier:%s\n",idstart);
	  printf("Exiting\n");
	}
	idlen = (idend - idstart) + 1;
	bloc[totseq].id = malloc(sizeof(char) * idlen);
	bloc[totseq].id = GJstrblank(bloc[totseq].id,idlen);
	strncpy(bloc[totseq].id,idstart+1,idlen-1);   /* don't copy the ">" symbol */
	bloc[totseq].ilen = idlen-1;
	bloc[totseq].id[idlen-1] = '\0';

	bloc[totseq].tlen = strlen(idend)+1;
	bloc[totseq].title = malloc(sizeof(char) * bloc[totseq].tlen);
	bloc[totseq].title = GJstrblank(bloc[totseq].title,bloc[totseq].tlen);
	strcpy(bloc[totseq].title,idend);

	bloc[totseq].seq = (char *) malloc(sizeof(char) * MAXslen);
        bloc[totseq].seq[0] = ' ';
	goto l1;
    } else if(sident){
	if((idstart = strchr(buff,'*')) != NULL){
	    i = 0;
	    while((buff = fgets(buff,MAXtlen,bfile)) != NULL){
		if(*idstart == '*'){
/*		    printf("Blocfile read: Length: %d\n",i); */
		    ++i;
		    for(k=1;k<totseq+1;++k){
			bloc[k].slen = i;
			bloc[k].seq[i] = '\0';
			bloc[k].seq = realloc(bloc[k].seq,sizeof(char)*(i+1));
		    }
		    *nbloc = totseq;
		    free(buff);
		    return 0;
		}
		bstart = idstart;
		++i;
		if(i==MAXslen) printf("Max Sequence length exceeded - use MAX_SEQ_LEN command to increase");
		for(j=1;j<totseq+1;++j){
		    /*cope with short lines */
		    bloc[j].seq[i] = *bstart++;
		}
	    }
	    printf("No terminating * in blocfile\n");
	    return -1;
	} else goto l1;
    } else {
	goto l1;
    }
}

char *GJstrblank(string,len)
char *string;
int len;
/* set a string to blanks and add terminating nul */
{
  --len;
  string[len] = '\0';
  --len;
  while(len > -1){
    string[len] = ' ';
    --len;
  }
  return string;
}
 


/* given a property type table, a bloc structure, a maximum number of
 *  properties to be tolerated, and a minimum window length, this routine 
 *  returns a string of 1 and 0's corresponding to which positions satisfy
 *  the criteria */

int *cons_test(ptable,bloc,nbloc,min_n_prop,window,n_gaps_tolerated,fraction_ignored,t_cons,n_cons,n_pos,align_len)
struct slist ptable;
struct seqdat *bloc;
int nbloc,min_n_prop,n_gaps_tolerated,window;
float fraction_ignored;
int *t_cons; /* total conservation value */
int *n_cons; /* number of conserved positions */
int *n_pos;
int *align_len;
{
	int i,j,k;
	int gaps,npt,yes;
	int neighbors;
	int tot_pos;
	int nuse;
	int *index,*cons;
	
	cons=(int*)malloc((strlen(&bloc[1].seq[1])+100)*sizeof(int));
	nuse=nbloc;

	(*t_cons)=(*n_pos)=(*n_cons)=0;
	for(i=0; i<(strlen(&bloc[1].seq[1])+100); ++i)
	  cons[i]=0;


	/* provide an alphabeticised index for speed */
	index=(int*)malloc(strlen(AAS)*sizeof(int));
	for(i=0; i<strlen(AAS); ++i) {
	   if(AAS[i]!='_') 
	for(j=0; j<strlen(ptable.aas); ++j) 
	   if(AAS[i]==ptable.aas[j]) { 
	       index[i]=j; 
	       /* this pointer tells us where in the property matrix to look for AA - ' ' */
	       break;
	   }
	}
	/* now proceed through the block file and determine which positions to keep based on the
	 *  criteria given */
	(*align_len)=0;
	for(i=0; i<strlen(&bloc[1].seq[1]); ++i) {
	    gaps=0;
	    for(j=0; j<nbloc; ++j)  {
	       if(bloc[j+1].seq[i+1]==' ') gaps++;
/*	       printf("%c",bloc[j+1].seq[i+1]);   */
	    }
	    if(gaps<nuse) (*align_len)++;
	    if(gaps<=n_gaps_tolerated) {
	       (*n_pos)++;
	       /* go through to property table to see which properties we have at this position */
	       npt=0; tot_pos=0;
	       for(j=0; j<ptable.pno; ++j) {
		  yes=0;
		  for(k=0; k<nbloc; ++k)
		    if(ptable.apm[j].ptype[index[(int)(bloc[k+1].seq[i+1]-' ')]]) yes++;
		  if(yes==nuse) npt++;
		  if(yes>0)tot_pos++;
		  yes=0;
		  for(k=0; k<nbloc; ++k) 
		    if(!(ptable.apm[j].ptype[index[(int)(bloc[k+1].seq[i+1]-' ')]])) yes++;
	 	  if(yes==nuse) npt++;
		  if(yes>0) tot_pos++;
	       }
	       if(npt<min_n_prop) {
		 cons[i]=0;
	       } else {
		 (*n_cons)++;
		 cons[i]=1;
	       }
	       (*t_cons)+=npt;
/*	       printf(" %2d/%2d %d",npt,tot_pos,*align_len+1);  */
	    } else cons[i]=0;
/*	    printf("\n");   */
	}
/*	for(i=0; i<strlen(&bloc[1].seq[1]); ++i) printf("%1d",cons[i]); printf("\n");  */
	/* now smooth the array out according to the window */
	for(i=0; i<strlen(&bloc[1].seq[1]); ++i) {
	   if(cons[i]==1) {
              neighbors=0;
              for(j=1; j<=window; ++j) {
                 if(cons[i+j]==0 || i+j>(strlen(&bloc[1].seq[1])-1)) break;
                 else neighbors++;
	      }
	      for(j=1; j<=window; ++j) {
                 if((i-j)>=0 && (cons[i-j]==0 || (i-j)<0)) break;
                 else neighbors++;
              }
            cons[i]=(neighbors>=(window-1)); 
	  }
        }
/*	for(i=0; i<strlen(&bloc[1].seq[1]); ++i) printf("%1d",cons[i]); printf("\n");  */
/*	printf("Total number of positions: %d\n",(*align_len));  */
	return cons;
}

long int nCr(N,R)
int N,R;
{
	long int value;

	value=(int)(factorial(N)/(factorial(N-R)*factorial(R)));
	return value;
}

double factorial(N)
int N;
{
	int i;
	double V;
	V=1.0;
	if(N==0) return V;

	for(i=1; i<=N; ++i) V*=(double)i;
	return V;
}

/* Ptyperead.c
   ***********

This program reads in standard property type matrices, allowing the use of 
multiple property types in the MAAS program, ie.   each amino acid may be a 
member of more than one property type group.   The standard input file is of
the format:

!    *ILVCAGMFYWHKREQDNSTP BZX**
!
 1    111111111111000000101001 Hydrophobic
 2    000000000011100000001001 Positive
 3    000000000000010100001001 Negative
 4    000000001111111111101111 Polar
 5    000000000011110100001001 Charged
 6    001111000000000111111001 Small
 7    000011000000000001001001 Tiny
 8    111000000000000000001001 Aliphatic
 9    000000011110000000001001 Aromatic
10    000000000000000000011001 Proline
~
~
 n    001001001000100010001101 n 
!

The first collumn (0) contains the property type record.   If a property 
type is not present on the line, this position is occupied by an 
exclamation mark (!), which is, therefore, a reserved character in 
collumn 0.   The property type record is a decimal number which is later 
associated with the property name record.

The first row contains an indicator of the number of property types to 
be read.   In this row, the asterisk (*) is a reserved character.   The 
first asterisk is always in collumn 5.   The second asterisk is a marker
for the number of symbols represented in the property type matrix, 
occuring in the collumn following the last collumn occupied by the 
property type matrix.   The space between the first two markers contains 
a list of the amino acid symbols for which property types are listed.   
The third asterisk indicates the collumn in which the
property type names start.

Any space to the right of the property type names, in any collumn, may 
be used for comments.   

The property type matrix is expressed in binary form.   As many symbols/
property types as required may be defined, with the proviso that the 
total number of characters per line does not exceed 80.

*/  
 
struct slist readprop(fp)
FILE *fp;

{
  char pts[80],lno=0,p[80];
  int mu,mn,sm,si,pdes=6,i,total,tally;
  struct slist sy;

  sy.pno=0;
  sy.syno=0;    
  sy.apm = (struct pmtrx*) malloc(sizeof(struct pmtrx)*2);
  sy.pnames = (char**) malloc(sizeof(char*)*2);
  sy.aas=(char*) malloc(sizeof(char)*100);

  total=-1;

  while(!feof(fp)){

  /* Read in one line (of max 80 characters) */
  
  fgets(pts,79,fp);
    if(pts[5]==42){
      while (pts[pdes]!=42){
	sy.syno++;
	pdes++;
      }
      sm=pdes+1;
      while (pts[sm]!=42){
	sm++;
      }
      for(mu=6;mu<(sy.syno+6);++mu){
        sy.aas[mu-6]=pts[mu];
      }
      sy.aas[mu-6]='\0';
    }

/* If the line starts with a non-control code character or ! read in the symbol
   and its property type matrix */

    tally=0;
    if((pts[0]!=33)&&(pts[0]>31)){
      si=0;
      i=0;
      sscanf(pts,"%d",&i);
      si = sm;
      while (pts[si]!=32){
        si++;  
      }
      sy.pnames[sy.pno] = (char*) malloc (sizeof(char)*((si-sm)+2));
      for(mu=sm;mu<si;++mu) sy.pnames[sy.pno][mu-sm]=pts[mu];
      sy.pnames[sy.pno][mu-sm]='\0';
      sy.apm[sy.pno].pcode=i;
      sy.apm[sy.pno].ptype = (int*) malloc(sizeof(int)*(sy.syno+2));
      for(mu=6;mu<(6+sy.syno);++mu){
	sy.apm[sy.pno].ptype[mu-6]=(int)pts[mu]-48;
        if(pts[mu]=='1') tally++;
      }
      sy.pno++;
      sy.apm = (struct pmtrx*) 
        realloc(sy.apm,sizeof(struct pmtrx)*(sy.pno+2));
      sy.pnames = (char**) realloc (sy.pnames,sizeof(char*)*(sy.pno+2));
    }
    if(total<tally) total=tally;
  }
  sy.maxnoprop = total;
  fclose(fp);
  return sy;
}

float ran3(seed)
int *seed;
{
	static int inext,inextp;
	static long ma[56];
	static int iff=0;
	long mj,mk;
	int i,ii,k;

	if (*seed < 0 || iff == 0) {
		iff=1;
		mj=MSEED-(*seed < 0 ? -*seed : *seed);
		mj %= MBIG;
		ma[55]=mj;
		mk=1;
		for (i=1;i<=54;i++) {
			ii=(21*i) % 55;
			ma[ii]=mk;
			mk=mj-mk;
			if (mk < MZ) mk += MBIG;
			mj=ma[ii];
		}
		for (k=1;k<=4;k++)
			for (i=1;i<=55;i++) {
				ma[i] -= ma[1+(i+30) % 55];
				if (ma[i] < MZ) ma[i] += MBIG;
			}
		inext=0;
		inextp=31;
		*seed=1;
	}
	if (++inext == 56) inext=1;
	if (++inextp == 56) inextp=1;
	mj=ma[inext]-ma[inextp];
	if (mj < MZ) mj += MBIG;
	ma[inext]=mj;
	return mj*FAC;
}

/*  comb: routine to generate all combinations of numbers from a set - 
    call with r = number from set required
	      n = size of set
	      p = 1
	      v = pointer to an array of int's r+1 long
    e.g. to find all unique pairs from 10 call with
    r = 2, n = 10
    to find all unique fives from 10 call with r=2, n=10.

    This routine is recursive and crashes for large values of n - presumably
    due to lack of stack space?

    maximum number of calls appears to be 80624

    G. J. Barton March 1991

    13 January 1994:  Now store the combinations in a 2d array of 
    length t;

    The a array is dynamically increased in length as new combinations
    are found.  See the above code for an example on how to call this 
    routine.
*/

int **comb(r,p,n,v,a,t)
int *r;  	/* number of numbers to select from n */
int p;   	/* position in list of numbers (working variable)*/
int *n;  	/* number from which combinations are selected */
int *v;  	/* current combination result */
int **a; 	/* array to store all combinations */
long int *t;  	/* length of a */
{
    int i;

    if(p<*r && p>0){		/* not the last digit */
	if(v[p]<(*n-*r+p)){	/* not exceeded max value for this digit */
	    ++v[p];
	    v[p+1] = v[p];
	    a = comb(r,p+1,n,v,a,t);/* look at next digit */
	}else{
	    a = comb(r,p-1,n,v,a,t);/* look at previous digit */
	}
    }else if(p==*r){		/* last digit */
	if(v[p]<(*n-*r+p)){	/* not exceeding max value for last digit */
	    ++v[p];
	    /* update the result array */
	    ++*t;
	    a = (int **) realloc(a,sizeof(int *) * (*t));
	    a[*t-1] = (int *) malloc(sizeof(int) * (*r));
	    for(i=0;i<*r;++i)a[*t-1][i] = v[i+1];

	    a = comb(r,p,n,v,a,t);	/* look at this digit again */
	}else{
	    a = comb(r,p-1,n,v,a,t);/* look at previous digit */
	}
    }
    return a;
}
/* This routine removes spaces from a string and returns the result. */

int rmsp(c)
char *c;
{
	int i,j;
	char *temp;

	temp=(char*)malloc((strlen(c)+1)*sizeof(char));
	i=0; j=0;
	while(c[i] !='\0') {
	   if(c[i]!=' ') temp[j++]=c[i];
	   i++;
	} /* End of while. */
	temp[j]='\0';
	strcpy(c,temp);
	free(temp);
	return;
} /* End of routine. */

