/* ---------------------------------------------------------- 
%(C)1994,1995 Institute for New Generation Computer Technology 
%       (Read COPYRIGHT for detailed information.) 
----------------------------------------------------------- */
/* ------------------------------------------------------------------ */
/*              readfile.c                                            */
/*         read data file                                             */
/* ------------------------------------------------------------------ */

/*   data file format    */
/* seqname1:ABCDEFGHIJ   */
/* seqname2:ACCCBDEHIJ   */

#include   <stdio.h>
#include   <string.h>
#include   <ctype.h>
#include "stem.h"

/*
#define DEBUG TRUE 
*/


int read_sequence(ptr,dest)
     char *ptr,  /* line from file */
          *dest; /* seqbuff */
{
/*
  fprintf(stderr,"read_sequence:ptr at top = %s\n",ptr);
*/
  while(*ptr == ' ' || *ptr == '\t' )
    *ptr++;    /* skip space and tab */
  for(;;)
    {
      if(*ptr == '\n')
	{ /* sequence ends this line */
	  *dest = '\0';
	  return NULL;
	  break;
	}
      if ( *ptr == '\\')  /* continues to next line */
	{
	  *dest = '\0';
	  return TRUE;
	  break;
	}

      if (isalpha(*ptr) || *ptr == '-')
	{
	  *dest++ = *ptr++;
	}
    }
}


char *copy_seq(seqbuff)
     char *seqbuff;
{
  char *head , *ptr;
  int i;
/*
  fprintf(stderr,"copy_seq is %s\n",seqbuff);
*/
  i = strlen(seqbuff);
  
  if((head =
      (char *)calloc((i + 1),sizeof(char))) == NULL)
    {       
      fprintf(stderr,"copy_seq :fail to alloc memory for seq %s \n",seqbuff);
      exit(1);
    }

  ptr = head;

  for(;;)
    {
      if(*seqbuff == '\0')
	break;
      else if(isalpha(*seqbuff) && isupper(*seqbuff))
	*ptr++ = *seqbuff++;
      else if(isalpha(*seqbuff) && islower(*seqbuff))
	{
	  *ptr++ = toupper(*seqbuff);
	  *seqbuff++;
	}
      else if(*seqbuff=='-')
	*ptr++ = *seqbuff++;
    }
#ifdef DEBUG
  fprintf(stderr,"head = %s\n",head);
#endif
  return head;
}

void adjust_length(maxSeqLength)
     int   maxSeqLength;
{
  int  i, j, k;
#ifdef DEBUG
  fprintf(stderr,"adjust_length:maxSeqLength = %d\n",maxSeqLength);
#endif
  for(i=0;i!=NoOfSeqs;i++)
    {
      if(sequences[i].seqlength == maxSeqLength)
	{
	  if((sequences[i].sequence =
	      (char *)calloc((maxSeqLength + 1),sizeof(char))) == NULL)
	    {       
	      fprintf(stderr,"fail to alloc memory for seq %s \n",
		      sequences[i].sequenceTemp);
	      exit(1);
	    }
	  strcpy(sequences[i].sequence,sequences[i].sequenceTemp);
#ifdef DEBUG
	  fprintf(stderr,"sequences[%d].sequence = %s\n",i,sequences[i].sequence);
#endif
	}
      else if(sequences[i].seqlength < maxSeqLength)
	{ /* add '-' to tail */
	  if((sequences[i].sequence =
	      (char *)calloc((maxSeqLength + 1),sizeof(char))) == NULL)
	    {       
	      fprintf(stderr,"fail to alloc memory for sequence %d:%s \n",
		      i,sequences[i].sequenceTemp);
	      exit(1);
	    }
	  strcpy(sequences[i].sequence,sequences[i].sequenceTemp);
	  j = maxSeqLength - sequences[i].seqlength;
	  for (k=0;k != j;k++)
	    {
	      strcat(sequences[i].sequence,"-");
	    }
#ifdef DEBUG
	  fprintf(stderr,"sequences[%d].sequence = %s\n",i,sequences[i].sequence);
#endif
	}
    }
}

void free_sequences()
{
  int i;
  /* already sequences are read */
  /* free previous memory       */

#ifdef SUN
  for(i=0;i!= NoOfSeqs;i++)
    {
      if((free(sequences[i].seqName)) == NULL )
	{
	  fprintf(stderr,"free_sequences: fail to free memory! (seqName)\n");
	  exit(1);
	}

      fprintf(stderr,"sequences[%d].seqName freed.\n",i);

      if((free(sequences[i].sequenceTemp)) == NULL )
	{
	  fprintf(stderr,"free_sequences: fail to free memory! (sequenceTemp)\n");
	  exit(1);
	}
/*
      fprintf(stderr,"sequences[%d].sequenceTemp freed.\n",i);
*/
      if((free(sequences[i].sequence)) == NULL )
	{
	  fprintf(stderr,"free_sequences: fail to free memory! (sequence)\n");
	  exit(1);
	}
/*
      fprintf(stderr,"sequences[%d].sequence freed.\n",i);
*/
    }
#endif  /* SUN */

#ifdef HP_IRIS
/* don't check free() return value */
  for(i=0;i!= NoOfSeqs;i++)
    {
      free(sequences[i].seqName);
/*
      fprintf(stderr,"sequences[%d].seqName freed.\n",i);
*/
      free(sequences[i].sequenceTemp);
/*
      fprintf(stderr,"sequences[%d].sequenceTemp freed.\n",i);
*/
      free(sequences[i].sequence);
/*
      fprintf(stderr,"sequences[%d].sequence freed.\n",i);
*/
    }
#endif  /* HP,IRIS */

  seqRead = FALSE;
}


int read_datafile(fpr)
     FILE *fpr;
{
  char buffer[MAXCOLUMN+1],seqbuff[MAXCOLUMN+1];
  char *p1 , *p2;
  int  line;       /* line counter of datafile  */
  int  i,/* seqcount, */ dest_leng,
       illegal_flag,
       noname ,    /* on when sequence has no name */
       maxNameLength,
       maxSeqLength,
       long_flag; /* on when next line is expected */
    
  if(seqRead)
    free_sequences();  /* free allocated memory */

  /* initialize */
  NoOfSeqs = 0;
  NoOfColumn = 0;
  line = 0;
/*  seqcount = 0; */
  noname = 0;
  illegal_flag = 0;
  maxNameLength = 0;
  maxSeqLength = 0;
  long_flag = 0;  /* TRUE when next line is expected */
  dest_leng = 0;

  for(;;)
    {
      if(fgets(buffer,MAXCOLUMN,fpr) == NULL)
	{
	/*  fprintf(stderr,"end of file \n"); */
	  break;   /* end of file, so get out of loop */
	}
/*      
      printf("buffer = %s\n",buffer);
*/      
      line++;
      
      for(p1=buffer;*p1 == ' ' || *p1 == '\t' ;p1++)
	;       /* skip space and tab */
      if(*p1 == ';') continue;        /* comment, so get next line */
      else if(*p1 == '%') continue;   /* comment, so get next line */
      else if(*p1 == '#') continue;   /* comment, so get next line */
      else if(*p1 == '\n') continue;  /* empty line, so get next line */
      
      /*    printf("*p1= %c\n",*p1);*/

      if(long_flag==NULL)
	{
	  p2 = seqbuff;
	  /* check sequence name */
	  for(i=0 ; *p1!=':' ; p1++)
	    {
	      i++; /* count length of seqname */
      
	      if(*p1 == '\n') /* this line has no  ':' */
		{
		  fprintf(stderr,"illegal format in line %d\n",line);
		  fprintf(stderr,"     %s\n",buffer);
		  illegal_flag = TRUE;
		}
	      if(illegal_flag)    /* illegal file format, so return FALSE */
		{
		  return FALSE;
		  break;
		}
	    }
	  
	  if( i==0)
	    { /* this sequence has no name */
	      i = 8; /* for "sequence" */
	      noname = TRUE;
	    }
	  else
	    noname = 0;

	  if(NoOfSeqs == 0) /* first data */ 
	    maxNameLength = i;
	  else
	    maxNameLength = (maxNameLength > i) ? maxNameLength : i; 
      
	  sequences[NoOfSeqs].nameLength= i;
/*
	  fprintf(stderr,"namelength = %d\n",i);
*/
	  if((sequences[NoOfSeqs].seqName =
	      (char *)calloc((i + 1),sizeof(char))) == NULL)
	    {       
	      fprintf(stderr,"fail to alloc memory for seqName %d:%s \n",line,buffer);
	      exit(1);
	    }
	  if(noname) /* sequence has no name */
	    strcpy(sequences[NoOfSeqs].seqName,"sequence"); /* put dummy name */
	  else
	    {
	      p1=buffer;
	      p2=sequences[NoOfSeqs].seqName;
	      while(*p1 != ':')
		{  /* copy sequence name */
		  *p2++ = *p1++;
		}
	      *p2 = '\0'; 
	    }
/*
	  fprintf(stderr,"sequences[%d].seqName = %s\n",
	           NoOfSeqs,sequences[NoOfSeqs].seqName);
*/
	  p1++;  /* just after ':' */

	  p2 = seqbuff;

	  long_flag=read_sequence(p1,p2); /* read sequence */

#ifdef DEBUG
	  fprintf(stderr,"long_flag=%d\n",long_flag);
	  fprintf(stderr,"seqbuff=%s\n",seqbuff);
#endif

	  if(long_flag == NULL)
	    { /* the sequence ends this line */
	       i=strlen(seqbuff);
	       sequences[NoOfSeqs].seqlength = i;

	       if(NoOfSeqs == 0) /* first data */ 
		 maxSeqLength = i;
	       else
		 maxSeqLength = (maxSeqLength > i) ? maxSeqLength : i; 

	       sequences[NoOfSeqs].sequenceTemp = copy_seq(seqbuff);
	       NoOfSeqs++;
	    }
	}
      else if(long_flag)
	{ /* the sequence has sequel in next line */
	  for(p1=buffer;*p1 == ' ' || *p1 == '\t' ;p1++)
	    ;       /* skip space and tab */
	  p2 = seqbuff;
#ifdef DEBUG	  
	  fprintf(stderr,"seqbuff at here is %s\n",seqbuff);
#endif	  
	  dest_leng= strlen(seqbuff);
	  p2 += dest_leng; /* move to the end of seqbuff */
	  long_flag = read_sequence(p1,p2);
	  
	  if(long_flag==NULL)
	    { /* the sequence ends this line */
	      i=strlen(seqbuff);
	      sequences[NoOfSeqs].seqlength = i;
	      if(NoOfSeqs == 0) /* first data */ 
		maxSeqLength = i;
	      else
		maxSeqLength = (maxSeqLength > i) ? maxSeqLength : i; 
	      
	      sequences[NoOfSeqs].sequenceTemp = copy_seq(seqbuff);
	      NoOfSeqs++;
	    }
	}
    }

  NoOfColumn = maxSeqLength;
  NameLength = maxNameLength;
  
#ifdef DEBUG
  printf("maxNameLength = %d ,\n", maxNameLength);
  printf("Number of data = %d ,\n", NoOfSeqs);
  printf("Number of column = %d ,\n", NoOfColumn);
  for(i=0;i!= NoOfSeqs;i++)
    fprintf(stderr,"sequences[%3d] = %s:%s\n",i,
	    sequences[i].seqName,sequences[i].sequenceTemp);
#endif
  /* adjust seq length */
  adjust_length(maxSeqLength);
#ifdef DEBUG
  printf("after adjust_length\n");
  for(i=0;i!= NoOfSeqs;i++)
    {
      fprintf(stderr,"sequences[%3d] = %s\n",i,
	    sequences[i].seqName);
      fprintf(stderr,"%s\n",sequences[i].sequence);
    }
#endif

  if(NoOfSeqs == 0)
    { /* illeagal data file */
      return FALSE;
    }
  else  
    {
      if(NoOfSeqs == 1)
	{
	  singleMode = TRUE;
	  Percent = 100;
	}
      else
	{
	  if(singleMode)
	    { /* treating one sequence before reading this file */
	      /*	      printf("pre_percent = %d\n",pre_percent); */
	      if(pre_percent == 100) /* reset to default percent */
		{Percent = PERCENT_D;
		 /*		 printf("new percent = %d ",Percent); */
	       }
	      else
		{Percent = pre_percent;
		 /*		 printf("new percent e = %d ",Percent); */
	       }
	      
	    }
	  singleMode = FALSE;
	}
#ifdef DEBUG
      printf("single flag = %d\n",singleMode);
#endif      
      seqRead = TRUE;
      return TRUE;
    }

}


/* ----------------- end of "readfile.c" --------------------- */



