/*************************************************************************/
//  CS 100b
//  Fall 1999
//  Homework 4 -- DNA Sequence Distance Measures
//
//  This is a program to demonstrate the use of a DNA sequence class for
//  doing DNA sequence comparisons.  The distance between two DNA sequences 
//  are computed 3 different ways.  
//
//  The example was obtained from Debra Goldberg so all specific questions 
//  should be directed towards her!  :)
/*************************************************************************/
public class hw4
{
   public static void main (String[] args)
   {
      // variable declarations
      TokenReader in;		// for reading input
      DNA_sequence DNA1;	// first DNA sequence
      DNA_sequence DNA2;	// second DNA sequence
      int HammingDist;		// Hamming distance
      int WeightedDist;		// weighted distance
      int MinimumDist;		// minimum distance

      // initialization code
      // instantiate objects
      in = new TokenReader(System.in); 
      DNA1 = new DNA_sequence();
      DNA2 = new DNA_sequence();
      
      // get the DNA sequences from user, check if valid input
      DNA1.SetDNAstr();
      DNA2.SetDNAstr();

      // calculate Hamming distance
      HammingDist = DNA1.GetHammingDist(DNA2);
      if (HammingDist < 0)
         System.out.println("Error in Hamming Distance computation");
      else
         System.out.println("The Hamming Distance is:  " + HammingDist);

      // set substitution matrix for weighted and minimum distances
      DNA_sequence.SetSubMatrix();

      // determine and print weighted distance
      WeightedDist = DNA1.GetWeightedDist(DNA2);
      if (WeightedDist < 0)
         System.out.println("Error in Hamming Distance computation");
      else
         System.out.println("The Weighted Distance is:  " + WeightedDist);

      // determine and print minimum distance
      MinimumDist = DNA1.GetMinDist(DNA2);
      System.out.println("The Minimum Distance is:  " + MinimumDist);

      // get new DNA sequences from user, check if valid input
      // another set of sequences is entered so we can test inputs
      //  which are illegal for the other distance methods
      DNA1.SetDNAstr();
      DNA2.SetDNAstr();

      // determine and print minimum distance
      MinimumDist = DNA1.GetMinDist(DNA2);
      System.out.println("The Minimum Distance is:  " + MinimumDist);
   }
}
----------
X-Sun-Data-Type: default
X-Sun-Data-Description: default
X-Sun-Data-Name: DNA_sequence.java
X-Sun-Charset: us-ascii
X-Sun-Content-Lines: 332

/*************************************************************************/
//  CS 100b
//  Fall 1999
//  Homework 4 -- DNA Sequence Distance Measures
//
//  This file contains a DNA sequence class for doing DNA sequence 
//  comparisons.  The DNA sequence class allows the distance between 
//  objects to be measured 3 different ways.  The Hamming distance, 
//  weighted Hamming distance, and edit distance are supported.  
//  The DNA sequence class also provides methods to read a new 
//  DNA sequence from the user, get the length of the sequence, get a 
//  single DNA base character, get a substitution matrix from the user, 
//  get the weighted distance between 2 individual DNA bases (characters), 
//  and display a DNA sequence.
//
//  The example was obtained from Debra Goldberg so all specific questions 
//  should be directed towards her!  :)
/*************************************************************************/
public class DNA_sequence
{
   private final static int GAP = 5;	// gap penalty
   private int length = 0;		// length of DNA sequence, used to
					// determine if DNAstr was intialized
   private final static String validChars = "ACGT";  // a list of valid 
						     // characters for a DNA
						     // sequence
   private char[] DNAstr;		// the actual DNA sequence
   private static boolean SubInit = false;	// SubMtx has been initialized
   private static int[][] SubMtx = new int[4][4]; // the substitution matrix

   // constructor
   //  values will be set via other methods
   public DNA_sequence(){}

   //--------------------------------------------------------------------
   // SetDNAstr()
   //
   // Purpose  : Get a DNA string from user
   // Input    : None
   // Condition: None
   // PostCond : DNAstr set to the user's input, length set
   // Return   : true if operation is successful.  false otherwise.
   //--------------------------------------------------------------------
   public boolean SetDNAstr(){
      boolean valid;			// input has been validated
      String dtmp;			// temporary string for input
      TokenReader in = new TokenReader(System.in);  // for reading input
      char cc;				// current character

      do
      {
         // prompt user for input
         System.out.println("Input a DNA string: ");
         System.out.println("Input must be A, G, C, or T:");
      
         // get input and save to dtmp
         dtmp = in.readString();
         length = dtmp.length();
	 DNAstr = new char[length];
      
         // go through string and strip one character at a time 
         // and save to the DNAstr array
	 valid = true;
	 // note that declaring ii in the for loop makes it local
	 //  to the loop; ii cannot be used after loop terminates
         for(int ii=0; ii<length; ii++) {
            cc = dtmp.charAt(ii);

            // check for correct input
	    if (validChars.indexOf(cc) < 0){
               System.out.println("Error:  Invalid character.");
               valid = false;
            }
            DNAstr[ii]=cc;
         }
      } while ( ! valid);
      return true;
   } // end SetDNAstr

   //--------------------------------------------------------------------
   // GetIthChar()
   //
   // Purpose  : Get a character from a DNA string
   // Input    : Index of character to be retrieved from DNA string
   // Condition: None
   // PostCond : None
   // Return   : Character at ith position of the DNA
   //--------------------------------------------------------------------
   public char GetIthChar (int ind) {
      if (ind > length)
      {
	 System.err.println("Invalid request in GetIthChar");
	 System.exit(0);
      }
      return DNAstr[ind-1];
   } // end GetIthChar 

   //--------------------------------------------------------------------
   // GetStrLen()
   //
   // Purpose  : Get the length of a DNA string
   // Input    : None
   // Condition: None
   // PostCond : None
   // Return   : length of the DNA string
   //--------------------------------------------------------------------
   public int GetStrLen() {
      return length;
   }

   //--------------------------------------------------------------------
   // GetHammingDist()
   //
   // Purpose  : Get the Hamming distance of a DNA string
   // Input    : Another DNA sequence
   // Condition: A must have same len as DNAstr
   // PostCond : None
   // Return   : number of different symbols between two DNA strings
   //--------------------------------------------------------------------
   public int GetHammingDist (DNA_sequence A) {

      int HammD = 0;      // variable to keep track of the hamming dist

      // only compare if length are equal
      if (A.GetStrLen()==length){
         
         // go through DNA and compare char one by one
         for (int ind=0; ind<length; ind++) {

            // if character at same position different, increment HammD
            if(A.GetIthChar(ind+1)!=DNAstr[ind]){
               HammD++;
            }
         }
         
         return HammD;
      }
      // only get here if lengths are not equal
      System.err.print ("Error: GetHammingDist called with differing ");
      System.err.println ("length DNA sequences.");
      return -1;
   } // end GetHammingDist


   //--------------------------------------------------------------------
   // SetSubMatrix()
   //
   // Purpose  : Initialize the Substitution Matrix with user input
   // Input    : None
   // Condition: None
   // PostCond : SubMtx and SubInit are initialized
   // Return   : None
   //--------------------------------------------------------------------
   public static void SetSubMatrix() {
      TokenReader in = new TokenReader(System.in);  // for reading input
      
      // get all possible weights from user:
      // prompt user to input indvidual weights 
      // (only 6 possibilities due to symmetry)
      System.out.println("Input the substitution weights:");

      for (int ii = 0; ii < validChars.length(); ii++) {
         SubMtx[ii][ii] = 0;
         for (int jj = ii+1; jj < validChars.length(); jj++) {
            System.out.print(validChars.charAt(ii)+"->"+
		 validChars.charAt(jj)+": ");
            SubMtx[ii][jj] = in.readInt();
            SubMtx[jj][ii] = SubMtx[ii][jj];
         }
      }

      SubInit = true;
   } // end SetSubMatrix
   
   //--------------------------------------------------------------------
   // GetWeight()
   //
   // Purpose  : Get the cost of swapping from one char to another
   // Input    : Two characters in a DNA string
   // Condition: A, C, G, or T
   // PostCond : None
   // Return   : the cost of swpping
   //--------------------------------------------------------------------
   public int GetWeight (char x, char y) {

      int ndx1 = validChars.indexOf(x);	// index of x in SubMtx
      int ndx2 = validChars.indexOf(y);	// index of y in SubMtx

      // Check that SubMtx has been initialized
      if (SubInit) {
         // return the right weight based on input
	 if (ndx1 < 0 || ndx2 < 0) {
	    System.err.println ("Error: invalid base in GetWeight");
            return -1;
	 }
	 else
            return SubMtx[ndx1][ndx2];
      }
      else {
	 System.err.print ("Error:  substitution matrix not initialized ");
	 System.err.println ("before calling GetWeight");
         return -1;
      }
   } // end GetWeight 

   //--------------------------------------------------------------------
   // GetWeightedDist()
   //
   // Purpose  : Get the weighted distance of two DNA strings
   // Input    : DNA_sequence A to be compared with
   // Condition: A and DNAstr same length
   // PostCond : None
   // Return   : the weighted distance, -1 if string length not equal
   //--------------------------------------------------------------------
   public int GetWeightedDist (DNA_sequence A) {

      int WeightedD = 0;      // used to keep the distance, 
			      // initial at 0 then increment
      
      // string len not equal, return -1
      if (A.GetStrLen()!=this.GetStrLen()){
         System.err.print ("Error: GetWeightedDist called with differing ");
         System.err.println ("length DNA sequences.");
         return -1;
      }
      else{
         // go through the two DNA sequence and compare each and 
	 // undate the weighted dist
         for (int ii=1; ii<=length; ii++) {
            WeightedD += this.GetWeight(this.GetIthChar(ii),A.GetIthChar(ii));
         }
      }
      return WeightedD;
   } // end GetWeightedDist
   
   //--------------------------------------------------------------------
   // GetMinDist()
   //
   // Purpose  : Get the minimum distance of two DNA strings
   // Input    : DNA_sequence A to be compared with
   // Condition: None
   // PostCond : None
   // Return   : the minimum distance
   //--------------------------------------------------------------------
   public int GetMinDist (DNA_sequence A) {
      
      int colsize = A.GetStrLen() + 1;  // each char in A is a column header,
                                 	// so length of column is len of A
      int rowsize = length + 1 ;        // DNAstr is the row header, 
					// row length is len of A
      int minVal = -1;               	// keeps track of min value
      int align_i_with_j;               // cost of straight up matching
      int align_this_i_with_gap;        // cost of matching ith char in 
					// DNAstr with gap
      int align_A_j_with_gap;           // cost of matching jth char in A 
					// with gap
   
      // opt_score table.  at each cell the value is the min cost for 
      // matching the prefix (the chars with smaller indices.  
      // if at [i][j], then it contain min cost of aligning [0...i] chars 
      // of one DNA sequence and [0...j] chars of the other)
      int[][] Opt_Score = new int[rowsize][colsize];

      // initialize score table (compute 0th row and column)
      for(int ii=0; ii < rowsize; ii++) {
         Opt_Score[ii][0] = ii*GAP;
      }
      for (int jj=0; jj < colsize; jj++) {
         Opt_Score[0][jj] = jj*GAP;

      } 
      // computation for each element in array 
      // here we're doing one row at a time, could do columns instead
      for (int ii=1; ii < rowsize; ii++) {

	 // for each cell in this row,
         // calculate the min cost for matching the first iith char in 
	 // DNAstr to the first jj char in A by selecting the min of 
	 // three possible alignment choices
         for (int jj=1; jj < colsize; jj++) {

            // get the cost of the 3 choices

            // cost of lining character in first to character in second
            align_i_with_j = Opt_Score[ii-1][jj-1] +
                  this.GetWeight(this.GetIthChar(ii), A.GetIthChar(jj));
            
            // cost of lining character in first to gap
            align_this_i_with_gap = Opt_Score[ii-1][jj] + GAP;
            
            // cost of lining character in second to gap
            align_A_j_with_gap = Opt_Score[ii][jj-1] + GAP;

            // find min of the three choices
            Opt_Score[ii][jj] = Math.min (Math.min (
				align_i_with_j,
            			align_this_i_with_gap), 
            			align_A_j_with_gap);
         }
      }
      
      // print out the tables (not required but nice)
      // Opt_Score
      System.out.println ("\n" + "Opt_Score Table:");
      for (int ii=0; ii < rowsize; ii++) {
         for (int jj=0; jj < colsize; jj++) {
            Format.print (System.out,"%4i", Opt_Score[ii][jj]);
         }
         System.out.println();
      }
      return Opt_Score[rowsize-1][colsize-1];

   } // end GetMinDist
      

   //--------------------------------------------------------------------
   // Display()
   //
   // Purpose  : Display the DNA sequence
   // Input    : None
   // Output   : The DNA sequence
   // Condition: None
   // PostCond : None
   // Return   : None
   //--------------------------------------------------------------------
   public void Display() {
      for (int ii=0; ii < length; ii++) {
         System.out.print (DNAstr[ii]);
      }
      System.out.println();
   }
} // end Display

