#! /usr/bin/python


# Copyright (C) 2005 Peter Clote
#  All Rights Reserved.
#  
#  Permission to use, copy, modify, and distribute this
#  software for NON-COMMERCIAL purposes
#  and without fee is hereby granted provided that this
#  copyright notice appears in all copies. If you use, modify
#  or extend this code, please cite the reference
#    "Symmetric time warping, Boltzmann pair probabilities and
#    functional genomics", by P. Clote and J. Straubhaar,
#    submitted to Journal of Mathematical Biology.
#                                                                                  
#  THE AUTHOR MAKES NO REPRESENTATIONS OR
#  WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
#  PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHOR
#  SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED
#  BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
#  THIS SOFTWARE OR ITS DERIVATIVES.
#                                                                                  
#  
#  
import sys,getPAMmatrix

SEQUENCE = 1
  #1 means nucleotide sequence, 0 means gene expression data
#T     = .5
T     = .5
k     = 1
ALPHA = -14
BETA  = -2
GAP   = -2
INF   = sys.maxint-1
LINELEN       = 60
EPSILON       = 0.01
DEBUG   = 0
VERBOSE = 0


def printList(L):
  for x in L:
    print x

def distance(x,y):
  if SEQUENCE:
    d = getPAMmatrix.getPAMmatrix('distance.txt')
    return d[(x,y)]
  else:
    return abs(float(x)-float(y))

def isNumber(s):
  try:
    x  = float(s)
    ok = 1
  except:
    x  = 0
    ok = 0
  return ok


def getSequence(filename):
  file = open(filename,"r")
  line = file.readline()
  text = file.read()
  text = text.replace("\n","")
  if line[0] == '>':
    return text
  else:
    text = line[:-1]+text
  return text

def getNumericalSequence(filename):
  file = open(filename,"r")
  line = file.readline()
#  L    = line.split("\t")
  L    = line.split()
  L    = L[1:]         #remove gene name
  L    = map(float,L)  #convert from string to float
  return L

def printMatrix(M):
  keys = M.keys()
  keys.sort()
  (x,y) = keys[0]
  for key in keys:
    (u,v) = key
    if u==x:
#      sys.stdout.write("%.4f\t" % M[key])
      sys.stdout.write("%f\t" % M[key])
    else:
      x=u
#      sys.stdout.write("\n%.4f\t" % M[key])
      sys.stdout.write("\n%f\t" % M[key])
  print "\n"

def monus(a,b):
  if a>b:
    return a-b
  else:
    return 0

def lengthOfSequencesInAlignment(A):
  #A is alignment of form:
  #       I-LR-
  #       LY-RR
  #return lengths of words aligned, (3,4) in this example
  sumA = 0; sumB = 0
  for x,y in A:
    if x!='-': sumA += 1
    if y!='-': sumB += 1
  return (sumA,sumB)


def convertDmatrixToEmatrix(D):
    #since values of D[key] are pairs of form (score,backarrowList)
    #we create matrix E containing only scores
    E = {}
    for key in D.keys():
      first,second = D[key]
      E[key] = first
    return E


def getSubsequencesFromAlignment(A):
  #A is alignment of form:
  #       I-LR-
  #       LY-RR
  #return words aligned, ('ILR','LYRR') in this example
  a = ""; b = ""
  for x,y in A:
    if x!='-': a += x
    if y!='-': b += y
  return (a,b)

def getStarredSubsequencesFromAlignment(A):
  #A is alignment of form:
  #       I-LR-
  #       LY-RR
  #return words aligned, ('I-LR-','LY-RR') in this example
  aStar = ""; bStar = ""
  for x,y in A:
    aStar += x; bStar += y
  return (aStar,bStar)


def printAlignmentWithQuality(A,quality,start=(0,0)):
  #A is alignment of form:
  #       I-LR-
  #       LY-RR
  #quality is string of qualities of Boltzmann probabilities
  #return lengths of words aligned, (3,4) in this example
  assert len(A)==len(quality)
  startA,startB = start #offset positions
  aStar,bStar = getStarredSubsequencesFromAlignment(A)
  length      = len(A) 
  startPos    = 1; 
  endPos      = min(LINELEN,length)
  stop        = 0 #boolean flag to stop loop
  while not stop:
    print "%d\t%s\t%d" %(startPos+startA,aStar[startPos-1:endPos],endPos+startA)
    print "\t%s\t" %quality[startPos-1:endPos]
    print "%d\t%s\t%d" %(startPos+startB,bStar[startPos-1:endPos],endPos+startB)
    print 
    numCharPrinted = endPos-startPos+1
    startPos += numCharPrinted
    endPos      = min(endPos+LINELEN,length)
    if startPos > length: stop = 1
 

def printSequenceAndAlignment(A,a,b,Probs,LINELEN,start=(0,0)):
  startA,startB = start; 
  maxStart = max(startA,startB)
  c,d= getSubsequencesFromAlignment(A)
  if VERBOSE:
    print "\nFirst sequence"
    print a
    print "\nOptimal subword of first sequence"
    for i in range(startA): sys.stdout.write(' ')
    print c
    print "\nSecond sequence"
    print b
    print "\nOptimal subword of second sequence"
    for i in range(startB): sys.stdout.write(' ')
    print d

  quality = ""
  for i in range(len(Probs)):
    if Probs[i]>0.75:
      quality += "+"
    elif Probs[i]>0.5:
      quality += "*"
    elif Probs[i]>0.25:
      quality += "-"
    else:
      quality += " "
  if VERBOSE:
    text = """
              Explanation: (+: 75\%-100\%),
                           (*: 50\%-75\%),
                           (-: 25\%-50\%),
                           ( : 0\% -25\%)"""
    print text
  print "\n-------------------------------------"
  print "Alignment"
  printAlignmentWithQuality(A,quality,start)

  if VERBOSE:
    aStar,bStar = getStarredSubsequencesFromAlignment(A)
    print "\n-------------------------------------"
    print "Table of probabilities in alignment"
    for i in range(len(A)):
      print "%s\t%s\t%s" % (aStar[i],bStar[i],Probs[i])