#! /usr/bin/python # Copyright (C) 2005 Peter Clote # All Rights Reserved. # # Permission to use, copy, modify, and distribute this # software for NON-COMMERCIAL purposes # and without fee is hereby granted provided that this # copyright notice appears in all copies. If you use, modify # or extend this code, please cite the reference # "Symmetric time warping, Boltzmann pair probabilities and # functional genomics", by P. Clote and J. Straubhaar, # submitted to Journal of Mathematical Biology. # # THE AUTHOR MAKES NO REPRESENTATIONS OR # WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHOR # SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED # BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING # THIS SOFTWARE OR ITS DERIVATIVES. # # # # timeWarp.py # P.Clote # Implementation of new symmetric form of time warping. # For debugging purposes, the code prints out the path matrix # arrays for forward and reverse time warping (D and E). import string,sys,random,math,getPAMmatrix import tempfile,os from parameters import GAP,LINELEN,INF,EPSILON,k,T,DEBUG,VERBOSE from parameters import getSequence,printMatrix from parameters import getSubsequencesFromAlignment, printSequenceAndAlignment TAU = 10 #even sample intervals for both time series sequences TAUover2 = float(TAU)/2.0 DEBUG = 1 def printList(L): for x in L: print x def printPathGraph(L): infilename = tempfile.mktemp() outfilename = tempfile.mktemp() infile = open(infilename,"w") outfile = open(outfilename,"w") #WARNING: infile is write-only and used to write contents of list L #Then infile is passed to gnuplot to produce an eps output graph in outfile for (x,y) in L: infile.write(str(x)+"\t"+str(y)+"\n") infile.flush(); infile.close() #I hope that it stays present long enough to call gnuplot outfile.write("set terminal postscript eps\n") graphFileName = "pathGraph.eps" cmd = 'set output "' + graphFileName +'"\n' cmd += 'set pointsize 3 ' + '\n' cmd += 'set noautoscale ' + '\n' #unscaled axis distances outfile.write(cmd) outfile.write('plot "'+infilename+ '" with linespoints\n') outfile.flush() cmd = "gnuplot " + outfilename os.system(cmd) cmd = "ps2gif " + "pathGraph.eps" os.system(cmd) cmd = "rm -f p2f*.pbm" os.system(cmd) def pathMatrix(a,b,distance): #WARNING: In contrast to sequence alignment, in time warping #we have D[(i,j)] = time warp distance between a[0],...,a[i] #and b[0],...,b[j] for all 0<=ii>n and 0>j>m a = a+a[len(a)-1] b = b+b[len(b)-1] #pad a,b with their last letter for base case treatment n = len(a); m = len(b) E = {}; forwardArrowLists = {} # E[(n-1,m-1)] = TAU*distance[(a[n-1],b[m-1])] E[(n-1,m-1)] = 0 for j in range(m-2,-1,-1): E[(n-1,j)] = E[(n-1,j+1)]+TAUover2*distance[(a[n-1],b[j])] forwardArrowLists[(n-1,j)] = [(n-1,j+1)] for i in range(n-2,-1,-1): E[(i,m-1)] = E[(i+1,m-1)]+TAUover2*distance[(a[i],b[m-1])] forwardArrowLists[(i,m-1)] = [(i+1,m-1)] for i in range(n-2,-1,-1): for j in range(m-2,-1,-1): #determine minimum min = INF x = E[(i+1,j+1)] + TAU*distance[(a[i],b[j])] if x < min: min = x x = E[(i+1,j)] + TAUover2*distance[(a[i],b[j])] if x < min: min = x x = E[(i,j+1)] + TAUover2*distance[(a[i],b[j])] if x < min: min = x E[(i,j)] = min #now determine back arrows forwardArrowLists[(i,j)] = [] x = E[(i+1,j+1)] + TAU*distance[(a[i],b[j])] if x == min: forwardArrowLists[(i,j)].append( (i+1,j+1) ) x = E[(i+1,j)] + TAUover2*distance[(a[i],b[j])] if x == min: forwardArrowLists[(i,j)].append( (i+1,j) ) x = E[(i,j+1)] + TAUover2*distance[(a[i],b[j])] if x == min: forwardArrowLists[(i,j)].append( (i,j+1) ) return (E,forwardArrowLists) def align(a,b,D,backArrowLists): n = len(a); m = len(b); x = n-1; y = m-1; A = []; B = []; C = [] #A is usual (sequence) alignment presentation of time warping align. #B is time warping presentation #C list of pairs (x,y) of coordinates for path graph while (x>=0 or y>=0) and not (x==0 and y==0): #print backArrowLists[(x,y)] #x0,y0 = random.choice( backArrowLists[(x,y)] ) print "x",x,"y",y x0,y0 = backArrowLists[(x,y)][0] #pick first direction, which preferentially is diagonal if #that appears in the list if x0+1==x and y0+1==y: A.append( (a[x],b[y]) ) B.append( (a[x],b[y]) ) C.append( (1,1) ) x = x0; y = y0 elif x0==x and y0+1==y: A.append( ('-',b[y]) ) # B.append( (a[x-1],b[y]) ) B.append( (a[x],b[y]) ) C.append( (0,1) ) y = y0 elif x0+1==x and y0==y: A.append( (a[x],'-') ) # B.append( (a[x],b[y-1]) ) B.append( (a[x],b[y]) ) C.append( (1,0) ) x = x0 A.append( (a[0],b[0]) ) B.append( (a[0],b[0]) ) C.append( (1,1) ) A.reverse() B.reverse() C.reverse() path = [] xCoord = 0; yCoord = 0 #coordinates of points in path graph path.append( (xCoord,yCoord) ) #start at origin for (deltaX,deltaY) in C: xCoord += deltaX; yCoord += deltaY path.append( (xCoord,yCoord) ) return (A,B,path) def printAlignment( A ): print "\n-------------------------------------\n" aStar = ""; bStar = "" for (x,y) in A: aStar += x; bStar += y print aStar print bStar def printDirectionsInPathMatrix( X ): print "\n-------------------------------------\n" for (ch,n) in X: for i in range(n): sys.stdout.write(' ') print ch print if __name__ == '__main__': if len(sys.argv) < 4: print "Usage: %s -s/-f a b [distanceFile] " % sys.argv[0] print """ 1) a is amino acid sequence (-s option) or FASTA file of amino acid (-f option) 2) b is amino acid sequence (-s option) or FASTA file of amino acid (-f option) 3) distance is nucleotide distance matrix, whose first line contains A,C,G,T separated by white space, then the usual lower triangular matrix is given, where the first symbol in each successive line is a nucleotide """ sys.exit(1) if len(sys.argv)==4: distance = getPAMmatrix.getPAMmatrix('distance.txt') else: distance = getPAMmatrix.getPAMmatrix(sys.argv[4]) flag = sys.argv[1] if flag == '-s': a = sys.argv[2] b = sys.argv[3] else: a = getSequence(sys.argv[2]) b = getSequence(sys.argv[3]) D,arrows = pathMatrix(a,b,distance) A,B,path = align(a,b,D,arrows) E,arrows = backwardsPathMatrix(a,b,distance) printAlignment( A ) printAlignment( B ) printPathGraph(path) print D[(len(a),len(b))] print E[(0,0)] print a,b #assert D[(len(a),len(b))] == E[(0,0)] if DEBUG: print "\n--------------------------------------" print "Matrix D\n" printMatrix(D) print "\n--------------------------------------" print "Matrix E\n" printMatrix(E)