#! /usr/bin/python # Copyright (C) 2005 Peter Clote # All Rights Reserved. # # Permission to use, copy, modify, and distribute this # software for NON-COMMERCIAL purposes # and without fee is hereby granted provided that this # copyright notice appears in all copies. If you use, modify # or extend this code, please cite the reference # "Symmetric time warping, Boltzmann pair probabilities and # functional genomics", by P. Clote and J. Straubhaar, # submitted to Journal of Mathematical Biology. # # THE AUTHOR MAKES NO REPRESENTATIONS OR # WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHOR # SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED # BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING # THIS SOFTWARE OR ITS DERIVATIVES. # # # # timeWarpEasyFirstLastMatch.py # P.Clote # We require first and last characters to be matched together # (respectively) between both sequences import string,sys,random,math,getPAMmatrix import tempfile,os from parameters import GAP,LINELEN,INF,EPSILON,k,T,DEBUG,VERBOSE from parameters import getSequence,printMatrix from parameters import getSubsequencesFromAlignment, printSequenceAndAlignment inf = sys.maxint TAU = 10 #even sample intervals for both time series sequences TAUover2 = float(TAU)/2.0 DEBUG = 0 def printList(L): for x in L: print x def printPathGraph(L): infilename = tempfile.mktemp() outfilename = tempfile.mktemp() infile = open(infilename,"w") outfile = open(outfilename,"w") #WARNING: infile is write-only and used to write contents of list L #Then infile is passed to gnuplot to produce an eps output graph in outfile for (x,y) in L: infile.write(str(x)+"\t"+str(y)+"\n") infile.flush(); infile.close() #I hope that it stays present long enough to call gnuplot outfile.write("set terminal postscript eps\n") graphFileName = "pathGraph.eps" cmd = 'set output "' + graphFileName +'"\n' cmd += 'set pointsize 3 ' + '\n' cmd += 'set noautoscale ' + '\n' #unscaled axis distances outfile.write(cmd) outfile.write('plot "'+infilename+ '" with linespoints\n') outfile.flush() cmd = "gnuplot " + outfilename os.system(cmd) cmd = "ps2gif " + "pathGraph.eps" os.system(cmd) cmd = "rm -f p2f*.pbm" os.system(cmd) def pathMatrix(a,b,distance): #WARNING: In contrast to sequence alignment, in time warping #we have D[(i,j)] = time warp distance between a[0],...,a[i] #and b[0],...,b[j] for all 0<=i0 and y>0: #print backArrowLists[(x,y)] #x0,y0 = random.choice( backArrowLists[(x,y)] ) x0,y0 = backArrowLists[(x,y)][0] #pick first direction, which preferentially is diagonal if #that appears in the list if x0+1==x and y0+1==y: A.append( (a[x],b[y]) ) B.append( (a[x],b[y]) ) C.append( (1,1) ) x = x0; y = y0 elif x0==x and y0+1==y: A.append( ('-',b[y]) ) B.append( (a[x],b[y]) ) C.append( (0,1) ) y = y0 elif x0+1==x and y0==y: A.append( (a[x],'-') ) B.append( (a[x],b[y]) ) C.append( (1,0) ) x = x0 while x>0 and y==0: A.append( (a[x],'-') ) B.append( (a[x],b[y]) ) x -= 1 while x==0 and y>0: A.append( ('-',b[y]) ) B.append( (a[x],b[y]) ) y -= 1 A.append( (a[0],b[0]) ) B.append( (a[0],b[0]) ) C.append( (1,1) ) A.reverse() B.reverse() C.reverse() path = [] xCoord = 0; yCoord = 0 #coordinates of points in path graph path.append( (xCoord,yCoord) ) #start at origin for (deltaX,deltaY) in C: xCoord += deltaX; yCoord += deltaY path.append( (xCoord,yCoord) ) return (A,B,path) def printAlignment( A ): print "\n-------------------------------------\n" aStar = ""; bStar = "" for (x,y) in A: aStar += x; bStar += y print aStar print bStar def printDirectionsInPathMatrix( X ): print "\n-------------------------------------\n" for (ch,n) in X: for i in range(n): sys.stdout.write(' ') print ch print if __name__ == '__main__': if len(sys.argv) < 4: print "Usage: %s -s/-f a b [distanceFile] " % sys.argv[0] print """ 1) a is DNA nucleotide sequence (-s option) or FASTA file of DNA nucleotide (-f option) 2) b is DNA nucleotide sequence (-s option) or FASTA file of DNA nucleotide (-f option) 3) distance is nucleotide distance matrix, whose first line contains A,C,G,T separated by white space, then the usual lower triangular matrix is given, where the first symbol in each successive line is a nucleotide """ sys.exit(1) if len(sys.argv)==4: distance = getPAMmatrix.getPAMmatrix('distance.txt') else: distance = getPAMmatrix.getPAMmatrix(sys.argv[4]) flag = sys.argv[1] if flag == '-s': a = sys.argv[2] b = sys.argv[3] else: a = getSequence(sys.argv[2]) b = getSequence(sys.argv[3]) print "Classical forward alignment" D,arrows = pathMatrix(a,b,distance) A,B,path = align(a,b,D,arrows) printAlignment( A ) printAlignment( B ) printPathGraph(path) print D[(len(a)-1,len(b)-1)] print "Original sequences: %s\t%s\n" % (a,b) print "Classical backwards alignment computed by reversing" print "the original sequences and applying (forward) time warping" aa = ""; for i in range(len(a)-1,-1,-1): aa += a[i] bb = ""; for i in range(len(b)-1,-1,-1): bb += b[i] D,arrows = pathMatrix(aa,bb,distance) A,B,path = align(aa,bb,D,arrows) printAlignment( A ) printAlignment( B ) print "Forwards time warp distance: ", D[(len(a)-1,len(b)-1)] if DEBUG: print "\n--------------------------------------" print "Matrix D\n" printMatrix(D) print "\n--------------------------------------"