#! /usr/bin/python # Copyright (C) 2005 Peter Clote # All Rights Reserved. # # Permission to use, copy, modify, and distribute this # software for NON-COMMERCIAL purposes # and without fee is hereby granted provided that this # copyright notice appears in all copies. If you use, modify # or extend this code, please cite the reference # "Symmetric time warping, Boltzmann pair probabilities and # functional genomics", by P. Clote and J. Straubhaar, # submitted to Journal of Mathematical Biology. # # THE AUTHOR MAKES NO REPRESENTATIONS OR # WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHOR # SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED # BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING # THIS SOFTWARE OR ITS DERIVATIVES. # # # # classicTimeWarp.py # P.Clote # Implementation of classical time warping, which is not symmetric. # For better understanding of the algorithm, which can be considered # as a kind of sequence alignment for sequences of real numbers # (with main application here to gene expression time series data), # I have implemented the algorithm to handle both sequences of real # numbers AND sequences of nucleotides/amino acids. For the latter, # a distance matrix (analogue of PAM matrix, except using distances # instead of similarity) is required, as given in file "distance.txt" # NOTE: The graphical display of the path graph is created using # gnuplot which must be installed. Otherwise, comment out this call. import string,sys,random,math import tempfile,os import getPAMmatrix from parameters import GAP,LINELEN,INF,EPSILON,k,T,DEBUG,VERBOSE from parameters import getNumericalSequence,printMatrix,getSequence from parameters import getSubsequencesFromAlignment, printSequenceAndAlignment from parameters import SEQUENCE #if SEQUENCE=1 then use nucleotide/aa sequences TAU = 10.0 TAUover2 = float(TAU)/2.0 MU = 10.0 MUover2 = float(MU)/2.0 TAUMUover2 = float(TAU+MU)/2.0 DEBUG = 0 def printList(L): for x in L: print x def distance(x,y): if SEQUENCE: d = getPAMmatrix.getPAMmatrix('distance.txt') return d[(x,y)] else: return abs(float(x)-float(y)) def isNumber(s): try: x = float(s) ok = 1 except: x = 0 ok = 0 return ok def printPathGraph(L): infilename = tempfile.mktemp() outfilename = tempfile.mktemp() infile = open(infilename,"w") outfile = open(outfilename,"w") #WARNING: infile is write-only and used to write contents of list L #Then infile is passed to gnuplot to produce an eps output graph in outfile for (x,y) in L: infile.write(str(x)+"\t"+str(y)+"\n") infile.flush(); infile.close() #I hope that it stays present long enough to call gnuplot outfile.write("set terminal postscript eps\n") graphFileName = "pathGraph.eps" cmd = 'set output "' + graphFileName +'"\n' cmd += 'set pointsize 3 ' + '\n' cmd += 'set noautoscale ' + '\n' #unscaled axis distances outfile.write(cmd) outfile.write('plot "'+infilename+ '" with linespoints\n') outfile.flush() cmd = "gnuplot " + outfilename os.system(cmd) cmd = "ps2gif " + "pathGraph.eps" os.system(cmd) cmd = "rm -f p2f*.pbm" os.system(cmd) def pathMatrix(a,b): #WARNING: In contrast to sequence alignment, in time warping #we have D[(i,j)] = time warp distance between a[0],...,a[i] #and b[0],...,b[j] for all 0<=i1 or j>1: print i,j,backArrowLists[(i,j)] return (D,backArrowLists) def align(a,b,D,backArrowLists): a = [a[0]] + a #pad a with repeat of the first number b = [b[0]] + b #pad b with repeat of the first number n = len(a); m = len(b); x = n-1; y = m-1; A = []; B = []; C = [] #A is usual (sequence) alignment presentation of time warping align. #B is time warping presentation #C list of pairs (x,y) of coordinates for path graph while x>1 or y>1: #print backArrowLists[(x,y)] #x0,y0 = random.choice( backArrowLists[(x,y)] ) x0,y0 = backArrowLists[(x,y)][0] #pick first direction, which preferentially is diagonal if #that appears in the list if x0+1==x and y0+1==y: A.append( (a[x],b[y]) ) B.append( (a[x],b[y]) ) C.append( (1,1) ) x = x0; y = y0 elif x0==x and y0+1==y: A.append( ('-',b[y]) ) B.append( (a[x],b[y]) ) C.append( (0,1) ) y = y0 elif x0+1==x and y0==y: A.append( (a[x],'-') ) B.append( (a[x],b[y]) ) C.append( (1,0) ) x = x0 A.append( (a[1],b[1]) ) B.append( (a[1],b[1]) ) C.append( (1,1) ) A.reverse() B.reverse() C.reverse() path = [] xCoord = 0; yCoord = 0 #coordinates of points in path graph path.append( (xCoord,yCoord) ) #start at origin for (deltaX,deltaY) in C: xCoord += deltaX; yCoord += deltaY path.append( (xCoord,yCoord) ) return (A,B,path) def printAlignment( A ): print "\n-------------------------------------\n" aStar = ""; bStar = "" for (x,y) in A: if SEQUENCE: aStar = ""; bStar = "" for (x,y) in A: aStar += x; bStar += y else: if isNumber(x): chX = str(x)+"\t" else: chX = "-"+"\t" if isNumber(y): chY = str(y)+"\t" else: chY = "-"+"\t" aStar += chX; bStar += chY print aStar print bStar def printDirectionsInPathMatrix( X ): print "\n-------------------------------------\n" for (ch,n) in X: for i in range(n): sys.stdout.write(' ') print ch print if __name__ == '__main__': if len(sys.argv) < 4: print "Usage: %s -s/-f a b " % sys.argv[0] print """ 1) a is expression array entry sequence (-s option) or FASTA file of expression array entry (-f option) 2) b is expression array entry sequence (-s option) or FASTA file of expression array entry (-f option) """ sys.exit(1) flag = sys.argv[1] if flag == '-s': if SEQUENCE: a = list(sys.argv[2]) b = list(sys.argv[3]) else: print "Cannot interactively process expression data" sys.exit(1) else: if SEQUENCE: a = list(getSequence(sys.argv[2])) b = list(getSequence(sys.argv[3])) else: a = getNumericalSequence(sys.argv[2]) b = getNumericalSequence(sys.argv[3]) print "First sequence as list : ",a print "Second sequence as list: ",b D,arrows = pathMatrix(a,b) A,B,path = align(a,b,D,arrows) print "Optimal time warping\n" printAlignment( A ) printAlignment( B ) printPathGraph(path) print "Time warping distance from left to right: ",D[(len(a),len(b))] print "\nReversal\n" a.reverse(); a0 = a b.reverse(); b0 = b print "Reversal of first sequence: ",a0 print "Reversal of second sequence: ",b0 D,arrows = pathMatrix(a,b) A,B,path = align(a,b,D,arrows) printAlignment( A ) printAlignment( B ) print "Time warping distance from left to right of reversals: ",D[(len(a),len(b))] if DEBUG: print "\n--------------------------------------" print "Matrix D\n" printMatrix(D)