#! /usr/bin/python # Copyright (C) 2005 Peter Clote # All Rights Reserved. # # Permission to use, copy, modify, and distribute this # software for NON-COMMERCIAL purposes # and without fee is hereby granted provided that this # copyright notice appears in all copies. If you use, modify # or extend this code, please cite the reference # "Symmetric time warping, Boltzmann pair probabilities and # functional genomics", by P. Clote and J. Straubhaar, # submitted to Journal of Mathematical Biology. # # THE AUTHOR MAKES NO REPRESENTATIONS OR # WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHOR # SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED # BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING # THIS SOFTWARE OR ITS DERIVATIVES. # # # # timeWarpEasy.py # P.Clote # This is NOT symmetric time warping. import string,sys,random,math,getPAMmatrix import tempfile,os from parameters import GAP,LINELEN,INF,EPSILON,k,T,DEBUG,VERBOSE from parameters import getSequence,printMatrix from parameters import getSubsequencesFromAlignment, printSequenceAndAlignment TAU = 10 #even sample intervals for both time series sequences TAUover2 = float(TAU)/2.0 DEBUG = 0 def printList(L): for x in L: print x def printPathGraph(L): infilename = tempfile.mktemp() outfilename = tempfile.mktemp() infile = open(infilename,"w") outfile = open(outfilename,"w") #WARNING: infile is write-only and used to write contents of list L #Then infile is passed to gnuplot to produce an eps output graph in outfile for (x,y) in L: infile.write(str(x)+"\t"+str(y)+"\n") infile.flush(); infile.close() #I hope that it stays present long enough to call gnuplot outfile.write("set terminal postscript eps\n") graphFileName = "pathGraph.eps" cmd = 'set output "' + graphFileName +'"\n' cmd += 'set pointsize 3 ' + '\n' cmd += 'set noautoscale ' + '\n' #unscaled axis distances outfile.write(cmd) outfile.write('plot "'+infilename+ '" with linespoints\n') outfile.flush() cmd = "gnuplot " + outfilename os.system(cmd) cmd = "ps2gif " + "pathGraph.eps" os.system(cmd) cmd = "rm -f p2f*.pbm" os.system(cmd) def pathMatrix(a,b,distance): #WARNING: In contrast to sequence alignment, in time warping #we have D[(i,j)] = time warp distance between a[0],...,a[i] #and b[0],...,b[j] for all 0<=ii>n and 0>j>m a = a+a[len(a)-1] b = b+b[len(b)-1] #pad a,b with their last letter for base case treatment n = len(a); m = len(b) E = {}; forwardArrowLists = {} E[(n-1,m-1)] = 0 for j in range(m-2,-1,-1): E[(n-1,j)] = E[(n-1,j+1)]+TAUover2*distance[(a[n-1],b[j])] forwardArrowLists[(n-1,j)] = [(n-1,j+1)] for i in range(n-2,-1,-1): E[(i,m-1)] = E[(i+1,m-1)]+TAUover2*distance[(a[i],b[m-1])] forwardArrowLists[(i,m-1)] = [(i+1,m-1)] for i in range(n-2,-1,-1): for j in range(m-2,-1,-1): #determine minimum min = INF x = E[(i+1,j+1)] + TAU*distance[(a[i],b[j])] if x < min: min = x x = E[(i+1,j)] + TAUover2*distance[(a[i],b[j])] if x < min: min = x x = E[(i,j+1)] + TAUover2*distance[(a[i],b[j])] if x < min: min = x E[(i,j)] = min #now determine back arrows forwardArrowLists[(i,j)] = [] x = E[(i+1,j+1)] + TAU*distance[(a[i],b[j])] if x == min: forwardArrowLists[(i,j)].append( (i+1,j+1) ) x = E[(i+1,j)] + TAUover2*distance[(a[i],b[j])] if x == min: forwardArrowLists[(i,j)].append( (i+1,j) ) x = E[(i,j+1)] + TAUover2*distance[(a[i],b[j])] if x == min: forwardArrowLists[(i,j)].append( (i,j+1) ) return (E,forwardArrowLists) def align(a,b,D,backArrowLists): a = a[0] + a #pad a with repeat of the first letter b = b[0] + b #pad b with repeat of the first letter n = len(a); m = len(b); x = n-1; y = m-1; A = []; B = []; C = [] #A is usual (sequence) alignment presentation of time warping align. #B is time warping presentation #C list of pairs (x,y) of coordinates for path graph while x>0 and y>0: #print backArrowLists[(x,y)] #x0,y0 = random.choice( backArrowLists[(x,y)] ) x0,y0 = backArrowLists[(x,y)][0] #pick first direction, which preferentially is diagonal if #that appears in the list if x0+1==x and y0+1==y: A.append( (a[x],b[y]) ) B.append( (a[x],b[y]) ) C.append( (1,1) ) x = x0; y = y0 elif x0==x and y0+1==y: A.append( ('-',b[y]) ) B.append( (a[x],b[y]) ) C.append( (0,1) ) y = y0 elif x0+1==x and y0==y: A.append( (a[x],'-') ) B.append( (a[x],b[y]) ) C.append( (1,0) ) x = x0 while x>0 and y==0: A.append( (a[x],'-') ) B.append( (a[x],b[y]) ) x -= 1 while x==0 and y>0: A.append( ('-',b[y]) ) B.append( (a[x],b[y]) ) y -= 1 # A.append( (a[0],b[0]) ) # B.append( (a[0],b[0]) ) # C.append( (1,1) ) A.reverse() B.reverse() C.reverse() path = [] xCoord = 0; yCoord = 0 #coordinates of points in path graph path.append( (xCoord,yCoord) ) #start at origin for (deltaX,deltaY) in C: xCoord += deltaX; yCoord += deltaY path.append( (xCoord,yCoord) ) return (A,B,path) def reverseAlign(a,b,E,forwardArrowLists): a = a + a[-1] #pad a with repeat of the last letter b = b + b[-1] #pad b with repeat of the last letter n = len(a); m = len(b); x = 0; y = 0; A = []; B = []; C = [] #A is usual (sequence) alignment presentation of time warping align. #B is time warping presentation #C list of pairs (x,y) of coordinates for path graph while x