#! /usr/bin/env python # dijkstraMorganHiggsViaSamplingUsingRNAsubopt.py # P.Clote import sys,os,misc from greedyPathMorganHiggs import greedy from samplingMethods import RNAsubopt DEBUG = 0 def printList(L): for x in L: print x def setMinus(A,B): #return A-B L = [] for x in A: if x not in B: L.append(x) return L def aux0(filename): #read FASTA file with FASTA comment, RNA and 2 sec str A,B #FASTA comment mandatory file = open(filename) line = file.readline().strip() assert( line[0]=='>' ) FASTA = line[1:] rna = file.readline().strip() A = file.readline().strip() B = file.readline().strip() file.close() return FASTA,rna,A,B def aux(filename): #read FASTA file with RNA and many sampled sec str #FASTA comment mandatory SecStrList = [] file = open(filename) line = file.readline().strip() assert( line[0]=='>' ) FASTA = line[1:] rna = file.readline().strip() line = file.readline() while line: secStr = line.strip() SecStrList.append( secStr ) line = file.readline() file.close() return FASTA,rna,SecStrList def dijkstra(rna,A,B,SecStrList): #following ensures A,B are in first,second pos of list if B not in SecStrList: SecStrList = [B] + SecStrList else: SecStrList.remove(B) SecStrList = [B] + SecStrList if A not in SecStrList: SecStrList = [A] + SecStrList else: SecStrList.remove(A) SecStrList = [A] + SecStrList #Ensure that A,B are first two str in SecStrList #(1) Compute edge weights n = len(SecStrList) nodes = range(n) #List [0,1,...,n-1] of indices of sec str Edges = {} #Edge weight matrix, Edges[(i,j)] = wt Dist = {} #Dist[X] is shortest path wt from A to X Q = nodes #List of indices of nodes reachable from A Prev = {} #Dictionary for predecessor mfe = misc.RNAfold(rna)[1] #minimum free energy for i in range(n): Dist[i] = sys.maxint for j in range(i+1,n): X = SecStrList[i] Y = SecStrList[j] count,maxFreeEnergy,barrierStr,Path,FreeEnergy = greedy(rna,X,Y) Edges[(i,j)] = maxFreeEnergy - mfe #barrier height for greedy path Prev[j] = i for i in range(n): for j in range(i): Edges[(i,j)] = Edges[(j,i)] #symmetrize Prev[j] = i Prev[0] = 0 #predecessor of node A is itself Dist[0] = 0 #distance from A to itself is 0 #(2) Now repeat frontier extension while Q != []: if DEBUG: print "Q",Q minIndex = n #bogus node minDist = sys.maxint for x in Q: #now determine node in Q with smallest distance to source A if Dist[x] maxEnergy: maxEnergy = energy print "%s\t%.2f" % (secStr,energy) print "Max energy %.2f" % maxEnergy def main(filename,numSamples): FASTA,rna,A,B = aux0(filename) SecStrList = RNAsubopt(rna,numSamples) rna,A,B,path,SecStrList = dijkstra(rna,A,B,SecStrList) CompletePath = completePath(rna,A,B,path,SecStrList) print "> %s" % FASTA printCompletePathWithEnergies(rna,CompletePath) if __name__ == '__main__': if len(sys.argv) < 2: text = """Usage: %s file [numSamples] 1) file: FASTA, rna, A, B each on 1 line A is source, B is target sec str 2) numSamples: Morgan Higgs method depends on number of sampled str. (default number sampled str is 100 -- Warning: may be fewer since only distinct structures considered.) Program implements the 'indirect path' search algorithm of Morgan and Higgs, 'Barrier heights between ground states in a model of RNA secondary structure' in J. Phys. A: Math. Gen. 31 (1998) 3153-3170. This program calls program greedyPathMorganHiggs.py, which computes the greedy direct path between source A and target B, for any given A,B, where a direct path (trajectory) A = S0,S1,...,Sn = B has property that base pair distance dist(Si,S_{i+1})=1 and each Si contains base pairs from either A or B; i,e,. Si subseteq A union B. The indirect path from A to B is obtained by computing all greedy direct paths between structures structures sampled on the fly, and using Dijkstra to piece together optimal path consisting of greedy hops between sampled structures. """ print text % sys.argv[0] sys.exit(1) filename = sys.argv[1].strip() if len(sys.argv)>2: numSamples = int(sys.argv[2]) else: numSamples = 10 main(filename,numSamples)