#! /usr/bin/env python # greedyPathMorganHiggs.py # P.Clote # compute the greedy path using Morgan-Higgs algorithm. import sys,os,misc,random from misc import energyOfStr DEBUG = 0 def aux(filename): #aux reads a FASTA format file containing RNA sequence and sec str A,B #goal is to find path from A to B file = open(filename) FASTA = file.readline().strip() assert( FASTA[0] == '>' ) rna = file.readline().strip() secStr1 = file.readline().strip() secStr2 = file.readline().strip() file.close() return FASTA,rna,secStr1,secStr2 def ConflictingBP((i,j),L): #return list of base pairs in L that conflict with (i,j) ConflictList = [] for (x,y) in L: if i<=x<=j<=y or x<=i<=y<=j: ConflictList.append( (x,y) ) return ConflictList def clash( (x,y),A): #A is sec str represented as list of base pairs #determine whether there exists (i,j) in A such that im or (n==m and i>a): return +1 return 0 def computeAndSortConflictLists(A,B): #return maximum size conflicting base pair lists #will choose randomly one from this list L = [] for (i,j) in B: X = ConflictingBP( (i,j), A ) if X != []: L.append( ((i,j),X,len(X)) ) if L == []: return L L.sort(cmp) MinConflict = [] num = L[0][-1] #max number of conflicting bp in A for a bp in B for x in L: if x[-1]==num: MinConflict.append(x) return MinConflict def basePairList2dbn(L,n): #return dot bracket notation for base pair list X = ['.']*n for (i,j) in L: X[i] = '('; X[j] = ')' return "".join(X) def setMinus(A,B): #return A-B L = [] for x in A: if x not in B: L.append(x) return L def insert(x,L): #assuming that L is sorted, insert new element x into L #this assumes that x not in L for i in range(len(L)): if xmaxFreeEnergy: maxFreeEnergy = freeEnergy barrierStr = secstr if DEBUG: print "removing (%d,%d)" % (x,y) print "A: ",secstr for (x,y) in B0: if not clash( (x,y),A): assert( (x,y) not in A) count += 1 A = insert((x,y),A) #insert new base pair into SORTED base pair list secstr = basePairList2dbn(A,n) Path.append(secstr) freeEnergy = energyOfStr(rna,secstr) FreeEnergy.append(freeEnergy) if freeEnergy>maxFreeEnergy: maxFreeEnergy = freeEnergy barrierStr = secstr if DEBUG: print "inserting (%d,%d)" % (x,y) print "A: ",secstr else: #L is [] so can add remaining base pairs from B into A #as well, must remove those remaining non-conflicting basepairs #in A-B for (x,y) in B0: count += 1 A = insert( (x,y), A ) #insert and keep A sorted secstr = basePairList2dbn(A,n) Path.append(secstr) freeEnergy = energyOfStr(rna,secstr) FreeEnergy.append(freeEnergy) if freeEnergy>maxFreeEnergy: maxFreeEnergy = freeEnergy barrierStr = secstr if DEBUG: print "inserting (%d,%d)" % (x,y) print "A: ",secstr for (x,y) in A0: count += 1 A.remove( (x,y) ) secstr = basePairList2dbn(A,n) Path.append(secstr) freeEnergy = energyOfStr(rna,secstr) FreeEnergy.append(freeEnergy) if freeEnergy>maxFreeEnergy: maxFreeEnergy = freeEnergy barrierStr = secstr if DEBUG: print "removing (%d,%d)" % (x,y) print "A: ",secstr return count,maxFreeEnergy,barrierStr,Path,FreeEnergy def main(filename): FASTA,rna,ss1,ss2 = aux(filename) text = "Base pair distance: %d" print text % misc.basePairDistBetweenDotBracketNotations(ss1,ss2) count,maxFreeEnergy,barrierStr,DirectPath,FreeEnergy = greedy(rna,ss1,ss2) print "Path length from A to B: %d" % count print "Barrier energy: %f" % maxFreeEnergy print "Barrier structure: %s\n" % barrierStr for i in range(count+1): #print ">%d" % i print "%s\t%.2f" % (DirectPath[i], FreeEnergy[i]) print "max free energy of intermediate in path %f" % maxFreeEnergy if __name__ == '__main__': if len(sys.argv) < 2: print "Usage: %s filename" % sys.argv[0] print " File contains FASTA comment, RNA sequence and 2 sec str" print " each on separate line." sys.exit(1) filename = sys.argv[1] main(filename)