#! /usr/bin/env python # generalQuasiRandomSaturatedStructure.py # P.Clote """ Program generates quasi-random saturated structure as follows. Generate an initial list $L$ of all allowable base pairs $(i,j)$ with $1 \leq i < j \leq n$ and $j \geq i+\theta+1$. Create a saturated structure by repeately picking a base pair from $L$, adding it to an initially empty structure $S$, then removing from $L$ all base pairs that form a crossing (pseudoknot) with the base pair just selected. This ensures that the next time a base pair from $L$, it can be added to $S$ without violating the definition of secondary structure. Iterate this procedure until $L$ is empty to form the stochastic saturated structure $S$. Taking an average over 100 repetitions, we have computed the average number of base pairs and the standard deviation for $n =10, 100, 1000$. Results are $\mu = 0.323$, $\sigma= 0.0604$ for $n=10$, $\mu = 0.3526$, $\sigma= 0.0386$ for $n=100$ and $\mu = 0.35618$, $\sigma= 0.0361$ for $n=1000$. """ import sys,os,random,stats THETA = 1 def basePairList2Vienna(n,S): L = list('$' + ('.'*n)) for (i,j) in S: L[i] = '(' L[j] = ')' return "".join(L[1:]) def initializePossibleBasePairs(n): L = [] for i in range(1,n-THETA): #i in [1,n-THETA-1] for j in range(i+THETA+1,n+1): #j in [i+THETA+1,n] L.append( (i,j) ) return L def removeClashes(L,(x,y)): L0 = [] for (i,j) in L: if not( i<=x<=j<=y or x<=i<=y<=j ): #no clash L0.append( (i,j) ) return L0 def randomGreedySatStr(n): L = initializePossibleBasePairs(n) N = len(L) S = [] #S is sec str while N != 0: k = random.randint(0,N-1) (x,y) = L[k] S.append( (x,y) ) L = L[:k]+L[k+1:] L = removeClashes(L,(x,y)) N = len(L) S = basePairList2Vienna(n,S) return S def numBasePairsInRandomGreedySatStr(n): L = initializePossibleBasePairs(n) N = len(L) numBasePairs = 0 while N != 0: k = random.randint(0,N-1) (x,y) = L[k] numBasePairs += 1 L = L[:k]+L[k+1:] L = removeClashes(L,(x,y)) N = len(L) return numBasePairs def expectedNumBasePairs(n,N): L = [] for i in range(N): L.append( numBasePairsInRandomGreedySatStr(n)/float(n) ) mean,stdev,min,max = stats.getSampleStats(L) return mean,stdev def main(n,N): mean,stdev = expectedNumBasePairs(n,N) print mean,stdev if __name__ == '__main__': if len(sys.argv) < 2: print "Usage: %s lenOfRNA [numIterations]" % sys.argv[0] sys.exit(1) n = int(sys.argv[1]) if len(sys.argv)>2: N = int(sys.argv[2]) else: N = 100 main(n,N)