#! /usr/bin/env python # P.Clote #computeExpNumNearestNhborsFromFASTAfile.py import sys,math R = 0.0019872370936902486 T = 310 THETA = 3 DEBUG = False def canBasePair(X,Y): if (X,Y) in [('A','U'),('U','A'),('C','G'),('G','C'),('G','U'),('U','G')]: return True else: return False def printValues(D,rna): n = len(rna) for i in range(1,n+1): for j in range(1,n+1): if j 3, compute S[(i,j)] all 1<=i<=j<=n for d in range(THETA+1,n+1): for i in range(1,n+1): #i in [1,...,n-THETA-1] j = i+d if (j>n): break s = S[(i,j-1)] #case 1: j unpaired in [i,j] if canBasePair(rna[i],rna[j]): s += S[(i+1,j-1)] #case 2: (i,j) is base paired for k in range(i+1,j-THETA): if canBasePair(rna[k],rna[j]): s += S[(i,k-1)]*S[(k+1,j-1)] #case 3: (k,j) is base paired S[(i,j)] = s #inductive case j-i > 3, compute Q[(i,j)] all 1<=i<=j<=n for d in range(THETA+1,n+1): for i in range(1,n+1): j = i+d if (j>n): break q = Q[(i,j-1)] if canBasePair(rna[i],rna[j]): q += 2*S[(i+1,j-1)]+Q[(i+1,j-1)] for k in range(i+1,j-THETA): if canBasePair(rna[k],rna[j]): q += 2*S[(i,k-1)]*S[(k+1,j-1)]+S[(i,k-1)]*Q[(k+1,j-1)]+Q[(i,k-1)]*S[(k+1,j-1)] Q[(i,j)] = q return Q,S def aux(rna): Q,S = compute(rna) n = len(rna) q = Q[(1,n)] s = S[(1,n)] #print q,s,(q/s) if DEBUG: print "\nQ" printValues(Q,rna) print "\nS" printValues(S,rna) expectedNumNhbors = q/s return expectedNumNhbors def main(filename): file = open(filename) line = file.readline() while line: line = line.strip() assert(line[0]=='>') FASTA = line[1:].strip() rna = file.readline().strip().upper() expectedNumNhbors = aux(rna) print expectedNumNhbors line = file.readline() file.close() if __name__ == '__main__': if len(sys.argv) < 2: print "Usage: %s filenameOfRNAsInFASTAformat" % sys.argv[0] sys.exit(1) filename = sys.argv[1] main(filename)