#! /usr/bin/env python # expNumHairpinsInListOfStr.py # P.Clote # Program computes the expected number of hairpins in a list # of secondary structures, where all structures following FASTA # comment of form ">n" have length n. Used in conjunction with # program that (1) outputs ALL secondary structures of lengths # at most n (homopolymer model, theta=1), and another program # that (2) filters out all but the saturated structures from # previous list. import sys,os def numHairpins(secStr): num = 0; prev = 0 for i in range(len(secStr)): if secStr[i]=='(': prev = i+1 elif secStr[i]==')': if prev!=0: num += 1 prev = 0 return num def main(filename): file = open(filename) line = file.readline() D = {}; D[0]=0; length = 0 print ">0" while line: if line[0]=='>': keys = D.keys() keys.sort() #print avg from previous collection of structures avg = 0.0; n = 0.0 for k in keys: avg += D[k]*k n += D[k] if n==0: avg = 0 else: avg = avg/n if length == 0: avg = 0 else: avg = avg/length print avg #initialize for new data length = float(line.split()[-1]) D = {}; D[0]=0 print line.strip() else: secStr = line.strip() num = numHairpins(secStr) if num not in D.keys(): D[num] = 1 else: D[num] += 1 #print "%s\t%d" % (secStr,numHairpins(secStr)) line = file.readline() file.close() keys = D.keys() keys.sort() #print avg from previous collection of structures avg = 0.0; n = 0.0 for k in keys: avg += D[k]*k n += D[k] if n==0: avg = 0 else: avg = avg/n if length == 0: avg = 0 else: avg = avg/length print avg if __name__ == '__main__': if len(sys.argv) < 2: print "Usage: %s filename" % sys.argv[0] sys.exit(1) filename = sys.argv[1] main(filename)