# W A Lorenz 10-2009 # quick class that is useful for parsing various # faa like formats from a file. from subprocess import Popen, PIPE class parseFaa: def __init__(self, fileName): self.fileName = fileName self.file = open(fileName, 'r') self.done = False def next(self): if self.done==True: return ('','') seq='' name='' done=False while(done==False): line=self.file.readline() if not line: self.file.close() self.done=True done=True elif line[0]=='>' and len(line.split())>1: name=line.split()[1] elif line[0].upper() in ['A','C','G','U']: seq=line.split()[0] done=True return (seq, name) def nextSeqStrMFE(self): if self.done==True: return ('','', '', '') seq='' name='' struc='' strucMFE='' done=False seqDone=False strucDone=False while(done==False): line=self.file.readline() if not line: self.file.close() self.done=True done=True continue elif line[0]=='>' and len(line.split())>1: name=line.split()[1] elif line[0] in ['.','(',')']: struc=line.split()[0] strucDone=True elif line[0].upper() in ['A','C','G','U']: seq=line.split()[0] seqDone=True done=seqDone and strucDone if self.done==True: return ('','','','') p=Popen("RNAfold", stdin=PIPE, stdout=PIPE) (pout, perr)=p.communicate(seq+'\n') strucMFE=pout.split('\n')[1].split()[0] return (seq, name, struc, strucMFE) def nextSeq2Str(self): if self.done==True: return ('','', '', '') seq='' name='' struc='' strucMFE='' done=False seqDone=False strucDone=False struc2Done=False while(done==False): line=self.file.readline() if not line: self.file.close() self.done=True done=True continue elif line[0]=='>' and len(line.split())>1: name=line.split()[1] elif line[0] in ['.','(',')'] and not strucDone: struc=line.split()[0] strucDone=True elif line[0] in ['.','(',')'] and strucDone: struc2=line.split()[0] struc2Done=True elif line[0].upper() in ['A','C','G','U']: seq=line.split()[0] seqDone=True done=seqDone and struc2Done if self.done==True: return ('','','','') return (seq, name, struc, struc2) def nextall(self): if self.done==True: return ('','','','') seq='';name='';seqline='';nameline='' done=False while(done==False): line=self.file.readline() if not line: self.file.close() self.done=True done=True elif line[0]=='>' and len(line.split())>1: name=line.split()[1] nameline=line elif line[0].upper() in ['A','C','G','U']: seq=line.split()[0] seqline=line done=True return (seq, name, seqline, nameline) def close(self): self.file.close()