#! /usr/bin/python # Copyright (C) 2005 Peter Clote # All Rights Reserved. # # Permission to use, copy, modify, and distribute this # software for NON-COMMERCIAL purposes # and without fee is hereby granted provided that this # copyright notice appears in all copies. If you use, modify # or extend this code, please cite the reference # "Symmetric time warping, Boltzmann pair probabilities and # functional genomics", by P. Clote and J. Straubhaar, # submitted to Journal of Mathematical Biology. # # THE AUTHOR MAKES NO REPRESENTATIONS OR # WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHOR # SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED # BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING # THIS SOFTWARE OR ITS DERIVATIVES. # # # # getPAMmatrix.py # P.Clote import string,sys def readStrings(fileName): L = [] file = open(fileName) line = file.readline() while line: L.append(line[:-1]) line = file.readline() return L def getPAMmatrix(filename): #---------------------------------------------------------------------- #input PAM matrix into dictionary "pam" whose keys are #pairs (i,j) of single-letter amino acid codes pam={} file = open(filename,"r") line=file.readline() AAs=string.split(line) #list of amino acids for key1 in AAs: line=file.readline() pamScores=string.split(line)[1:] for i in range(len(pamScores)): key2=AAs[i] pam[(key1,key2)]=int(pamScores[i]) #now symmetrify PAM matrix for i in range(len(AAs)): for j in range(i+1,len(AAs)): pam[(AAs[i],AAs[j])]=pam[(AAs[j],AAs[i])] file.close() #print #print AAs #for x in AAs: # sys.stdout.write("%s " % x) # for y in AAs: sys.stdout.write("%3.0f" % pam[(x,y)]) # print return(pam) #end of getPAMmatrix() #---------------------------------------------------------------------- if __name__ == '__main__': if len(sys.argv) != 4: print "Usage: %s file integer aminoAcid" % sys.argv[0] print """ 1) file contains equal length amino acid sequences 2) integer 0 <= i < n, where n is sequence length 3) amino acid is single letter code """ sys.exit(1) fileName = sys.argv[1] alphabet = aminoAcidAndGeneticCodes.singleLetterAAcodes n = int(sys.argv[2]) aa = sys.argv[3] L = readStrings(fileName) pam = getPAMmatrix('pam250.txt') if n>= len(L[0]): print "Try smaller n" sys.exit(1) for s in L: print pam[(s[n],aa)]