#! /usr/bin/env python

# numPrecisionTimeWarp.py
# P.Clote

# This program approximates the decimal place accuracy
# for the Boltzmann partition function computation.
# Assumptions: 
#  1) Average time warping distance between yeast and human genes is 40.
#  2) RT = 1
#  3) Number of alignments between two sequences of length n is numAlign()
"""
Suppose that (log) gene expression values are correct to within 
EPSILON accuracy. In the worst case scenario, the error in the
time warping distance between two length n time series of 
(log) expression values is bounded by n*EPSILON.
The number of alignments is exactly numAlign(n,n), hence the error
in the partition function computation is approximately bounded
by numAlign(n,n)*[ n*EPSILON ] / (RT exp(TWDISTANCE/RT))
where TWDISTANCE is the time warping distance between two series.

If the error in the time warping computation between 2 time series
is bounded by EPSILON, rather than n*EPSILON, then we have a less
draconian error bound. This program computes both values. As well,
we compute at the expected values when time warping Cho's yeast and
human data.
"""




import sys
from math import log,exp,sqrt

def asymptNumAlign(n):
  #Formula from Waterman for asymptotic number of alignments
  #of 2 sequences of length n
  return (1+sqrt(2))**(2*n+1)/sqrt(n)

def numAlign(n,m):
  #computes number of alignments of 2 sequences of lengths n,m
  D = {}
  D[(1,1)]=3
  for i in range(0,n+1): D[(i,0)]=1
  for j in range(0,m+1): D[(0,j)]=1
  for i in range(1,n+1):
    for j in range(1,m+1):
      D[(i,j)] =D[(i-1,j-1)]+D[(i,j-1)]+D[(i-1,j)]
#  for m in range(2,n+1):
#    print m,D[(m,m)],f(m)
  return D[(n,m)]


AVGDIST = 26          #average time warping distance
EPSILON = 0.001         #accuracy of gene expression measurements
LOWER = 1
UPPER = 20
n     = UPPER
for n in range(LOWER,UPPER+1):
  print n,numAlign(n,n),numAlign(n,n)*n*EPSILON/exp(AVGDIST)
print numAlign(17,13),numAlign(17,13)*20*EPSILON/exp(AVGDIST)
print
for n in range(LOWER,UPPER+1):
  print n,numAlign(n,n),numAlign(n,n)*EPSILON/exp(AVGDIST)
print numAlign(17,13),numAlign(17,13)*EPSILON/exp(AVGDIST)