/*------------------------------------------------------------- Copyright (C) 2005 Fabrizio Ferre'. All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for NON-COMMERCIAL purposes and without fee is hereby granted provided that this copyright notice appears in all copies. THE AUTHOR AND PUBLISHER MAKE NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHORS AND PUBLISHER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. -------------------------------------------------------------*/ /************************************************* Program: timeWarpingBoltzmann_2.cpp Fabrizio Ferre, 17 Jan 2005 Program for symmetric time warping of gene expression time series. Algorithm described in a paper by Peter Clote and Jurg Straubhaar in Journal of Mathematical Biology (2005). A traceback is used to determine the alignment, and is determined by choosing that direction for which F(i,j) is minimum, i.e.: __ | | F(i-1,j-1) + (tau+mu)/2)*rho(a_i,b_j) (diagonal) F(i,j) = MIN of | F(i-1,j) + (tau/2)*(rho(a_i,b_j)+rho(a_i,b_j+1))/2 (vertical) | F(i,j-1) + (mu/2)*(rho(a_i,b_j)+rho(a_i+1,b_j))/2 (horizontal) |__ where tau,mu>0 are time intervals for time series a,b respectively and rho is a distance metric (in this case is equal to abs(x-y)). Tau and mu values are computed as time ponts intervals - i.e. if time points are t(0), t(1), .., t(N) => tau(i) = t(i+1)-t(i). Backward matrix B is computed to show that the time warping is symmetric. Symmetry is necessary for the computation of Boltzmann pair probabilities Additionally, we compute the forward partition FZ and backward partition BZ functions, as well as the Boltzmann pair probabilities Pr_A[a_i,b_j] of time warping a_i with b_j in optimal time warping A. *************************************************/ #include #include #include // def of RAND_MAX #include #include "timeWarpingBoltzmann_2.h" int main(int argc, char *argv[]) { /** variable declarations **/ int counterA=1,counterB=1; /** time series size counters **/ double forwardScore, backwardScore; /** time warping distance score **/ int pathSize; /** size of optimal path **/ double Zf, Zb; /** partition function **/ float sc; /** scaling factor **/ double mean; /** mean of gene expression time series **/ double stdev; /** stdev of gene expression time series **/ /** constants **/ const float T = 0.5; /** Temperature (used for computing Z) **/ const float k = 1.0; /** Boltzmann constant **/ const float scaleTiA = 1.0; /** scale the time intervals such that tiA[1] = scaleTiA(tpA[2]-tpA[1]) **/ const float scaleTiB = 1.0; /** scale the time intervals such that tiB[1] = scaleTiB(tpB[2]-tpB[1]) **/ /** arrays and matrices **/ float faA[N],faB[N]; /** arrays for the expression values points **/ double naA[N],naB[N]; /** arrays for the expression values Zscores **/ float tpA[N],tpB[N]; /** arrays for the time points (minutes) **/ float tiA[N],tiB[N]; /** arrays for time intervals **/ double f[N][N]; /** forward matrix **/ double b[N][N]; /** backward matrix **/ double fz[N][N]; /** forward partition function **/ double bz[N][N]; /** backward partition function **/ char arrows[N][N]; /** matrix for storing the traceback arrows (in the form of characters d=diag, u=up, l=left) **/ int aout[N],bout[N]; /** arrays used for storing the time points alignment **/ double Prob[N]; /** Boltzmann's probabilities of the optimal alignment **/ faA[0] = 0; faB[0] = 0; /** for array padding **/ /**** Error handling for input file *****/ if (argc != 3) { fprintf(stderr,"Usage: %s timeSerie1 timeSerie2\n",argv[0]); fprintf(stderr,"\ntimeSeries format:\n"); fprintf(stderr,"#expName time0 time1 ... timeN\n"); fprintf(stderr,"#geneName Zscore0 Zscore1 .. ZscoreN\n"); exit(1); } /*** reading input files ***/ counterA = parseInputFile(argv[1], tpA, faA); counterB = parseInputFile(argv[2], tpB, faB); /*** padding and time intervals arrays computation ***/ meanStdDev(faA, counterA, &mean, &stdev); normalize(faA, counterA, naA, mean, stdev); naA[0] = naA[1]; naA[counterA] = naA[counterA-1]; /** array padding **/ computeTimeIntervals(tpA, tiA, counterA, scaleTiA); tiA[0] = tiA[1]; tiA[counterA-1] = tiA[counterA-2]; tiA[counterA] = tiA[counterA-1]; // int i; // for(i=0;i<=counterA;i++) { // tiA[i] = (10.0); // } meanStdDev(faB, counterB, &mean, &stdev); normalize(faB, counterB, naB, mean, stdev); naB[0] = naB[1]; naB[counterB] = naB[counterB-1]; /** array padding **/ computeTimeIntervals(tpB, tiB, counterB, scaleTiB); tiB[0] = tiB[1]; tiB[counterB-1] = tiB[counterB-2]; tiB[counterB] = tiB[counterB-1]; // for(i=0;i<=counterB;i++) { // tiB[i] = (40.0/3.0); // } counterA ++; counterB ++; /** scale the Zscores **/ sc = scale(naA, naB, counterA, counterB); /** initialize and fill the forward path matrix **/ forwardScore = forwardPathMatrix(f, arrows, naA, naB, tiA, tiB, counterA, counterB, sc); /** initialize and fill the backward path matrix **/ backwardScore = backwardPathMatrix(b, naA, naB, tiA, tiB, counterA, counterB, sc); /** traceback **/ pathSize = traceback(arrows, aout, bout, counterA, counterB); /** print alignment **/ printAlignment(aout, bout, pathSize); /** forward and backward partition functions **/ Zf = forwardPartitionFunction(fz, naA, naB, T, k, tiA, tiB, counterA, counterB, sc); Zb = backwardPartitionFuntion(bz, naA, naB, T, k, tiA, tiB, counterA, counterB, sc); /** pair probabilities of the optimal path **/ reverseIntArray(aout, pathSize); reverseIntArray(bout, pathSize); computePairAlignmentProb(fz, bz, aout, bout, pathSize, Prob, naA, naB, T, k, tiA, tiB, sc); /** print the outputs **/ printf("Time Warping distance -- forward: %f\n",forwardScore); printf("Time Warping distance -- backward: %f\n",backwardScore); printf("\n"); printf("Forward Partition Function: %e\n",Zf); printf("Backward Partition Function: %e\n",Zb); printf("\n"); printPairProbabilities(aout, bout, Prob, pathSize); return(0); } /**** Functions ****/ int parseInputFile(char fileName[], float tp[], float fa[]) { FILE *in; int cA,cB; char inString[lenLine]; char *ptok; float ff; int counter; /***** Initialization of input file and error checking *****/ if(!(in=fopen(fileName, "r"))) { fprintf(stderr,"ERROR: Unable to open %s for input.\n", fileName); exit(1); } cA = 0; while(fgets(inString,lenLine,in)) { cA++; if (cA==1) { cB = 1; ptok = strtok(inString," \t"); ptok = strtok(NULL," \t"); while (ptok!=NULL) { ff = atof(ptok); tp[cB] = ff; cB++; ptok = strtok(NULL," \t"); } counter = cB; } else { cB = 1; ptok = strtok(inString," \t"); ptok = strtok(NULL," \t"); while (ptok!=NULL) { ff = atof(ptok); fa[cB] = ff; cB++; ptok = strtok(NULL," \t"); } } } return(counter); } void meanStdDev(float A[], int cA, double *mean, double *stdev) { float sum = 0.0; float sumSqr = 0.0; float variance; int i; for (i=1;i max) max = A[i]; if (A[i] < min) min = A[i]; } for (i=1;i max) max = B[i]; if (B[i] < min) min = B[i]; } sc = 1/(sqrt(n*pow(max-min,2))); return (sc); } float distance(double a, double b, float sc) { return( sc * fabs(a-b) ); } double forwardPathMatrix(double m[][N],char t[][N],double A[],double B[],float T1[],float T2[], int cA,int cB, float sc) { int i,j; float diag,down,right,min; /** initialize the forward path matrix **/ m[0][0] = 0; t[0][0] = 'd'; for (i=1;i0;i--) { m[i][cB-1] = m[i+1][cB-1] + ((T1[i-1]/2) * distance(A[i],B[cB-1],sc)); } for (j=cB-2;j>0;j--) { m[cA-1][j] = m[cA-1][j+1] + ((T2[j-1]/2) * distance(A[cA-1],B[j],sc)); } /** fill the backward path matrix **/ for (i=cA-2;i>0;i--) { for (j=cB-2;j>0;j--) { diag = m[i+1][j+1] + ((T1[i-1]+T2[j-1])/2) * (distance(A[i],B[j],sc)); down = m[i+1][j] + (T1[i-1]/2) * (distance(A[i],B[j],sc) + distance(A[i],B[j-1],sc))/2; right = m[i][j+1] + (T2[j-1]/2) * (distance(A[i],B[j],sc) + distance(A[i-1],B[j],sc))/2; min = MIN3(diag,down,right); if (min == diag) { m[i][j] = diag; } else if (min == down) { m[i][j] = down; } else { m[i][j] = right; } } // end j for loop } // end i for loop /** return the time warping distance **/ return m[1][1]; } double backwardPartitionFuntion(double m[][N],double A[],double B[],float T,float k,float T1[],float T2[],int cA,int cB, float sc) { int i,j; double diag,down,right; /** initialize the backward path matrix **/ m[cA-1][cB-1] = 1; for (i=cA-2;i>0;i--) m[i][cB-1] = m[i+1][cB-1] * (pow(M_E,-((T1[i-1]/2) * distance(A[i],B[cB-1],sc))/(k*T))); for (j=cB-2;j>0;j--) m[cA-1][j] = m[cA-1][j+1] * (pow(M_E,-((T2[j-1]/2) * distance(A[cA-1],B[j],sc))/(k*T))); /** fill the backward path matrix **/ for (i=cA-2;i>0;i--) { for (j=cB-2;j>0;j--) { diag = m[i+1][j+1] * pow(M_E,-(((T1[i-1]+T2[j-1])/2) * distance(A[i],B[j],sc)/(k*T))); down = m[i+1][j] * pow(M_E,-((T1[i-1]/2) * ((distance(A[i],B[j],sc)+distance(A[i],B[j-1],sc))/2)/(k*T))); right = m[i][j+1] * pow(M_E,-((T2[j-1]/2) * ((distance(A[i],B[j],sc)+distance(A[i-1],B[j],sc))/2)/(k*T))); m[i][j] = diag+down+right; } // end j for loop } // end i for loop /** return the time warping distance **/ return m[1][1]; } void computePairAlignmentProb(double fz[][N], double bz[][N], int ao[], int bo[], int sz, double Prob[], double A[], double B[], float T, float k, float T1[], float T2[], float sc) { int i,j,c,o; double score; const double Z=bz[1][1]; for (c=0;c-1;i--) { for(j=cB;j>-1;j--) { printf("%d,%d,%f\n",i,j,m[i][j]); } } } int traceback(char t[][N], int ao[], int bo[], int cA, int cB) { int i = cA-2; int j = cB-2; int sz = 1; ao[0] = i; bo[0] = j; while (i>1 && j>1) { if(t[i][j] == 'd') { i--; j--; } else if(t[i][j] == 'u') { i--; } else if(t[i][j] == 'l') { j--; } ao[sz] = i; bo[sz] = j; sz++; } while (i>1) { i--; ao[sz] = i; bo[sz] = j; sz++; } while (j>1) { j--; ao[sz] = i; bo[sz] = j; sz++; } return(sz); } void printAlignment(int ao[], int bo[], int sz) { int i,j; printf("\nTime points alignment:\n"); for (i=sz-1;i>=0;i--) { if(ao[i] == 0) printf("1\t"); else printf("%d\t",ao[i]); } printf("\n"); for (j=sz-1;j>=0;j--) { if(bo[j] == 0) printf("1\t"); else printf("%d\t",bo[j]); } printf("\n\n"); }