/* This file takes a nucleotide data set and a tree (either from the data file or from a separate file) and computes maximum likelihood estimates for every possible 4x4 reversible model on that data and tree. We use the string (v1,v2,v3,v4,v5,v6), where and v1..6 = 0..5 to encode a 4x4 symmetric transition matrix with entries [* v1 v2 v3] [- * v4 v5] [- - * v6] [- - - * ] For instance: (010010) encodes HKY85. For each model the following information is reported: - Model string. (e.g. (012345) for the GRM) - Number of model parameters - Max ln-likelihood for the model - Likelihood ratio statistic (as a sub-model of the GRM) - AIC - P-Value for the Likelihood Ratio Test. Sergei L. Kosakovsky Pond, Summer 2002. */ function ReceiveJobs (sendOrNot) { if (MPI_NODE_COUNT>1) { MPIReceive (-1, fromNode, result_String); jobModelNum = MPINodeState[fromNode-1][1]; vv1 = MPINodeState[fromNode-1][2]; vv2 = MPINodeState[fromNode-1][3]; vv3 = MPINodeState[fromNode-1][4]; vv4 = MPINodeState[fromNode-1][5]; vv5 = MPINodeState[fromNode-1][6]; vv6 = MPINodeState[fromNode-1][7]; if (sendOrNot) { MPISend (fromNode,lf); MPINodeState[fromNode-1][1] = modelNum; MPINodeState[fromNode-1][2] = v1; MPINodeState[fromNode-1][3] = v2; MPINodeState[fromNode-1][4] = v3; MPINodeState[fromNode-1][5] = v4; MPINodeState[fromNode-1][6] = v5; MPINodeState[fromNode-1][7] = v6; } else { MPINodeState[fromNode-1][0] = 0; MPINodeState[fromNode-1][1] = -1; } ExecuteCommands (result_String); } else { jobModelNum = modelNum; } if (jobModelNum == 0) { stdl = lf_MLES[1][0]; fullnp = lf_MLES[1][1]+addOn; fprintf(stdout,"\n(012345) Full Model ln-lik = ",stdl,". Parameter Count=",Format(fullnp,0,0)," AIC = ", 2*(fullnp-stdl),"\n\n"); resultCache [0][0] = 1; resultCache [0][1] = 2; resultCache [0][2] = 3; resultCache [0][3] = 4; resultCache [0][4] = 5; resultCache [0][5] = lf_MLES[1][0]; resultCache [0][6] = lf_MLES[1][1]+addOn; resultCache [0][7] = 0; resultCache [0][8] = 0; fprintf (stdout,"\n# | Model | # prm | lnL | LRT | AIC | P-Value |"); fprintf (stdout,"\n----|----------|-------|-----------|----------------|------------|------------------|"); if (MPI_NODE_COUNT>1) { for (h=1; h<203; h=h+1) { lnL = resultCache[h][5]; if (lnL<0) { np = resultCache[h][6]; LRT = -2*(lnL-stdl); if (LRT<0) { LRT = 0; } AIC = -2*lnL+2*np; PRINT_DIGITS = 3; fprintf (stdout,"\n",h); PRINT_DIGITS = 1; fprintf (stdout," | (",0, resultCache[h][0], resultCache[h][1], resultCache[h][2], resultCache[h][3], resultCache[h][4],") | "); fprintf (stdout,Format (np,5,0)); PRINT_DIGITS = 8; fprintf (stdout, " | ",lnL," | ",Format(LRT,14,3), " | ", AIC, " | ", ); PRINT_DIGITS = 15; if (LRT==0) { pValue = 1; } else { pValue = 1-CChi2(LRT,fullnp-np); } fprintf (stdout,pValue," |"); resultCache [jobModelNum][7] = pValue; if (pValue1)&&(resultCache[0][5]>=0)) { resultCache [jobModelNum][0] = vv2; resultCache [jobModelNum][1] = vv3; resultCache [jobModelNum][2] = vv4; resultCache [jobModelNum][3] = vv5; resultCache [jobModelNum][4] = vv6; resultCache [jobModelNum][5] = lf_MLES[1][0]; resultCache [jobModelNum][6] = lf_MLES[1][1]+addOn; return fromNode - 1; } } np = lf_MLES[1][1]+addOn; lnL = lf_MLES[1][0]; LRT = -2*(lnL-stdl); if (LRT<0) { LRT = 0; } AIC = -2*lnL+2*np; PRINT_DIGITS = 3; fprintf (stdout,"\n",jobModelNum); PRINT_DIGITS = 1; fprintf (stdout," | (",vv1,vv2,vv3,vv4,vv5,vv6,") | "); fprintf (stdout,Format (np,5,0)); PRINT_DIGITS = 8; fprintf (stdout, " | ",lnL," | ",Format(LRT,14,3), " | ", AIC, " | ", ); PRINT_DIGITS = 15; if (LRT==0) { pValue = 1; } else { pValue = 1-CChi2(LRT,fullnp-np); } fprintf (stdout,pValue," |"); resultCache [jobModelNum][0] = vv2; resultCache [jobModelNum][1] = vv3; resultCache [jobModelNum][2] = vv4; resultCache [jobModelNum][3] = vv5; resultCache [jobModelNum][4] = vv6; resultCache [jobModelNum][5] = lf_MLES[1][0]; resultCache [jobModelNum][6] = lf_MLES[1][1]+addOn; resultCache [jobModelNum][7] = pValue; if (pValue 0) { #include "TemplateModels/defineGamma.mdl"; } ChoiceList (branchLengths,"Estimate Branch Lengths",1,SKIP_NONE, "Every Time","Branch lengths are reestimated for every model.", "Once","Branch lenghts obtained from the nucleotide GTR model and reused for subsequent models." ); if (branchLengths<0) { return; } rejectAt = 0; while ((rejectAt<=0)||(rejectAt>=1)) { fprintf (stdout, "\nModel rejection level (e.g. 0.05):"); fscanf (stdin,"Number", rejectAt); } SetDialogPrompt ("Save results to:"); fprintf (PROMPT_FOR_FILE, CLEAR_FILE); BASE_PATH = LAST_FILE_PATH; KEEP_OPTIMAL_ORDER = 1; MESSAGE_LOGGING = 0; global AC=1; global AT=1; global CG=1; global CT=1; global GT=1; global R=1; r = setElement (0,2,1); r = setElement (0,3,2); r = setElement (1,2,3); r = setElement (1,3,4); r = setElement (2,3,5); MG94custom = 0; MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); vectorOfFrequencies = BuildCodonFrequencies (observedFreq); Model MG94customModel = (MG94custom,vectorOfFrequencies,0); USE_POSITION_SPECIFIC_FREQS = 1; Tree tr = treeString; addOn = 0; if (branchLengths) { global TreeScaler = 1; GTRMatrix = {{*,AC*nt,nt,AT*nt}{AC*nt,*,CG*nt,CT*nt}{nt,CG*nt,*,GT*nt}{AT*nt,CT*nt,GT*nt,*}}; DataSetFilter nucFilter = CreateFilter (filteredData,1); HarvestFrequencies (nucFreq,nucFilter,1,1,1); Model GTRModel = (GTRMatrix,nucFreq,1); givenTreeString = Format (tr,0,0); Tree nucTree = givenTreeString; LikelihoodFunction lfn = (nucFilter, nucTree); Optimize (nres,lfn); ReplicateConstraint ("this1.?.synRate:=this2.?.nt__/TreeScaler",tr,nucTree); addOn = nres[1][1]-nres[1][2]-1; } LikelihoodFunction lf = (filteredData, tr); resultCache = {203,9}; modelNum = 0; rejectCount = 0; if (MPI_NODE_COUNT>1) { MPINodeState = {MPI_NODE_COUNT-1,8}; OPTIMIZE_SUMMATION_ORDER = 0; MPISend (1,lf); MPINodeState[0][0] = 1; MPINodeState[0][1] = modelNum; } else { Optimize (lf_MLES,lf); vv1 = 0; vv2 = 0; vv3 = 0; vv4 = 0; vv5 = 0; vv6 = 0; dummy = ReceiveJobs (0); } rateBiasTerms = {{"AC","1","AT","CG","CT","GT"}}; for (v2=0; v2<=1; v2=v2+1) { for (v3=0; v3<=v2+1; v3=v3+1) { if (v3>v2) { ub4 = v3; } else { ub4 = v2; } for (v4=0; v4<=ub4+1; v4=v4+1) { if (v4>=ub4) { ub5 = v4; } else { ub5 = ub4; } for (v5=0; v5<=ub5+1; v5=v5+1) { if (v5>ub5) { ub6 = v5; } else { ub6 = ub5; } for (v6=0; v6<=ub6+1; v6=v6+1) { if (v6==5) { break; } R = 1; paramCount = 0; modelDesc = "0"+Format(v2,1,0); modelDesc = modelDesc+Format(v3,1,0); modelDesc = modelDesc+Format(v4,1,0); modelDesc = modelDesc+Format(v5,1,0); modelDesc = modelDesc+Format(v6,1,0); modelConstraintString = ""; AC = 1; AT = 1; CG = 1; CT = 1; GT = 1; for (customLoopCounter2=1; customLoopCounter2<6; customLoopCounter2=customLoopCounter2+1) { for (customLoopCounter=0; customLoopCounter1) { for (mpiNode = 0; mpiNode < MPI_NODE_COUNT-1; mpiNode = mpiNode+1) { if (MPINodeState[mpiNode][0]==0) { break; } } if (mpiNode==MPI_NODE_COUNT-1) /* all nodes busy */ { mpiNode = ReceiveJobs (1); } else { MPISend (mpiNode+1,lf); MPINodeState[mpiNode][0] = 1; MPINodeState[mpiNode][1] = modelNum; MPINodeState[mpiNode][2] = v1; MPINodeState[mpiNode][3] = v2; MPINodeState[mpiNode][4] = v3; MPINodeState[mpiNode][5] = v4; MPINodeState[mpiNode][6] = v5; MPINodeState[mpiNode][7] = v6; } } else { Optimize (lf_MLES,lf); vv1 = v1; vv2 = v2; vv3 = v3; vv4 = v4; vv5 = v5; vv6 = v6; dummy = ReceiveJobs (0); } } } } } } if (MPI_NODE_COUNT>1) { while (1) { for (nodeCounter = 0; nodeCounter < MPI_NODE_COUNT-1; nodeCounter = nodeCounter+1) { if (MPINodeState[nodeCounter][0]==1) { fromNode = ReceiveJobs (0); break; } } if (nodeCounter == MPI_NODE_COUNT-1) { break; } } OPTIMIZE_SUMMATION_ORDER = 1; } function checkEmbedding (_m1, _m2) { for (r=0; r<6; r=r+1) { if (_m2[r]<_m1[r]) { /*fprintf (stdout,_m1," ", _m2, " Reject 1 at position ",r,"\n");*/ return 0; } if (_m2[r]>_m1[r]) { for (r2 = 0; r2 < 6; r2 = r2+1) { if ((_m2[r2]==_m2[r])&&(_m1[r2]!=_m1[r])) { /*fprintf (stdout,_m1," ", _m2, " Reject 2 at positions ",r,r2,"\n");*/ return 0; } } } } return 1; } PRINT_DIGITS = 0; fprintf (stdout, "\n\n--------------------------\n (*) => p-Value < ", rejectAt, "\nRejected ", rejectCount, " models.\n"); if (rejectCount<202) { fprintf (stdout, "\nPerforming nested tests on the remaining models...\n"); done = 0; while (!done) { done = 1; for (v2=1; v2<203; v2=v2+1) { if (resultCache[v2][8]) { modelString = "0"; for (v3 = 0; v3<5; v3=v3+1) { modelString = modelString + resultCache [v2][v3]; } for (v3 = v2+1; v3<203; v3 = v3+1) { if (resultCache[v3][8]) { modelString2 = "0"; for (v4 = 0; v4<5; v4=v4+1) { modelString2 = modelString2 + resultCache [v3][v4]; } if (checkEmbedding (modelString, modelString2)) { fprintf (stdout,"H: (", modelString,") A: (", modelString2, "). "); done = 0; LRT = 2*(resultCache[v3][5]-resultCache[v2][5]); npd = resultCache[v3][6]-resultCache[v2][6]; if (LRT<0) { pValue = 1; } else { pValue = 1-CChi2(LRT,npd); } fprintf (stdout," P-Value=", Format (pValue,10,3)); if (pValue