mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1026 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			Beef
		
	
	
	
	
	
			
		
		
	
	
			1026 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			Beef
		
	
	
	
	
	
/* Written by SL Kosakovsky Pond (sergeilkp@mac.com) October 2006 */
 | 
						|
/* This analysis uses a 4 bin general bivariate discrete distributions, 
 | 
						|
   with 2 negatively selected classes, a positively selected class and a neutral class,
 | 
						|
   to compare the distributions of dS and dN in two data sets.
 | 
						|
   
 | 
						|
   The tests done:
 | 
						|
   
 | 
						|
   - Are the distributions the same?
 | 
						|
   - Are there the same proportions of sites with dN>dS in the data sets?
 | 
						|
   - Do the sites under positive selection share the same dN/dS?
 | 
						|
   - Two previous tests combined
 | 
						|
*/
 | 
						|
 | 
						|
RequireVersion ("0.9920061001");
 | 
						|
 | 
						|
/*---------------------------------------------------------------------------------------------------------------------------------------------------*/
 | 
						|
 | 
						|
function BuildCodonFrequencies (obsF)
 | 
						|
{
 | 
						|
	PIStop = 1.0;
 | 
						|
	result = {ModelMatrixDimension,1};
 | 
						|
	hshift = 0;
 | 
						|
 | 
						|
	for (h=0; h<64; h=h+1)
 | 
						|
	{
 | 
						|
		first = h$16;
 | 
						|
		second = h%16$4;
 | 
						|
		third = h%4;
 | 
						|
		if (_Genetic_Code[h]==10) 
 | 
						|
		{
 | 
						|
			hshift = hshift+1;
 | 
						|
			PIStop = PIStop-obsF[first][0]*obsF[second][1]*obsF[third][2];
 | 
						|
			continue; 
 | 
						|
		}
 | 
						|
		result[h-hshift][0]=obsF[first][0]*obsF[second][1]*obsF[third][2];
 | 
						|
	}
 | 
						|
	return result*(1.0/PIStop);
 | 
						|
}
 | 
						|
 | 
						|
/*---------------------------------------------------------------------------------------------------------------------------------------------------*/
 | 
						|
 | 
						|
function ReportDistributionString (rc,freqStrMx,infix, skip0)
 | 
						|
{
 | 
						|
	distroString = "";
 | 
						|
	distroString * 1024;
 | 
						|
	
 | 
						|
	reportMx = {rc,4};
 | 
						|
	
 | 
						|
	distroString * ("   dN/dS      dS      dN      Prob\n");
 | 
						|
	for (mi=0; mi<rc; mi=mi+1)
 | 
						|
	{
 | 
						|
		ExecuteCommands ("reportMx[mi][0]=S_"+infix+mi+"/c_"+infix+"scale;");
 | 
						|
		ExecuteCommands ("reportMx[mi][1]=NS_"+infix+mi+"/c_"+infix+"scale;");
 | 
						|
		reportMx[mi][2] = reportMx[mi][1]/reportMx[mi][0];
 | 
						|
		ExecuteCommands ("reportMx[mi][3]="+freqStrMx[mi]+";");
 | 
						|
		if (skip0 == 0 || reportMx[mi][3] > 0)
 | 
						|
		{
 | 
						|
			distroString * (Format(reportMx[mi][2],8,3)+Format(reportMx[mi][0],8,3)+Format(reportMx[mi][1],8,3)+Format(reportMx[mi][3],10,3)+"\n");
 | 
						|
		}
 | 
						|
	}								   
 | 
						|
	
 | 
						|
	distroString * 0;
 | 
						|
	return distroString;
 | 
						|
}
 | 
						|
 | 
						|
/*---------------------------------------------------------------------------------------------------------------------------------------------------*/
 | 
						|
 | 
						|
function DefineNucleotideBiases (dummy)
 | 
						|
{
 | 
						|
	ModelTitle 				= "MG94x"+modelDesc[0];
 | 
						|
	modelConstraintString = "";
 | 
						|
 | 
						|
	for (customLoopCounter2=0; customLoopCounter2<6; customLoopCounter2=customLoopCounter2+1)
 | 
						|
	{
 | 
						|
		if (rateBiasTerms[customLoopCounter2] != "1")
 | 
						|
		{
 | 
						|
			ExecuteCommands ("global " + rateBiasTerms[customLoopCounter2] + "=1;");
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	for (customLoopCounter2=1; customLoopCounter2<6; customLoopCounter2=customLoopCounter2+1)
 | 
						|
	{
 | 
						|
		for (customLoopCounter=0; customLoopCounter<customLoopCounter2; customLoopCounter=customLoopCounter+1)
 | 
						|
		{
 | 
						|
			if (modelDesc[customLoopCounter2]==modelDesc[customLoopCounter])
 | 
						|
			{
 | 
						|
				ModelTitle  = ModelTitle+modelDesc[customLoopCounter2];	
 | 
						|
				if (rateBiasTerms[customLoopCounter2] == "1")
 | 
						|
				{
 | 
						|
					modelConstraintString = modelConstraintString + rateBiasTerms[customLoopCounter]+":="+rateBiasTerms[customLoopCounter2]+";";
 | 
						|
				}
 | 
						|
				else
 | 
						|
				{
 | 
						|
					modelConstraintString = modelConstraintString + rateBiasTerms[customLoopCounter2]+":="+rateBiasTerms[customLoopCounter]+";";			
 | 
						|
				}
 | 
						|
				break;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if (customLoopCounter==customLoopCounter2)
 | 
						|
		{
 | 
						|
			ModelTitle = ModelTitle+modelDesc[customLoopCounter2];	
 | 
						|
		}
 | 
						|
	}	
 | 
						|
 | 
						|
 | 
						|
	if (Abs(modelConstraintString))
 | 
						|
	{
 | 
						|
		ExecuteCommands (modelConstraintString);
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/*---------------------------------------------------------------------------------------------------------------------------------------------------*/
 | 
						|
 | 
						|
function 		defineRateDistribution (infix)
 | 
						|
{
 | 
						|
	ExecuteCommands ("global S_"+infix+"0  = 0.5;S_"+infix+"0:>0.0000001;global NS_"+infix+"0 = 0.1;");
 | 
						|
 | 
						|
	for (mi=1; mi<resp; mi=mi+1)
 | 
						|
	{
 | 
						|
		categDef1 * ("global S_"+infix+mi+"=0.5;S_"+infix+mi+":>0.0000001;\nglobal NS_"+infix+mi+";\n");
 | 
						|
		if (randomizeInitValues)
 | 
						|
		{
 | 
						|
			categDef1*("global P_"+infix+mi+" = Random(0.05,0.95);\nP_"+infix+mi+":<1;\n");	
 | 
						|
			categDef1*("global S_"+infix+mi+" = Random(0.05,1);");	
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			categDef1*("global P_"+infix+mi+" = 1/"+(resp+1-mi)+";\nP_"+infix+mi+":<1;\n");
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	freqStrMx    = {resp,1};
 | 
						|
	if (resp>1)
 | 
						|
	{
 | 
						|
		freqStrMx[0] = "P_"+infix+"1";
 | 
						|
 | 
						|
		for (mi=1; mi<resp-1; mi=mi+1)
 | 
						|
		{
 | 
						|
			freqStrMx[mi] = "";
 | 
						|
			for (mi2=1;mi2<=mi;mi2=mi2+1)
 | 
						|
			{
 | 
						|
				freqStrMx[mi] = freqStrMx[mi]+"(1-P_"+infix+mi2+")";		
 | 
						|
			}
 | 
						|
			freqStrMx[mi] = freqStrMx[mi]+"P_"+infix+(mi+1);	
 | 
						|
		}	
 | 
						|
		freqStrMx[mi] = "";
 | 
						|
 | 
						|
		for (mi2=1;mi2<mi;mi2=mi2+1)
 | 
						|
		{
 | 
						|
			freqStrMx[mi] = freqStrMx[mi]+"(1-P_"+infix+mi2+")";		
 | 
						|
		}
 | 
						|
		freqStrMx[mi] = freqStrMx[mi]+"(1-P_"+infix+mi+")";	
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		freqStrMx[0] = "1";
 | 
						|
	}
 | 
						|
 | 
						|
	categDef1*( "\n\nglobal c_"+infix+"scale:=S_"+infix+"0*" + freqStrMx[0]);
 | 
						|
 | 
						|
	for (mi=1; mi<resp; mi=mi+1)
 | 
						|
	{
 | 
						|
		categDef1*( "+S_"+infix+mi+"*" + freqStrMx[mi]);
 | 
						|
	}
 | 
						|
 | 
						|
	categDef1 * ";";
 | 
						|
 | 
						|
	for (mi=0; mi<resp; mi=mi+1)
 | 
						|
	{
 | 
						|
		categDef1 * ("\nglobal R_"+infix+mi+"=1;NS_"+infix+mi+":=R_"+infix+mi+"*S_"+infix+mi+";\n");
 | 
						|
	}
 | 
						|
 | 
						|
	if (stratDNDS)
 | 
						|
	{
 | 
						|
		for (mi=respP; mi<respN+respP; mi=mi+1)
 | 
						|
		{
 | 
						|
			categDef1 * ("\nR_"+infix+mi+":=1;");
 | 
						|
		}
 | 
						|
 | 
						|
		if (randomizeInitValues)
 | 
						|
		{
 | 
						|
			for (mi=respP+respN; mi<resp; mi=mi+1)
 | 
						|
			{
 | 
						|
				categDef1 * ("\nR_"+infix+mi+":<1;R_"+infix+mi+"="+Random(0.05,0.95)+";");
 | 
						|
			}
 | 
						|
 | 
						|
			for (mi=0; mi<respP; mi=mi+1)
 | 
						|
			{
 | 
						|
				categDef1 * ("\nR_"+infix+mi+":>1;R_"+infix+mi+"="+Random(1.05,10)+";");
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			for (mi=respN+respP; mi<resp; mi=mi+1)
 | 
						|
			{
 | 
						|
				categDef1 * ("\nR_"+infix+mi+":<1;R_"+infix+mi+"=1/"+(2+mi-respN-respP)+";");
 | 
						|
			}
 | 
						|
 | 
						|
			for (mi=0; mi<respP; mi=mi+1)
 | 
						|
			{
 | 
						|
				categDef1 * ("\nR_"+infix+mi+":>1;R_"+infix+mi+"=2+"+mi+";");
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		if (randomizeInitValues)
 | 
						|
		{
 | 
						|
			for (mi=0; mi<resp; mi=mi+1)
 | 
						|
			{
 | 
						|
				categDef1 * ("\nR_"+infix+mi+"="+Random(0.05,1.75)+";");
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			for (mi=0; mi<resp; mi=mi+1)
 | 
						|
			{
 | 
						|
				categDef1 * ("\nR_"+infix+mi+"="+(0.1+0.3*mi)+";");
 | 
						|
			}	
 | 
						|
		}
 | 
						|
 | 
						|
	}
 | 
						|
 | 
						|
	categDef1 * 0;
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
/*---------------------------------------------------------------------------------------------------------------------------------------------------*/
 | 
						|
 | 
						|
ChoiceList (branchLengths,"Branch Lengths",1,SKIP_NONE,
 | 
						|
			"Codon Model","Jointly optimize rate parameters and branch lengths (slow and thorough)",
 | 
						|
			"Nucleotide Model","Estimate branch lengths once, using an appropriate nucleotide model (quick and dirty)."
 | 
						|
		    );
 | 
						|
 | 
						|
if (branchLengths<0)
 | 
						|
{
 | 
						|
	return;
 | 
						|
}
 | 
						|
 | 
						|
fileCount	= 2;
 | 
						|
#include "TemplateModels/chooseGeneticCode.def";
 | 
						|
 | 
						|
ExecuteAFile 		    ("2RatesAnalyses/MG94xREVxBivariate.mdl");
 | 
						|
fprintf 				(stdout, "\nReading input file 1/2\n");
 | 
						|
SetDialogPrompt 		("Please specify codon data #1:");
 | 
						|
DataSet					ds_1 			= ReadDataFile (PROMPT_FOR_FILE);
 | 
						|
fprintf					(stdout, "\nData file 1: ", LAST_FILE_PATH, "\n\t", ds_1.species, " sequences and ", ds_1.sites, " nucleotides\n");
 | 
						|
DataSetFilter 			filteredData_1 	= CreateFilter (ds_1,3,"","",GeneticCodeExclusions);
 | 
						|
HarvestFrequencies		(observedFreq_1,filteredData_1,3,1,1);
 | 
						|
HarvestFrequencies		(observedFreqSingle_1,filteredData_1,1,1,1);
 | 
						|
done = 0;
 | 
						|
while (!done)
 | 
						|
{
 | 
						|
	fprintf (stdout,"\nPlease enter a 6 character model designation (e.g:010010 defines HKY85):");
 | 
						|
	fscanf  (stdin,"String", modelDesc);
 | 
						|
	if (Abs(modelDesc)==6)
 | 
						|
	{	
 | 
						|
		done = 1;
 | 
						|
	}
 | 
						|
}		
 | 
						|
ModelTitle 				= "MG94x"+modelDesc[0];
 | 
						|
rateBiasTerms	 		= {{"AC_1","1","AT_1","CG_1","CT_1","GT_1"}};
 | 
						|
DefineNucleotideBiases (0);
 | 
						|
modelConstraintString_1 = modelConstraintString;
 | 
						|
ModelTitle_1			= ModelTitle;	
 | 
						|
fprintf					(stdout, "Using model ", ModelTitle_1, " on alignment 1\n\n");
 | 
						|
#include				"queryTree.bf";
 | 
						|
treeString_1			= treeString;			
 | 
						|
 | 
						|
 | 
						|
fprintf 				(stdout, "\nReading input file 2/2\n");
 | 
						|
SetDialogPrompt 		("Please specify codon data #2:");
 | 
						|
DataSet					ds_2 			= ReadDataFile (PROMPT_FOR_FILE);
 | 
						|
fprintf					(stdout, "\nData file 2: ", LAST_FILE_PATH, "\n\t", ds_2.species, " sequences and ", ds_2.sites, " nucleotides\n");
 | 
						|
DataSetFilter 			filteredData_2 	= CreateFilter (ds_2,3,"","",GeneticCodeExclusions);
 | 
						|
HarvestFrequencies		(observedFreq_2,filteredData_2,3,1,1);
 | 
						|
HarvestFrequencies		(observedFreqSingle_2,filteredData_2,1,1,1);
 | 
						|
done = 0;
 | 
						|
while (!done)
 | 
						|
{
 | 
						|
	fprintf (stdout,"\nPlease enter a 6 character model designation (e.g:010010 defines HKY85):");
 | 
						|
	fscanf  (stdin,"String", modelDesc);
 | 
						|
	if (Abs(modelDesc)==6)
 | 
						|
	{	
 | 
						|
		done = 1;
 | 
						|
	}
 | 
						|
}		
 | 
						|
ModelTitle 				= "MG94x"+modelDesc[0];
 | 
						|
rateBiasTerms	 		= {{"AC_2","1","AT_2","CG_2","CT_2","GT_2"}};
 | 
						|
DefineNucleotideBiases (0);
 | 
						|
modelConstraintString_2 = modelConstraintString;
 | 
						|
ModelTitle_2			= ModelTitle;	
 | 
						|
fprintf					(stdout, "Using model ", ModelTitle_2, " on alignment 2\n\n");
 | 
						|
#include				"queryTree.bf";
 | 
						|
treeString_2			= treeString;			
 | 
						|
						  
 | 
						|
totalCodonCount			= filteredData_1.sites + filteredData_2.sites;
 | 
						|
 | 
						|
ChoiceList (randomizeInitValues, "Initial Value Options",1,SKIP_NONE,
 | 
						|
			"Default",	 "Use default inital values for rate distribution parameters.",
 | 
						|
			"Randomized",	 "Select initial values for rate distribution parameters at random.");
 | 
						|
 | 
						|
 | 
						|
if (randomizeInitValues < 0)
 | 
						|
{
 | 
						|
	return;
 | 
						|
}
 | 
						|
 | 
						|
stratDNDS = 1;
 | 
						|
resp  	  = 4;
 | 
						|
respM 	  = 2;
 | 
						|
respN 	  = 1;
 | 
						|
respP 	  = 1;
 | 
						|
 | 
						|
fprintf (stdout, "\nUsing\n\t", respM, " negatively selected classes\n\t", respN, " neutrally evolving classes\n\t", 
 | 
						|
					respP, " positively selected classes\n\n");
 | 
						|
 | 
						|
categDef1 = "";
 | 
						|
categDef1 * 1024;
 | 
						|
 | 
						|
lfDef	  = {};
 | 
						|
 | 
						|
for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
 | 
						|
{
 | 
						|
	lfDef[fileID] = "";
 | 
						|
	lfDef[fileID] * 1024;
 | 
						|
	lfDef[fileID] * "Log(";
 | 
						|
}
 | 
						|
 | 
						|
defineRateDistribution ("1_");
 | 
						|
categDef_1  = categDef1;
 | 
						|
freqStrMx_1 = freqStrMx;
 | 
						|
 | 
						|
for (mi=0; mi<resp; mi=mi+1)
 | 
						|
{
 | 
						|
	fileID = 0;
 | 
						|
	if (mi)
 | 
						|
	{
 | 
						|
		lfDef[fileID] * "+";
 | 
						|
	}
 | 
						|
	lfDef[fileID]*(freqStrMx_1[mi]+"*SITE_LIKELIHOOD["+(fileID*resp+mi)+"]");		
 | 
						|
}
 | 
						|
 | 
						|
defineRateDistribution ("2_");
 | 
						|
categDef_2  = categDef1;
 | 
						|
freqStrMx_2 = freqStrMx;
 | 
						|
 | 
						|
for (mi=0; mi<resp; mi=mi+1)
 | 
						|
{
 | 
						|
	fileID = 1;
 | 
						|
	if (mi)
 | 
						|
	{
 | 
						|
		lfDef[fileID] * "+";
 | 
						|
	}
 | 
						|
	lfDef[fileID]*(freqStrMx_2[mi]+"*SITE_LIKELIHOOD["+(fileID*resp+mi)+"]");		
 | 
						|
}
 | 
						|
 | 
						|
lfDef1 = "";
 | 
						|
lfDef1 * 128;
 | 
						|
lfDef1 * "\"";
 | 
						|
 | 
						|
for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
 | 
						|
{
 | 
						|
	lfDef[fileID] * ")";
 | 
						|
	lfDef[fileID] * 0;
 | 
						|
	lfDef1 * lfDef[fileID];
 | 
						|
	if (fileID < fileCount - 1)
 | 
						|
	{
 | 
						|
		lfDef1 * "+";
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
lfDef1 	  * "\"";
 | 
						|
lfDef1	  * 0;
 | 
						|
 | 
						|
ExecuteCommands (categDef_1);
 | 
						|
ExecuteCommands (categDef_2);
 | 
						|
 | 
						|
ModelMatrixDimension = 64;
 | 
						|
for (h = 0 ;h<64; h=h+1)
 | 
						|
{
 | 
						|
	if (_Genetic_Code[h]==10)
 | 
						|
	{
 | 
						|
		ModelMatrixDimension = ModelMatrixDimension-1;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
SetDialogPrompt ("Save resulting fits to:");
 | 
						|
fprintf 		(PROMPT_FOR_FILE, CLEAR_FILE);
 | 
						|
resToPath 		= LAST_FILE_PATH;
 | 
						|
 | 
						|
vectorOfFrequencies_1 = BuildCodonFrequencies (observedFreq_1);
 | 
						|
vectorOfFrequencies_2 = BuildCodonFrequencies (observedFreq_2);
 | 
						|
 | 
						|
fprintf			(stdout, "Fitting a nucleotide model to approximate branch lengths and get starting nucleotide bias estimates...\n");
 | 
						|
 | 
						|
nucModelMatrix_1 			= {{*,AC_1*t,t,AT_1*t}{AC_1*t,*,CG_1*t,CT_1*t}{t,CG_1*t,*,GT_1*t}{AT_1*t,CT_1*t,GT_1*t,*}};
 | 
						|
Model nucModel_1 			= (nucModelMatrix_1,observedFreqSingle_1);
 | 
						|
DataSetFilter 	nucFilter_1 = CreateFilter (filteredData_1,1);
 | 
						|
Tree  			nucTree_1 	= treeString_1;
 | 
						|
 | 
						|
nucModelMatrix_2 = {{*,AC_2*t,t,AT_2*t}{AC_2*t,*,CG_2*t,CT_2*t}{t,CG_2*t,*,GT_2*t}{AT_2*t,CT_2*t,GT_2*t,*}};
 | 
						|
Model nucModel_2 = (nucModelMatrix_2,observedFreqSingle_2);
 | 
						|
DataSetFilter 	nucFilter_2 = CreateFilter (filteredData_2,1);
 | 
						|
Tree  			nucTree_2 	= treeString_2;
 | 
						|
 | 
						|
LikelihoodFunction nuc_lf = (nucFilter_1, nucTree_1, nucFilter_2, nucTree_2);
 | 
						|
Optimize (nuc_res, nuc_lf);
 | 
						|
 | 
						|
global 	 codonFactor_1 = 0.33;
 | 
						|
global 	 codonFactor_2 = 0.33;
 | 
						|
 | 
						|
lfParts	= "";
 | 
						|
lfParts * 128;
 | 
						|
lfParts * "LikelihoodFunction lf = (filteredData_1,tree_1_0";
 | 
						|
 | 
						|
for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
 | 
						|
{
 | 
						|
	rateMultipliers 	= {{""   ,"AC_"+fileID+"*",""   ,"AT_"+fileID+"*"}
 | 
						|
						   {"AC_"+fileID+"*",""   ,"CG_"+fileID+"*","CT_"+fileID+"*"}
 | 
						|
						   {""   ,"CG_"+fileID+"*",""   ,"GT_"+fileID+"*"}
 | 
						|
						   {"AT_"+fileID+"*","CT_"+fileID+"*","GT_"+fileID+"*","" }
 | 
						|
						  };
 | 
						|
 | 
						|
	for (part = 0; part < resp; part = part + 1)
 | 
						|
	{
 | 
						|
		ExecuteCommands ("PopulateModelMatrix(\"rate_matrix_"+fileID+"_"+part+"\",observedFreq_"+fileID+",\"S_"+fileID+"_"+part+"/c_"+fileID+"_scale\",\"NS_"+fileID+"_"+part+"/c_"+fileID+"_scale\");");
 | 
						|
		ExecuteCommands ("Model MG94model_"+fileID+"_"+part+"= (rate_matrix_"+fileID+"_"+part+",vectorOfFrequencies_"+fileID+",0);");
 | 
						|
		
 | 
						|
		treeID = "tree_"+fileID+"_"+part;
 | 
						|
		ExecuteCommands ("Tree "+treeID+"=treeString_" + fileID +";");
 | 
						|
		if (branchLengths)
 | 
						|
		{
 | 
						|
			ExecuteCommands ("ReplicateConstraint (\"this1.?.synRate:=this2.?.t__/codonFactor_"+fileID+"\","+treeID+",nucTree_"+fileID+");");
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			if (part == 0)
 | 
						|
			{	
 | 
						|
				ExecuteCommands ("bnames = BranchName(nucTree_" + fileID + ",-1);");
 | 
						|
				nlfDef = "";
 | 
						|
				nlfDef * 128;
 | 
						|
				for (lc = 0; lc < Columns (bnames); lc=lc+1)
 | 
						|
				{
 | 
						|
					nlfDef * (treeID + "." + bnames[lc] + ".synRate = nucTree_" + fileID + "." + bnames[lc] + ".t/codonFactor_"+fileID+";");
 | 
						|
				}
 | 
						|
				nlfDef * 0;
 | 
						|
				ExecuteCommands (nlfDef);
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				ExecuteCommands ("ReplicateConstraint (\"this1.?.synRate:=this2.?.synRate\","+treeID+",tree_"+fileID+"_0);");
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if (part || fileID > 1)
 | 
						|
		{
 | 
						|
			lfParts = lfParts + ",filteredData_" + fileID + "," + treeID;
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
lfParts * 0;
 | 
						|
ExecuteCommands (lfParts + "," + lfDef1 + ");");
 | 
						|
 | 
						|
sumPath = resToPath + ".summary";
 | 
						|
 | 
						|
treeBranchParameters = 0;
 | 
						|
for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
 | 
						|
{
 | 
						|
	ExecuteCommands ("treeBranchParameters = treeBranchParameters + BranchCount(tree_" + fileID + "_0) + TipCount(tree_" + fileID + "_0);");
 | 
						|
}	
 | 
						|
 | 
						|
/*-------------- INDEPENDENT DISTRIBUTIONS  ------------------*/
 | 
						|
 | 
						|
sop = Max(OPTIMIZATION_PRECISION,0.001);
 | 
						|
 | 
						|
fprintf (stdout, "Running simpler distribution approximations to ensure good convergence...\n");
 | 
						|
 | 
						|
OPTIMIZATION_PRECISION = 0.1;
 | 
						|
 | 
						|
P_1_1 := 0;P_1_2 := 0;P_1_3 := 0;
 | 
						|
P_2_1 := 0;P_2_2 := 0;P_2_3 := 0;
 | 
						|
 | 
						|
S_1_0 := 1;S_1_1 := 1;S_1_2 := 1;S_1_3 := 1;
 | 
						|
S_2_0 := 1;S_2_1 := 1;S_2_2 := 1;S_2_3 := 1;
 | 
						|
R_1_0 := 1;R_1_1 := 1;R_1_2 := 1;
 | 
						|
R_2_0 := 1;R_2_1 := 1;R_2_2 := 1;
 | 
						|
 | 
						|
Optimize (res,lf);
 | 
						|
USE_LAST_RESULTS 		= 1;
 | 
						|
SKIP_CONJUGATE_GRADIENT = 1;
 | 
						|
 | 
						|
d1 = ReportDistributionString(4,freqStrMx_1,"1_",1);
 | 
						|
d2 = ReportDistributionString(4,freqStrMx_2,"2_",1);
 | 
						|
 | 
						|
fprintf (stdout,  "\n*** Done with pass 1. Log(L) = ", Format(res[1][0],10,3), " *** \n");
 | 
						|
fprintf (stdout,  "Approximate rates for data set 1:\n", d1);
 | 
						|
fprintf (stdout,  "Approximate rates for data set 2:\n", d2);
 | 
						|
 | 
						|
codonFactor_1 := codonFactor_1__;
 | 
						|
codonFactor_2 := codonFactor_2__;
 | 
						|
 | 
						|
AC_1  := AC_1__; AT_1  := AT_1__; CG_1  := CG_1__; CT_1  := CT_1__; GT_1  := GT_1__;
 | 
						|
AC_2  := AC_2__; AT_2  := AT_2__; CG_2  := CG_2__; CT_2  := CT_2__; GT_2  := GT_2__;
 | 
						|
 | 
						|
 | 
						|
 | 
						|
fprintf (stdout, "\nGateaux sampling positively selected directions\n");
 | 
						|
 | 
						|
P_1_1 = 2/filteredData_1.sites;
 | 
						|
S_1_3 = 1;
 | 
						|
 | 
						|
baseLineLL  = res[1][0];
 | 
						|
LFCompute 	(lf,LF_START_COMPUTE);
 | 
						|
bestDiff 	= -1e100;
 | 
						|
bestAlpha 	= 1;
 | 
						|
bestBeta  	= 1;
 | 
						|
bestLL		= -1e100;
 | 
						|
 | 
						|
step  		= 0.1;
 | 
						|
step2 		= 0.25;
 | 
						|
v1 			= 0.05;
 | 
						|
 | 
						|
for (v1c = 0; v1c < 10; v1c = v1c + 1)
 | 
						|
{
 | 
						|
	v2 = v1+step; 
 | 
						|
	for (v2c = 0; v2c < 20; v2c = v2c+1)
 | 
						|
	{
 | 
						|
		checkASample ("1_0");	
 | 
						|
		v2 = v2 + step2;
 | 
						|
	}
 | 
						|
	v1 = v1+step;
 | 
						|
} 
 | 
						|
 | 
						|
S_1_0 = bestAlpha;
 | 
						|
R_1_0 = bestBeta;
 | 
						|
bestAlpha 	= 1;
 | 
						|
bestBeta  	= 1;
 | 
						|
 | 
						|
if (bestDiff <= 0)
 | 
						|
{
 | 
						|
	P_1_1 = 0;
 | 
						|
}
 | 
						|
saveBD = bestDiff;
 | 
						|
 | 
						|
P_2_1 = 1/filteredData_2.sites;
 | 
						|
S_2_3 = 1;
 | 
						|
 | 
						|
v1 			= 0.05;
 | 
						|
for (v1c = 0; v1c < 10; v1c = v1c + 1)
 | 
						|
{
 | 
						|
	v2 = v1+step; 
 | 
						|
	for (v2c = 0; v2c < 20; v2c = v2c+1)
 | 
						|
	{
 | 
						|
		checkASample ("2_0");	
 | 
						|
		v2 = v2 + step2;
 | 
						|
	}
 | 
						|
	v1 = v1+step;
 | 
						|
} 
 | 
						|
 | 
						|
LFCompute (lf,LF_DONE_COMPUTE);
 | 
						|
 | 
						|
S_2_0 = bestAlpha;
 | 
						|
R_2_0 = bestBeta;
 | 
						|
 | 
						|
if (bestDiff <= saveBD)
 | 
						|
{
 | 
						|
	P_2_1 = 0;
 | 
						|
}
 | 
						|
 | 
						|
if (bestDiff > 0)
 | 
						|
{
 | 
						|
	fprintf (stdout, "\nFound a likelihood improvement in the direction (", S_1_0, ",", S_1_0*R_1_0, "), (", S_2_0, ",", S_2_0*R_2_0, "), ",bestDiff," likelihood points\n");
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
Optimize (res,lf);
 | 
						|
 | 
						|
d1 = ReportDistributionString(4,freqStrMx_1,"1_",1);
 | 
						|
d2 = ReportDistributionString(4,freqStrMx_2,"2_",1);
 | 
						|
 | 
						|
fprintf (stdout,  "\n*** Done with pass 2. Log(L) = ", Format(res[1][0],10,3), " *** \n");
 | 
						|
fprintf (stdout,  "Approximate rates for data set 1:\n", d1);
 | 
						|
fprintf (stdout,  "Approximate rates for data set 2:\n", d2);
 | 
						|
 | 
						|
 | 
						|
fprintf (stdout, "\nGateaux sampling neutral directions\n");
 | 
						|
 | 
						|
baseLineLL  = res[1][0];
 | 
						|
LFCompute 	(lf,LF_START_COMPUTE);
 | 
						|
 | 
						|
P_1_2 = 1/(filteredData_1.sites*(1-P_1_1));
 | 
						|
bestDiff 	= -1e100;
 | 
						|
bestAlpha 	= 1;
 | 
						|
step = 0.02;
 | 
						|
v1 = 0;
 | 
						|
for (v1c = 0; v1c < 50; v1c = v1c + 1)
 | 
						|
{
 | 
						|
	v1 = v1 + step;
 | 
						|
	v2 = v1;
 | 
						|
	
 | 
						|
	checkASample ("1_1");
 | 
						|
}	
 | 
						|
 | 
						|
 | 
						|
S_1_1 = bestAlpha;
 | 
						|
bestAlpha 	= 1;
 | 
						|
 | 
						|
if (bestDiff <= 0)
 | 
						|
{
 | 
						|
	P_1_2 = 0;
 | 
						|
}
 | 
						|
saveBD = bestDiff;
 | 
						|
R_1_1 := 1;
 | 
						|
 | 
						|
P_2_2 = 1/(filteredData_2.sites*(1-P_2_1));
 | 
						|
 | 
						|
v1 = 0;
 | 
						|
for (v1c = 0; v1c < 50; v1c = v1c + 1)
 | 
						|
{
 | 
						|
	v1 = v1 + step;
 | 
						|
	v2 = v1;
 | 
						|
	
 | 
						|
	checkASample ("2_1");
 | 
						|
}	
 | 
						|
 | 
						|
 | 
						|
LFCompute (lf,LF_DONE_COMPUTE);
 | 
						|
 | 
						|
S_2_1 = bestAlpha;
 | 
						|
R_2_1 := 1;
 | 
						|
 | 
						|
if (bestDiff <= saveBD)
 | 
						|
{
 | 
						|
	P_2_2 = 0;
 | 
						|
}
 | 
						|
 | 
						|
if (bestDiff > 0)
 | 
						|
{
 | 
						|
	fprintf (stdout, "\nFound a likelihood improvement in the direction (", S_1_1, ",", S_1_1, "), (", S_2_1, ",", S_2_1, "), ",bestDiff," likelihood points\n");
 | 
						|
}
 | 
						|
 | 
						|
Optimize (res,lf);
 | 
						|
d1 = ReportDistributionString(4,freqStrMx_1,"1_",1);
 | 
						|
d2 = ReportDistributionString(4,freqStrMx_2,"2_",1);
 | 
						|
 | 
						|
fprintf (stdout,  "\n*** Done with pass 3. Log(L) = ", Format(res[1][0],10,3), " *** \n");
 | 
						|
fprintf (stdout,  "Approximate rates for data set 1:\n", d1);
 | 
						|
fprintf (stdout,  "Approximate rates for data set 2:\n", d2);
 | 
						|
 | 
						|
fprintf (stdout, "\nGateaux sampling negative selected directions\n");
 | 
						|
 | 
						|
 | 
						|
baseLineLL  = res[1][0];
 | 
						|
LFCompute 	(lf,LF_START_COMPUTE);
 | 
						|
bestDiff 	= -1e100;
 | 
						|
bestAlpha 	= 1;
 | 
						|
bestBeta  	= 1;
 | 
						|
 | 
						|
 | 
						|
step = 1/16;
 | 
						|
v1 = 0;
 | 
						|
P_1_3 = 2/(filteredData_1.sites*(1-P_1_1)*(1-P_1_2));
 | 
						|
S_1_2 = 1;
 | 
						|
for (v1c = 0; v1c < 15; v1c = v1c + 1)
 | 
						|
{
 | 
						|
	v1 = v1+step;
 | 
						|
	v2 = step/2; 
 | 
						|
	for (v2c = 0; v2c < v1c; v2c = v2c+1)
 | 
						|
	{
 | 
						|
		checkASample ("1_2");	
 | 
						|
		v2 = v2 + step;
 | 
						|
	}
 | 
						|
} 
 | 
						|
 | 
						|
 | 
						|
S_1_2 = bestAlpha;
 | 
						|
R_1_2 = bestBeta;
 | 
						|
bestAlpha 	= 1;
 | 
						|
bestBeta  	= 1;
 | 
						|
 | 
						|
if (bestDiff <= 0)
 | 
						|
{
 | 
						|
	P_1_3 = 0;
 | 
						|
}
 | 
						|
saveBD = bestDiff;
 | 
						|
 | 
						|
 | 
						|
v1 = 0;
 | 
						|
P_2_3 = 2/(filteredData_2.sites*(1-P_2_1)*(1-P_2_2));
 | 
						|
S_2_2 = 1;
 | 
						|
for (v1c = 0; v1c < 15; v1c = v1c + 1)
 | 
						|
{
 | 
						|
	v1 = v1+step;
 | 
						|
	v2 = step/2; 
 | 
						|
	for (v2c = 0; v2c < v1c; v2c = v2c+1)
 | 
						|
	{
 | 
						|
		checkASample ("2_2");	
 | 
						|
		v2 = v2 + step;
 | 
						|
	}
 | 
						|
} 
 | 
						|
 | 
						|
LFCompute (lf,LF_DONE_COMPUTE);
 | 
						|
 | 
						|
S_2_2 = bestAlpha;
 | 
						|
R_2_2 = bestBeta;
 | 
						|
 | 
						|
if (bestDiff <= saveBD)
 | 
						|
{
 | 
						|
	P_2_2 = 0;
 | 
						|
}
 | 
						|
 | 
						|
if (bestDiff > 0)
 | 
						|
{
 | 
						|
	fprintf (stdout, "\nFound a likelihood improvement in the direction (", S_1_2, ",", S_1_2*R_1_2, "), (", S_2_2, ",", S_2_2*R_2_2, "), ",bestDiff," likelihood points\n");
 | 
						|
}
 | 
						|
 | 
						|
AC_1  = AC_1; AT_1  = AT_1; CG_1  = CG_1; CT_1  = CT_1; GT_1  = GT_1;
 | 
						|
AC_2  = AC_2; AT_2  = AT_2; CG_2  = CG_2; CT_2  = CT_2; GT_2  = GT_2;
 | 
						|
 | 
						|
if (Abs(modelConstraintString_1))
 | 
						|
{
 | 
						|
	ExecuteCommands (modelConstraintString_1);
 | 
						|
}
 | 
						|
if (Abs(modelConstraintString_2))
 | 
						|
{
 | 
						|
	ExecuteCommands (modelConstraintString_2);
 | 
						|
}
 | 
						|
 | 
						|
codonFactor_1 = codonFactor_1;
 | 
						|
codonFactor_2 = codonFactor_2;
 | 
						|
 | 
						|
 | 
						|
GetString (paramList, lf, -1);
 | 
						|
degF = Columns(paramList["Global Independent"]) + 14 - 2*branchLengths + treeBranchParameters; 
 | 
						|
 | 
						|
OPTIMIZATION_PRECISION = sop;
 | 
						|
fprintf (stdout, "Running an independent distributions model fit (", degF, " parameters)...\n");
 | 
						|
Optimize (res,lf);
 | 
						|
LogL = res[1][0];
 | 
						|
 | 
						|
AIC	 = 2*(degF-LogL);
 | 
						|
AICc = 2*(degF*totalCodonCount/(totalCodonCount-degF-1) - LogL);
 | 
						|
 | 
						|
fprintf (stdout, "\n\nIndependent distributions model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogL, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degF, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AIC,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICc,  15, 5),"\n"
 | 
						|
);
 | 
						|
 | 
						|
fprintf (sumPath,CLEAR_FILE,"Independent distributions model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogL, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degF, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AIC,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICc,  15, 5),"\n");
 | 
						|
				 
 | 
						|
d1 = ReportDistributionString(4,freqStrMx_1,"1_",0);
 | 
						|
d2 = ReportDistributionString(4,freqStrMx_2,"2_",0);
 | 
						|
 | 
						|
fprintf (stdout,  "Inferred rates for data set 1:\n", d1);
 | 
						|
fprintf (sumPath, "Inferred rates for data set 1:\n", d1);
 | 
						|
fprintf (stdout,  "Inferred rates for data set 2:\n", d2);
 | 
						|
fprintf (sumPath, "Inferred rates for data set 2:\n", d2);
 | 
						|
 | 
						|
LIKELIHOOD_FUNCTION_OUTPUT = 6;
 | 
						|
fprintf (resToPath,CLEAR_FILE,lf);
 | 
						|
 | 
						|
 | 
						|
/*-------------- SHARED POSITIVE SELECTION STRENGTHS  ------------------*/
 | 
						|
 | 
						|
for (k=0; k<respP; k=k+1)
 | 
						|
{
 | 
						|
	ExecuteCommands ("R_1_"+k+"=0.5(R_1_"+k+"+R_2_"+k+");");
 | 
						|
	ExecuteCommands ("R_2_"+k+":=R_1_"+k+";");
 | 
						|
}
 | 
						|
 | 
						|
GetString (paramList, lf, -1);
 | 
						|
degFPSS = Columns(paramList["Global Independent"]) + 14 - 2*branchLengths + treeBranchParameters; 
 | 
						|
	
 | 
						|
fprintf (stdout, "\nRunning a shared positive selection strengths model (",degFPSS," parameters)...\n");
 | 
						|
Optimize (res_PSS,lf);
 | 
						|
LogLPSS = res_PSS[1][0];
 | 
						|
 | 
						|
AICPSS	 = 2*(degFPSS-LogLPSS);
 | 
						|
AICcPSS    = 2*(degFPSS*totalCodonCount/(totalCodonCount-degFPSS-1) - LogLPSS);
 | 
						|
 | 
						|
fprintf (stdout, "\n\nShared positive selection strengths model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogLPSS, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degFPSS, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AICPSS,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICcPSS,  15, 5),"\n"
 | 
						|
);
 | 
						|
 | 
						|
fprintf (sumPath, "\n\nShared positive strengths regime model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogLPSS, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degFPSS, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AICPSS,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICcPSS,  15, 5),"\n"
 | 
						|
);
 | 
						|
 | 
						|
d1 = ReportDistributionString(4,freqStrMx_1,"1_",0);
 | 
						|
d2 = ReportDistributionString(4,freqStrMx_2,"2_",0);
 | 
						|
 | 
						|
fprintf (stdout,  "Inferred rates for data set 1:\n", d1);
 | 
						|
fprintf (sumPath, "Inferred rates for data set 1:\n", d1);
 | 
						|
fprintf (stdout,  "Inferred rates for data set 2:\n", d2);
 | 
						|
fprintf (sumPath, "Inferred rates for data set 2:\n", d2);
 | 
						|
fpath = resToPath + ".SharedStrength";
 | 
						|
LIKELIHOOD_FUNCTION_OUTPUT = 6;
 | 
						|
fprintf (fpath,CLEAR_FILE,lf);
 | 
						|
 | 
						|
/*-------------- SHARED POSITIVE SELECTION PROPORTIONS  ------------------*/
 | 
						|
 | 
						|
/* RELOAD INDEPENDENT FIT */
 | 
						|
DeleteObject (lf);
 | 
						|
ExecuteAFile (resToPath);
 | 
						|
 | 
						|
for (k=1; k<=respP; k=k+1)
 | 
						|
{
 | 
						|
	ExecuteCommands ("P_1_"+k+"=0.5(P_1_"+k+"+P_2_"+k+");");
 | 
						|
	ExecuteCommands ("P_2_"+k+":=P_1_"+k+";");
 | 
						|
}
 | 
						|
 | 
						|
GetString (paramList, lf, -1);
 | 
						|
degFPSP = Columns(paramList["Global Independent"]) + 14 - 2*branchLengths + treeBranchParameters; 
 | 
						|
fprintf (stdout, "\nRunning a shared positive selection proportions model (",degFPSP," parameters) ...\n");
 | 
						|
Optimize (res_PSP,lf);
 | 
						|
LogLPSP = res_PSP[1][0];
 | 
						|
 | 
						|
 | 
						|
AICPSP	 = 2*(degFPSP-LogLPSP);
 | 
						|
AICcPSP    = 2*(degFPSP*totalCodonCount/(totalCodonCount-degFPSP-1) - LogLPSP);
 | 
						|
 | 
						|
fprintf (stdout, "\n\nShared positive selection proportions model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogLPSP, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degFPSP, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AICPSP,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICcPSP,  15, 5),"\n"
 | 
						|
);
 | 
						|
 | 
						|
fprintf (sumPath, "\n\nShared positive proportions regime model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogLPSP, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degFPSP, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AICPSP,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICcPSP,  15, 5),"\n"
 | 
						|
);
 | 
						|
 | 
						|
d1 = ReportDistributionString(4,freqStrMx_1,"1_",0);
 | 
						|
d2 = ReportDistributionString(4,freqStrMx_2,"2_",0);
 | 
						|
 | 
						|
fprintf (stdout,  "Inferred rates for data set 1:\n", d1);
 | 
						|
fprintf (sumPath,  "Inferred rates for data set 1:\n", d1);
 | 
						|
fprintf (stdout,  "Inferred rates for data set 2:\n", d2);
 | 
						|
fprintf (sumPath,  "Inferred rates for data set 2:\n", d2);
 | 
						|
fpath = resToPath + ".SharedProportion";
 | 
						|
LIKELIHOOD_FUNCTION_OUTPUT = 6;
 | 
						|
fprintf (fpath,CLEAR_FILE,lf);
 | 
						|
 | 
						|
 | 
						|
/*-------------- SHARED POSITIVE SELECTION REGIME  ------------------*/
 | 
						|
 | 
						|
/* RELOAD INDEPENDENT FIT */
 | 
						|
DeleteObject (lf);
 | 
						|
ExecuteAFile (resToPath);
 | 
						|
 | 
						|
for (k=0; k<respP; k=k+1)
 | 
						|
{
 | 
						|
	ExecuteCommands ("R_1_"+k+"=0.5(R_1_"+k+"+R_2_"+k+");");
 | 
						|
	ExecuteCommands ("R_2_"+k+":=R_1_"+k+";");
 | 
						|
}
 | 
						|
for (k=1; k<=respP; k=k+1)
 | 
						|
{
 | 
						|
	ExecuteCommands ("P_1_"+k+"=0.5(P_1_"+k+"+P_2_"+k+");");
 | 
						|
	ExecuteCommands ("P_2_"+k+":=P_1_"+k+";");
 | 
						|
}
 | 
						|
 | 
						|
GetString (paramList, lf, -1);
 | 
						|
degFPSH = Columns(paramList["Global Independent"]) + 14 - 2*branchLengths + treeBranchParameters; 
 | 
						|
 | 
						|
fprintf (stdout, "\nRunning a shared positive selection regime model (", degFPSH, " parameters)...\n");
 | 
						|
Optimize (res_PSH,lf);
 | 
						|
LogLPSH = res_PSH[1][0];
 | 
						|
 | 
						|
AICPSH	 = 2*(degFPSH-LogLPSH);
 | 
						|
AICcPSH    = 2*(degFPSH*totalCodonCount/(totalCodonCount-degFPSH-1) - LogLPSH);
 | 
						|
 | 
						|
fprintf (stdout, "\n\nShared positive selection regime model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogLPSH, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degFPSH, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AICPSH,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICcPSH,  15, 5),"\n"
 | 
						|
);
 | 
						|
 | 
						|
fprintf (sumPath, "\n\nShared positive selection regime model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogLPSH, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degFPSH, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AICPSH,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICcPSH,  15, 5),"\n"
 | 
						|
);
 | 
						|
 | 
						|
d1 = ReportDistributionString(4,freqStrMx_1,"1_",0);
 | 
						|
d2 = ReportDistributionString(4,freqStrMx_2,"2_",0);
 | 
						|
 | 
						|
fprintf (stdout,  "Inferred rates for data set 1:\n", d1);
 | 
						|
fprintf (sumPath,  "Inferred rates for data set 1:\n", d1);
 | 
						|
fprintf (stdout,  "Inferred rates for data set 2:\n", d2);
 | 
						|
fprintf (sumPath,  "Inferred rates for data set 2:\n", d2);
 | 
						|
fpath = resToPath + ".SharedPositiveSelection";
 | 
						|
LIKELIHOOD_FUNCTION_OUTPUT = 6;
 | 
						|
fprintf (fpath,CLEAR_FILE,lf);
 | 
						|
 | 
						|
/*-------------- SHARED MODEL ------------------*/
 | 
						|
 | 
						|
DeleteObject (lf);
 | 
						|
ExecuteAFile (resToPath);
 | 
						|
 | 
						|
for (k=0; k<resp; k=k+1)
 | 
						|
{
 | 
						|
	ExecuteCommands ("S_1_"+k+"=0.5(S_1_"+k+"+S_2_"+k+");");
 | 
						|
	if (k<respP || k>=respP+respN)
 | 
						|
	{
 | 
						|
		ExecuteCommands ("R_1_"+k+"=0.5(R_1_"+k+"+R_2_"+k+");");
 | 
						|
	}
 | 
						|
	ExecuteCommands ("S_2_"+k+":=S_1_"+k+";R_2_"+k+":=R_1_"+k+";");
 | 
						|
}
 | 
						|
for (k=1; k<=resp; k=k+1)
 | 
						|
{
 | 
						|
	ExecuteCommands ("P_1_"+k+"=0.5(P_1_"+k+"+P_2_"+k+");");
 | 
						|
	ExecuteCommands ("P_2_"+k+":=P_1_"+k+";");
 | 
						|
}
 | 
						|
 | 
						|
GetString (paramList, lf, -1);
 | 
						|
degFJ = Columns(paramList["Global Independent"]) + 14 - 2*branchLengths + treeBranchParameters; 
 | 
						|
fprintf (stdout, "\nRunning a shared distributions model fit (", degFJ, " parameters)...\n");
 | 
						|
 | 
						|
Optimize (res_J,lf);
 | 
						|
LogLJ = res_J[1][0];
 | 
						|
 | 
						|
AICJ	 = 2*(degFJ-LogLJ);
 | 
						|
AICcJ    = 2*(degFJ*totalCodonCount/(totalCodonCount-degFJ-1) - LogLJ);
 | 
						|
 | 
						|
fprintf (stdout, "\n\nShared distributions model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogLJ, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degFJ, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AICJ,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICcJ,  15, 5),"\n"
 | 
						|
);
 | 
						|
 | 
						|
fprintf (sumPath, "\n\nShared distributions model fit summary\n", 
 | 
						|
				 "\nLog likelihood:", Format (LogLJ, 15, 5),
 | 
						|
				 "\nParameters    :", Format (degFJ, 15, 0),
 | 
						|
				 "\nAIC           :", Format (AICJ,  15, 5),
 | 
						|
				 "\nc-AIC         :", Format (AICcJ,  15, 5),"\n"
 | 
						|
);
 | 
						|
 | 
						|
d1 = ReportDistributionString(4,freqStrMx_1,"1_",0);
 | 
						|
 | 
						|
fprintf (stdout,  "Inferred joint rates:\n", d1);
 | 
						|
fprintf (sumPath, "Inferred joint rates:\n", d1);
 | 
						|
 | 
						|
fpath = resToPath + ".JointAll";
 | 
						|
LIKELIHOOD_FUNCTION_OUTPUT = 6;
 | 
						|
fprintf (fpath,CLEAR_FILE,lf);
 | 
						|
 | 
						|
USE_LAST_RESULTS 			= 0;
 | 
						|
SKIP_CONJUGATE_GRADIENT 	= 0;
 | 
						|
LIKELIHOOD_FUNCTION_OUTPUT	= 2;
 | 
						|
 | 
						|
fprintf (stdout, "\nDistribution comparison tests\n",
 | 
						|
				 "\n\tAre the distributions different?",
 | 
						|
				 "\n\t\tLR = ", Format (2*(LogL-LogLJ),10,3), 
 | 
						|
				 	  " DF = ", degF-degFJ, 
 | 
						|
				 	  " p = ", Format(1-CChi2(2*(LogL-LogLJ), degF-degFJ),8,3), "\n");
 | 
						|
 | 
						|
fprintf (stdout, "\n\tAre selective regimes (dN/dS and proportions) different?",
 | 
						|
				 "\n\t\tLR = ", Format (2*(LogL-LogLPSH),10,3), 
 | 
						|
				 	  " DF = ", degF-degFPSH, 
 | 
						|
				 	  " p = ", Format(1-CChi2(2*(LogL-LogLPSH), degF-degFPSH),8,3), "\n");
 | 
						|
 | 
						|
fprintf (stdout, "\n\tAre selection strengths (dN/dS) different?",
 | 
						|
				 "\n\t\tLR = ", Format (2*(LogL-LogLPSS),10,3), 
 | 
						|
				 	  " DF = ", degF-degFPSS, 
 | 
						|
				 	  " p = ", Format(1-CChi2(2*(LogL-LogLPSS), degF-degFPSH),8,3), "\n");
 | 
						|
 | 
						|
fprintf (stdout, "\n\tAre the proportions of codons under selection different?",
 | 
						|
				 "\n\t\tLR = ", Format (2*(LogL-LogLPSP),10,3), 
 | 
						|
				 	  " DF = ", degF-degFPSP, 
 | 
						|
				 	  " p = ", Format(1-CChi2(2*(LogL-LogLPSP), degF-degFPSP),8,3), "\n");
 | 
						|
 | 
						|
fprintf (sumPath, "\n\nDistribution comparison tests\n",
 | 
						|
				 "\n\tAre the distributions different?",
 | 
						|
				 "\n\t\tLR = ", Format (2*(LogL-LogLJ),10,3), 
 | 
						|
				 	  " DF = ", degF-degFJ, 
 | 
						|
				 	  " p = ", Format(1-CChi2(2*(LogL-LogLJ), degF-degFJ),8,3), "\n");
 | 
						|
 | 
						|
fprintf (sumPath, "\n\tAre selective regimes (dN/dS and proportions) different?",
 | 
						|
				 "\n\t\tLR = ", Format (2*(LogL-LogLPSH),10,3), 
 | 
						|
				 	  " DF = ", degF-degFPSH, 
 | 
						|
				 	  " p = ", Format(1-CChi2(2*(LogL-LogLPSH), degF-degFPSH),8,3), "\n");
 | 
						|
 | 
						|
fprintf (sumPath, "\n\tAre selection strengths (dN/dS) different?",
 | 
						|
				 "\n\t\tLR = ", Format (2*(LogL-LogLPSS),10,3), 
 | 
						|
				 	  " DF = ", degF-degFPSS, 
 | 
						|
				 	  " p = ", Format(1-CChi2(2*(LogL-LogLPSS), degF-degFPSH),8,3), "\n");
 | 
						|
 | 
						|
fprintf (sumPath, "\n\tAre the proportions of codons under selection different?",
 | 
						|
				 "\n\t\tLR = ", Format (2*(LogL-LogLPSP),10,3), 
 | 
						|
				 	  " DF = ", degF-degFPSP, 
 | 
						|
				 	  " p = ", Format(1-CChi2(2*(LogL-LogLPSP), degF-degFPSP),8,3), "\n");
 | 
						|
/*------------------------------------------------------------------------------------------------------------*/
 | 
						|
 | 
						|
function checkASample (whichRate)
 | 
						|
{
 | 
						|
	ExecuteCommands ("S_"+whichRate+"=v1;R_"+whichRate+"=v2/v1;");
 | 
						|
	LFCompute (lf,res_n);
 | 
						|
	localDiff = res_n-baseLineLL;
 | 
						|
	if (localDiff > bestDiff)
 | 
						|
	{
 | 
						|
		bestDiff 	= localDiff;
 | 
						|
		bestAlpha 	= v1;
 | 
						|
		bestBeta  	= v2/v1;
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
}
 |