From 5e68714ae5a1d6993f63c2c9e0a1bf36e471ff7c Mon Sep 17 00:00:00 2001 From: Mike Doud Date: Thu, 30 Apr 2015 15:37:59 -0700 Subject: [PATCH] add HyPhy examples --- samples/HyPhy/454.bf | 2988 +++++++++++++++++++++++++++ samples/HyPhy/AAModelComparison.bf | 1827 ++++++++++++++++ samples/HyPhy/AnalyzeCodonData.bf | 1031 +++++++++ samples/HyPhy/AnalyzeDiNucData.bf | 931 +++++++++ samples/HyPhy/AnalyzeNucProtData.bf | 931 +++++++++ samples/HyPhy/BS2007.bf | 1999 ++++++++++++++++++ samples/HyPhy/MatrixIndexing.bf | 1 + samples/HyPhy/profile_test.bf | 1 + 8 files changed, 9709 insertions(+) create mode 100644 samples/HyPhy/454.bf create mode 100644 samples/HyPhy/AAModelComparison.bf create mode 100644 samples/HyPhy/AnalyzeCodonData.bf create mode 100644 samples/HyPhy/AnalyzeDiNucData.bf create mode 100644 samples/HyPhy/AnalyzeNucProtData.bf create mode 100644 samples/HyPhy/BS2007.bf create mode 100755 samples/HyPhy/MatrixIndexing.bf create mode 100755 samples/HyPhy/profile_test.bf diff --git a/samples/HyPhy/454.bf b/samples/HyPhy/454.bf new file mode 100644 index 00000000..40c867bd --- /dev/null +++ b/samples/HyPhy/454.bf @@ -0,0 +1,2988 @@ + + + + + + + + + + + + hyphy/454.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ + + + +
+ +
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 538 lines (451 sloc) + + 14.93 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExecuteAFile ("Utility/GrabBag.bf");
ExecuteAFile ("Utility/DBTools.ibf");
alignOptions = {};
SetDialogPrompt ("Sequence File:");
DataSet unal = ReadDataFile (PROMPT_FOR_FILE);
BASE_FILE_PATH = LAST_FILE_PATH;
DB_FILE_PATH = LAST_FILE_PATH + ".db";
ANALYSIS_DB_ID = _openCacheDB (DB_FILE_PATH);
haveTable = _TableExists (ANALYSIS_DB_ID, "SETTINGS");
if (haveTable)
{
existingSettings = _ExecuteSQL (ANALYSIS_DB_ID, "SELECT * FROM SETTINGS");
}
if (Abs(existingSettings))
{
existingSettings = existingSettings [0];
ExecuteCommands ("_Genetic_Code = " + existingSettings["GENETIC_CODE"]);
ExecuteCommands (existingSettings["OPTIONS"] ^ {{"_hyphyAssociativeArray","alignOptions"}});
masterReferenceSequence = existingSettings["REFERENCE"];
dbSequences = _ExecuteSQL (ANALYSIS_DB_ID, "SELECT SEQUENCE_ID FROM SEQUENCES WHERE STAGE = 0");
unalSequenceCount = Abs(dbSequences);
toDoSequences = {unalSequenceCount,1};
for (k = 0; k < unalSequenceCount; k=k+1)
{
toDoSequences[k] = (dbSequences[k])["SEQUENCE_ID"];
}
dbSequences = 0;
fprintf (stdout, "[PHASE 1] Reloaded ", unalSequenceCount, " unprocessed sequences\n");
}
else
{
tableDefines = {};
tableDefines ["SETTINGS"] = {};
(tableDefines ["SETTINGS"])["RUN_DATE"] = "DATE";
(tableDefines ["SETTINGS"])["OPTIONS"] = "TEXT";
(tableDefines ["SETTINGS"])["REFERENCE"] = "TEXT";
(tableDefines ["SETTINGS"])["GENETIC_CODE"] = "TEXT";
(tableDefines ["SETTINGS"])["THRESHOLD"] = "REAL";
tableDefines ["SEQUENCES"] = {};
(tableDefines ["SEQUENCES"])["SEQUENCE_ID"] = "TEXT UNIQUE";
(tableDefines ["SEQUENCES"])["LENGTH"] = "INTEGER";
(tableDefines ["SEQUENCES"])["STAGE"] = "INTEGER";
/*
0 - initial import
1 - in frame without a fix
2 - one frame shift / fixed
3 - out-of-frame; not fixed / not aligned
*/
(tableDefines ["SEQUENCES"])["RAW"] = "TEXT";
(tableDefines ["SEQUENCES"])["ALIGNED_AA"] = "TEXT"; /* aligned aa. sequence */
(tableDefines ["SEQUENCES"])["ALIGNED"] = "TEXT"; /* aligned nucleotide sequence */
(tableDefines ["SEQUENCES"])["OFFSET"] = "INTEGER"; /* start offset w.r.t the reference sequence */
(tableDefines ["SEQUENCES"])["END_OFFSET"] = "INTEGER"; /* end offset w.r.t the reference sequence */
(tableDefines ["SEQUENCES"])["SCORE"] = "REAL";
(tableDefines ["SEQUENCES"])["FRAME"] = "INTEGER";
_CreateTableIfNeeded (ANALYSIS_DB_ID, "SETTINGS", tableDefines["SETTINGS"], 0);
_CreateTableIfNeeded (ANALYSIS_DB_ID, "SEQUENCES", tableDefines["SEQUENCES"], 1);
/* START ALIGNMENT SETTINGS */
LoadFunctionLibrary ("SeqAlignShared.ibf");
DataSetFilter filteredData = CreateFilter (unal,1);
GetInformation (UnalignedSeqs,filteredData);
/* preprocess sequences */
unalSequenceCount = Rows(UnalignedSeqs)*Columns(UnalignedSeqs);
GetString (sequenceNames, unal, -1);
longestSequence = 0;
longestSequenceIDX = 0;
seqRecord = {};
fprintf (stdout, "[PHASE 1] Initial Processing of ", unalSequenceCount, " sequences\n");
for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"[^a-zA-Z]",""}};
UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"^N+",""}};
UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"N+$",""}};
seqRecord ["SEQUENCE_ID"] = sequenceNames[seqCounter];
seqRecord ["LENGTH"] = Abs (UnalignedSeqs[seqCounter]);
seqRecord ["STAGE"] = 0;
seqRecord ["RAW"] = UnalignedSeqs[seqCounter];
if (doLongestSequence)
{
if (doLongestSequence == 1 || seqCounter != unalSequenceCount-1)
{
if (Abs (UnalignedSeqs[seqCounter]) > longestSequence)
{
longestSequence = Abs (UnalignedSeqs[seqCounter]);
longestSequenceIDX = seqCounter;
}
}
}
_InsertRecord (ANALYSIS_DB_ID, "SEQUENCES", seqRecord);
SetParameter (STATUS_BAR_STATUS_STRING, "Initial processing ("+seqCounter+"/"+unalSequenceCount+" done)",0);
}
if (refSeq == 0)
{
masterReferenceSequence = UnalignedSeqs[0];
}
if (doLongestSequence)
{
fprintf (stdout, "\nSelected sequence ", sequenceNames[longestSequenceIDX], " as reference.");
masterReferenceSequence = UnalignedSeqs[longestSequenceIDX];
}
incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def";
ExecuteCommands ("#include \""+incFileName+"\";");
doLongestSequence = (refSeq==1);
aRecord = {};
aRecord["RUN_DATE"] = _ExecuteSQL(ANALYSIS_DB_ID,"SELECT DATE('NOW') AS CURRENT_DATE");
aRecord["RUN_DATE"] = ((aRecord["RUN_DATE"])[0])["CURRENT_DATE"];
aRecord["OPTIONS"] = "" + alignOptions;
aRecord["REFERENCE"] = masterReferenceSequence;
aRecord["GENETIC_CODE"] = "" + _Genetic_Code;
_InsertRecord (ANALYSIS_DB_ID, "SETTINGS", aRecord);
toDoSequences = sequenceNames;
UnalignedSequences = 0;
}
skipOutliers = 1;
doRC = 1;
/* build codon translation table */
codonToAAMap = {};
codeToAA = "FLIMVSPTAYXHQNKDECWRG";
nucChars = "ACGT";
for (p1=0; p1<64; p1=p1+1)
{
codon = nucChars[p1$16]+nucChars[p1%16$4]+nucChars[p1%4];
ccode = _Genetic_Code[p1];
codonToAAMap[codon] = codeToAA[ccode];
}
/* determine reading frames */
ProteinSequences = {};
AllTranslations = {};
ReadingFrames = {};
StopCodons = {};
StopPositions = {};
RC = {};
fprintf (stdout, "\n[PHASE 2] Detecting reading frames for each unprocessed sequence...\n");
frameCounter = {3,2};
stillHasStops = {};
aRecord = {};
for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
rawSeq = (_ExecuteSQL(ANALYSIS_DB_ID,"SELECT RAW FROM SEQUENCES WHERE SEQUENCE_ID = '" + toDoSequences[seqCounter] + "'"));
aSeq = (rawSeq[0])["RAW"];
seqLen = Abs(aSeq)-2;
minStops = 1e20;
tString = "";
rFrame = 0;
rrc = 0;
allTran = {3,2};
stopPosn = {6,2};
for (rc = 0; rc<=doRC; rc = rc+1)
{
if (rc)
{
aSeq = nucleotideReverseComplement (aSeq)
}
for (offset = 0; offset < 3; offset = offset+1)
{
translString = "";
translString * (seqLen/3+1);
for (seqPos = offset; seqPos < seqLen; seqPos = seqPos+3)
{
codon = aSeq[seqPos][seqPos+2];
prot = codonToAAMap[codon];
if (Abs(prot))
{
translString * prot;
}
else
{
translString * "?";
}
}
translString * 0;
translString = translString^{{"X$","?"}};
stopPos = translString||"X";
if (stopPos[0]>=0)
{
stopCount = Rows(stopPos)$2;
stopPosn[3*rc+offset][0] = stopPos[0];
stopPosn[3*rc+offset][1] = stopPos[stopCount*2-1];
}
else
{
stopCount = 0;
}
if (stopCount<minStops)
{
minStops = stopCount;
rFrame = offset;
rrc = rc;
tString = translString;
}
allTran[offset][rc] = translString;
}
}
ReadingFrames [seqCounter] = rFrame;
ProteinSequences [seqCounter] = tString;
frameCounter [rFrame][rrc] = frameCounter[rFrame][rrc]+1;
StopPositions [seqCounter] = stopPosn;
AllTranslations [seqCounter] = allTran;
RC [seqCounter] = rrc;
SetParameter (STATUS_BAR_STATUS_STRING, "Reading frame analysis ("+seqCounter+"/"+unalSequenceCount+" done)",0);
}
_closeCacheDB (ANALYSIS_DB_ID);
return 0;
s1 = ProteinSequences[0];
fprintf (stdout, "\nFound:\n\t", frameCounter[0], " sequences in reading frame 1\n\t",frameCounter[1], " sequences in reading frame 2\n\t",frameCounter[2], " sequences in reading frame 3\n\nThere were ", Abs(stillHasStops), " sequences with apparent frameshift/sequencing errors\n");
skipSeqs = {};
for (k=0; k<Abs(stillHasStops); k=k+1)
{
seqCounter = stillHasStops[k];
seqName = sequenceNames[seqCounter];
fprintf (stdout,"Sequence ", seqCounter+1, " (", seqName, ") seems to have");
stopPosn = StopPositions[seqCounter];
fStart = -1;
fEnd = -1;
fMin = 1e10;
frame1 = 0;
frame2 = 0;
checkFramePosition (stopPosn[0][1],stopPosn[1][0],0,1);
checkFramePosition (stopPosn[1][1],stopPosn[0][0],1,0);
checkFramePosition (stopPosn[0][1],stopPosn[2][0],0,2);
checkFramePosition (stopPosn[2][1],stopPosn[0][0],2,0);
checkFramePosition (stopPosn[2][1],stopPosn[1][0],2,1);
checkFramePosition (stopPosn[1][1],stopPosn[2][0],1,2);
if (fStart>=0)
{
allTran = AllTranslations[seqCounter];
useq = UnalignedSeqs[seqCounter];
fprintf (stdout, " a shift from frame ", frame2+1, " to frame ", frame1+1, " between a.a. positions ", fStart, " and ", fEnd, ".");
fStart2 = Max(fStart-1,0);
fEnd2 = Min(fEnd+1,Min(Abs(allTran[frame1]),Abs(allTran[frame2]))-1);
tempString = allTran[frame2];
fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame2+1, ":\n\t", tempString[fStart2][fEnd2]);
fprintf (stdout, "\n\t", useq[3*fStart2+frame2][3*fEnd2+frame2-1]);
tempString = allTran[frame1];
fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame1+1, ":\n\t", tempString[fStart2][fEnd2]);
fprintf (stdout, "\n\t", useq[3*fStart2+frame1][3*fEnd2+frame1-1]);
fprintf (stdout, "\n\t\tAttempting to resolve by alignment to reference. ");
f1s = allTran[frame1];
f2s = allTran[frame2];
f1l = Abs(f1s);
bestScore = -1e10;
bestSplice = -1;
for (k2=fStart; k2<fEnd; k2=k2+1)
{
s2 = f2s[0][k2]+f1s[k2+1][Abs(f1s)];
inStr = {{s1,s2}};
AlignSequences(aligned, inStr, alignOptions);
aligned = aligned[0];
aligned = aligned[0];
if (aligned > bestScore)
{
bestScore = aligned;
bestSplice = k2;
bestString = s2;
}
}
fprintf (stdout, "Best splice site appears to be at a.a. position ", bestSplice, "\n");
/* update best spliced string */
ProteinSequences[seqCounter] = bestString;
ReadingFrames[seqCounter] = 0;
UnalignedSeqs[seqCounter] = useq[frame2][frame2+3*bestSplice+2] + useq[frame1+3*bestSplice+3][Abs(useq)-1] + "---";
}
else
{
fprintf (stdout, " multiple frameshifts\n");
skipSeqs[seqCounter] = 1;
}
}
SeqAlignments = {};
startingPosition = {unalSequenceCount,2};
refLength = Abs(ProteinSequences[0]);
refInsertions = {refLength,1};
fprintf (stdout,"\nPerforming pairwise alignment with reference sequences\n");
alignmentScores = {};
for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
if (skipSeqs[seqCounter] == 0)
{
s2 = ProteinSequences[seqCounter];
inStr = {{s1,s2}};
AlignSequences(aligned, inStr, alignOptions);
aligned = aligned[0];
SeqAlignments[seqCounter] = aligned;
alignmentScores[Abs(alignmentScores)] = aligned[0]/Abs(aligned[1]);
aligned = aligned[1];
myStartingPosition = aligned$"[^-]";
myEndingPosition = Abs (aligned)-1;
while (aligned[myEndingPosition]=="-")
{
myEndingPosition = myEndingPosition - 1;
}
myStartingPosition = myStartingPosition[0];
startingPosition[seqCounter][0] = myStartingPosition;
startingPosition[seqCounter][1] = myEndingPosition;
aligned = aligned[myStartingPosition][myEndingPosition];
refInsert = aligned||"-+";
if (refInsert[0]>0)
{
insCount = Rows (refInsert)/2;
offset = 0;
for (insN = 0; insN < insCount; insN = insN+1)
{
insPos = refInsert[insN*2];
insLength = refInsert[insN*2+1]-insPos+1;
insPos = insPos-offset;
if (refInsertions[insPos]<insLength)
{
refInsertions[insPos]=insLength;
}
offset = offset + insLength;
}
}
}
}
alignmentScoresM = avlToMatrix ("alignmentScores");
ExecuteAFile ("Utility/DescriptiveStatistics.bf");
distInfo = GatherDescriptiveStats (alignmentScoresM);
/*lowerCuttoff = 0.25;*/
distInfo["Mean"] - 2*distInfo["Std.Dev"];
/* produce a fully gapped reference sequence */
fprintf (stdout,"\nMerging pairwise alignments into a MSA\n");
fullRefSeq = "";
fullRefSeq * refLength;
fullRefSeq * (s1[0]);
s1N = UnalignedSeqs[0];
fullRefSeqN = "";
fullRefSeqN * (3*refLength);
fullRefSeqN * (s1N[0][2]);
frameShift = ReadingFrames[0];
for (seqCounter=1;seqCounter<refLength;seqCounter=seqCounter+1)
{
gapCount = refInsertions[seqCounter];
for (k=0; k<gapCount;k=k+1)
{
fullRefSeq*("-");
fullRefSeqN*("---");
}
fullRefSeq * (s1[seqCounter]);
fullRefSeqN * (s1N[frameShift+seqCounter*3][frameShift+seqCounter*3+2]);
}
fullRefSeq * 0;
fullRefSeqN * 0;
return 0;
refLength = Abs(fullRefSeq);
SetDialogPrompt ("Save alignment to:");
seqName=sequenceNames[0];
fprintf (PROMPT_FOR_FILE,CLEAR_FILE,">",seqName,"\n",fullRefSeq);
fName = LAST_FILE_PATH;
fNameC = fName+".nuc";
fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",fullRefSeqN);
alCounter = 0;
for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
if (skipSeqs[seqCounter] == 0)
{
if (skipOutliers == 0 && alignmentScoresM[alCounter] < lowerCuttoff)
{
seqName=sequenceNames[seqCounter];
fprintf (stdout, "Sequence ", seqName ," was skipped because of a poor alignment score.\n");
skipSeqs[seqCounter] = 1;
alCounter = alCounter + 1;
continue;
}
alCounter = alCounter + 1;
seqName=sequenceNames[seqCounter];
aligned = SeqAlignments[seqCounter];
aligned1 = aligned[1];
aligned2 = aligned[2];
s2 = startingPosition[seqCounter][0];
e2 = startingPosition[seqCounter][1];
gappedSeq = "";
gappedSeq * Abs(aligned2);
k=0;
while (k<refLength)
{
while (fullRefSeq[k]!=aligned1[s2])
{
gappedSeq*("-");
k=k+1;
}
gappedSeq*(aligned2[s2]);
s2=s2+1;
k=k+1;
}
gappedSeq * 0;
gappedSeqN = "";
gappedSeqN * (3*Abs(aligned2));
frameShift = ReadingFrames[seqCounter];
s1N = UnalignedSeqs[seqCounter];
s2N = ProteinSequences[seqCounter];
s2 = startingPosition[seqCounter][0];
k = 0;
e2 = Abs(gappedSeq);
k = 0;
while (k<e2)
{
while ((s2N[s2]!=gappedSeq[k])&&(k<e2))
{
gappedSeqN * ("---");
k=k+1;
}
if (k<e2)
{
gappedSeqN * s1N[frameShift+s2*3][frameShift+s2*3+2];
s2 = s2+1;
k=k+1;
}
}
gappedSeqN * 0;
if (refSeq2 && seqCounter == unalSequenceCount-1)
{
fscanf (fName, "Raw", soFar);
fprintf (fName, CLEAR_FILE,">",seqName,"\n",gappedSeq,"\n",soFar);
fscanf (fNameC, "Raw", soFar);
fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",gappedSeqN,"\n",soFar);
}
else
{
fprintf (fName,"\n>",seqName,"\n",gappedSeq);
fprintf (fNameC,"\n>",seqName,"\n",gappedSeqN);
}
}
}
if (Abs(skipSeqs))
{
fName = fName+".bad";
for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
if (skipSeqs[seqCounter])
{
seqName=sequenceNames[seqCounter];
fprintf (fName,">",seqName,"\n",UnalignedSeqs[seqCounter],"\n");
}
}
}
function checkFramePosition (pos1, pos2, fr1, fr2)
{
fSpan = pos2-pos1;
if (fSpan>1) /* first followed by second*/
{
if (fSpan < fMin)
{
fMin = fSpan;
frame1 = fr1;
frame2 = fr2;
fStart = pos1+1;
fEnd = pos2;
}
}
return 0;
}
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/AAModelComparison.bf b/samples/HyPhy/AAModelComparison.bf new file mode 100644 index 00000000..72b30a53 --- /dev/null +++ b/samples/HyPhy/AAModelComparison.bf @@ -0,0 +1,1827 @@ + + + + + + + + + + + + hyphy/AAModelComparison.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 253 lines (199 sloc) + + 5.743 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
if (Rows (modelMatrixList) == 0)
{
modelMatrixList =
{
{"Equal Input", "EIAA.mdl", "19"}
{"Dayhoff","Dayhoff.mdl","0"}
{"Dayhoff+F","Dayhoff_F.mdl","19"}
{"JTT", "Jones.mdl", "0"}
{"JTT+F", "Jones_F.mdl", "19"}
{"WAG", "WAG.mdl", "0"}
{"WAG+F", "WAG_F.mdl", "19"}
{"rtREV", "rtREV.mdl", "0"}
{"rtREV+F", "rtREV_F.mdl", "19"}
{"mtMAM", "mtMAM.mdl", "0"}
{"mtMAM+F", "mtMAM_F.mdl", "19"}
{"mtREV 24", "mtREV_24.mdl", "0"}
{"mtREV 24+F", "mtREV_24_F.mdl", "19"}
{"HIV within", "HIVwithin.mdl", "0"}
{"HIV within+F", "HIVwithin+F.mdl", "19"}
{"HIV between", "HIVbetween.mdl", "0"}
{"HIV between+F", "HIVbetween+F.mdl", "19"}
{"REV-1 step", "reducedREV.mdl", "19"}
{"REV", "mtREV.mdl", "19"}
};
}
/*___________________________________________________________________________________________________________*/
function runAModel (modelID, fileName, xtraP, midx)
{
ExecuteCommands ("#include \"TemplateModels/"+fileName+"\";");
Tree givenTree = treeString;
LikelihoodFunction lf = (filteredData,givenTree);
GetString (lf_info, lf, -1);
locals = lf_info["Local Independent"];
if (Columns (branchLengthStash))
{
USE_LAST_RESULTS = 1;
for (_iv = 0; _iv < Columns (locals); _iv = _iv+1)
{
ExecuteCommands (locals[_iv] + "=1;\n");
}
currentBL = BranchLength (givenTree,0);
currentBN = BranchName (givenTree,-1);
for (_iv = 0; _iv < Columns (currentBN); _iv = _iv+1)
{
ExecuteCommands ("givenTree."+currentBN[_iv]+".t="+branchLengthStash[_iv]/currentBL+";");
}
}
else
{
for (_iv = 0; _iv < Columns (locals); _iv = _iv+1)
{
ExecuteCommands (locals[_iv] + "=0.1;\n");
}
USE_LAST_RESULTS = 1;
}
Optimize (res,lf);
fprintf (stdout, "| ", modelID);
for (k=0; k<maxModelWidth-Abs(modelID)-1; k=k+1)
{
fprintf (stdout, " ");
}
params = res[1][1]+xtraP;
AIC = 2(-res[1][0]+params);
if (filteredData.sites-params>1)
{
cAIC = 2(-res[1][0]+params*(filteredData.sites/(filteredData.sites-params-1)));
}
else
{
cAIC = 0;
}
branchLengths = BranchLength (givenTree,-1);
TL = 0;
for (k=Rows(branchLengths)*Columns(branchLengths)-1; k>=0; k=k-1)
{
TL = TL + branchLengths[k];
}
fprintf (stdout, "| ", Format (res[1][0],14,3), " | ", Format (params,5,0), " | ",
Format (AIC, 9,3), " | ",);
if (cAIC > 0)
{
fprintf (stdout, Format (cAIC,11,3), " | ");
}
else
{
fprintf (stdout, " N/A | ");
}
fprintf (stdout, Format (TL,11,3), " |\n", sepString);
resultMatrix[midx][0] = res[1][0];
resultMatrix[midx][1] = params;
resultMatrix[midx][2] = AIC;
resultMatrix[midx][3] = cAIC;
resultMatrix[midx][4] = TL;
if (AIC < bestAIC)
{
bestAIC = AIC;
bestAICidx = midx;
branchLengthStash = BranchLength (givenTree,-1);
}
if (cAIC > 0)
{
if (bestCAIC > cAIC)
{
bestCAIC = cAIC;
bestCAICidx = midx;
}
}
return 0;
}
/*___________________________________________________________________________________________________________*/
maxModelWidth = 7;
skipCodeSelectionStep = 0;
ChoiceList (doREV, "Include REV?", 1, SKIP_NONE, "Yes", "Include REV and reduced REV models. CAUTION: these models take a long time to fit.",
"No", "Only use empirical models");
if (doREV < 0)
{
return 0;
}
if (doREV == 0)
{
#include "TemplateModels/chooseGeneticCode.def";
skipCodeSelectionStep = 1;
}
modelCount = Rows (modelMatrixList) - 2*doREV;
for (k=0; k<modelCount; k=k+1)
{
maxModelWidth = Max(maxModelWidth,Abs (modelMatrixList[k][0])+2);
}
sepString = "";
capString = "";
sepString * 256;
sepString * "+";
capString * 256;
capString * "| Model";
for (k=0; k<maxModelWidth; k=k+1)
{
sepString * "-";
}
for (k=0; k<maxModelWidth-6; k=k+1)
{
capString * " ";
}
capString * "| Log Likelihood | #prms | AIC Score | c-AIC Score | Tree Length |\n";
sepString * "+----------------+-------+-----------+-------------+-------------+\n";
sepString * 0;
capString * 0;
branchLengthStash = 0;
SKIP_MODEL_PARAMETER_LIST = 0;
#include "TemplateModels/modelParameters2.mdl";
if (modelType == 1)
{
#include "TemplateModels/defineGamma.mdl";
}
if (modelType == 2)
{
#include "TemplateModels/defineHM.mdl";
}
SKIP_MODEL_PARAMETER_LIST = 1;
SetDialogPrompt ("Please load an amino-acid data file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,1);
fprintf (stdout,"\nRunning aminoacid model comparisons on ", LAST_FILE_PATH, "\n\nThe alignment has ",ds.species, " sequences and ", ds.sites, " sites\n");
_DO_TREE_REBALANCE_ = 1;
#include "queryTree.bf";
resultMatrix = {modelCount, 5};
fprintf (stdout, "\n",sepString,capString,sepString);
bestAIC = 1e100;
bestCAIC = 1e100;
bestAICidx = 0;
bestCAICidx = -1;
for (mid=0; mid<modelCount; mid=mid+1)
{
runAModel (modelMatrixList[mid][0], modelMatrixList[mid][1], 0+modelMatrixList[mid][2], mid);
}
fprintf (stdout, "\n\nBest AIC model:\n\t", modelMatrixList[bestAICidx][0], " with the score of ", bestAIC);
if (bestCAICidx>=0)
{
fprintf (stdout, "\n\nBest c-AIC model:\n\t", modelMatrixList[bestCAICidx][0], " with the score of ", bestCAIC);
}
labelMatrix = {{"Log-likelihood","Parameters","AIC","c-AIC","Total tree length",""}};
aaString = "Model";
for (fC = 0; fC < modelCount; fC = fC+1)
{
aaString = aaString + ";" + modelMatrixList[fC][0];
}
USE_LAST_RESULTS = 0;
labelMatrix[5] = aaString;
skipCodeSelectionStep = 0;
OpenWindow (CHARTWINDOW,{{"Model Fits"}
{"labelMatrix"},
{"resultMatrix"},
{"Bar Chart"},
{"Index"},
{"c-AIC"},
{"Model Index"},
{""},
{"AIC"}
},
"SCREEN_WIDTH-60;SCREEN_HEIGHT-60;30;30");
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/AnalyzeCodonData.bf b/samples/HyPhy/AnalyzeCodonData.bf new file mode 100644 index 00000000..27280bbc --- /dev/null +++ b/samples/HyPhy/AnalyzeCodonData.bf @@ -0,0 +1,1031 @@ + + + + + + + + + + + + hyphy/AnalyzeCodonData.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 54 lines (36 sloc) + + 1.285 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NICETY_LEVEL = 3;
#include "TemplateModels/chooseGeneticCode.def";
#include "simpleBootstrap.bf";
SetDialogPrompt ("Please specify a codon data file:");
COUNT_GAPS_IN_FREQUENCIES = 0;
VERBOSITY_LEVEL = 1;
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
SelectTemplateModel(filteredData);
_DO_TREE_REBALANCE_ = 1;
#include "queryTree.bf";
if (modelType)
{
ChoiceList (branchLengths, "Branch Lengths", 1, SKIP_NONE,
"Estimate", "Estimate branch lengths by ML",
"Proportional to input tree", "Branch lengths are proportional to those in input tree");
if (branchLengths < 0)
{
return;
}
if (branchLengths == 1)
{
global treeScaler = 1;
ReplicateConstraint ("this1.?.?:=treeScaler*this2.?.?__", givenTree, givenTree);
}
}
LikelihoodFunction lf = (filteredData,givenTree);
Optimize (res,lf);
fprintf (stdout, "\n______________RESULTS______________\n",lf);
/* compute syn and non-syn stencils for current genetic code */
#include "categoryEcho.bf";
GetString (sendMeBack,lf,-1);
sendMeBack["LogL"] = res[1][0];
sendMeBack["NP"] = res[1][1];
return sendMeBack;
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/AnalyzeDiNucData.bf b/samples/HyPhy/AnalyzeDiNucData.bf new file mode 100644 index 00000000..833c79d7 --- /dev/null +++ b/samples/HyPhy/AnalyzeDiNucData.bf @@ -0,0 +1,931 @@ + + + + + + + + + + + + hyphy/AnalyzeDiNucData.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 29 lines (15 sloc) + + 0.661 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SetDialogPrompt ("Please specify a di-nucleotide (e.g. stem RNA) file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,2);
fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
SelectTemplateModel(filteredData);
_DO_TREE_REBALANCE_ = 1;
#include "queryTree.bf";
LikelihoodFunction lf = (filteredData,givenTree);
timer = Time(0);
USE_ADAPTIVE_VARIABLE_STEP = 0;
Optimize (res,lf);
timer = Time(0)-timer;
fprintf (stdout, "\n______________RESULTS______________\nTime taken = ", timer, " seconds\nAIC Score = ",
2(res[1][1]-res[1][0]),"\n",lf);
#include "categoryEcho.bf";
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/AnalyzeNucProtData.bf b/samples/HyPhy/AnalyzeNucProtData.bf new file mode 100644 index 00000000..710cb4b6 --- /dev/null +++ b/samples/HyPhy/AnalyzeNucProtData.bf @@ -0,0 +1,931 @@ + + + + + + + + + + + + hyphy/AnalyzeNucProtData.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 29 lines (18 sloc) + + 0.868 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
_Genetic_Code = 0;
ExecuteAFile("simpleBootstrap.bf");
SetDialogPrompt ("Please specify a nucleotide or amino-acid data file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,1);
fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
SelectTemplateModel(filteredData);
_DO_TREE_REBALANCE_ = 1;
ExecuteAFile ("queryTree.bf");
LikelihoodFunction lf = (filteredData,givenTree);
timer = Time(0);
Optimize (res,lf);
timer = Time(0)-timer;
fprintf (stdout, "\n______________RESULTS______________\nTime taken = ", timer, " seconds\nAIC Score = ",
2(res[1][1]-res[1][0]),"\n",lf);
ExecuteAFile ("categoryEcho.bf");
GetString (lfInfo, lf, -1);
return {"Log(L)": res[1][0], "DF": res[1][1], "Tree": Format (givenTree,1,1)}
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/BS2007.bf b/samples/HyPhy/BS2007.bf new file mode 100644 index 00000000..d49327e5 --- /dev/null +++ b/samples/HyPhy/BS2007.bf @@ -0,0 +1,1999 @@ + + + + + + + + + + + + hyphy/BS2007.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 296 lines (239 sloc) + + 9.281 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
/* 1. include a file to define the genetic code
Note the use of base directory and path forming variables to make this analysis
independent of directory placement
*/
incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def";
ExecuteCommands ("#include \""+incFileName+"\";");
/* 2. load a codon partition */
SetDialogPrompt ("Please locate a coding alignment:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
coding_path = LAST_FILE_PATH;
fprintf (stdout, "\nLoaded a ", filteredData.species, " sequence alignment with ", filteredData.sites, " codons from\n",coding_path,"\n");
/* 3. include a file to prompt for a tree */
incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"queryTree.bf";
ExecuteCommands ("#include \""+incFileName+"\";");
/* 4. Compute nucleotide counts by position for the F3x4 estimator */
COUNT_GAPS_IN_FREQUENCIES = 0;
HarvestFrequencies (baseFreqs,filteredData,3,1,1);
COUNT_GAPS_IN_FREQUENCIES = 1;
fprintf (stdout, "\nBase composition:\n\tA: ", Format (baseFreqs[0][0],10,5),",",Format (baseFreqs[0][1],10,5),",",Format (baseFreqs[0][2],10,5),
"\n\tC: ", Format (baseFreqs[1][0],10,5),",",Format (baseFreqs[1][1],10,5),",",Format (baseFreqs[1][2],10,5),
"\n\tG: ", Format (baseFreqs[2][0],10,5),",",Format (baseFreqs[2][1],10,5),",",Format (baseFreqs[2][2],10,5),
"\n\tT: ", Format (baseFreqs[3][0],10,5),",",Format (baseFreqs[3][1],10,5),",",Format (baseFreqs[3][2],10,5), "\n");
/* 6. define the GY94 rate matrix; for now each branch will have it's own
dS and dN, we will constrain them later */
global kappa_inv = 1;
ModelMatrixDimension = 64;
for (h = 0; h<64; h=h+1)
{
if (_Genetic_Code[h]==10) /* stop codon */
{
ModelMatrixDimension = ModelMatrixDimension-1;
}
}
GY_Matrix = {ModelMatrixDimension,ModelMatrixDimension};
hshift = 0;
for (h=0; h<64; h=h+1)
{
if (_Genetic_Code[h]==10)
{
hshift = hshift+1;
}
else
{
vshift = hshift;
for (v = h+1; v<64; v=v+1)
{
diff = v-h;
if (_Genetic_Code[v]==10)
{
vshift = vshift+1;
}
else
{
if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) /* one step */
{
if (h$4==v$4)
{
transition = v%4;
transition2= h%4;
}
else
{
if(diff%16==0)
{
transition = v$16;
transition2= h$16;
}
else
{
transition = v%16$4;
transition2= h%16$4;
}
}
if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) /* synonymous */
{
if (Abs(transition-transition2)%2) /* transversion */
{
GY_Matrix[h-hshift][v-vshift] := kappa_inv*synRate;
GY_Matrix[v-vshift][h-hshift] := kappa_inv*synRate;
}
else
{
GY_Matrix[h-hshift][v-vshift] := synRate;
GY_Matrix[v-vshift][h-hshift] := synRate;
}
}
else
{
if (Abs(transition-transition2)%2) /* transversion */
{
GY_Matrix[h-hshift][v-vshift] := kappa_inv*nonSynRate;
GY_Matrix[v-vshift][h-hshift] := kappa_inv*nonSynRate;
}
else
{
GY_Matrix[h-hshift][v-vshift] := nonSynRate;
GY_Matrix[v-vshift][h-hshift] := nonSynRate;
}
}
}
}
}
}
}
/*8. build codon frequencies (use the F3x4 estimator) */
PIStop = 1.0;
codonFreqs = {ModelMatrixDimension,1};
hshift = 0;
for (h=0; h<64; h=h+1)
{
first = h$16;
second = h%16$4;
third = h%4;
if (_Genetic_Code[h]==10)
{
hshift = hshift+1;
PIStop = PIStop-baseFreqs[first][0]*baseFreqs[second][1]*baseFreqs[third][2];
continue;
}
codonFreqs[h-hshift]=baseFreqs[first][0]*baseFreqs[second][1]*baseFreqs[third][2];
}
codonFreqs = codonFreqs*(1.0/PIStop);
/*9. define the codon model */
Model GY_Model = (GY_Matrix,codonFreqs,1);
/*10. Define the tree and pick the foreground branch, displaying a tree window to facilitate selection;
the latter step is executed for 2 of 3 model choices */
Tree givenTree1 = treeString;
Tree givenTree2 = treeString;
Tree givenTree3 = treeString;
Tree givenTree4 = treeString;
USE_LAST_RESULTS = 0;
OPTIMIZATION_METHOD = 4;
/* Approximate kappa and branch lengths using an HKY85 fit */
HKY85_Matrix = {{*,t*kappa_inv,t,t*kappa_inv}
{t*kappa_inv,*,kappa_inv*t,t}
{t,t*kappa_inv,*,kappa_inv*t}
{t*kappa_inv,t,kappa_inv*t,*}};
HarvestFrequencies (nucFreqs,ds,1,1,1);
Model HKY85_Model = (HKY85_Matrix,nucFreqs,1);
Tree nucTree = treeString;
DataSetFilter nucData = CreateFilter (ds,1);
fprintf (stdout, "Obtaining nucleotide branch lengths and kappa to be used as starting values...\n");
LikelihoodFunction nuc_lf = (nucData,nucTree);
Optimize (nuc_mle,nuc_lf);
fprintf (stdout, "\n", Format (nucTree,1,1), "\nkappa=", Format (1/kappa_inv,8,3), "\n");
USE_LAST_RESULTS = 1;
mxTreeSpec = {5,1};
mxTreeSpec [0] = "nucTree";
mxTreeSpec [1] = "8240";
mxTreeSpec [2] = "10,40,-10,-175,1";
mxTreeSpec [3] = "";
mxTreeSpec [4] = "";
OpenWindow (TREEWINDOW, mxTreeSpec,"(SCREEN_WIDTH-50)/2;(SCREEN_HEIGHT-50)/2;30+(SCREEN_WIDTH-30)/2;45");
leafNodes = TipCount (givenTree);
internalNodes = BranchCount(givenTree);
choiceMatrix = {internalNodes+leafNodes,2};
for (bc=0; bc<internalNodes; bc=bc+1)
{
choiceMatrix[bc][0] = BranchName(givenTree,bc);
choiceMatrix[bc][1] = "Internal Branch Rooting " + givenTree[bc];
}
for (bc=0; bc<leafNodes; bc=bc+1)
{
choiceMatrix[bc+internalNodes][0] = TipName(givenTree,bc);
choiceMatrix[bc+internalNodes][1] = "Leaf node " + choiceMatrix[bc+internalNodes][0];
}
ChoiceList (stOption,"Choose the foreground branch",0,NO_SKIP,choiceMatrix);
if (stOption[0] < 0)
{
return -1;
}
fprintf (stdout, "\n\n", Columns (stOption)," foreground branch(es) set to: ", "\n");
for (bc = 0; bc < Columns (stOption); bc = bc + 1)
{
fprintf (stdout, choiceMatrix[stOption[bc]][0], "\n");
}
OpenWindow (CLOSEWINDOW, "Tree nucTree");
/* 15. Constrain dS and dN in the tree to based upon different models */
global omega_1 = 0.25;
global omega_2 = 0.5;
global omega_3 = 1.5;
omega_1 :< 1;
omega_2 :< 1;
omega_3 :> 1;
ClearConstraints (givenTree1);
ClearConstraints (givenTree2);
ClearConstraints (givenTree3);
ClearConstraints (givenTree4);
/* 16. define and optimize the likelihood function */
bNames = BranchName (givenTree,-1);
nucBL = BranchLength (nucTree,-1);
for (bc=0; bc<Columns(bNames)-1; bc=bc+1)
{
ExecuteCommands ("givenTree1."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree1."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree2."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree2."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree3."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree3."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree4."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree4."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
}
codBL = BranchLength (givenTree1,-1);
for (bc=0; bc<Columns(bNames)-1; bc=bc+1)
{
if (nucBL[bc]>0)
{
scalingFactor = nucBL[bc]/codBL[bc];
ExecuteCommands ("givenTree1."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree1."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree2."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree2."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree3."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree3."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree4."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree4."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
}
}
for (bc = 0; bc < Columns (stOption); bc = bc + 1)
{
bName = choiceMatrix[stOption[bc]][0];
ExecuteCommands ("givenTree1."+bName+".nonSynRate:=omega_1*givenTree1."+bName+".synRate");
ExecuteCommands ("givenTree2."+bName+".nonSynRate:=omega_2*givenTree2."+bName+".synRate");
ExecuteCommands ("givenTree3."+bName+".nonSynRate:=givenTree3."+bName+".synRate");
ExecuteCommands ("givenTree4."+bName+".nonSynRate:=omega_3*givenTree4."+bName+".synRate");
}
OPTIMIZATION_PRECISION = 0.001;
global P_1 = 1/4;
global P_2 = 1/3;
global P_3 = 1/2;
P_1 :< 1;
P_2 :< 1;
P_3 :< 1;
fprintf (stdout, "\nFitting the model with selection...\n");
LikelihoodFunction lf = (filteredData, givenTree1, filteredData, givenTree2,filteredData, givenTree3,filteredData, givenTree4,
"Log(P_1*SITE_LIKELIHOOD[0]+(1-P_1)*P_2*SITE_LIKELIHOOD[1]+(1-P_1)(1-P_2)*P_3*SITE_LIKELIHOOD[2]+(1-P_1)(1-P_2)(1-P_3)*SITE_LIKELIHOOD[3])");
Optimize (mles,lf);
fprintf (stdout, lf);
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/MatrixIndexing.bf b/samples/HyPhy/MatrixIndexing.bf new file mode 100755 index 00000000..aa1344d4 --- /dev/null +++ b/samples/HyPhy/MatrixIndexing.bf @@ -0,0 +1 @@ +fprintf (stdout, "\n1). Spawning a zero-populated 5x6 matrix and setting it's values to random numbers in [0,1].\n"); aMatrix = {5,6}; aMatrix = aMatrix ["Random(0,1)"]; fprintf (stdout, aMatrix, "\n"); fprintf (stdout, "\n2). Accessing a second-row third-column element and a random element.\n\n"); r = Random (0, Rows(aMatrix))$1; c = Random (0, Columns(aMatrix))$1; fprintf (stdout, "matrix[1][2]=", aMatrix[1][2], "\nmatrix[", r , "][" , c, "]=", aMatrix[r][c], "\n"); fprintf (stdout, "\n3). Accessing the fourth row.\n\n"); fprintf (stdout, "matrix[3][-1]=\n", aMatrix[3][-1],"\n"); fprintf (stdout, "\n4). Accessing the first column.\n\n"); fprintf (stdout, "matrix[-1][0]=\n", aMatrix[-1][0],"\n"); fprintf (stdout, "\n5). Populating a matrix template (below the diagonal).\n\n"); template={5,6}; template=template["_MATRIX_ELEMENT_ROW_>_MATRIX_ELEMENT_COLUMN_"]; fprintf (stdout, template ,"\n"); fprintf (stdout, "\n6). Extracting (by row) matrix elements using the template.\n\n"); fprintf (stdout, aMatrix[template] ,"\n"); fprintf (stdout, "\n7). Extracting a submatrix: top left corner at (1,1) - bottom right corner at (3,2).\n\n"); fprintf (stdout, "matrix[{{1,1}}][{{3,2}}]=\n", aMatrix[{{1,1}}][{{3,2}}],"\n"); fprintf (stdout, "\n8). Returning a matrix in which all elements are squared and above diagonal elements are further increased by 1.\n\n"); fprintf (stdout, "\n", aMatrix["_MATRIX_ELEMENT_VALUE_^2+(_MATRIX_ELEMENT_ROW_<_MATRIX_ELEMENT_COLUMN_)"],"\n"); \ No newline at end of file diff --git a/samples/HyPhy/profile_test.bf b/samples/HyPhy/profile_test.bf new file mode 100755 index 00000000..64cee8b2 --- /dev/null +++ b/samples/HyPhy/profile_test.bf @@ -0,0 +1 @@ +#profile START; s = 0; m = {5,1}; for (k=0; k<250000; k=k+1) { s = s + k; t = Random (0,5); m [t] = m [t] + 1; } #profile PAUSE; s2 = 0; for (k=1; k<10000; k=k+1) { s2 = s2+1/k; } #profile _hyphy_profile_dump; stats = _hyphy_profile_dump["STATS"]; _profile_summer = {1,Rows(stats)}; _profile_summer = _profile_summer["1"] * stats; _instructions = _hyphy_profile_dump["INSTRUCTION"]; _indices = _hyphy_profile_dump["INSTRUCTION INDEX"]; fprintf (stdout, "\nTotal run time (seconds) : ", Format(_profile_summer[1]/1000000,15,6), "\nTotal number of steps : ", Format(_profile_summer[0],15,0), "\n\n"); for (k=0; k