From 828fc7a19ba7dbf36da48aa3b02486c6c9a6a4cd Mon Sep 17 00:00:00 2001 From: "Daniel G. Taylor" Date: Tue, 28 Apr 2015 22:21:30 -0700 Subject: [PATCH 1/8] Add support for API Blueprint This adds a grammar and samples for [API Blueprint][] based on the [Sublime Text plugin][] from Apiary. The [Atom language][] is also based on the Sublime plugin. API Blueprint is now used in over [3,600 repositories][] on GitHub and has [several][Aglio] [open source][Dredd] [tools][Drakov] available. Here's an [example using Lightshow][Lightshow] to highlight a small API. [3,600 repositories]: https://github.com/search?utf8=%E2%9C%93&q=FORMAT+1A+extension%3Aapib+extension%3Amd&type=Code&ref=advsearch&l= [Aglio]: https://github.com/danielgtaylor/aglio [API Blueprint]: https://apiblueprint.org/ [Drakov]: https://www.npmjs.com/package/drakov [Atom language]: https://github.com/danielgtaylor/atom-language-api-blueprint [Dredd]: https://github.com/apiaryio/dredd [Lightshow]: https://github-lightshow.herokuapp.com/?utf8=%E2%9C%93&scope=from-url&grammar_url=https%3A%2F%2Fgithub.com%2Fapiaryio%2Fapi-blueprint-sublime-plugin%2Fblob%2Fmaster%2FAPIBlueprint.tmLanguage&grammar_text=&code_source=from-url&code_url=https%3A%2F%2Fraw.githubusercontent.com%2Fapiaryio%2Fapi-blueprint%2Fmaster%2Fexamples%2F12.%2520Advanced%2520Action.md&code= [Sublime Text plugin]: https://github.com/apiaryio/api-blueprint-sublime-plugin --- .gitmodules | 3 ++ grammars.yml | 3 ++ lib/linguist/languages.yml | 8 +++ samples/API Blueprint/actions.apib | 55 ++++++++++++++++++++ samples/API Blueprint/attributes.apib | 39 ++++++++++++++ samples/API Blueprint/simple.apib | 18 +++++++ vendor/grammars/api-blueprint-sublime-plugin | 1 + 7 files changed, 127 insertions(+) create mode 100644 samples/API Blueprint/actions.apib create mode 100644 samples/API Blueprint/attributes.apib create mode 100644 samples/API Blueprint/simple.apib create mode 160000 vendor/grammars/api-blueprint-sublime-plugin diff --git a/.gitmodules b/.gitmodules index f1ce5891..be0e86cb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -657,3 +657,6 @@ [submodule "vendor/grammars/jflex.tmbundle"] path = vendor/grammars/jflex.tmbundle url = https://github.com/jflex-de/jflex.tmbundle.git +[submodule "vendor/grammars/api-blueprint-sublime-plugin"] + path = vendor/grammars/api-blueprint-sublime-plugin + url = https://github.com/apiaryio/api-blueprint-sublime-plugin diff --git a/grammars.yml b/grammars.yml index 46f2fd5d..d34253ac 100644 --- a/grammars.yml +++ b/grammars.yml @@ -156,6 +156,9 @@ vendor/grammars/antlr.tmbundle: vendor/grammars/apache.tmbundle: - source.apache-config - source.apache-config.mod_perl +vendor/grammars/api-blueprint-sublime-plugin/: +- text.html.markdown.source.gfm.apib +- text.html.markdown.source.gfm.mson vendor/grammars/applescript.tmbundle: - source.applescript vendor/grammars/asciidoc.tmbundle/: diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 662c488f..da046cae 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -59,6 +59,14 @@ ANTLR: - .g4 ace_mode: text +API Blueprint: + type: markup + color: "#2ACCA8" + ace_mode: markdown + extensions: + - .apib + tm_scope: text.html.markdown.source.gfm.apib + APL: type: programming color: "#5A8164" diff --git a/samples/API Blueprint/actions.apib b/samples/API Blueprint/actions.apib new file mode 100644 index 00000000..f94dc67a --- /dev/null +++ b/samples/API Blueprint/actions.apib @@ -0,0 +1,55 @@ +FORMAT: 1A + +# Advanced Action API +A resource action is – in fact – a state transition. This API example demonstrates an action - state transition - to another resource. + +## API Blueprint ++ [Previous: Resource Model](11.%20Resource%20Model.md) ++ [This: Raw API Blueprint](https://raw.github.com/apiaryio/api-blueprint/master/examples/11.%20Advanced%20Action.md) + +# Tasks [/tasks/tasks{?status,priority}] + ++ Parameters + + status (string) + + priority (number) + +## List All Tasks [GET] + ++ Response 200 (application/json) + + [ + { + "id": 123, + "name": "Exercise in gym", + "done": false, + "type": "task" + }, + { + "id": 124, + "name": "Shop for groceries", + "done": true, + "type": "task" + } + ] + +## Retrieve Task [GET /task/{id}] +This is a state transition to another resource + ++ Parameters + + id (string) + ++ Response 200 (application/json) + + { + "id": 123, + "name": "Go to gym", + "done": false, + "type": "task" + } + +## Delete Task [DELETE /task/{id}] + ++ Parameters + + id (string) + ++ Response 204 diff --git a/samples/API Blueprint/attributes.apib b/samples/API Blueprint/attributes.apib new file mode 100644 index 00000000..ce143f7b --- /dev/null +++ b/samples/API Blueprint/attributes.apib @@ -0,0 +1,39 @@ +FORMAT: 1A + +# Attributes API +This API example demonstrates how to describe body attributes of a request or response message. + +In this case, the description is complementary (and duplicate!) to the provided JSON example in the body section. The [Advanced Attributes](09.%20Advanced%20Attributes.md) API example will demonstrate how to avoid duplicates and how to reuse attributes descriptions. + +## API Blueprint ++ [Previous: Parameters](07.%20Parameters.md) ++ [This: Raw API Blueprint](https://raw.github.com/apiaryio/api-blueprint/master/examples/08.%20Attributes.md) ++ [Next: Advanced Attributes](09.%20Advanced%20Attributes.md) + +# Group Coupons + +## Coupon [/coupons/{id}] +A coupon contains information about a percent-off or amount-off discount you might want to apply to a customer. + +### Retrieve a Coupon [GET] +Retrieves the coupon with the given ID. + ++ Response 200 (application/json) + + + Attributes (object) + + id: 250FF (string) + + created: 1415203908 (number) - Time stamp + + percent_off: 25 (number) + + A positive integer between 1 and 100 that represents the discount the coupon will apply. + + + redeem_by (number) - Date after which the coupon can no longer be redeemed + + + Body + + { + "id": "250FF", + "created": 1415203908, + "percent_off": 25, + "redeem_by:" null + } diff --git a/samples/API Blueprint/simple.apib b/samples/API Blueprint/simple.apib new file mode 100644 index 00000000..ecee8481 --- /dev/null +++ b/samples/API Blueprint/simple.apib @@ -0,0 +1,18 @@ +FORMAT: 1A + +# The Simplest API +This is one of the simplest APIs written in the **API Blueprint**. +One plain resource combined with a method and that's it! We will explain what is going on in the next installment - [Resource and Actions](02.%20Resource%20and%20Actions.md). + +**Note:** As we progress through the examples, do not also forget to view the [Raw](https://raw.github.com/apiaryio/api-blueprint/master/examples/01.%20Simplest%20API.md) code to see what is really going on in the API Blueprint, as opposed to just seeing the output of the Github Markdown parser. + +Also please keep in mind that every single example in this course is a **real API Blueprint** and as such you can **parse** it with the [API Blueprint parser](https://github.com/apiaryio/drafter) or one of its [bindings](https://github.com/apiaryio/drafter#bindings). + +## API Blueprint ++ [This: Raw API Blueprint](https://raw.github.com/apiaryio/api-blueprint/master/examples/01.%20Simplest%20API.md) ++ [Next: Resource and Actions](02.%20Resource%20and%20Actions.md) + +# GET /message ++ Response 200 (text/plain) + + Hello World! diff --git a/vendor/grammars/api-blueprint-sublime-plugin b/vendor/grammars/api-blueprint-sublime-plugin new file mode 160000 index 00000000..4713b0e8 --- /dev/null +++ b/vendor/grammars/api-blueprint-sublime-plugin @@ -0,0 +1 @@ +Subproject commit 4713b0e8248b63d993ec5bcaf3d70051e56acec2 From f393ea307d9bfc751604749e9417edff470c91ce Mon Sep 17 00:00:00 2001 From: Mike Doud Date: Thu, 30 Apr 2015 15:14:53 -0700 Subject: [PATCH 2/8] Add 'HyPhy Batch Language' --- lib/linguist/languages.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 662c488f..868f29c1 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -1349,6 +1349,12 @@ Hy: - hylang tm_scope: source.hy +HyPhy Batch Language: + type: programming + ace_mode: text + extensions: + - .bf + IDL: type: programming color: "#a3522f" From 4ae6455e0ef9ca855ce8314013053f9b008ad4de Mon Sep 17 00:00:00 2001 From: Mike Doud Date: Thu, 30 Apr 2015 15:25:04 -0700 Subject: [PATCH 3/8] Update languages.yml --- lib/linguist/languages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 868f29c1..827559c8 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -1349,7 +1349,7 @@ Hy: - hylang tm_scope: source.hy -HyPhy Batch Language: +HyPhy: type: programming ace_mode: text extensions: From 5e68714ae5a1d6993f63c2c9e0a1bf36e471ff7c Mon Sep 17 00:00:00 2001 From: Mike Doud Date: Thu, 30 Apr 2015 15:37:59 -0700 Subject: [PATCH 4/8] add HyPhy examples --- samples/HyPhy/454.bf | 2988 +++++++++++++++++++++++++++ samples/HyPhy/AAModelComparison.bf | 1827 ++++++++++++++++ samples/HyPhy/AnalyzeCodonData.bf | 1031 +++++++++ samples/HyPhy/AnalyzeDiNucData.bf | 931 +++++++++ samples/HyPhy/AnalyzeNucProtData.bf | 931 +++++++++ samples/HyPhy/BS2007.bf | 1999 ++++++++++++++++++ samples/HyPhy/MatrixIndexing.bf | 1 + samples/HyPhy/profile_test.bf | 1 + 8 files changed, 9709 insertions(+) create mode 100644 samples/HyPhy/454.bf create mode 100644 samples/HyPhy/AAModelComparison.bf create mode 100644 samples/HyPhy/AnalyzeCodonData.bf create mode 100644 samples/HyPhy/AnalyzeDiNucData.bf create mode 100644 samples/HyPhy/AnalyzeNucProtData.bf create mode 100644 samples/HyPhy/BS2007.bf create mode 100755 samples/HyPhy/MatrixIndexing.bf create mode 100755 samples/HyPhy/profile_test.bf diff --git a/samples/HyPhy/454.bf b/samples/HyPhy/454.bf new file mode 100644 index 00000000..40c867bd --- /dev/null +++ b/samples/HyPhy/454.bf @@ -0,0 +1,2988 @@ + + + + + + + + + + + + hyphy/454.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ + + + +
+ +
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 538 lines (451 sloc) + + 14.93 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExecuteAFile ("Utility/GrabBag.bf");
ExecuteAFile ("Utility/DBTools.ibf");
alignOptions = {};
SetDialogPrompt ("Sequence File:");
DataSet unal = ReadDataFile (PROMPT_FOR_FILE);
BASE_FILE_PATH = LAST_FILE_PATH;
DB_FILE_PATH = LAST_FILE_PATH + ".db";
ANALYSIS_DB_ID = _openCacheDB (DB_FILE_PATH);
haveTable = _TableExists (ANALYSIS_DB_ID, "SETTINGS");
if (haveTable)
{
existingSettings = _ExecuteSQL (ANALYSIS_DB_ID, "SELECT * FROM SETTINGS");
}
if (Abs(existingSettings))
{
existingSettings = existingSettings [0];
ExecuteCommands ("_Genetic_Code = " + existingSettings["GENETIC_CODE"]);
ExecuteCommands (existingSettings["OPTIONS"] ^ {{"_hyphyAssociativeArray","alignOptions"}});
masterReferenceSequence = existingSettings["REFERENCE"];
dbSequences = _ExecuteSQL (ANALYSIS_DB_ID, "SELECT SEQUENCE_ID FROM SEQUENCES WHERE STAGE = 0");
unalSequenceCount = Abs(dbSequences);
toDoSequences = {unalSequenceCount,1};
for (k = 0; k < unalSequenceCount; k=k+1)
{
toDoSequences[k] = (dbSequences[k])["SEQUENCE_ID"];
}
dbSequences = 0;
fprintf (stdout, "[PHASE 1] Reloaded ", unalSequenceCount, " unprocessed sequences\n");
}
else
{
tableDefines = {};
tableDefines ["SETTINGS"] = {};
(tableDefines ["SETTINGS"])["RUN_DATE"] = "DATE";
(tableDefines ["SETTINGS"])["OPTIONS"] = "TEXT";
(tableDefines ["SETTINGS"])["REFERENCE"] = "TEXT";
(tableDefines ["SETTINGS"])["GENETIC_CODE"] = "TEXT";
(tableDefines ["SETTINGS"])["THRESHOLD"] = "REAL";
tableDefines ["SEQUENCES"] = {};
(tableDefines ["SEQUENCES"])["SEQUENCE_ID"] = "TEXT UNIQUE";
(tableDefines ["SEQUENCES"])["LENGTH"] = "INTEGER";
(tableDefines ["SEQUENCES"])["STAGE"] = "INTEGER";
/*
0 - initial import
1 - in frame without a fix
2 - one frame shift / fixed
3 - out-of-frame; not fixed / not aligned
*/
(tableDefines ["SEQUENCES"])["RAW"] = "TEXT";
(tableDefines ["SEQUENCES"])["ALIGNED_AA"] = "TEXT"; /* aligned aa. sequence */
(tableDefines ["SEQUENCES"])["ALIGNED"] = "TEXT"; /* aligned nucleotide sequence */
(tableDefines ["SEQUENCES"])["OFFSET"] = "INTEGER"; /* start offset w.r.t the reference sequence */
(tableDefines ["SEQUENCES"])["END_OFFSET"] = "INTEGER"; /* end offset w.r.t the reference sequence */
(tableDefines ["SEQUENCES"])["SCORE"] = "REAL";
(tableDefines ["SEQUENCES"])["FRAME"] = "INTEGER";
_CreateTableIfNeeded (ANALYSIS_DB_ID, "SETTINGS", tableDefines["SETTINGS"], 0);
_CreateTableIfNeeded (ANALYSIS_DB_ID, "SEQUENCES", tableDefines["SEQUENCES"], 1);
/* START ALIGNMENT SETTINGS */
LoadFunctionLibrary ("SeqAlignShared.ibf");
DataSetFilter filteredData = CreateFilter (unal,1);
GetInformation (UnalignedSeqs,filteredData);
/* preprocess sequences */
unalSequenceCount = Rows(UnalignedSeqs)*Columns(UnalignedSeqs);
GetString (sequenceNames, unal, -1);
longestSequence = 0;
longestSequenceIDX = 0;
seqRecord = {};
fprintf (stdout, "[PHASE 1] Initial Processing of ", unalSequenceCount, " sequences\n");
for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"[^a-zA-Z]",""}};
UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"^N+",""}};
UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"N+$",""}};
seqRecord ["SEQUENCE_ID"] = sequenceNames[seqCounter];
seqRecord ["LENGTH"] = Abs (UnalignedSeqs[seqCounter]);
seqRecord ["STAGE"] = 0;
seqRecord ["RAW"] = UnalignedSeqs[seqCounter];
if (doLongestSequence)
{
if (doLongestSequence == 1 || seqCounter != unalSequenceCount-1)
{
if (Abs (UnalignedSeqs[seqCounter]) > longestSequence)
{
longestSequence = Abs (UnalignedSeqs[seqCounter]);
longestSequenceIDX = seqCounter;
}
}
}
_InsertRecord (ANALYSIS_DB_ID, "SEQUENCES", seqRecord);
SetParameter (STATUS_BAR_STATUS_STRING, "Initial processing ("+seqCounter+"/"+unalSequenceCount+" done)",0);
}
if (refSeq == 0)
{
masterReferenceSequence = UnalignedSeqs[0];
}
if (doLongestSequence)
{
fprintf (stdout, "\nSelected sequence ", sequenceNames[longestSequenceIDX], " as reference.");
masterReferenceSequence = UnalignedSeqs[longestSequenceIDX];
}
incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def";
ExecuteCommands ("#include \""+incFileName+"\";");
doLongestSequence = (refSeq==1);
aRecord = {};
aRecord["RUN_DATE"] = _ExecuteSQL(ANALYSIS_DB_ID,"SELECT DATE('NOW') AS CURRENT_DATE");
aRecord["RUN_DATE"] = ((aRecord["RUN_DATE"])[0])["CURRENT_DATE"];
aRecord["OPTIONS"] = "" + alignOptions;
aRecord["REFERENCE"] = masterReferenceSequence;
aRecord["GENETIC_CODE"] = "" + _Genetic_Code;
_InsertRecord (ANALYSIS_DB_ID, "SETTINGS", aRecord);
toDoSequences = sequenceNames;
UnalignedSequences = 0;
}
skipOutliers = 1;
doRC = 1;
/* build codon translation table */
codonToAAMap = {};
codeToAA = "FLIMVSPTAYXHQNKDECWRG";
nucChars = "ACGT";
for (p1=0; p1<64; p1=p1+1)
{
codon = nucChars[p1$16]+nucChars[p1%16$4]+nucChars[p1%4];
ccode = _Genetic_Code[p1];
codonToAAMap[codon] = codeToAA[ccode];
}
/* determine reading frames */
ProteinSequences = {};
AllTranslations = {};
ReadingFrames = {};
StopCodons = {};
StopPositions = {};
RC = {};
fprintf (stdout, "\n[PHASE 2] Detecting reading frames for each unprocessed sequence...\n");
frameCounter = {3,2};
stillHasStops = {};
aRecord = {};
for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
rawSeq = (_ExecuteSQL(ANALYSIS_DB_ID,"SELECT RAW FROM SEQUENCES WHERE SEQUENCE_ID = '" + toDoSequences[seqCounter] + "'"));
aSeq = (rawSeq[0])["RAW"];
seqLen = Abs(aSeq)-2;
minStops = 1e20;
tString = "";
rFrame = 0;
rrc = 0;
allTran = {3,2};
stopPosn = {6,2};
for (rc = 0; rc<=doRC; rc = rc+1)
{
if (rc)
{
aSeq = nucleotideReverseComplement (aSeq)
}
for (offset = 0; offset < 3; offset = offset+1)
{
translString = "";
translString * (seqLen/3+1);
for (seqPos = offset; seqPos < seqLen; seqPos = seqPos+3)
{
codon = aSeq[seqPos][seqPos+2];
prot = codonToAAMap[codon];
if (Abs(prot))
{
translString * prot;
}
else
{
translString * "?";
}
}
translString * 0;
translString = translString^{{"X$","?"}};
stopPos = translString||"X";
if (stopPos[0]>=0)
{
stopCount = Rows(stopPos)$2;
stopPosn[3*rc+offset][0] = stopPos[0];
stopPosn[3*rc+offset][1] = stopPos[stopCount*2-1];
}
else
{
stopCount = 0;
}
if (stopCount<minStops)
{
minStops = stopCount;
rFrame = offset;
rrc = rc;
tString = translString;
}
allTran[offset][rc] = translString;
}
}
ReadingFrames [seqCounter] = rFrame;
ProteinSequences [seqCounter] = tString;
frameCounter [rFrame][rrc] = frameCounter[rFrame][rrc]+1;
StopPositions [seqCounter] = stopPosn;
AllTranslations [seqCounter] = allTran;
RC [seqCounter] = rrc;
SetParameter (STATUS_BAR_STATUS_STRING, "Reading frame analysis ("+seqCounter+"/"+unalSequenceCount+" done)",0);
}
_closeCacheDB (ANALYSIS_DB_ID);
return 0;
s1 = ProteinSequences[0];
fprintf (stdout, "\nFound:\n\t", frameCounter[0], " sequences in reading frame 1\n\t",frameCounter[1], " sequences in reading frame 2\n\t",frameCounter[2], " sequences in reading frame 3\n\nThere were ", Abs(stillHasStops), " sequences with apparent frameshift/sequencing errors\n");
skipSeqs = {};
for (k=0; k<Abs(stillHasStops); k=k+1)
{
seqCounter = stillHasStops[k];
seqName = sequenceNames[seqCounter];
fprintf (stdout,"Sequence ", seqCounter+1, " (", seqName, ") seems to have");
stopPosn = StopPositions[seqCounter];
fStart = -1;
fEnd = -1;
fMin = 1e10;
frame1 = 0;
frame2 = 0;
checkFramePosition (stopPosn[0][1],stopPosn[1][0],0,1);
checkFramePosition (stopPosn[1][1],stopPosn[0][0],1,0);
checkFramePosition (stopPosn[0][1],stopPosn[2][0],0,2);
checkFramePosition (stopPosn[2][1],stopPosn[0][0],2,0);
checkFramePosition (stopPosn[2][1],stopPosn[1][0],2,1);
checkFramePosition (stopPosn[1][1],stopPosn[2][0],1,2);
if (fStart>=0)
{
allTran = AllTranslations[seqCounter];
useq = UnalignedSeqs[seqCounter];
fprintf (stdout, " a shift from frame ", frame2+1, " to frame ", frame1+1, " between a.a. positions ", fStart, " and ", fEnd, ".");
fStart2 = Max(fStart-1,0);
fEnd2 = Min(fEnd+1,Min(Abs(allTran[frame1]),Abs(allTran[frame2]))-1);
tempString = allTran[frame2];
fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame2+1, ":\n\t", tempString[fStart2][fEnd2]);
fprintf (stdout, "\n\t", useq[3*fStart2+frame2][3*fEnd2+frame2-1]);
tempString = allTran[frame1];
fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame1+1, ":\n\t", tempString[fStart2][fEnd2]);
fprintf (stdout, "\n\t", useq[3*fStart2+frame1][3*fEnd2+frame1-1]);
fprintf (stdout, "\n\t\tAttempting to resolve by alignment to reference. ");
f1s = allTran[frame1];
f2s = allTran[frame2];
f1l = Abs(f1s);
bestScore = -1e10;
bestSplice = -1;
for (k2=fStart; k2<fEnd; k2=k2+1)
{
s2 = f2s[0][k2]+f1s[k2+1][Abs(f1s)];
inStr = {{s1,s2}};
AlignSequences(aligned, inStr, alignOptions);
aligned = aligned[0];
aligned = aligned[0];
if (aligned > bestScore)
{
bestScore = aligned;
bestSplice = k2;
bestString = s2;
}
}
fprintf (stdout, "Best splice site appears to be at a.a. position ", bestSplice, "\n");
/* update best spliced string */
ProteinSequences[seqCounter] = bestString;
ReadingFrames[seqCounter] = 0;
UnalignedSeqs[seqCounter] = useq[frame2][frame2+3*bestSplice+2] + useq[frame1+3*bestSplice+3][Abs(useq)-1] + "---";
}
else
{
fprintf (stdout, " multiple frameshifts\n");
skipSeqs[seqCounter] = 1;
}
}
SeqAlignments = {};
startingPosition = {unalSequenceCount,2};
refLength = Abs(ProteinSequences[0]);
refInsertions = {refLength,1};
fprintf (stdout,"\nPerforming pairwise alignment with reference sequences\n");
alignmentScores = {};
for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
if (skipSeqs[seqCounter] == 0)
{
s2 = ProteinSequences[seqCounter];
inStr = {{s1,s2}};
AlignSequences(aligned, inStr, alignOptions);
aligned = aligned[0];
SeqAlignments[seqCounter] = aligned;
alignmentScores[Abs(alignmentScores)] = aligned[0]/Abs(aligned[1]);
aligned = aligned[1];
myStartingPosition = aligned$"[^-]";
myEndingPosition = Abs (aligned)-1;
while (aligned[myEndingPosition]=="-")
{
myEndingPosition = myEndingPosition - 1;
}
myStartingPosition = myStartingPosition[0];
startingPosition[seqCounter][0] = myStartingPosition;
startingPosition[seqCounter][1] = myEndingPosition;
aligned = aligned[myStartingPosition][myEndingPosition];
refInsert = aligned||"-+";
if (refInsert[0]>0)
{
insCount = Rows (refInsert)/2;
offset = 0;
for (insN = 0; insN < insCount; insN = insN+1)
{
insPos = refInsert[insN*2];
insLength = refInsert[insN*2+1]-insPos+1;
insPos = insPos-offset;
if (refInsertions[insPos]<insLength)
{
refInsertions[insPos]=insLength;
}
offset = offset + insLength;
}
}
}
}
alignmentScoresM = avlToMatrix ("alignmentScores");
ExecuteAFile ("Utility/DescriptiveStatistics.bf");
distInfo = GatherDescriptiveStats (alignmentScoresM);
/*lowerCuttoff = 0.25;*/
distInfo["Mean"] - 2*distInfo["Std.Dev"];
/* produce a fully gapped reference sequence */
fprintf (stdout,"\nMerging pairwise alignments into a MSA\n");
fullRefSeq = "";
fullRefSeq * refLength;
fullRefSeq * (s1[0]);
s1N = UnalignedSeqs[0];
fullRefSeqN = "";
fullRefSeqN * (3*refLength);
fullRefSeqN * (s1N[0][2]);
frameShift = ReadingFrames[0];
for (seqCounter=1;seqCounter<refLength;seqCounter=seqCounter+1)
{
gapCount = refInsertions[seqCounter];
for (k=0; k<gapCount;k=k+1)
{
fullRefSeq*("-");
fullRefSeqN*("---");
}
fullRefSeq * (s1[seqCounter]);
fullRefSeqN * (s1N[frameShift+seqCounter*3][frameShift+seqCounter*3+2]);
}
fullRefSeq * 0;
fullRefSeqN * 0;
return 0;
refLength = Abs(fullRefSeq);
SetDialogPrompt ("Save alignment to:");
seqName=sequenceNames[0];
fprintf (PROMPT_FOR_FILE,CLEAR_FILE,">",seqName,"\n",fullRefSeq);
fName = LAST_FILE_PATH;
fNameC = fName+".nuc";
fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",fullRefSeqN);
alCounter = 0;
for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
if (skipSeqs[seqCounter] == 0)
{
if (skipOutliers == 0 && alignmentScoresM[alCounter] < lowerCuttoff)
{
seqName=sequenceNames[seqCounter];
fprintf (stdout, "Sequence ", seqName ," was skipped because of a poor alignment score.\n");
skipSeqs[seqCounter] = 1;
alCounter = alCounter + 1;
continue;
}
alCounter = alCounter + 1;
seqName=sequenceNames[seqCounter];
aligned = SeqAlignments[seqCounter];
aligned1 = aligned[1];
aligned2 = aligned[2];
s2 = startingPosition[seqCounter][0];
e2 = startingPosition[seqCounter][1];
gappedSeq = "";
gappedSeq * Abs(aligned2);
k=0;
while (k<refLength)
{
while (fullRefSeq[k]!=aligned1[s2])
{
gappedSeq*("-");
k=k+1;
}
gappedSeq*(aligned2[s2]);
s2=s2+1;
k=k+1;
}
gappedSeq * 0;
gappedSeqN = "";
gappedSeqN * (3*Abs(aligned2));
frameShift = ReadingFrames[seqCounter];
s1N = UnalignedSeqs[seqCounter];
s2N = ProteinSequences[seqCounter];
s2 = startingPosition[seqCounter][0];
k = 0;
e2 = Abs(gappedSeq);
k = 0;
while (k<e2)
{
while ((s2N[s2]!=gappedSeq[k])&&(k<e2))
{
gappedSeqN * ("---");
k=k+1;
}
if (k<e2)
{
gappedSeqN * s1N[frameShift+s2*3][frameShift+s2*3+2];
s2 = s2+1;
k=k+1;
}
}
gappedSeqN * 0;
if (refSeq2 && seqCounter == unalSequenceCount-1)
{
fscanf (fName, "Raw", soFar);
fprintf (fName, CLEAR_FILE,">",seqName,"\n",gappedSeq,"\n",soFar);
fscanf (fNameC, "Raw", soFar);
fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",gappedSeqN,"\n",soFar);
}
else
{
fprintf (fName,"\n>",seqName,"\n",gappedSeq);
fprintf (fNameC,"\n>",seqName,"\n",gappedSeqN);
}
}
}
if (Abs(skipSeqs))
{
fName = fName+".bad";
for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
if (skipSeqs[seqCounter])
{
seqName=sequenceNames[seqCounter];
fprintf (fName,">",seqName,"\n",UnalignedSeqs[seqCounter],"\n");
}
}
}
function checkFramePosition (pos1, pos2, fr1, fr2)
{
fSpan = pos2-pos1;
if (fSpan>1) /* first followed by second*/
{
if (fSpan < fMin)
{
fMin = fSpan;
frame1 = fr1;
frame2 = fr2;
fStart = pos1+1;
fEnd = pos2;
}
}
return 0;
}
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/AAModelComparison.bf b/samples/HyPhy/AAModelComparison.bf new file mode 100644 index 00000000..72b30a53 --- /dev/null +++ b/samples/HyPhy/AAModelComparison.bf @@ -0,0 +1,1827 @@ + + + + + + + + + + + + hyphy/AAModelComparison.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 253 lines (199 sloc) + + 5.743 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
if (Rows (modelMatrixList) == 0)
{
modelMatrixList =
{
{"Equal Input", "EIAA.mdl", "19"}
{"Dayhoff","Dayhoff.mdl","0"}
{"Dayhoff+F","Dayhoff_F.mdl","19"}
{"JTT", "Jones.mdl", "0"}
{"JTT+F", "Jones_F.mdl", "19"}
{"WAG", "WAG.mdl", "0"}
{"WAG+F", "WAG_F.mdl", "19"}
{"rtREV", "rtREV.mdl", "0"}
{"rtREV+F", "rtREV_F.mdl", "19"}
{"mtMAM", "mtMAM.mdl", "0"}
{"mtMAM+F", "mtMAM_F.mdl", "19"}
{"mtREV 24", "mtREV_24.mdl", "0"}
{"mtREV 24+F", "mtREV_24_F.mdl", "19"}
{"HIV within", "HIVwithin.mdl", "0"}
{"HIV within+F", "HIVwithin+F.mdl", "19"}
{"HIV between", "HIVbetween.mdl", "0"}
{"HIV between+F", "HIVbetween+F.mdl", "19"}
{"REV-1 step", "reducedREV.mdl", "19"}
{"REV", "mtREV.mdl", "19"}
};
}
/*___________________________________________________________________________________________________________*/
function runAModel (modelID, fileName, xtraP, midx)
{
ExecuteCommands ("#include \"TemplateModels/"+fileName+"\";");
Tree givenTree = treeString;
LikelihoodFunction lf = (filteredData,givenTree);
GetString (lf_info, lf, -1);
locals = lf_info["Local Independent"];
if (Columns (branchLengthStash))
{
USE_LAST_RESULTS = 1;
for (_iv = 0; _iv < Columns (locals); _iv = _iv+1)
{
ExecuteCommands (locals[_iv] + "=1;\n");
}
currentBL = BranchLength (givenTree,0);
currentBN = BranchName (givenTree,-1);
for (_iv = 0; _iv < Columns (currentBN); _iv = _iv+1)
{
ExecuteCommands ("givenTree."+currentBN[_iv]+".t="+branchLengthStash[_iv]/currentBL+";");
}
}
else
{
for (_iv = 0; _iv < Columns (locals); _iv = _iv+1)
{
ExecuteCommands (locals[_iv] + "=0.1;\n");
}
USE_LAST_RESULTS = 1;
}
Optimize (res,lf);
fprintf (stdout, "| ", modelID);
for (k=0; k<maxModelWidth-Abs(modelID)-1; k=k+1)
{
fprintf (stdout, " ");
}
params = res[1][1]+xtraP;
AIC = 2(-res[1][0]+params);
if (filteredData.sites-params>1)
{
cAIC = 2(-res[1][0]+params*(filteredData.sites/(filteredData.sites-params-1)));
}
else
{
cAIC = 0;
}
branchLengths = BranchLength (givenTree,-1);
TL = 0;
for (k=Rows(branchLengths)*Columns(branchLengths)-1; k>=0; k=k-1)
{
TL = TL + branchLengths[k];
}
fprintf (stdout, "| ", Format (res[1][0],14,3), " | ", Format (params,5,0), " | ",
Format (AIC, 9,3), " | ",);
if (cAIC > 0)
{
fprintf (stdout, Format (cAIC,11,3), " | ");
}
else
{
fprintf (stdout, " N/A | ");
}
fprintf (stdout, Format (TL,11,3), " |\n", sepString);
resultMatrix[midx][0] = res[1][0];
resultMatrix[midx][1] = params;
resultMatrix[midx][2] = AIC;
resultMatrix[midx][3] = cAIC;
resultMatrix[midx][4] = TL;
if (AIC < bestAIC)
{
bestAIC = AIC;
bestAICidx = midx;
branchLengthStash = BranchLength (givenTree,-1);
}
if (cAIC > 0)
{
if (bestCAIC > cAIC)
{
bestCAIC = cAIC;
bestCAICidx = midx;
}
}
return 0;
}
/*___________________________________________________________________________________________________________*/
maxModelWidth = 7;
skipCodeSelectionStep = 0;
ChoiceList (doREV, "Include REV?", 1, SKIP_NONE, "Yes", "Include REV and reduced REV models. CAUTION: these models take a long time to fit.",
"No", "Only use empirical models");
if (doREV < 0)
{
return 0;
}
if (doREV == 0)
{
#include "TemplateModels/chooseGeneticCode.def";
skipCodeSelectionStep = 1;
}
modelCount = Rows (modelMatrixList) - 2*doREV;
for (k=0; k<modelCount; k=k+1)
{
maxModelWidth = Max(maxModelWidth,Abs (modelMatrixList[k][0])+2);
}
sepString = "";
capString = "";
sepString * 256;
sepString * "+";
capString * 256;
capString * "| Model";
for (k=0; k<maxModelWidth; k=k+1)
{
sepString * "-";
}
for (k=0; k<maxModelWidth-6; k=k+1)
{
capString * " ";
}
capString * "| Log Likelihood | #prms | AIC Score | c-AIC Score | Tree Length |\n";
sepString * "+----------------+-------+-----------+-------------+-------------+\n";
sepString * 0;
capString * 0;
branchLengthStash = 0;
SKIP_MODEL_PARAMETER_LIST = 0;
#include "TemplateModels/modelParameters2.mdl";
if (modelType == 1)
{
#include "TemplateModels/defineGamma.mdl";
}
if (modelType == 2)
{
#include "TemplateModels/defineHM.mdl";
}
SKIP_MODEL_PARAMETER_LIST = 1;
SetDialogPrompt ("Please load an amino-acid data file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,1);
fprintf (stdout,"\nRunning aminoacid model comparisons on ", LAST_FILE_PATH, "\n\nThe alignment has ",ds.species, " sequences and ", ds.sites, " sites\n");
_DO_TREE_REBALANCE_ = 1;
#include "queryTree.bf";
resultMatrix = {modelCount, 5};
fprintf (stdout, "\n",sepString,capString,sepString);
bestAIC = 1e100;
bestCAIC = 1e100;
bestAICidx = 0;
bestCAICidx = -1;
for (mid=0; mid<modelCount; mid=mid+1)
{
runAModel (modelMatrixList[mid][0], modelMatrixList[mid][1], 0+modelMatrixList[mid][2], mid);
}
fprintf (stdout, "\n\nBest AIC model:\n\t", modelMatrixList[bestAICidx][0], " with the score of ", bestAIC);
if (bestCAICidx>=0)
{
fprintf (stdout, "\n\nBest c-AIC model:\n\t", modelMatrixList[bestCAICidx][0], " with the score of ", bestCAIC);
}
labelMatrix = {{"Log-likelihood","Parameters","AIC","c-AIC","Total tree length",""}};
aaString = "Model";
for (fC = 0; fC < modelCount; fC = fC+1)
{
aaString = aaString + ";" + modelMatrixList[fC][0];
}
USE_LAST_RESULTS = 0;
labelMatrix[5] = aaString;
skipCodeSelectionStep = 0;
OpenWindow (CHARTWINDOW,{{"Model Fits"}
{"labelMatrix"},
{"resultMatrix"},
{"Bar Chart"},
{"Index"},
{"c-AIC"},
{"Model Index"},
{""},
{"AIC"}
},
"SCREEN_WIDTH-60;SCREEN_HEIGHT-60;30;30");
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/AnalyzeCodonData.bf b/samples/HyPhy/AnalyzeCodonData.bf new file mode 100644 index 00000000..27280bbc --- /dev/null +++ b/samples/HyPhy/AnalyzeCodonData.bf @@ -0,0 +1,1031 @@ + + + + + + + + + + + + hyphy/AnalyzeCodonData.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 54 lines (36 sloc) + + 1.285 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NICETY_LEVEL = 3;
#include "TemplateModels/chooseGeneticCode.def";
#include "simpleBootstrap.bf";
SetDialogPrompt ("Please specify a codon data file:");
COUNT_GAPS_IN_FREQUENCIES = 0;
VERBOSITY_LEVEL = 1;
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
SelectTemplateModel(filteredData);
_DO_TREE_REBALANCE_ = 1;
#include "queryTree.bf";
if (modelType)
{
ChoiceList (branchLengths, "Branch Lengths", 1, SKIP_NONE,
"Estimate", "Estimate branch lengths by ML",
"Proportional to input tree", "Branch lengths are proportional to those in input tree");
if (branchLengths < 0)
{
return;
}
if (branchLengths == 1)
{
global treeScaler = 1;
ReplicateConstraint ("this1.?.?:=treeScaler*this2.?.?__", givenTree, givenTree);
}
}
LikelihoodFunction lf = (filteredData,givenTree);
Optimize (res,lf);
fprintf (stdout, "\n______________RESULTS______________\n",lf);
/* compute syn and non-syn stencils for current genetic code */
#include "categoryEcho.bf";
GetString (sendMeBack,lf,-1);
sendMeBack["LogL"] = res[1][0];
sendMeBack["NP"] = res[1][1];
return sendMeBack;
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/AnalyzeDiNucData.bf b/samples/HyPhy/AnalyzeDiNucData.bf new file mode 100644 index 00000000..833c79d7 --- /dev/null +++ b/samples/HyPhy/AnalyzeDiNucData.bf @@ -0,0 +1,931 @@ + + + + + + + + + + + + hyphy/AnalyzeDiNucData.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 29 lines (15 sloc) + + 0.661 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SetDialogPrompt ("Please specify a di-nucleotide (e.g. stem RNA) file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,2);
fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
SelectTemplateModel(filteredData);
_DO_TREE_REBALANCE_ = 1;
#include "queryTree.bf";
LikelihoodFunction lf = (filteredData,givenTree);
timer = Time(0);
USE_ADAPTIVE_VARIABLE_STEP = 0;
Optimize (res,lf);
timer = Time(0)-timer;
fprintf (stdout, "\n______________RESULTS______________\nTime taken = ", timer, " seconds\nAIC Score = ",
2(res[1][1]-res[1][0]),"\n",lf);
#include "categoryEcho.bf";
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/AnalyzeNucProtData.bf b/samples/HyPhy/AnalyzeNucProtData.bf new file mode 100644 index 00000000..710cb4b6 --- /dev/null +++ b/samples/HyPhy/AnalyzeNucProtData.bf @@ -0,0 +1,931 @@ + + + + + + + + + + + + hyphy/AnalyzeNucProtData.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 29 lines (18 sloc) + + 0.868 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
_Genetic_Code = 0;
ExecuteAFile("simpleBootstrap.bf");
SetDialogPrompt ("Please specify a nucleotide or amino-acid data file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,1);
fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
SelectTemplateModel(filteredData);
_DO_TREE_REBALANCE_ = 1;
ExecuteAFile ("queryTree.bf");
LikelihoodFunction lf = (filteredData,givenTree);
timer = Time(0);
Optimize (res,lf);
timer = Time(0)-timer;
fprintf (stdout, "\n______________RESULTS______________\nTime taken = ", timer, " seconds\nAIC Score = ",
2(res[1][1]-res[1][0]),"\n",lf);
ExecuteAFile ("categoryEcho.bf");
GetString (lfInfo, lf, -1);
return {"Log(L)": res[1][0], "DF": res[1][1], "Tree": Format (givenTree,1,1)}
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/BS2007.bf b/samples/HyPhy/BS2007.bf new file mode 100644 index 00000000..d49327e5 --- /dev/null +++ b/samples/HyPhy/BS2007.bf @@ -0,0 +1,1999 @@ + + + + + + + + + + + + hyphy/BS2007.bf at master · veg/hyphy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skip to content +
+ + + + + + + + + + + + +
+
+
+ +
+
+
+ +
    + +
  • +
    + +
    + + + + Watch + + + + +
    + +
    +
    +
    +
  • + +
  • + +
    + +
    + + +
    +
    + + +
    + +
  • + +
  • + + + Fork + + + + +
  • + +
+ +

+ + /hyphy + + + + + +

+
+
+ +
+
+ +
+ + + +
+ +
+

HTTPS clone URL

+
+ + + + +
+
+ + +
+

SSH clone URL

+
+ + + + +
+
+ + +
+

Subversion checkout URL

+
+ + + + +
+
+ + + +

You can clone with + HTTPS, SSH, or Subversion. + + + +

+ + + + Clone in Desktop + + + + + + + Download ZIP + +
+
+ +
+ + + + + + +
+ +
+ + + branch: + master + + + +
+ +
+ + + + +
+ + +
+ + +
+ Fetching contributors… +
+ +
+

+

Cannot retrieve contributors at this time

+
+
+
+
+
+ +
+ Raw + Blame + History +
+ + + + + +
+ +
+
+ +
+ +
+ 296 lines (239 sloc) + + 9.281 kb +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
/* 1. include a file to define the genetic code
Note the use of base directory and path forming variables to make this analysis
independent of directory placement
*/
incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def";
ExecuteCommands ("#include \""+incFileName+"\";");
/* 2. load a codon partition */
SetDialogPrompt ("Please locate a coding alignment:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
coding_path = LAST_FILE_PATH;
fprintf (stdout, "\nLoaded a ", filteredData.species, " sequence alignment with ", filteredData.sites, " codons from\n",coding_path,"\n");
/* 3. include a file to prompt for a tree */
incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"queryTree.bf";
ExecuteCommands ("#include \""+incFileName+"\";");
/* 4. Compute nucleotide counts by position for the F3x4 estimator */
COUNT_GAPS_IN_FREQUENCIES = 0;
HarvestFrequencies (baseFreqs,filteredData,3,1,1);
COUNT_GAPS_IN_FREQUENCIES = 1;
fprintf (stdout, "\nBase composition:\n\tA: ", Format (baseFreqs[0][0],10,5),",",Format (baseFreqs[0][1],10,5),",",Format (baseFreqs[0][2],10,5),
"\n\tC: ", Format (baseFreqs[1][0],10,5),",",Format (baseFreqs[1][1],10,5),",",Format (baseFreqs[1][2],10,5),
"\n\tG: ", Format (baseFreqs[2][0],10,5),",",Format (baseFreqs[2][1],10,5),",",Format (baseFreqs[2][2],10,5),
"\n\tT: ", Format (baseFreqs[3][0],10,5),",",Format (baseFreqs[3][1],10,5),",",Format (baseFreqs[3][2],10,5), "\n");
/* 6. define the GY94 rate matrix; for now each branch will have it's own
dS and dN, we will constrain them later */
global kappa_inv = 1;
ModelMatrixDimension = 64;
for (h = 0; h<64; h=h+1)
{
if (_Genetic_Code[h]==10) /* stop codon */
{
ModelMatrixDimension = ModelMatrixDimension-1;
}
}
GY_Matrix = {ModelMatrixDimension,ModelMatrixDimension};
hshift = 0;
for (h=0; h<64; h=h+1)
{
if (_Genetic_Code[h]==10)
{
hshift = hshift+1;
}
else
{
vshift = hshift;
for (v = h+1; v<64; v=v+1)
{
diff = v-h;
if (_Genetic_Code[v]==10)
{
vshift = vshift+1;
}
else
{
if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) /* one step */
{
if (h$4==v$4)
{
transition = v%4;
transition2= h%4;
}
else
{
if(diff%16==0)
{
transition = v$16;
transition2= h$16;
}
else
{
transition = v%16$4;
transition2= h%16$4;
}
}
if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) /* synonymous */
{
if (Abs(transition-transition2)%2) /* transversion */
{
GY_Matrix[h-hshift][v-vshift] := kappa_inv*synRate;
GY_Matrix[v-vshift][h-hshift] := kappa_inv*synRate;
}
else
{
GY_Matrix[h-hshift][v-vshift] := synRate;
GY_Matrix[v-vshift][h-hshift] := synRate;
}
}
else
{
if (Abs(transition-transition2)%2) /* transversion */
{
GY_Matrix[h-hshift][v-vshift] := kappa_inv*nonSynRate;
GY_Matrix[v-vshift][h-hshift] := kappa_inv*nonSynRate;
}
else
{
GY_Matrix[h-hshift][v-vshift] := nonSynRate;
GY_Matrix[v-vshift][h-hshift] := nonSynRate;
}
}
}
}
}
}
}
/*8. build codon frequencies (use the F3x4 estimator) */
PIStop = 1.0;
codonFreqs = {ModelMatrixDimension,1};
hshift = 0;
for (h=0; h<64; h=h+1)
{
first = h$16;
second = h%16$4;
third = h%4;
if (_Genetic_Code[h]==10)
{
hshift = hshift+1;
PIStop = PIStop-baseFreqs[first][0]*baseFreqs[second][1]*baseFreqs[third][2];
continue;
}
codonFreqs[h-hshift]=baseFreqs[first][0]*baseFreqs[second][1]*baseFreqs[third][2];
}
codonFreqs = codonFreqs*(1.0/PIStop);
/*9. define the codon model */
Model GY_Model = (GY_Matrix,codonFreqs,1);
/*10. Define the tree and pick the foreground branch, displaying a tree window to facilitate selection;
the latter step is executed for 2 of 3 model choices */
Tree givenTree1 = treeString;
Tree givenTree2 = treeString;
Tree givenTree3 = treeString;
Tree givenTree4 = treeString;
USE_LAST_RESULTS = 0;
OPTIMIZATION_METHOD = 4;
/* Approximate kappa and branch lengths using an HKY85 fit */
HKY85_Matrix = {{*,t*kappa_inv,t,t*kappa_inv}
{t*kappa_inv,*,kappa_inv*t,t}
{t,t*kappa_inv,*,kappa_inv*t}
{t*kappa_inv,t,kappa_inv*t,*}};
HarvestFrequencies (nucFreqs,ds,1,1,1);
Model HKY85_Model = (HKY85_Matrix,nucFreqs,1);
Tree nucTree = treeString;
DataSetFilter nucData = CreateFilter (ds,1);
fprintf (stdout, "Obtaining nucleotide branch lengths and kappa to be used as starting values...\n");
LikelihoodFunction nuc_lf = (nucData,nucTree);
Optimize (nuc_mle,nuc_lf);
fprintf (stdout, "\n", Format (nucTree,1,1), "\nkappa=", Format (1/kappa_inv,8,3), "\n");
USE_LAST_RESULTS = 1;
mxTreeSpec = {5,1};
mxTreeSpec [0] = "nucTree";
mxTreeSpec [1] = "8240";
mxTreeSpec [2] = "10,40,-10,-175,1";
mxTreeSpec [3] = "";
mxTreeSpec [4] = "";
OpenWindow (TREEWINDOW, mxTreeSpec,"(SCREEN_WIDTH-50)/2;(SCREEN_HEIGHT-50)/2;30+(SCREEN_WIDTH-30)/2;45");
leafNodes = TipCount (givenTree);
internalNodes = BranchCount(givenTree);
choiceMatrix = {internalNodes+leafNodes,2};
for (bc=0; bc<internalNodes; bc=bc+1)
{
choiceMatrix[bc][0] = BranchName(givenTree,bc);
choiceMatrix[bc][1] = "Internal Branch Rooting " + givenTree[bc];
}
for (bc=0; bc<leafNodes; bc=bc+1)
{
choiceMatrix[bc+internalNodes][0] = TipName(givenTree,bc);
choiceMatrix[bc+internalNodes][1] = "Leaf node " + choiceMatrix[bc+internalNodes][0];
}
ChoiceList (stOption,"Choose the foreground branch",0,NO_SKIP,choiceMatrix);
if (stOption[0] < 0)
{
return -1;
}
fprintf (stdout, "\n\n", Columns (stOption)," foreground branch(es) set to: ", "\n");
for (bc = 0; bc < Columns (stOption); bc = bc + 1)
{
fprintf (stdout, choiceMatrix[stOption[bc]][0], "\n");
}
OpenWindow (CLOSEWINDOW, "Tree nucTree");
/* 15. Constrain dS and dN in the tree to based upon different models */
global omega_1 = 0.25;
global omega_2 = 0.5;
global omega_3 = 1.5;
omega_1 :< 1;
omega_2 :< 1;
omega_3 :> 1;
ClearConstraints (givenTree1);
ClearConstraints (givenTree2);
ClearConstraints (givenTree3);
ClearConstraints (givenTree4);
/* 16. define and optimize the likelihood function */
bNames = BranchName (givenTree,-1);
nucBL = BranchLength (nucTree,-1);
for (bc=0; bc<Columns(bNames)-1; bc=bc+1)
{
ExecuteCommands ("givenTree1."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree1."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree2."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree2."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree3."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree3."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree4."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree4."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
}
codBL = BranchLength (givenTree1,-1);
for (bc=0; bc<Columns(bNames)-1; bc=bc+1)
{
if (nucBL[bc]>0)
{
scalingFactor = nucBL[bc]/codBL[bc];
ExecuteCommands ("givenTree1."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree1."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree2."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree2."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree3."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree3."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree4."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree4."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
}
}
for (bc = 0; bc < Columns (stOption); bc = bc + 1)
{
bName = choiceMatrix[stOption[bc]][0];
ExecuteCommands ("givenTree1."+bName+".nonSynRate:=omega_1*givenTree1."+bName+".synRate");
ExecuteCommands ("givenTree2."+bName+".nonSynRate:=omega_2*givenTree2."+bName+".synRate");
ExecuteCommands ("givenTree3."+bName+".nonSynRate:=givenTree3."+bName+".synRate");
ExecuteCommands ("givenTree4."+bName+".nonSynRate:=omega_3*givenTree4."+bName+".synRate");
}
OPTIMIZATION_PRECISION = 0.001;
global P_1 = 1/4;
global P_2 = 1/3;
global P_3 = 1/2;
P_1 :< 1;
P_2 :< 1;
P_3 :< 1;
fprintf (stdout, "\nFitting the model with selection...\n");
LikelihoodFunction lf = (filteredData, givenTree1, filteredData, givenTree2,filteredData, givenTree3,filteredData, givenTree4,
"Log(P_1*SITE_LIKELIHOOD[0]+(1-P_1)*P_2*SITE_LIKELIHOOD[1]+(1-P_1)(1-P_2)*P_3*SITE_LIKELIHOOD[2]+(1-P_1)(1-P_2)(1-P_3)*SITE_LIKELIHOOD[3])");
Optimize (mles,lf);
fprintf (stdout, lf);
+ +
+ +
+ +Jump to Line + + +
+ +
+ +
+
+ + +
+ +
+ +
+ + +
+
+
+ +
+
+
+
+
+ +
+ + + + + + +
+ + + Something went wrong with that request. Please try again. +
+ + + + + + + +
+ + + + diff --git a/samples/HyPhy/MatrixIndexing.bf b/samples/HyPhy/MatrixIndexing.bf new file mode 100755 index 00000000..aa1344d4 --- /dev/null +++ b/samples/HyPhy/MatrixIndexing.bf @@ -0,0 +1 @@ +fprintf (stdout, "\n1). Spawning a zero-populated 5x6 matrix and setting it's values to random numbers in [0,1].\n"); aMatrix = {5,6}; aMatrix = aMatrix ["Random(0,1)"]; fprintf (stdout, aMatrix, "\n"); fprintf (stdout, "\n2). Accessing a second-row third-column element and a random element.\n\n"); r = Random (0, Rows(aMatrix))$1; c = Random (0, Columns(aMatrix))$1; fprintf (stdout, "matrix[1][2]=", aMatrix[1][2], "\nmatrix[", r , "][" , c, "]=", aMatrix[r][c], "\n"); fprintf (stdout, "\n3). Accessing the fourth row.\n\n"); fprintf (stdout, "matrix[3][-1]=\n", aMatrix[3][-1],"\n"); fprintf (stdout, "\n4). Accessing the first column.\n\n"); fprintf (stdout, "matrix[-1][0]=\n", aMatrix[-1][0],"\n"); fprintf (stdout, "\n5). Populating a matrix template (below the diagonal).\n\n"); template={5,6}; template=template["_MATRIX_ELEMENT_ROW_>_MATRIX_ELEMENT_COLUMN_"]; fprintf (stdout, template ,"\n"); fprintf (stdout, "\n6). Extracting (by row) matrix elements using the template.\n\n"); fprintf (stdout, aMatrix[template] ,"\n"); fprintf (stdout, "\n7). Extracting a submatrix: top left corner at (1,1) - bottom right corner at (3,2).\n\n"); fprintf (stdout, "matrix[{{1,1}}][{{3,2}}]=\n", aMatrix[{{1,1}}][{{3,2}}],"\n"); fprintf (stdout, "\n8). Returning a matrix in which all elements are squared and above diagonal elements are further increased by 1.\n\n"); fprintf (stdout, "\n", aMatrix["_MATRIX_ELEMENT_VALUE_^2+(_MATRIX_ELEMENT_ROW_<_MATRIX_ELEMENT_COLUMN_)"],"\n"); \ No newline at end of file diff --git a/samples/HyPhy/profile_test.bf b/samples/HyPhy/profile_test.bf new file mode 100755 index 00000000..64cee8b2 --- /dev/null +++ b/samples/HyPhy/profile_test.bf @@ -0,0 +1 @@ +#profile START; s = 0; m = {5,1}; for (k=0; k<250000; k=k+1) { s = s + k; t = Random (0,5); m [t] = m [t] + 1; } #profile PAUSE; s2 = 0; for (k=1; k<10000; k=k+1) { s2 = s2+1/k; } #profile _hyphy_profile_dump; stats = _hyphy_profile_dump["STATS"]; _profile_summer = {1,Rows(stats)}; _profile_summer = _profile_summer["1"] * stats; _instructions = _hyphy_profile_dump["INSTRUCTION"]; _indices = _hyphy_profile_dump["INSTRUCTION INDEX"]; fprintf (stdout, "\nTotal run time (seconds) : ", Format(_profile_summer[1]/1000000,15,6), "\nTotal number of steps : ", Format(_profile_summer[0],15,0), "\n\n"); for (k=0; k Date: Thu, 30 Apr 2015 16:07:19 -0700 Subject: [PATCH 5/8] removing incorrect samples --- samples/HyPhy/454.bf | 2988 --------------------------- samples/HyPhy/AAModelComparison.bf | 1827 ---------------- samples/HyPhy/AnalyzeCodonData.bf | 1031 --------- samples/HyPhy/AnalyzeDiNucData.bf | 931 --------- samples/HyPhy/AnalyzeNucProtData.bf | 931 --------- samples/HyPhy/BS2007.bf | 1999 ------------------ 6 files changed, 9707 deletions(-) delete mode 100644 samples/HyPhy/454.bf delete mode 100644 samples/HyPhy/AAModelComparison.bf delete mode 100644 samples/HyPhy/AnalyzeCodonData.bf delete mode 100644 samples/HyPhy/AnalyzeDiNucData.bf delete mode 100644 samples/HyPhy/AnalyzeNucProtData.bf delete mode 100644 samples/HyPhy/BS2007.bf diff --git a/samples/HyPhy/454.bf b/samples/HyPhy/454.bf deleted file mode 100644 index 40c867bd..00000000 --- a/samples/HyPhy/454.bf +++ /dev/null @@ -1,2988 +0,0 @@ - - - - - - - - - - - - hyphy/454.bf at master · veg/hyphy - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Skip to content -
- - - - - - - - - - - - -
-
-
- -
-
-
- -
    - -
  • -
    - -
    - - - - Watch - - - - -
    - -
    -
    -
    -
  • - -
  • - -
    - -
    - - -
    -
    - - -
    - -
  • - -
  • - - - Fork - - - - -
  • - -
- -

- - /hyphy - - - - - -

-
-
- -
-
- -
- - - -
- -
-

HTTPS clone URL

-
- - - - -
-
- - -
-

SSH clone URL

-
- - - - -
-
- - -
-

Subversion checkout URL

-
- - - - -
-
- - - -

You can clone with - HTTPS, SSH, or Subversion. - - - -

- - - - Clone in Desktop - - - - - - - Download ZIP - -
-
- -
- - - - - - -
- -
- - - branch: - master - - - -
- -
- - - - -
- - -
- - -
- - - - -
- -
-
-
- -
- Raw - Blame - History -
- - - - - -
- -
-
- -
- -
- 538 lines (451 sloc) - - 14.93 kb -
-
- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ExecuteAFile ("Utility/GrabBag.bf");
ExecuteAFile ("Utility/DBTools.ibf");
alignOptions = {};
SetDialogPrompt ("Sequence File:");
DataSet unal = ReadDataFile (PROMPT_FOR_FILE);
BASE_FILE_PATH = LAST_FILE_PATH;
DB_FILE_PATH = LAST_FILE_PATH + ".db";
ANALYSIS_DB_ID = _openCacheDB (DB_FILE_PATH);
haveTable = _TableExists (ANALYSIS_DB_ID, "SETTINGS");
if (haveTable)
{
existingSettings = _ExecuteSQL (ANALYSIS_DB_ID, "SELECT * FROM SETTINGS");
}
if (Abs(existingSettings))
{
existingSettings = existingSettings [0];
ExecuteCommands ("_Genetic_Code = " + existingSettings["GENETIC_CODE"]);
ExecuteCommands (existingSettings["OPTIONS"] ^ {{"_hyphyAssociativeArray","alignOptions"}});
masterReferenceSequence = existingSettings["REFERENCE"];
dbSequences = _ExecuteSQL (ANALYSIS_DB_ID, "SELECT SEQUENCE_ID FROM SEQUENCES WHERE STAGE = 0");
unalSequenceCount = Abs(dbSequences);
toDoSequences = {unalSequenceCount,1};
for (k = 0; k < unalSequenceCount; k=k+1)
{
toDoSequences[k] = (dbSequences[k])["SEQUENCE_ID"];
}
dbSequences = 0;
fprintf (stdout, "[PHASE 1] Reloaded ", unalSequenceCount, " unprocessed sequences\n");
}
else
{
tableDefines = {};
tableDefines ["SETTINGS"] = {};
(tableDefines ["SETTINGS"])["RUN_DATE"] = "DATE";
(tableDefines ["SETTINGS"])["OPTIONS"] = "TEXT";
(tableDefines ["SETTINGS"])["REFERENCE"] = "TEXT";
(tableDefines ["SETTINGS"])["GENETIC_CODE"] = "TEXT";
(tableDefines ["SETTINGS"])["THRESHOLD"] = "REAL";
tableDefines ["SEQUENCES"] = {};
(tableDefines ["SEQUENCES"])["SEQUENCE_ID"] = "TEXT UNIQUE";
(tableDefines ["SEQUENCES"])["LENGTH"] = "INTEGER";
(tableDefines ["SEQUENCES"])["STAGE"] = "INTEGER";
/*
0 - initial import
1 - in frame without a fix
2 - one frame shift / fixed
3 - out-of-frame; not fixed / not aligned
*/
(tableDefines ["SEQUENCES"])["RAW"] = "TEXT";
(tableDefines ["SEQUENCES"])["ALIGNED_AA"] = "TEXT"; /* aligned aa. sequence */
(tableDefines ["SEQUENCES"])["ALIGNED"] = "TEXT"; /* aligned nucleotide sequence */
(tableDefines ["SEQUENCES"])["OFFSET"] = "INTEGER"; /* start offset w.r.t the reference sequence */
(tableDefines ["SEQUENCES"])["END_OFFSET"] = "INTEGER"; /* end offset w.r.t the reference sequence */
(tableDefines ["SEQUENCES"])["SCORE"] = "REAL";
(tableDefines ["SEQUENCES"])["FRAME"] = "INTEGER";
_CreateTableIfNeeded (ANALYSIS_DB_ID, "SETTINGS", tableDefines["SETTINGS"], 0);
_CreateTableIfNeeded (ANALYSIS_DB_ID, "SEQUENCES", tableDefines["SEQUENCES"], 1);
/* START ALIGNMENT SETTINGS */
LoadFunctionLibrary ("SeqAlignShared.ibf");
DataSetFilter filteredData = CreateFilter (unal,1);
GetInformation (UnalignedSeqs,filteredData);
/* preprocess sequences */
unalSequenceCount = Rows(UnalignedSeqs)*Columns(UnalignedSeqs);
GetString (sequenceNames, unal, -1);
longestSequence = 0;
longestSequenceIDX = 0;
seqRecord = {};
fprintf (stdout, "[PHASE 1] Initial Processing of ", unalSequenceCount, " sequences\n");
for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"[^a-zA-Z]",""}};
UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"^N+",""}};
UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"N+$",""}};
seqRecord ["SEQUENCE_ID"] = sequenceNames[seqCounter];
seqRecord ["LENGTH"] = Abs (UnalignedSeqs[seqCounter]);
seqRecord ["STAGE"] = 0;
seqRecord ["RAW"] = UnalignedSeqs[seqCounter];
if (doLongestSequence)
{
if (doLongestSequence == 1 || seqCounter != unalSequenceCount-1)
{
if (Abs (UnalignedSeqs[seqCounter]) > longestSequence)
{
longestSequence = Abs (UnalignedSeqs[seqCounter]);
longestSequenceIDX = seqCounter;
}
}
}
_InsertRecord (ANALYSIS_DB_ID, "SEQUENCES", seqRecord);
SetParameter (STATUS_BAR_STATUS_STRING, "Initial processing ("+seqCounter+"/"+unalSequenceCount+" done)",0);
}
if (refSeq == 0)
{
masterReferenceSequence = UnalignedSeqs[0];
}
if (doLongestSequence)
{
fprintf (stdout, "\nSelected sequence ", sequenceNames[longestSequenceIDX], " as reference.");
masterReferenceSequence = UnalignedSeqs[longestSequenceIDX];
}
incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def";
ExecuteCommands ("#include \""+incFileName+"\";");
doLongestSequence = (refSeq==1);
aRecord = {};
aRecord["RUN_DATE"] = _ExecuteSQL(ANALYSIS_DB_ID,"SELECT DATE('NOW') AS CURRENT_DATE");
aRecord["RUN_DATE"] = ((aRecord["RUN_DATE"])[0])["CURRENT_DATE"];
aRecord["OPTIONS"] = "" + alignOptions;
aRecord["REFERENCE"] = masterReferenceSequence;
aRecord["GENETIC_CODE"] = "" + _Genetic_Code;
_InsertRecord (ANALYSIS_DB_ID, "SETTINGS", aRecord);
toDoSequences = sequenceNames;
UnalignedSequences = 0;
}
skipOutliers = 1;
doRC = 1;
/* build codon translation table */
codonToAAMap = {};
codeToAA = "FLIMVSPTAYXHQNKDECWRG";
nucChars = "ACGT";
for (p1=0; p1<64; p1=p1+1)
{
codon = nucChars[p1$16]+nucChars[p1%16$4]+nucChars[p1%4];
ccode = _Genetic_Code[p1];
codonToAAMap[codon] = codeToAA[ccode];
}
/* determine reading frames */
ProteinSequences = {};
AllTranslations = {};
ReadingFrames = {};
StopCodons = {};
StopPositions = {};
RC = {};
fprintf (stdout, "\n[PHASE 2] Detecting reading frames for each unprocessed sequence...\n");
frameCounter = {3,2};
stillHasStops = {};
aRecord = {};
for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
rawSeq = (_ExecuteSQL(ANALYSIS_DB_ID,"SELECT RAW FROM SEQUENCES WHERE SEQUENCE_ID = '" + toDoSequences[seqCounter] + "'"));
aSeq = (rawSeq[0])["RAW"];
seqLen = Abs(aSeq)-2;
minStops = 1e20;
tString = "";
rFrame = 0;
rrc = 0;
allTran = {3,2};
stopPosn = {6,2};
for (rc = 0; rc<=doRC; rc = rc+1)
{
if (rc)
{
aSeq = nucleotideReverseComplement (aSeq)
}
for (offset = 0; offset < 3; offset = offset+1)
{
translString = "";
translString * (seqLen/3+1);
for (seqPos = offset; seqPos < seqLen; seqPos = seqPos+3)
{
codon = aSeq[seqPos][seqPos+2];
prot = codonToAAMap[codon];
if (Abs(prot))
{
translString * prot;
}
else
{
translString * "?";
}
}
translString * 0;
translString = translString^{{"X$","?"}};
stopPos = translString||"X";
if (stopPos[0]>=0)
{
stopCount = Rows(stopPos)$2;
stopPosn[3*rc+offset][0] = stopPos[0];
stopPosn[3*rc+offset][1] = stopPos[stopCount*2-1];
}
else
{
stopCount = 0;
}
if (stopCount<minStops)
{
minStops = stopCount;
rFrame = offset;
rrc = rc;
tString = translString;
}
allTran[offset][rc] = translString;
}
}
ReadingFrames [seqCounter] = rFrame;
ProteinSequences [seqCounter] = tString;
frameCounter [rFrame][rrc] = frameCounter[rFrame][rrc]+1;
StopPositions [seqCounter] = stopPosn;
AllTranslations [seqCounter] = allTran;
RC [seqCounter] = rrc;
SetParameter (STATUS_BAR_STATUS_STRING, "Reading frame analysis ("+seqCounter+"/"+unalSequenceCount+" done)",0);
}
_closeCacheDB (ANALYSIS_DB_ID);
return 0;
s1 = ProteinSequences[0];
fprintf (stdout, "\nFound:\n\t", frameCounter[0], " sequences in reading frame 1\n\t",frameCounter[1], " sequences in reading frame 2\n\t",frameCounter[2], " sequences in reading frame 3\n\nThere were ", Abs(stillHasStops), " sequences with apparent frameshift/sequencing errors\n");
skipSeqs = {};
for (k=0; k<Abs(stillHasStops); k=k+1)
{
seqCounter = stillHasStops[k];
seqName = sequenceNames[seqCounter];
fprintf (stdout,"Sequence ", seqCounter+1, " (", seqName, ") seems to have");
stopPosn = StopPositions[seqCounter];
fStart = -1;
fEnd = -1;
fMin = 1e10;
frame1 = 0;
frame2 = 0;
checkFramePosition (stopPosn[0][1],stopPosn[1][0],0,1);
checkFramePosition (stopPosn[1][1],stopPosn[0][0],1,0);
checkFramePosition (stopPosn[0][1],stopPosn[2][0],0,2);
checkFramePosition (stopPosn[2][1],stopPosn[0][0],2,0);
checkFramePosition (stopPosn[2][1],stopPosn[1][0],2,1);
checkFramePosition (stopPosn[1][1],stopPosn[2][0],1,2);
if (fStart>=0)
{
allTran = AllTranslations[seqCounter];
useq = UnalignedSeqs[seqCounter];
fprintf (stdout, " a shift from frame ", frame2+1, " to frame ", frame1+1, " between a.a. positions ", fStart, " and ", fEnd, ".");
fStart2 = Max(fStart-1,0);
fEnd2 = Min(fEnd+1,Min(Abs(allTran[frame1]),Abs(allTran[frame2]))-1);
tempString = allTran[frame2];
fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame2+1, ":\n\t", tempString[fStart2][fEnd2]);
fprintf (stdout, "\n\t", useq[3*fStart2+frame2][3*fEnd2+frame2-1]);
tempString = allTran[frame1];
fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame1+1, ":\n\t", tempString[fStart2][fEnd2]);
fprintf (stdout, "\n\t", useq[3*fStart2+frame1][3*fEnd2+frame1-1]);
fprintf (stdout, "\n\t\tAttempting to resolve by alignment to reference. ");
f1s = allTran[frame1];
f2s = allTran[frame2];
f1l = Abs(f1s);
bestScore = -1e10;
bestSplice = -1;
for (k2=fStart; k2<fEnd; k2=k2+1)
{
s2 = f2s[0][k2]+f1s[k2+1][Abs(f1s)];
inStr = {{s1,s2}};
AlignSequences(aligned, inStr, alignOptions);
aligned = aligned[0];
aligned = aligned[0];
if (aligned > bestScore)
{
bestScore = aligned;
bestSplice = k2;
bestString = s2;
}
}
fprintf (stdout, "Best splice site appears to be at a.a. position ", bestSplice, "\n");
/* update best spliced string */
ProteinSequences[seqCounter] = bestString;
ReadingFrames[seqCounter] = 0;
UnalignedSeqs[seqCounter] = useq[frame2][frame2+3*bestSplice+2] + useq[frame1+3*bestSplice+3][Abs(useq)-1] + "---";
}
else
{
fprintf (stdout, " multiple frameshifts\n");
skipSeqs[seqCounter] = 1;
}
}
SeqAlignments = {};
startingPosition = {unalSequenceCount,2};
refLength = Abs(ProteinSequences[0]);
refInsertions = {refLength,1};
fprintf (stdout,"\nPerforming pairwise alignment with reference sequences\n");
alignmentScores = {};
for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
if (skipSeqs[seqCounter] == 0)
{
s2 = ProteinSequences[seqCounter];
inStr = {{s1,s2}};
AlignSequences(aligned, inStr, alignOptions);
aligned = aligned[0];
SeqAlignments[seqCounter] = aligned;
alignmentScores[Abs(alignmentScores)] = aligned[0]/Abs(aligned[1]);
aligned = aligned[1];
myStartingPosition = aligned$"[^-]";
myEndingPosition = Abs (aligned)-1;
while (aligned[myEndingPosition]=="-")
{
myEndingPosition = myEndingPosition - 1;
}
myStartingPosition = myStartingPosition[0];
startingPosition[seqCounter][0] = myStartingPosition;
startingPosition[seqCounter][1] = myEndingPosition;
aligned = aligned[myStartingPosition][myEndingPosition];
refInsert = aligned||"-+";
if (refInsert[0]>0)
{
insCount = Rows (refInsert)/2;
offset = 0;
for (insN = 0; insN < insCount; insN = insN+1)
{
insPos = refInsert[insN*2];
insLength = refInsert[insN*2+1]-insPos+1;
insPos = insPos-offset;
if (refInsertions[insPos]<insLength)
{
refInsertions[insPos]=insLength;
}
offset = offset + insLength;
}
}
}
}
alignmentScoresM = avlToMatrix ("alignmentScores");
ExecuteAFile ("Utility/DescriptiveStatistics.bf");
distInfo = GatherDescriptiveStats (alignmentScoresM);
/*lowerCuttoff = 0.25;*/
distInfo["Mean"] - 2*distInfo["Std.Dev"];
/* produce a fully gapped reference sequence */
fprintf (stdout,"\nMerging pairwise alignments into a MSA\n");
fullRefSeq = "";
fullRefSeq * refLength;
fullRefSeq * (s1[0]);
s1N = UnalignedSeqs[0];
fullRefSeqN = "";
fullRefSeqN * (3*refLength);
fullRefSeqN * (s1N[0][2]);
frameShift = ReadingFrames[0];
for (seqCounter=1;seqCounter<refLength;seqCounter=seqCounter+1)
{
gapCount = refInsertions[seqCounter];
for (k=0; k<gapCount;k=k+1)
{
fullRefSeq*("-");
fullRefSeqN*("---");
}
fullRefSeq * (s1[seqCounter]);
fullRefSeqN * (s1N[frameShift+seqCounter*3][frameShift+seqCounter*3+2]);
}
fullRefSeq * 0;
fullRefSeqN * 0;
return 0;
refLength = Abs(fullRefSeq);
SetDialogPrompt ("Save alignment to:");
seqName=sequenceNames[0];
fprintf (PROMPT_FOR_FILE,CLEAR_FILE,">",seqName,"\n",fullRefSeq);
fName = LAST_FILE_PATH;
fNameC = fName+".nuc";
fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",fullRefSeqN);
alCounter = 0;
for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
if (skipSeqs[seqCounter] == 0)
{
if (skipOutliers == 0 && alignmentScoresM[alCounter] < lowerCuttoff)
{
seqName=sequenceNames[seqCounter];
fprintf (stdout, "Sequence ", seqName ," was skipped because of a poor alignment score.\n");
skipSeqs[seqCounter] = 1;
alCounter = alCounter + 1;
continue;
}
alCounter = alCounter + 1;
seqName=sequenceNames[seqCounter];
aligned = SeqAlignments[seqCounter];
aligned1 = aligned[1];
aligned2 = aligned[2];
s2 = startingPosition[seqCounter][0];
e2 = startingPosition[seqCounter][1];
gappedSeq = "";
gappedSeq * Abs(aligned2);
k=0;
while (k<refLength)
{
while (fullRefSeq[k]!=aligned1[s2])
{
gappedSeq*("-");
k=k+1;
}
gappedSeq*(aligned2[s2]);
s2=s2+1;
k=k+1;
}
gappedSeq * 0;
gappedSeqN = "";
gappedSeqN * (3*Abs(aligned2));
frameShift = ReadingFrames[seqCounter];
s1N = UnalignedSeqs[seqCounter];
s2N = ProteinSequences[seqCounter];
s2 = startingPosition[seqCounter][0];
k = 0;
e2 = Abs(gappedSeq);
k = 0;
while (k<e2)
{
while ((s2N[s2]!=gappedSeq[k])&&(k<e2))
{
gappedSeqN * ("---");
k=k+1;
}
if (k<e2)
{
gappedSeqN * s1N[frameShift+s2*3][frameShift+s2*3+2];
s2 = s2+1;
k=k+1;
}
}
gappedSeqN * 0;
if (refSeq2 && seqCounter == unalSequenceCount-1)
{
fscanf (fName, "Raw", soFar);
fprintf (fName, CLEAR_FILE,">",seqName,"\n",gappedSeq,"\n",soFar);
fscanf (fNameC, "Raw", soFar);
fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",gappedSeqN,"\n",soFar);
}
else
{
fprintf (fName,"\n>",seqName,"\n",gappedSeq);
fprintf (fNameC,"\n>",seqName,"\n",gappedSeqN);
}
}
}
if (Abs(skipSeqs))
{
fName = fName+".bad";
for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1)
{
if (skipSeqs[seqCounter])
{
seqName=sequenceNames[seqCounter];
fprintf (fName,">",seqName,"\n",UnalignedSeqs[seqCounter],"\n");
}
}
}
function checkFramePosition (pos1, pos2, fr1, fr2)
{
fSpan = pos2-pos1;
if (fSpan>1) /* first followed by second*/
{
if (fSpan < fMin)
{
fMin = fSpan;
frame1 = fr1;
frame2 = fr2;
fStart = pos1+1;
fEnd = pos2;
}
}
return 0;
}
- -
- -
- -Jump to Line - - -
- -
- -
-
- - -
- -
- -
- - -
-
-
- -
-
-
-
-
- -
- - - - - - -
- - - Something went wrong with that request. Please try again. -
- - - - - - - -
- - - - diff --git a/samples/HyPhy/AAModelComparison.bf b/samples/HyPhy/AAModelComparison.bf deleted file mode 100644 index 72b30a53..00000000 --- a/samples/HyPhy/AAModelComparison.bf +++ /dev/null @@ -1,1827 +0,0 @@ - - - - - - - - - - - - hyphy/AAModelComparison.bf at master · veg/hyphy - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Skip to content -
- - - - - - - - - - - - -
-
-
- -
-
-
- -
    - -
  • -
    - -
    - - - - Watch - - - - -
    - -
    -
    -
    -
  • - -
  • - -
    - -
    - - -
    -
    - - -
    - -
  • - -
  • - - - Fork - - - - -
  • - -
- -

- - /hyphy - - - - - -

-
-
- -
-
- -
- - - -
- -
-

HTTPS clone URL

-
- - - - -
-
- - -
-

SSH clone URL

-
- - - - -
-
- - -
-

Subversion checkout URL

-
- - - - -
-
- - - -

You can clone with - HTTPS, SSH, or Subversion. - - - -

- - - - Clone in Desktop - - - - - - - Download ZIP - -
-
- -
- - - - - - -
- -
- - - branch: - master - - - -
- -
- - - - -
- - -
- - -
- Fetching contributors… -
- -
-

-

Cannot retrieve contributors at this time

-
-
-
-
-
- -
- Raw - Blame - History -
- - - - - -
- -
-
- -
- -
- 253 lines (199 sloc) - - 5.743 kb -
-
- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (Rows (modelMatrixList) == 0)
{
modelMatrixList =
{
{"Equal Input", "EIAA.mdl", "19"}
{"Dayhoff","Dayhoff.mdl","0"}
{"Dayhoff+F","Dayhoff_F.mdl","19"}
{"JTT", "Jones.mdl", "0"}
{"JTT+F", "Jones_F.mdl", "19"}
{"WAG", "WAG.mdl", "0"}
{"WAG+F", "WAG_F.mdl", "19"}
{"rtREV", "rtREV.mdl", "0"}
{"rtREV+F", "rtREV_F.mdl", "19"}
{"mtMAM", "mtMAM.mdl", "0"}
{"mtMAM+F", "mtMAM_F.mdl", "19"}
{"mtREV 24", "mtREV_24.mdl", "0"}
{"mtREV 24+F", "mtREV_24_F.mdl", "19"}
{"HIV within", "HIVwithin.mdl", "0"}
{"HIV within+F", "HIVwithin+F.mdl", "19"}
{"HIV between", "HIVbetween.mdl", "0"}
{"HIV between+F", "HIVbetween+F.mdl", "19"}
{"REV-1 step", "reducedREV.mdl", "19"}
{"REV", "mtREV.mdl", "19"}
};
}
/*___________________________________________________________________________________________________________*/
function runAModel (modelID, fileName, xtraP, midx)
{
ExecuteCommands ("#include \"TemplateModels/"+fileName+"\";");
Tree givenTree = treeString;
LikelihoodFunction lf = (filteredData,givenTree);
GetString (lf_info, lf, -1);
locals = lf_info["Local Independent"];
if (Columns (branchLengthStash))
{
USE_LAST_RESULTS = 1;
for (_iv = 0; _iv < Columns (locals); _iv = _iv+1)
{
ExecuteCommands (locals[_iv] + "=1;\n");
}
currentBL = BranchLength (givenTree,0);
currentBN = BranchName (givenTree,-1);
for (_iv = 0; _iv < Columns (currentBN); _iv = _iv+1)
{
ExecuteCommands ("givenTree."+currentBN[_iv]+".t="+branchLengthStash[_iv]/currentBL+";");
}
}
else
{
for (_iv = 0; _iv < Columns (locals); _iv = _iv+1)
{
ExecuteCommands (locals[_iv] + "=0.1;\n");
}
USE_LAST_RESULTS = 1;
}
Optimize (res,lf);
fprintf (stdout, "| ", modelID);
for (k=0; k<maxModelWidth-Abs(modelID)-1; k=k+1)
{
fprintf (stdout, " ");
}
params = res[1][1]+xtraP;
AIC = 2(-res[1][0]+params);
if (filteredData.sites-params>1)
{
cAIC = 2(-res[1][0]+params*(filteredData.sites/(filteredData.sites-params-1)));
}
else
{
cAIC = 0;
}
branchLengths = BranchLength (givenTree,-1);
TL = 0;
for (k=Rows(branchLengths)*Columns(branchLengths)-1; k>=0; k=k-1)
{
TL = TL + branchLengths[k];
}
fprintf (stdout, "| ", Format (res[1][0],14,3), " | ", Format (params,5,0), " | ",
Format (AIC, 9,3), " | ",);
if (cAIC > 0)
{
fprintf (stdout, Format (cAIC,11,3), " | ");
}
else
{
fprintf (stdout, " N/A | ");
}
fprintf (stdout, Format (TL,11,3), " |\n", sepString);
resultMatrix[midx][0] = res[1][0];
resultMatrix[midx][1] = params;
resultMatrix[midx][2] = AIC;
resultMatrix[midx][3] = cAIC;
resultMatrix[midx][4] = TL;
if (AIC < bestAIC)
{
bestAIC = AIC;
bestAICidx = midx;
branchLengthStash = BranchLength (givenTree,-1);
}
if (cAIC > 0)
{
if (bestCAIC > cAIC)
{
bestCAIC = cAIC;
bestCAICidx = midx;
}
}
return 0;
}
/*___________________________________________________________________________________________________________*/
maxModelWidth = 7;
skipCodeSelectionStep = 0;
ChoiceList (doREV, "Include REV?", 1, SKIP_NONE, "Yes", "Include REV and reduced REV models. CAUTION: these models take a long time to fit.",
"No", "Only use empirical models");
if (doREV < 0)
{
return 0;
}
if (doREV == 0)
{
#include "TemplateModels/chooseGeneticCode.def";
skipCodeSelectionStep = 1;
}
modelCount = Rows (modelMatrixList) - 2*doREV;
for (k=0; k<modelCount; k=k+1)
{
maxModelWidth = Max(maxModelWidth,Abs (modelMatrixList[k][0])+2);
}
sepString = "";
capString = "";
sepString * 256;
sepString * "+";
capString * 256;
capString * "| Model";
for (k=0; k<maxModelWidth; k=k+1)
{
sepString * "-";
}
for (k=0; k<maxModelWidth-6; k=k+1)
{
capString * " ";
}
capString * "| Log Likelihood | #prms | AIC Score | c-AIC Score | Tree Length |\n";
sepString * "+----------------+-------+-----------+-------------+-------------+\n";
sepString * 0;
capString * 0;
branchLengthStash = 0;
SKIP_MODEL_PARAMETER_LIST = 0;
#include "TemplateModels/modelParameters2.mdl";
if (modelType == 1)
{
#include "TemplateModels/defineGamma.mdl";
}
if (modelType == 2)
{
#include "TemplateModels/defineHM.mdl";
}
SKIP_MODEL_PARAMETER_LIST = 1;
SetDialogPrompt ("Please load an amino-acid data file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,1);
fprintf (stdout,"\nRunning aminoacid model comparisons on ", LAST_FILE_PATH, "\n\nThe alignment has ",ds.species, " sequences and ", ds.sites, " sites\n");
_DO_TREE_REBALANCE_ = 1;
#include "queryTree.bf";
resultMatrix = {modelCount, 5};
fprintf (stdout, "\n",sepString,capString,sepString);
bestAIC = 1e100;
bestCAIC = 1e100;
bestAICidx = 0;
bestCAICidx = -1;
for (mid=0; mid<modelCount; mid=mid+1)
{
runAModel (modelMatrixList[mid][0], modelMatrixList[mid][1], 0+modelMatrixList[mid][2], mid);
}
fprintf (stdout, "\n\nBest AIC model:\n\t", modelMatrixList[bestAICidx][0], " with the score of ", bestAIC);
if (bestCAICidx>=0)
{
fprintf (stdout, "\n\nBest c-AIC model:\n\t", modelMatrixList[bestCAICidx][0], " with the score of ", bestCAIC);
}
labelMatrix = {{"Log-likelihood","Parameters","AIC","c-AIC","Total tree length",""}};
aaString = "Model";
for (fC = 0; fC < modelCount; fC = fC+1)
{
aaString = aaString + ";" + modelMatrixList[fC][0];
}
USE_LAST_RESULTS = 0;
labelMatrix[5] = aaString;
skipCodeSelectionStep = 0;
OpenWindow (CHARTWINDOW,{{"Model Fits"}
{"labelMatrix"},
{"resultMatrix"},
{"Bar Chart"},
{"Index"},
{"c-AIC"},
{"Model Index"},
{""},
{"AIC"}
},
"SCREEN_WIDTH-60;SCREEN_HEIGHT-60;30;30");
- -
- -
- -Jump to Line - - -
- -
- -
-
- - -
- -
- -
- - -
-
-
- -
-
-
-
-
- -
- - - - - - -
- - - Something went wrong with that request. Please try again. -
- - - - - - - -
- - - - diff --git a/samples/HyPhy/AnalyzeCodonData.bf b/samples/HyPhy/AnalyzeCodonData.bf deleted file mode 100644 index 27280bbc..00000000 --- a/samples/HyPhy/AnalyzeCodonData.bf +++ /dev/null @@ -1,1031 +0,0 @@ - - - - - - - - - - - - hyphy/AnalyzeCodonData.bf at master · veg/hyphy - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Skip to content -
- - - - - - - - - - - - -
-
-
- -
-
-
- -
    - -
  • -
    - -
    - - - - Watch - - - - -
    - -
    -
    -
    -
  • - -
  • - -
    - -
    - - -
    -
    - - -
    - -
  • - -
  • - - - Fork - - - - -
  • - -
- -

- - /hyphy - - - - - -

-
-
- -
-
- -
- - - -
- -
-

HTTPS clone URL

-
- - - - -
-
- - -
-

SSH clone URL

-
- - - - -
-
- - -
-

Subversion checkout URL

-
- - - - -
-
- - - -

You can clone with - HTTPS, SSH, or Subversion. - - - -

- - - - Clone in Desktop - - - - - - - Download ZIP - -
-
- -
- - - - - - -
- -
- - - branch: - master - - - -
- -
- - - - -
- - -
- - -
- Fetching contributors… -
- -
-

-

Cannot retrieve contributors at this time

-
-
-
-
-
- -
- Raw - Blame - History -
- - - - - -
- -
-
- -
- -
- 54 lines (36 sloc) - - 1.285 kb -
-
- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NICETY_LEVEL = 3;
#include "TemplateModels/chooseGeneticCode.def";
#include "simpleBootstrap.bf";
SetDialogPrompt ("Please specify a codon data file:");
COUNT_GAPS_IN_FREQUENCIES = 0;
VERBOSITY_LEVEL = 1;
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
SelectTemplateModel(filteredData);
_DO_TREE_REBALANCE_ = 1;
#include "queryTree.bf";
if (modelType)
{
ChoiceList (branchLengths, "Branch Lengths", 1, SKIP_NONE,
"Estimate", "Estimate branch lengths by ML",
"Proportional to input tree", "Branch lengths are proportional to those in input tree");
if (branchLengths < 0)
{
return;
}
if (branchLengths == 1)
{
global treeScaler = 1;
ReplicateConstraint ("this1.?.?:=treeScaler*this2.?.?__", givenTree, givenTree);
}
}
LikelihoodFunction lf = (filteredData,givenTree);
Optimize (res,lf);
fprintf (stdout, "\n______________RESULTS______________\n",lf);
/* compute syn and non-syn stencils for current genetic code */
#include "categoryEcho.bf";
GetString (sendMeBack,lf,-1);
sendMeBack["LogL"] = res[1][0];
sendMeBack["NP"] = res[1][1];
return sendMeBack;
- -
- -
- -Jump to Line - - -
- -
- -
-
- - -
- -
- -
- - -
-
-
- -
-
-
-
-
- -
- - - - - - -
- - - Something went wrong with that request. Please try again. -
- - - - - - - -
- - - - diff --git a/samples/HyPhy/AnalyzeDiNucData.bf b/samples/HyPhy/AnalyzeDiNucData.bf deleted file mode 100644 index 833c79d7..00000000 --- a/samples/HyPhy/AnalyzeDiNucData.bf +++ /dev/null @@ -1,931 +0,0 @@ - - - - - - - - - - - - hyphy/AnalyzeDiNucData.bf at master · veg/hyphy - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Skip to content -
- - - - - - - - - - - - -
-
-
- -
-
-
- -
    - -
  • -
    - -
    - - - - Watch - - - - -
    - -
    -
    -
    -
  • - -
  • - -
    - -
    - - -
    -
    - - -
    - -
  • - -
  • - - - Fork - - - - -
  • - -
- -

- - /hyphy - - - - - -

-
-
- -
-
- -
- - - -
- -
-

HTTPS clone URL

-
- - - - -
-
- - -
-

SSH clone URL

-
- - - - -
-
- - -
-

Subversion checkout URL

-
- - - - -
-
- - - -

You can clone with - HTTPS, SSH, or Subversion. - - - -

- - - - Clone in Desktop - - - - - - - Download ZIP - -
-
- -
- - - - - - -
- -
- - - branch: - master - - - -
- -
- - - - -
- - -
- - -
- Fetching contributors… -
- -
-

-

Cannot retrieve contributors at this time

-
-
-
-
-
- -
- Raw - Blame - History -
- - - - - -
- -
-
- -
- -
- 29 lines (15 sloc) - - 0.661 kb -
-
- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
SetDialogPrompt ("Please specify a di-nucleotide (e.g. stem RNA) file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,2);
fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
SelectTemplateModel(filteredData);
_DO_TREE_REBALANCE_ = 1;
#include "queryTree.bf";
LikelihoodFunction lf = (filteredData,givenTree);
timer = Time(0);
USE_ADAPTIVE_VARIABLE_STEP = 0;
Optimize (res,lf);
timer = Time(0)-timer;
fprintf (stdout, "\n______________RESULTS______________\nTime taken = ", timer, " seconds\nAIC Score = ",
2(res[1][1]-res[1][0]),"\n",lf);
#include "categoryEcho.bf";
- -
- -
- -Jump to Line - - -
- -
- -
-
- - -
- -
- -
- - -
-
-
- -
-
-
-
-
- -
- - - - - - -
- - - Something went wrong with that request. Please try again. -
- - - - - - - -
- - - - diff --git a/samples/HyPhy/AnalyzeNucProtData.bf b/samples/HyPhy/AnalyzeNucProtData.bf deleted file mode 100644 index 710cb4b6..00000000 --- a/samples/HyPhy/AnalyzeNucProtData.bf +++ /dev/null @@ -1,931 +0,0 @@ - - - - - - - - - - - - hyphy/AnalyzeNucProtData.bf at master · veg/hyphy - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Skip to content -
- - - - - - - - - - - - -
-
-
- -
-
-
- -
    - -
  • -
    - -
    - - - - Watch - - - - -
    - -
    -
    -
    -
  • - -
  • - -
    - -
    - - -
    -
    - - -
    - -
  • - -
  • - - - Fork - - - - -
  • - -
- -

- - /hyphy - - - - - -

-
-
- -
-
- -
- - - -
- -
-

HTTPS clone URL

-
- - - - -
-
- - -
-

SSH clone URL

-
- - - - -
-
- - -
-

Subversion checkout URL

-
- - - - -
-
- - - -

You can clone with - HTTPS, SSH, or Subversion. - - - -

- - - - Clone in Desktop - - - - - - - Download ZIP - -
-
- -
- - - - - - -
- -
- - - branch: - master - - - -
- -
- - - - -
- - -
- - -
- Fetching contributors… -
- -
-

-

Cannot retrieve contributors at this time

-
-
-
-
-
- -
- Raw - Blame - History -
- - - - - -
- -
-
- -
- -
- 29 lines (18 sloc) - - 0.868 kb -
-
- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
_Genetic_Code = 0;
ExecuteAFile("simpleBootstrap.bf");
SetDialogPrompt ("Please specify a nucleotide or amino-acid data file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,1);
fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
SelectTemplateModel(filteredData);
_DO_TREE_REBALANCE_ = 1;
ExecuteAFile ("queryTree.bf");
LikelihoodFunction lf = (filteredData,givenTree);
timer = Time(0);
Optimize (res,lf);
timer = Time(0)-timer;
fprintf (stdout, "\n______________RESULTS______________\nTime taken = ", timer, " seconds\nAIC Score = ",
2(res[1][1]-res[1][0]),"\n",lf);
ExecuteAFile ("categoryEcho.bf");
GetString (lfInfo, lf, -1);
return {"Log(L)": res[1][0], "DF": res[1][1], "Tree": Format (givenTree,1,1)}
- -
- -
- -Jump to Line - - -
- -
- -
-
- - -
- -
- -
- - -
-
-
- -
-
-
-
-
- -
- - - - - - -
- - - Something went wrong with that request. Please try again. -
- - - - - - - -
- - - - diff --git a/samples/HyPhy/BS2007.bf b/samples/HyPhy/BS2007.bf deleted file mode 100644 index d49327e5..00000000 --- a/samples/HyPhy/BS2007.bf +++ /dev/null @@ -1,1999 +0,0 @@ - - - - - - - - - - - - hyphy/BS2007.bf at master · veg/hyphy - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Skip to content -
- - - - - - - - - - - - -
-
-
- -
-
-
- -
    - -
  • -
    - -
    - - - - Watch - - - - -
    - -
    -
    -
    -
  • - -
  • - -
    - -
    - - -
    -
    - - -
    - -
  • - -
  • - - - Fork - - - - -
  • - -
- -

- - /hyphy - - - - - -

-
-
- -
-
- -
- - - -
- -
-

HTTPS clone URL

-
- - - - -
-
- - -
-

SSH clone URL

-
- - - - -
-
- - -
-

Subversion checkout URL

-
- - - - -
-
- - - -

You can clone with - HTTPS, SSH, or Subversion. - - - -

- - - - Clone in Desktop - - - - - - - Download ZIP - -
-
- -
- - - - - - -
- -
- - - branch: - master - - - -
- -
- - - - -
- - -
- - -
- Fetching contributors… -
- -
-

-

Cannot retrieve contributors at this time

-
-
-
-
-
- -
- Raw - Blame - History -
- - - - - -
- -
-
- -
- -
- 296 lines (239 sloc) - - 9.281 kb -
-
- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
/* 1. include a file to define the genetic code
Note the use of base directory and path forming variables to make this analysis
independent of directory placement
*/
incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def";
ExecuteCommands ("#include \""+incFileName+"\";");
/* 2. load a codon partition */
SetDialogPrompt ("Please locate a coding alignment:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
coding_path = LAST_FILE_PATH;
fprintf (stdout, "\nLoaded a ", filteredData.species, " sequence alignment with ", filteredData.sites, " codons from\n",coding_path,"\n");
/* 3. include a file to prompt for a tree */
incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"queryTree.bf";
ExecuteCommands ("#include \""+incFileName+"\";");
/* 4. Compute nucleotide counts by position for the F3x4 estimator */
COUNT_GAPS_IN_FREQUENCIES = 0;
HarvestFrequencies (baseFreqs,filteredData,3,1,1);
COUNT_GAPS_IN_FREQUENCIES = 1;
fprintf (stdout, "\nBase composition:\n\tA: ", Format (baseFreqs[0][0],10,5),",",Format (baseFreqs[0][1],10,5),",",Format (baseFreqs[0][2],10,5),
"\n\tC: ", Format (baseFreqs[1][0],10,5),",",Format (baseFreqs[1][1],10,5),",",Format (baseFreqs[1][2],10,5),
"\n\tG: ", Format (baseFreqs[2][0],10,5),",",Format (baseFreqs[2][1],10,5),",",Format (baseFreqs[2][2],10,5),
"\n\tT: ", Format (baseFreqs[3][0],10,5),",",Format (baseFreqs[3][1],10,5),",",Format (baseFreqs[3][2],10,5), "\n");
/* 6. define the GY94 rate matrix; for now each branch will have it's own
dS and dN, we will constrain them later */
global kappa_inv = 1;
ModelMatrixDimension = 64;
for (h = 0; h<64; h=h+1)
{
if (_Genetic_Code[h]==10) /* stop codon */
{
ModelMatrixDimension = ModelMatrixDimension-1;
}
}
GY_Matrix = {ModelMatrixDimension,ModelMatrixDimension};
hshift = 0;
for (h=0; h<64; h=h+1)
{
if (_Genetic_Code[h]==10)
{
hshift = hshift+1;
}
else
{
vshift = hshift;
for (v = h+1; v<64; v=v+1)
{
diff = v-h;
if (_Genetic_Code[v]==10)
{
vshift = vshift+1;
}
else
{
if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) /* one step */
{
if (h$4==v$4)
{
transition = v%4;
transition2= h%4;
}
else
{
if(diff%16==0)
{
transition = v$16;
transition2= h$16;
}
else
{
transition = v%16$4;
transition2= h%16$4;
}
}
if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) /* synonymous */
{
if (Abs(transition-transition2)%2) /* transversion */
{
GY_Matrix[h-hshift][v-vshift] := kappa_inv*synRate;
GY_Matrix[v-vshift][h-hshift] := kappa_inv*synRate;
}
else
{
GY_Matrix[h-hshift][v-vshift] := synRate;
GY_Matrix[v-vshift][h-hshift] := synRate;
}
}
else
{
if (Abs(transition-transition2)%2) /* transversion */
{
GY_Matrix[h-hshift][v-vshift] := kappa_inv*nonSynRate;
GY_Matrix[v-vshift][h-hshift] := kappa_inv*nonSynRate;
}
else
{
GY_Matrix[h-hshift][v-vshift] := nonSynRate;
GY_Matrix[v-vshift][h-hshift] := nonSynRate;
}
}
}
}
}
}
}
/*8. build codon frequencies (use the F3x4 estimator) */
PIStop = 1.0;
codonFreqs = {ModelMatrixDimension,1};
hshift = 0;
for (h=0; h<64; h=h+1)
{
first = h$16;
second = h%16$4;
third = h%4;
if (_Genetic_Code[h]==10)
{
hshift = hshift+1;
PIStop = PIStop-baseFreqs[first][0]*baseFreqs[second][1]*baseFreqs[third][2];
continue;
}
codonFreqs[h-hshift]=baseFreqs[first][0]*baseFreqs[second][1]*baseFreqs[third][2];
}
codonFreqs = codonFreqs*(1.0/PIStop);
/*9. define the codon model */
Model GY_Model = (GY_Matrix,codonFreqs,1);
/*10. Define the tree and pick the foreground branch, displaying a tree window to facilitate selection;
the latter step is executed for 2 of 3 model choices */
Tree givenTree1 = treeString;
Tree givenTree2 = treeString;
Tree givenTree3 = treeString;
Tree givenTree4 = treeString;
USE_LAST_RESULTS = 0;
OPTIMIZATION_METHOD = 4;
/* Approximate kappa and branch lengths using an HKY85 fit */
HKY85_Matrix = {{*,t*kappa_inv,t,t*kappa_inv}
{t*kappa_inv,*,kappa_inv*t,t}
{t,t*kappa_inv,*,kappa_inv*t}
{t*kappa_inv,t,kappa_inv*t,*}};
HarvestFrequencies (nucFreqs,ds,1,1,1);
Model HKY85_Model = (HKY85_Matrix,nucFreqs,1);
Tree nucTree = treeString;
DataSetFilter nucData = CreateFilter (ds,1);
fprintf (stdout, "Obtaining nucleotide branch lengths and kappa to be used as starting values...\n");
LikelihoodFunction nuc_lf = (nucData,nucTree);
Optimize (nuc_mle,nuc_lf);
fprintf (stdout, "\n", Format (nucTree,1,1), "\nkappa=", Format (1/kappa_inv,8,3), "\n");
USE_LAST_RESULTS = 1;
mxTreeSpec = {5,1};
mxTreeSpec [0] = "nucTree";
mxTreeSpec [1] = "8240";
mxTreeSpec [2] = "10,40,-10,-175,1";
mxTreeSpec [3] = "";
mxTreeSpec [4] = "";
OpenWindow (TREEWINDOW, mxTreeSpec,"(SCREEN_WIDTH-50)/2;(SCREEN_HEIGHT-50)/2;30+(SCREEN_WIDTH-30)/2;45");
leafNodes = TipCount (givenTree);
internalNodes = BranchCount(givenTree);
choiceMatrix = {internalNodes+leafNodes,2};
for (bc=0; bc<internalNodes; bc=bc+1)
{
choiceMatrix[bc][0] = BranchName(givenTree,bc);
choiceMatrix[bc][1] = "Internal Branch Rooting " + givenTree[bc];
}
for (bc=0; bc<leafNodes; bc=bc+1)
{
choiceMatrix[bc+internalNodes][0] = TipName(givenTree,bc);
choiceMatrix[bc+internalNodes][1] = "Leaf node " + choiceMatrix[bc+internalNodes][0];
}
ChoiceList (stOption,"Choose the foreground branch",0,NO_SKIP,choiceMatrix);
if (stOption[0] < 0)
{
return -1;
}
fprintf (stdout, "\n\n", Columns (stOption)," foreground branch(es) set to: ", "\n");
for (bc = 0; bc < Columns (stOption); bc = bc + 1)
{
fprintf (stdout, choiceMatrix[stOption[bc]][0], "\n");
}
OpenWindow (CLOSEWINDOW, "Tree nucTree");
/* 15. Constrain dS and dN in the tree to based upon different models */
global omega_1 = 0.25;
global omega_2 = 0.5;
global omega_3 = 1.5;
omega_1 :< 1;
omega_2 :< 1;
omega_3 :> 1;
ClearConstraints (givenTree1);
ClearConstraints (givenTree2);
ClearConstraints (givenTree3);
ClearConstraints (givenTree4);
/* 16. define and optimize the likelihood function */
bNames = BranchName (givenTree,-1);
nucBL = BranchLength (nucTree,-1);
for (bc=0; bc<Columns(bNames)-1; bc=bc+1)
{
ExecuteCommands ("givenTree1."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree1."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree2."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree2."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree3."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree3."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree4."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree4."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
}
codBL = BranchLength (givenTree1,-1);
for (bc=0; bc<Columns(bNames)-1; bc=bc+1)
{
if (nucBL[bc]>0)
{
scalingFactor = nucBL[bc]/codBL[bc];
ExecuteCommands ("givenTree1."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree1."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree2."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree2."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree3."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree3."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";");
ExecuteCommands ("givenTree4."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;");
ExecuteCommands ("givenTree4."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;");
}
}
for (bc = 0; bc < Columns (stOption); bc = bc + 1)
{
bName = choiceMatrix[stOption[bc]][0];
ExecuteCommands ("givenTree1."+bName+".nonSynRate:=omega_1*givenTree1."+bName+".synRate");
ExecuteCommands ("givenTree2."+bName+".nonSynRate:=omega_2*givenTree2."+bName+".synRate");
ExecuteCommands ("givenTree3."+bName+".nonSynRate:=givenTree3."+bName+".synRate");
ExecuteCommands ("givenTree4."+bName+".nonSynRate:=omega_3*givenTree4."+bName+".synRate");
}
OPTIMIZATION_PRECISION = 0.001;
global P_1 = 1/4;
global P_2 = 1/3;
global P_3 = 1/2;
P_1 :< 1;
P_2 :< 1;
P_3 :< 1;
fprintf (stdout, "\nFitting the model with selection...\n");
LikelihoodFunction lf = (filteredData, givenTree1, filteredData, givenTree2,filteredData, givenTree3,filteredData, givenTree4,
"Log(P_1*SITE_LIKELIHOOD[0]+(1-P_1)*P_2*SITE_LIKELIHOOD[1]+(1-P_1)(1-P_2)*P_3*SITE_LIKELIHOOD[2]+(1-P_1)(1-P_2)(1-P_3)*SITE_LIKELIHOOD[3])");
Optimize (mles,lf);
fprintf (stdout, lf);
- -
- -
- -Jump to Line - - -
- -
- -
-
- - -
- -
- -
- - -
-
-
- -
-
-
-
-
- -
- - - - - - -
- - - Something went wrong with that request. Please try again. -
- - - - - - - -
- - - - From 1fdcafb1ae29f97579a5e68ad9057658da83f478 Mon Sep 17 00:00:00 2001 From: Mike Doud Date: Fri, 1 May 2015 11:28:11 -0700 Subject: [PATCH 6/8] Update languages.yml --- lib/linguist/languages.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 827559c8..7845ed3f 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -1354,6 +1354,7 @@ HyPhy: ace_mode: text extensions: - .bf + tm_scope: none IDL: type: programming From ca12a99970c32858bd4f7c9a5167eb77ce08bfb3 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 1 May 2015 21:03:02 +0200 Subject: [PATCH 7/8] Handle SSH links to submodules --- script/travis/before_install | 3 +++ 1 file changed, 3 insertions(+) diff --git a/script/travis/before_install b/script/travis/before_install index 93ef383c..516ec956 100755 --- a/script/travis/before_install +++ b/script/travis/before_install @@ -17,6 +17,9 @@ else bundle config build.charlock_holmes --with-icu-dir=$(pwd)/vendor/debs fi +# Replace SSH links to submodules by HTTPS links. +sed -i 's/git@github.com:/https:\/\/github.com\//' .gitmodules + git submodule init git submodule sync --quiet script/fast-submodule-update From 585d74ecc995d2fda7107d72f212c6c34863647f Mon Sep 17 00:00:00 2001 From: Mike Doud Date: Sat, 2 May 2015 13:24:43 -0700 Subject: [PATCH 8/8] add better HyPhy samples --- samples/HyPhy/AAModelComparison.bf | 252 + samples/HyPhy/CodonModelCompare.bf | 1046 ++ samples/HyPhy/MFPositiveSelection.bf | 1113 ++ samples/HyPhy/MolecularClock.bf | 147 + samples/HyPhy/dNdSDistributionComparison.bf | 1025 ++ samples/HyPhy/hyphy_cmds.bf | 11003 ++++++++++++++++++ 6 files changed, 14586 insertions(+) create mode 100644 samples/HyPhy/AAModelComparison.bf create mode 100644 samples/HyPhy/CodonModelCompare.bf create mode 100644 samples/HyPhy/MFPositiveSelection.bf create mode 100644 samples/HyPhy/MolecularClock.bf create mode 100644 samples/HyPhy/dNdSDistributionComparison.bf create mode 100644 samples/HyPhy/hyphy_cmds.bf diff --git a/samples/HyPhy/AAModelComparison.bf b/samples/HyPhy/AAModelComparison.bf new file mode 100644 index 00000000..602a9e56 --- /dev/null +++ b/samples/HyPhy/AAModelComparison.bf @@ -0,0 +1,252 @@ +if (Rows (modelMatrixList) == 0) +{ + modelMatrixList = + { + {"Equal Input", "EIAA.mdl", "19"} + {"Dayhoff","Dayhoff.mdl","0"} + {"Dayhoff+F","Dayhoff_F.mdl","19"} + {"JTT", "Jones.mdl", "0"} + {"JTT+F", "Jones_F.mdl", "19"} + {"WAG", "WAG.mdl", "0"} + {"WAG+F", "WAG_F.mdl", "19"} + {"rtREV", "rtREV.mdl", "0"} + {"rtREV+F", "rtREV_F.mdl", "19"} + {"mtMAM", "mtMAM.mdl", "0"} + {"mtMAM+F", "mtMAM_F.mdl", "19"} + {"mtREV 24", "mtREV_24.mdl", "0"} + {"mtREV 24+F", "mtREV_24_F.mdl", "19"} + {"HIV within", "HIVwithin.mdl", "0"} + {"HIV within+F", "HIVwithin+F.mdl", "19"} + {"HIV between", "HIVbetween.mdl", "0"} + {"HIV between+F", "HIVbetween+F.mdl", "19"} + {"REV-1 step", "reducedREV.mdl", "19"} + {"REV", "mtREV.mdl", "19"} + }; +} + +/*___________________________________________________________________________________________________________*/ + +function runAModel (modelID, fileName, xtraP, midx) +{ + ExecuteCommands ("#include \"TemplateModels/"+fileName+"\";"); + Tree givenTree = treeString; + LikelihoodFunction lf = (filteredData,givenTree); + + GetString (lf_info, lf, -1); + locals = lf_info["Local Independent"]; + + if (Columns (branchLengthStash)) + { + USE_LAST_RESULTS = 1; + for (_iv = 0; _iv < Columns (locals); _iv = _iv+1) + { + ExecuteCommands (locals[_iv] + "=1;\n"); + } + currentBL = BranchLength (givenTree,0); + currentBN = BranchName (givenTree,-1); + for (_iv = 0; _iv < Columns (currentBN); _iv = _iv+1) + { + ExecuteCommands ("givenTree."+currentBN[_iv]+".t="+branchLengthStash[_iv]/currentBL+";"); + + } + } + else + { + for (_iv = 0; _iv < Columns (locals); _iv = _iv+1) + { + ExecuteCommands (locals[_iv] + "=0.1;\n"); + } + USE_LAST_RESULTS = 1; + } + + Optimize (res,lf); + + fprintf (stdout, "| ", modelID); + for (k=0; k1) + { + cAIC = 2(-res[1][0]+params*(filteredData.sites/(filteredData.sites-params-1))); + } + else + { + cAIC = 0; + } + + branchLengths = BranchLength (givenTree,-1); + TL = 0; + for (k=Rows(branchLengths)*Columns(branchLengths)-1; k>=0; k=k-1) + { + TL = TL + branchLengths[k]; + } + + fprintf (stdout, "| ", Format (res[1][0],14,3), " | ", Format (params,5,0), " | ", + Format (AIC, 9,3), " | ",); + + if (cAIC > 0) + { + fprintf (stdout, Format (cAIC,11,3), " | "); + } + else + { + fprintf (stdout, " N/A | "); + } + + fprintf (stdout, Format (TL,11,3), " |\n", sepString); + + resultMatrix[midx][0] = res[1][0]; + resultMatrix[midx][1] = params; + resultMatrix[midx][2] = AIC; + resultMatrix[midx][3] = cAIC; + resultMatrix[midx][4] = TL; + + if (AIC < bestAIC) + { + bestAIC = AIC; + bestAICidx = midx; + branchLengthStash = BranchLength (givenTree,-1); + } + + if (cAIC > 0) + { + if (bestCAIC > cAIC) + { + bestCAIC = cAIC; + bestCAICidx = midx; + + } + } + + return 0; +} + + +/*___________________________________________________________________________________________________________*/ + + + +maxModelWidth = 7; +skipCodeSelectionStep = 0; + +ChoiceList (doREV, "Include REV?", 1, SKIP_NONE, "Yes", "Include REV and reduced REV models. CAUTION: these models take a long time to fit.", + "No", "Only use empirical models"); + +if (doREV < 0) +{ + return 0; +} + +if (doREV == 0) +{ + #include "TemplateModels/chooseGeneticCode.def"; + skipCodeSelectionStep = 1; +} + +modelCount = Rows (modelMatrixList) - 2*doREV; + +for (k=0; k=0) +{ + fprintf (stdout, "\n\nBest c-AIC model:\n\t", modelMatrixList[bestCAICidx][0], " with the score of ", bestCAIC); +} + +labelMatrix = {{"Log-likelihood","Parameters","AIC","c-AIC","Total tree length",""}}; + +aaString = "Model"; + +for (fC = 0; fC < modelCount; fC = fC+1) +{ + aaString = aaString + ";" + modelMatrixList[fC][0]; +} + +USE_LAST_RESULTS = 0; + +labelMatrix[5] = aaString; +skipCodeSelectionStep = 0; +OpenWindow (CHARTWINDOW,{{"Model Fits"} + {"labelMatrix"}, + {"resultMatrix"}, + {"Bar Chart"}, + {"Index"}, + {"c-AIC"}, + {"Model Index"}, + {""}, + {"AIC"} + }, + "SCREEN_WIDTH-60;SCREEN_HEIGHT-60;30;30"); diff --git a/samples/HyPhy/CodonModelCompare.bf b/samples/HyPhy/CodonModelCompare.bf new file mode 100644 index 00000000..e24b730d --- /dev/null +++ b/samples/HyPhy/CodonModelCompare.bf @@ -0,0 +1,1046 @@ +/* + This file takes a nucleotide data set and a tree (either from the data file or from a separate file) and computes maximum likelihood estimates for every possible 4x4 reversible model on that data and tree. + + We use the string (v1,v2,v3,v4,v5,v6), where and v1..6 = 0..5 + to encode a 4x4 symmetric transition matrix with entries + [* v1 v2 v3] + [- * v4 v5] + [- - * v6] + [- - - * ] + + For instance: (010010) encodes HKY85. + + For each model the following information is reported: + - Model string. (e.g. (012345) for the GRM) + - Number of model parameters + - Max ln-likelihood for the model + - Likelihood ratio statistic (as a sub-model of the GRM) + - AIC + - P-Value for the Likelihood Ratio Test. + + + Sergei L. Kosakovsky Pond, Summer 2002. + +*/ + +function ReceiveJobs (sendOrNot) +{ + if (MPI_NODE_COUNT>1) + { + MPIReceive (-1, fromNode, result_String); + jobModelNum = MPINodeState[fromNode-1][1]; + vv1 = MPINodeState[fromNode-1][2]; + vv2 = MPINodeState[fromNode-1][3]; + vv3 = MPINodeState[fromNode-1][4]; + vv4 = MPINodeState[fromNode-1][5]; + vv5 = MPINodeState[fromNode-1][6]; + vv6 = MPINodeState[fromNode-1][7]; + if (sendOrNot) + { + MPISend (fromNode,lf); + MPINodeState[fromNode-1][1] = modelNum; + MPINodeState[fromNode-1][2] = v1; + MPINodeState[fromNode-1][3] = v2; + MPINodeState[fromNode-1][4] = v3; + MPINodeState[fromNode-1][5] = v4; + MPINodeState[fromNode-1][6] = v5; + MPINodeState[fromNode-1][7] = v6; + } + else + { + MPINodeState[fromNode-1][0] = 0; + MPINodeState[fromNode-1][1] = -1; + } + + ExecuteCommands (result_String); + } + else + { + jobModelNum = modelNum; + } + + if (jobModelNum == 0) + { + stdl = lf_MLES[1][0]; + fullnp = lf_MLES[1][1]+addOn; + fprintf(stdout,"\n(012345) Full Model ln-lik = ",stdl,". Parameter Count=",Format(fullnp,0,0)," AIC = ", 2*(fullnp-stdl),"\n\n"); + + + resultCache [0][0] = 1; + resultCache [0][1] = 2; + resultCache [0][2] = 3; + resultCache [0][3] = 4; + resultCache [0][4] = 5; + resultCache [0][5] = lf_MLES[1][0]; + resultCache [0][6] = lf_MLES[1][1]+addOn; + resultCache [0][7] = 0; + resultCache [0][8] = 0; + + fprintf (stdout,"\n# | Model | # prm | lnL | LRT | AIC | P-Value |"); + fprintf (stdout,"\n----|----------|-------|-----------|----------------|------------|------------------|"); + + if (MPI_NODE_COUNT>1) + { + for (h=1; h<203; h=h+1) + { + lnL = resultCache[h][5]; + + if (lnL<0) + { + np = resultCache[h][6]; + LRT = -2*(lnL-stdl); + if (LRT<0) + { + LRT = 0; + } + AIC = -2*lnL+2*np; + PRINT_DIGITS = 3; + fprintf (stdout,"\n",h); + PRINT_DIGITS = 1; + fprintf (stdout," | (",0, resultCache[h][0], resultCache[h][1], resultCache[h][2], resultCache[h][3], resultCache[h][4],") | "); + fprintf (stdout,Format (np,5,0)); + PRINT_DIGITS = 8; + fprintf (stdout, " | ",lnL," | ",Format(LRT,14,3), " | ", AIC, " | ", ); + + PRINT_DIGITS = 15; + if (LRT==0) + { + pValue = 1; + } + else + { + pValue = 1-CChi2(LRT,fullnp-np); + } + fprintf (stdout,pValue," |"); + resultCache [jobModelNum][7] = pValue; + if (pValue1)&&(resultCache[0][5]>=0)) + { + resultCache [jobModelNum][0] = vv2; + resultCache [jobModelNum][1] = vv3; + resultCache [jobModelNum][2] = vv4; + resultCache [jobModelNum][3] = vv5; + resultCache [jobModelNum][4] = vv6; + resultCache [jobModelNum][5] = lf_MLES[1][0]; + resultCache [jobModelNum][6] = lf_MLES[1][1]+addOn; + return fromNode - 1; + } + } + + np = lf_MLES[1][1]+addOn; + lnL = lf_MLES[1][0]; + LRT = -2*(lnL-stdl); + if (LRT<0) + { + LRT = 0; + } + AIC = -2*lnL+2*np; + PRINT_DIGITS = 3; + fprintf (stdout,"\n",jobModelNum); + PRINT_DIGITS = 1; + fprintf (stdout," | (",vv1,vv2,vv3,vv4,vv5,vv6,") | "); + fprintf (stdout,Format (np,5,0)); + PRINT_DIGITS = 8; + fprintf (stdout, " | ",lnL," | ",Format(LRT,14,3), " | ", AIC, " | ", ); + + + PRINT_DIGITS = 15; + if (LRT==0) + { + pValue = 1; + } + else + { + pValue = 1-CChi2(LRT,fullnp-np); + } + fprintf (stdout,pValue," |"); + + resultCache [jobModelNum][0] = vv2; + resultCache [jobModelNum][1] = vv3; + resultCache [jobModelNum][2] = vv4; + resultCache [jobModelNum][3] = vv5; + resultCache [jobModelNum][4] = vv6; + resultCache [jobModelNum][5] = lf_MLES[1][0]; + resultCache [jobModelNum][6] = lf_MLES[1][1]+addOn; + resultCache [jobModelNum][7] = pValue; + if (pValue 0) +{ + #include "TemplateModels/defineGamma.mdl"; +} + +ChoiceList (branchLengths,"Estimate Branch Lengths",1,SKIP_NONE, + "Every Time","Branch lengths are reestimated for every model.", + "Once","Branch lenghts obtained from the nucleotide GTR model and reused for subsequent models." + ); + +if (branchLengths<0) +{ + return; +} + +rejectAt = 0; + +while ((rejectAt<=0)||(rejectAt>=1)) +{ + fprintf (stdout, "\nModel rejection level (e.g. 0.05):"); + fscanf (stdin,"Number", rejectAt); +} + +SetDialogPrompt ("Save results to:"); + +fprintf (PROMPT_FOR_FILE, CLEAR_FILE); +BASE_PATH = LAST_FILE_PATH; + +KEEP_OPTIMAL_ORDER = 1; +MESSAGE_LOGGING = 0; + +global AC=1; +global AT=1; +global CG=1; +global CT=1; +global GT=1; +global R=1; + + +r = setElement (0,2,1); +r = setElement (0,3,2); +r = setElement (1,2,3); +r = setElement (1,3,4); +r = setElement (2,3,5); + +MG94custom = 0; +MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); +vectorOfFrequencies = BuildCodonFrequencies (observedFreq); +Model MG94customModel = (MG94custom,vectorOfFrequencies,0); + +USE_POSITION_SPECIFIC_FREQS = 1; +Tree tr = treeString; + +addOn = 0; + +if (branchLengths) +{ + global TreeScaler = 1; + GTRMatrix = {{*,AC*nt,nt,AT*nt}{AC*nt,*,CG*nt,CT*nt}{nt,CG*nt,*,GT*nt}{AT*nt,CT*nt,GT*nt,*}}; + DataSetFilter nucFilter = CreateFilter (filteredData,1); + HarvestFrequencies (nucFreq,nucFilter,1,1,1); + Model GTRModel = (GTRMatrix,nucFreq,1); + givenTreeString = Format (tr,0,0); + Tree nucTree = givenTreeString; + LikelihoodFunction lfn = (nucFilter, nucTree); + Optimize (nres,lfn); + ReplicateConstraint ("this1.?.synRate:=this2.?.nt__/TreeScaler",tr,nucTree); + addOn = nres[1][1]-nres[1][2]-1; +} + +LikelihoodFunction lf = (filteredData, tr); + +resultCache = {203,9}; + +modelNum = 0; +rejectCount = 0; + +if (MPI_NODE_COUNT>1) +{ + MPINodeState = {MPI_NODE_COUNT-1,8}; + OPTIMIZE_SUMMATION_ORDER = 0; + MPISend (1,lf); + MPINodeState[0][0] = 1; + MPINodeState[0][1] = modelNum; +} +else +{ + Optimize (lf_MLES,lf); + vv1 = 0; + vv2 = 0; + vv3 = 0; + vv4 = 0; + vv5 = 0; + vv6 = 0; + dummy = ReceiveJobs (0); +} + +rateBiasTerms = {{"AC","1","AT","CG","CT","GT"}}; + +for (v2=0; v2<=1; v2=v2+1) +{ + for (v3=0; v3<=v2+1; v3=v3+1) + { + if (v3>v2) + { + ub4 = v3; + } + else + { + ub4 = v2; + } + for (v4=0; v4<=ub4+1; v4=v4+1) + { + if (v4>=ub4) + { + ub5 = v4; + } + else + { + ub5 = ub4; + } + for (v5=0; v5<=ub5+1; v5=v5+1) + { + if (v5>ub5) + { + ub6 = v5; + } + else + { + ub6 = ub5; + } + for (v6=0; v6<=ub6+1; v6=v6+1) + { + if (v6==5) + { + break; + } + + R = 1; + + paramCount = 0; + + modelDesc = "0"+Format(v2,1,0); + modelDesc = modelDesc+Format(v3,1,0); + modelDesc = modelDesc+Format(v4,1,0); + modelDesc = modelDesc+Format(v5,1,0); + modelDesc = modelDesc+Format(v6,1,0); + + modelConstraintString = ""; + + AC = 1; + AT = 1; + CG = 1; + CT = 1; + GT = 1; + + for (customLoopCounter2=1; customLoopCounter2<6; customLoopCounter2=customLoopCounter2+1) + { + for (customLoopCounter=0; customLoopCounter1) + { + for (mpiNode = 0; mpiNode < MPI_NODE_COUNT-1; mpiNode = mpiNode+1) + { + if (MPINodeState[mpiNode][0]==0) + { + break; + } + } + + if (mpiNode==MPI_NODE_COUNT-1) + /* all nodes busy */ + { + mpiNode = ReceiveJobs (1); + } + else + { + MPISend (mpiNode+1,lf); + MPINodeState[mpiNode][0] = 1; + MPINodeState[mpiNode][1] = modelNum; + MPINodeState[mpiNode][2] = v1; + MPINodeState[mpiNode][3] = v2; + MPINodeState[mpiNode][4] = v3; + MPINodeState[mpiNode][5] = v4; + MPINodeState[mpiNode][6] = v5; + MPINodeState[mpiNode][7] = v6; + } + } + else + { + Optimize (lf_MLES,lf); + vv1 = v1; + vv2 = v2; + vv3 = v3; + vv4 = v4; + vv5 = v5; + vv6 = v6; + dummy = ReceiveJobs (0); + } + } + } + } + } + +} + +if (MPI_NODE_COUNT>1) +{ + while (1) + { + for (nodeCounter = 0; nodeCounter < MPI_NODE_COUNT-1; nodeCounter = nodeCounter+1) + { + if (MPINodeState[nodeCounter][0]==1) + { + fromNode = ReceiveJobs (0); + break; + } + } + if (nodeCounter == MPI_NODE_COUNT-1) + { + break; + } + } + OPTIMIZE_SUMMATION_ORDER = 1; +} + +function checkEmbedding (_m1, _m2) +{ + for (r=0; r<6; r=r+1) + { + if (_m2[r]<_m1[r]) + { + /*fprintf (stdout,_m1," ", _m2, " Reject 1 at position ",r,"\n");*/ + return 0; + } + if (_m2[r]>_m1[r]) + { + for (r2 = 0; r2 < 6; r2 = r2+1) + { + if ((_m2[r2]==_m2[r])&&(_m1[r2]!=_m1[r])) + { + /*fprintf (stdout,_m1," ", _m2, " Reject 2 at positions ",r,r2,"\n");*/ + return 0; + } + } + } + } + return 1; +} + +PRINT_DIGITS = 0; + +fprintf (stdout, "\n\n--------------------------\n (*) => p-Value < ", rejectAt, "\nRejected ", rejectCount, " models.\n"); + + +if (rejectCount<202) +{ + + fprintf (stdout, "\nPerforming nested tests on the remaining models...\n"); + + done = 0; + while (!done) + { + done = 1; + for (v2=1; v2<203; v2=v2+1) + { + if (resultCache[v2][8]) + { + modelString = "0"; + for (v3 = 0; v3<5; v3=v3+1) + { + modelString = modelString + resultCache [v2][v3]; + } + for (v3 = v2+1; v3<203; v3 = v3+1) + { + if (resultCache[v3][8]) + { + modelString2 = "0"; + for (v4 = 0; v4<5; v4=v4+1) + { + modelString2 = modelString2 + resultCache [v3][v4]; + } + if (checkEmbedding (modelString, modelString2)) + { + fprintf (stdout,"H: (", modelString,") A: (", modelString2, "). "); + done = 0; + LRT = 2*(resultCache[v3][5]-resultCache[v2][5]); + npd = resultCache[v3][6]-resultCache[v2][6]; + if (LRT<0) + { + pValue = 1; + } + else + { + pValue = 1-CChi2(LRT,npd); + } + fprintf (stdout," P-Value=", Format (pValue,10,3)); + if (pValue1)", + "0 & 2 (Normal>1)", + "3 Normal", + "RE: Lognormal", + "RE: Gamma", + "RE: Discrete"}}; + +ParameterCount = {{0, + 1, + 3, + 4, + 2, + 4, + 2, + 4, + 5, + 5, + 5, + 5, + 6, + 1, + 2, + 4 + }}; + +MAXIMUM_ITERATIONS_PER_VARIABLE = 2000; +OPTIMIZATION_PRECISION = 0.001; + +function SetWDistribution (resp) +{ + if (rateType == 0) + { + global P = .5; + P:<1; + categFreqMatrix = {{P,1-P}}; + categRateMatrix = {{0,1}}; + category c = (2, categFreqMatrix , MEAN, ,categRateMatrix, 0, 1e25); + } + else + { + if (rateType == 1) + { + global P1 = 1/3; + global P2 = 0; + + P1:<1; + P2:<1; + + global W = 1; + categFreqMatrix = {{P1,(1-P1)*P2, (1-P1)*(1-P2)}} ; + categRateMatrix = {{0,1,W}}; + category c = (3, categFreqMatrix , MEAN, ,categRateMatrix, 0, 1e25); + } + else + { + if (rateType == 2) + { + global P1 = 1/3; + global P2 = .5; + P1:<1; + P2:<1; + global W1 = .25; + global R1 = 4; + global R2 = 3; + R1:>1; + R2:>1; + categFreqMatrix = {{P1,(1-P1)*P2, (1-P1)*(1-P2)}} ; + categRateMatrix = {{W1,W1*R1,W1*R1*R2}}; + category c = (3, categFreqMatrix , MEAN, ,categRateMatrix, 0, 1e25); + } + else + { + if (rateType == 3) + { + global P1 = 1/5; + global P2 = 1/4; + global P3 = 1/3; + global P4 = 1/2; + + P1:<1; + P2:<1; + P3:<1; + P4:<1; + + categFreqMatrix = {{P1, + (1-P1)P2, + (1-P1)(1-P2)*P3, + (1-P1)(1-P2)(1-P3)P4, + (1-P1)(1-P2)(1-P3)(1-P4)}} ; + categRateMatrix = {{0,1/3,2/3,1,3}}; + category c = (5, categFreqMatrix , MEAN, ,categRateMatrix, 0, 1e25); + } + else + { + if (rateType == 4) + { + global alpha = .5; + global beta = 1; + alpha:>0.01;alpha:<100; + beta:>0.01; + beta:<200; + category c = (resp, EQUAL, MEAN, GammaDist(_x_,alpha,beta), CGammaDist(_x_,alpha,beta), 0 , + 1e25,CGammaDist(_x_,alpha+1,beta)*alpha/beta); + } + else + { + if (rateType == 5) + { + global alpha = .5; + global beta = 1; + global alpha2= .75; + global P = .5; + alpha:>0.01;alpha:<100; + beta:>0.01; + beta:<200; + P:<1; + alpha2:>0.01;alpha2:<100; + category c = (resp, EQUAL, MEAN, P*GammaDist(_x_,alpha,beta) + (1-P)*GammaDist(_x_,alpha2,alpha2) + , P*CGammaDist(_x_,alpha,beta) + (1-P)*CGammaDist(_x_,alpha2,alpha2), + 0 , 1e25, + P*CGammaDist(_x_,alpha+1,beta)*alpha/beta + (1-P)*CGammaDist(_x_,alpha2+1,alpha2)); + } + else + { + if (rateType == 6) + { + global betaP = 1; + global betaQ = 1; + betaP:>0.05;betaP:<85; + betaQ:>0.05;betaQ:<85; + category c = (resp, EQUAL, MEAN, _x_^(betaP-1)*(1-_x_)^(betaQ-1)/Beta(betaP,betaQ), IBeta(_x_,betaP,betaQ), 0 , + 1,IBeta(_x_,betaP+1,betaQ)*betaP/(betaP+betaQ)); + } + else + { + if (rateType == 7) + { + global W = 2; + /*W:>1;*/ + global P = 1-1/(resp+1); + global betaP = 1; + global betaQ = 2; + betaP:>0.05; + betaQ:>0.05; + betaP:<85; + betaQ:<85; + P:>0.0000001; + P:<0.9999999; + categFreqMatrix = {resp+1,1}; + for (k=0; k=W), + 0,1e25, + P*IBeta(Min(_x_,1),betaP+1,betaQ)*betaP/(betaP+betaQ)+(1-P)*W*(_x_>=W)); + } + else + { + if (rateType == 8) + { + global P = .5; + global betaP = 1; + global betaQ = 2; + betaP:>0.05;betaP:<85; + betaQ:>0.05;betaQ:<85; + global alpha = .5; + global beta = 1; + alpha:>0.01;alpha:<100; + beta:>0.01; + beta:<200; + P:<1; + category c = (resp, EQUAL, MEAN, + P*_x_^(betaP-1)*(1-Min(_x_,1))^(betaQ-1)/Beta(betaP,betaQ)+(1-P)*GammaDist(_x_,alpha,beta), + P*IBeta(Min(_x_,1),betaP,betaQ)+(1-P)*CGammaDist(_x_,alpha,beta), + 0,1e25, + P*betaP/(betaP+betaQ)*IBeta(Min(_x_,1),betaP+1,betaQ)+(1-P)*alpha/beta*CGammaDist(_x_,alpha+1,beta)); + } + else + { + if (rateType == 9) + { + global P = .5; + P:<1; + global betaP = 1; + betaP:>0.05;betaP:<85; + global betaQ = 2; + betaQ:>0.05;betaQ:<85; + global alpha = .5; + alpha:>0.01;alpha:<100; + global beta = 1; + beta:>0.01;beta:<500; + category c = (resp, EQUAL, MEAN, + P*_x_^(betaP-1)*(1-Min(_x_,1))^(betaQ-1)/Beta(betaP,betaQ)+(1-P)*(_x_>1)*GammaDist(Max(1e-20,_x_-1),alpha,beta), + P*IBeta(Min(_x_,1),betaP,betaQ)+(1-P)*CGammaDist(Max(_x_-1,0),alpha,beta), + 0,1e25, + P*betaP/(betaP+betaQ)*IBeta(Min(_x_,1),betaP+1,betaQ)+ + (1-P)*(alpha/beta*CGammaDist(Max(0,_x_-1),alpha+1,beta)+CGammaDist(Max(0,_x_-1),alpha,beta))); + } + else + { + if (rateType == 10) + { + global P = .5; + global betaP = 1; + global betaQ = 2; + betaP:>0.05; + betaQ:>0.05; + betaP:<85; + betaQ:<85; + global mu = 3; + global sigma = .01; + sigma:>0.0001; + sqrt2pi = Sqrt(8*Arctan(1)); + P:<1; + + category c = (resp, EQUAL, MEAN, + P*_x_^(betaP-1)*(1-Min(_x_,1))^(betaQ-1)/Beta(betaP,betaQ)+ + (1-P)*(_x_>=1)*Exp(-(_x_-mu)(_x_-mu)/(2*sigma*sigma))/(sqrt2pi__*sigma)/ZCDF((mu-1)/sigma), + P*IBeta(Min(_x_,1),betaP,betaQ)+(1-P)*(_x_>=1)*(1-ZCDF((mu-_x_)/sigma)/ZCDF((mu-1)/sigma)), + 0,1e25, + P*betaP/(betaP+betaQ)*IBeta(Min(_x_,1),betaP+1,betaQ)+ + (1-P)*(_x_>=1)*(mu*(1-ZCDF((1-mu)/sigma)-ZCDF((mu-_x_)/sigma))+ + sigma*(Exp((mu-1)(1-mu)/(2*sigma*sigma))-Exp((_x_-mu)(mu-_x_)/(2*sigma*sigma)))/sqrt2pi__)/ZCDF((mu-1)/sigma)); + } + else + { + if (rateType == 11) + { + global P = 1/3; + global P1 = .5; + + global mu = 3; + global sigma = .5; + sigma:>0.0001; + global sigma1 = 1; + sigma1:>0.0001; + + sqrt2pi = Sqrt(8*Arctan(1)); + P:<1; + P1:<1; + + categFreqMatrix = {resp+1,1}; + for (k=1; k<=resp; k=k+1) + { + categFreqMatrix[k]:=(1-P)/resp__; + } + categFreqMatrix[0]:=P; + + category c = (resp+1, categFreqMatrix, MEAN, + (1-P)((1-P1)*Exp(-(_x_-mu)(_x_-mu)/(2*sigma1*sigma1))/(sqrt2pi__*sigma1)/ZCDF(mu/sigma1)+ + P1*Exp(-(_x_-1)(_x_-1)/(2*sigma*sigma))/(sqrt2pi__*sigma)/ZCDF(1/sigma)), + P+(1-P)(_x_>1e-20)((1-P1)(1-ZCDF((mu-_x_)/sigma1)/ZCDF(mu/sigma1))+ + P1*(1-ZCDF((1-_x_)/sigma)/ZCDF(1/sigma))), + 0,1e25, + (1-P)((1-P1)(mu*(1-ZCDF(-mu/sigma1)-ZCDF((mu-_x_)/sigma1))+ + sigma1*(Exp(-mu*mu/(2*sigma1*sigma1))-Exp((_x_-mu)(mu-_x_)/(2*sigma1*sigma1)))/sqrt2pi__)/ZCDF(mu/sigma1)+ + P(1-ZCDF(-1/sigma)-ZCDF((1-_x_)/sigma)+ + sigma*(Exp(-1/(2*sigma*sigma))-Exp((_x_-1)(1-_x_)/(2*sigma*sigma)))/sqrt2pi__)/ZCDF(1/sigma)) + ); + } + else + { + if (rateType == 12) + { + global P = 1/3; + global P1 = .5; + + global mu = 3; + global sigma = .25; + global sigma1 = .5; + global sigma2 = 1; + sigma:>0.0001; + sigma1:>0.0001; + sigma2:>0.0001; + + sqrt2pi = Sqrt(8*Arctan(1)); + P:<1; + P1:<1; + + category c = (resp, EQUAL , MEAN, + 2*P*Exp(-_x_^2/(2*sigma*sigma))+ + (1-P)((1-P1)*Exp((_x_-mu)(mu-_x_)/(2*sigma2*sigma2))/(sqrt2pi__*sigma2)/ZCDF(mu/sigma2)+ + P1*Exp((1-_x_)(_x_-1)/(2*sigma1*sigma1))/(sqrt2pi__*sigma1)/ZCDF(1/sigma1)), + P*(1-2*ZCDF(-_x_/sigma))+ + (1-P)((1-P1)(1-ZCDF((mu-_x_)/sigma2)/ZCDF(mu/sigma2))+ + P1*(1-ZCDF((1-_x_)/sigma1)/ZCDF(1/sigma1))), + 0,1e25, + 2*P*sigma*(1-Exp(-_x_*_x_/(2*sigma*sigma)))/sqrt2pi__+ + (1-P)((1-P1)(mu*(1-ZCDF(-mu/sigma2)-ZCDF((mu-_x_)/sigma2))+ + sigma2*(Exp(-mu*mu/(2*sigma2*sigma2))-Exp((_x_-mu)(mu-_x_)/(2*sigma2*sigma2)))/sqrt2pi__)/ZCDF(mu/sigma2)+ + P1(1-ZCDF(-1/sigma1)-ZCDF((1-_x_)/sigma1)+ + sigma1*(Exp(-1/(2*sigma1*sigma1))-Exp((_x_-1)(1-_x_)/(2*sigma1*sigma1)))/sqrt2pi__)/ZCDF(mu/sigma1)) + ); + } + else + { + if (rateType == 13) + { + global sigma = .1; + sigma:>0.0001;sigma:<10; + sqrt2pi = Sqrt(8*Arctan(1)); + global _x_:<1e200; + category c = (resp, EQUAL, MEAN, + Exp (-Log(_x_)*Log(_x_) / (2*sigma*sigma)) / (_x_*sigma*sqrt2pi__), /*density*/ + ZCDF (Log(_x_)/sigma), /*CDF*/ + 1e-200, /*left bound*/ + 1e200, /*right bound*/ + Exp (.5*sigma^2)*ZCDF (Log(_x_)/sigma-sigma), + CONSTANT_ON_PARTITION + ); + } + else + { + if (rateType == 14) + { + global alpha = .5; + global beta = 1; + alpha:>0.01;alpha:<100; + beta:>0.01; + beta:<200; + category c = (resp, EQUAL, MEAN, GammaDist(_x_,alpha,beta), CGammaDist(_x_,alpha,beta), 0 , + 1e25,CGammaDist(_x_,alpha+1,beta)*alpha/beta,CONSTANT_ON_PARTITION); + } + else + { + global P1 = 1/3; + global P2 = .5; + P1:<1; + P2:<1; + global W1 = .25; + global R1 = 4; + global R2 = 3; + R1:>1; + R2:>1; + categFreqMatrix = {{P1,(1-P1)*P2, (1-P1)*(1-P2)}} ; + categRateMatrix = {{W1,W1*R1,W1*R1*R2}}; + category c = (3, categFreqMatrix , MEAN, ,categRateMatrix, 0, 1e25, ,CONSTANT_ON_PARTITION); + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + return 0; +} + +/* ____________________________________________________________________________________________________________________*/ + +function FrameText (frameChar,vertChar,parOff,theText) +{ + h = Abs (theText)+4; + fprintf (stdout,"\n"); + for (k=0; k1) break; + } + if (k 1 */ + { + ConstructCategoryMatrix(marginals,lf,COMPLETE); + + CC = Columns (marginals); + if (rateType>=13) + /* subset rate variation */ + { + CC = CC/numberOfSubsets; + + subsetMarginals = {D,numberOfSubsets}; + for (v=0; v1 (Posterior cutoff = ",sigLevel,")\n\n"); + for (v=0; v=sigLevel) + { + fprintf (LAST_FILE_PATH,Format (v+1,0,0)," (",positiveProb,")\n"); + } + } + fprintf (LAST_FILE_PATH,"\n\n------------------------------------------------\n\n Subsets with dN/dS<=1 (Posterior cutoff = ",sigLevel,")\n\n"); + for (v=0; v1 (Posterior cutoff = ",sigLevel,")\n\n"); + for (v=0; v=sigLevel) + { + fprintf (LAST_FILE_PATH,Format (v+1,0,0)," (",positiveProb,")\n"); + } + } + fprintf (LAST_FILE_PATH,"\n\n------------------------------------------------\n\n Sites with dN/dS<=1 (Posterior cutoff = ",sigLevel,")\n\n"); + for (v=0; v1."); + } + fprintf (LAST_FILE_PATH,"\n\n------------------------------------------------\n\n"); + return E; +} + +/* ____________________________________________________________________________________________________________________*/ + +function PopulateModelMatrix (ModelMatrixName&, EFV) +{ + ModelMatrixName = {ModelMatrixDimension,ModelMatrixDimension}; + + hshift = 0; + + if (modelType==0) + { + for (h=0; h<64; h=h+1) + { + if (_Genetic_Code[h]==10) + { + hshift = hshift+1; + continue; + } + vshift = hshift; + for (v = h+1; v<64; v=v+1) + { + diff = v-h; + if (_Genetic_Code[v]==10) + { + vshift = vshift+1; + continue; + } + if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) + { + if (h$4==v$4) + { + transition = v%4; + transition2= h%4; + } + else + { + if(diff%16==0) + { + transition = v$16; + transition2= h$16; + } + else + { + transition = v%16$4; + transition2= h%16$4; + } + } + if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) + { + ModelMatrixName[h-hshift][v-vshift] := t*EFV__[transition__]; + ModelMatrixName[v-vshift][h-hshift] := t*EFV__[transition2__]; + } + else + { + ModelMatrixName[h-hshift][v-vshift] := c*t*EFV__[transition__]; + ModelMatrixName[v-vshift][h-hshift] := c*t*EFV__[transition2__]; + } + } + } + } + } + else + { + if (modelType==1) + { + for (h=0; h<64; h=h+1) + { + if (_Genetic_Code[h]==10) + { + hshift = hshift+1; + continue; + } + vshift = hshift; + for (v = h+1; v<64; v=v+1) + { + diff = v-h; + if (_Genetic_Code[v]==10) + { + vshift = vshift+1; + continue; + } + nucPosInCodon = 2; + if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) + { + if (h$4==v$4) + { + transition = v%4; + transition2= h%4; + } + else + { + if(diff%16==0) + { + transition = v$16; + transition2= h$16; + nucPosInCodon = 0; + } + else + { + transition = v%16$4; + transition2= h%16$4; + nucPosInCodon = 1; + } + } + if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) + { + ModelMatrixName[h-hshift][v-vshift] := t*EFV__[transition__][nucPosInCodon__]; + ModelMatrixName[v-vshift][h-hshift] := t*EFV__[transition2__][nucPosInCodon__]; + } + else + { + ModelMatrixName[h-hshift][v-vshift] := c*t*EFV__[transition__][nucPosInCodon__]; + ModelMatrixName[v-vshift][h-hshift] := c*t*EFV__[transition2__][nucPosInCodon__]; + } + } + } + } + } + else + { + for (h=0; h<64; h=h+1) + { + if (_Genetic_Code[h]==10) + { + hshift = hshift+1; + continue; + } + vshift = hshift; + for (v = h+1; v<64; v=v+1) + { + diff = v-h; + if (_Genetic_Code[v]==10) + { + vshift = vshift+1; + continue; + } + if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) + { + if (h$4==v$4) + { + transition = v%4; + transition2= h%4; + } + else + { + if(diff%16==0) + { + transition = v$16; + transition2= h$16; + } + else + { + transition = v%16$4; + transition2= h%16$4; + } + } + if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) + { + if (Abs(transition-transition2)%2) + { + ModelMatrixName[h-hshift][v-vshift] := kappa*t; + ModelMatrixName[v-vshift][h-hshift] := kappa*t; + } + else + { + ModelMatrixName[h-hshift][v-vshift] := t; + ModelMatrixName[v-vshift][h-hshift] := t; + } + + } + else + { + if (Abs(transition-transition2)%2) + { + ModelMatrixName[h-hshift][v-vshift] := kappa*c*t; + ModelMatrixName[v-vshift][h-hshift] := kappa*c*t; + } + else + { + ModelMatrixName[h-hshift][v-vshift] := c*t; + ModelMatrixName[v-vshift][h-hshift] := c*t; + } + } + } + } + } + } + } + return (modelType>1); +} + +/* ____________________________________________________________________________________________________________________*/ + +function PopulateModelMatrix2 (ModelMatrixName&, EFV) +{ + ModelMatrixName = {ModelMatrixDimension,ModelMatrixDimension}; + + hshift = 0; + + for (h=0; h<64; h=h+1) + { + if (_Genetic_Code[h]==10) + { + hshift = hshift+1; + continue; + } + vshift = hshift; + for (v = h+1; v<64; v=v+1) + { + diff = v-h; + if (_Genetic_Code[v]==10) + { + vshift = vshift+1; + continue; + } + if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) + { + if (h$4==v$4) + { + transition = v%4; + transition2= h%4; + } + else + { + if(diff%16==0) + { + transition = v$16; + transition2= h$16; + } + else + { + transition = v%16$4; + transition2= h%16$4; + } + } + if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) + { + if (Abs(transition-transition2)%2) + { + ExecuteCommands ("ModelMatrixName[h-hshift][v-vshift] := kappa"+l+"*t;ModelMatrixName[v-vshift][h-hshift] := kappa"+l+"*t;"); + } + else + { + ModelMatrixName[h-hshift][v-vshift] := t; + ModelMatrixName[v-vshift][h-hshift] := t; + } + + } + else + { + if (Abs(transition-transition2)%2) + { + ExecuteCommands ("ModelMatrixName[h-hshift][v-vshift] := c*kappa"+l+"*t;ModelMatrixName[v-vshift][h-hshift] := c*kappa"+l+"*t;"); + } + else + { + ModelMatrixName[h-hshift][v-vshift] := c*t; + ModelMatrixName[v-vshift][h-hshift] := c*t; + } + } + } + } + } + return 1; +} +/* ____________________________________________________________________________________________________________________*/ + +function spawnLikelihood (kappaSharedOrNot) +{ + if (kappaSharedOrNot) + { + for (l=0; l=lowerSeqBound)&&(speciesIndex1)","Beta & (Normal>1)", + "0 & 2 (Normal>1)","0 & 2 (Normal>1)", + "3 Normal","3 Normal", + "RE:Log normal","Random Effects: Log normal", + "RE:Gamma","Random Effects: Gamma", + "RE:Discrete","Random Effects: 3 bin Discrete"); + + if (modelTypes[0]<0) + { + return; + } + for (rateType = 0; rateType < Rows(modelTypes)*Columns(modelTypes); rateType = rateType + 1) + { + modelType = modelTypes[rateType]; + chosenModelList[modelType] = 1; + } +} + +ChoiceList (shareType,"Choose parameter sharing mode",1,SKIP_NONE, + "All","Share dN/dS, transversion/transition ratio (if applicable) and base frequencies for all subsets.", + "dN/dS Only","Share only dN/dS. Transversion/transition ratio (if applicable) and base frequencies are separate for each subset." +); + +if (shareType<0) +{ + return; +} + +ChoiceList (modelType,"Choose a model",1,SKIP_NONE, + "MG94 1x4","Muse-Gaut 94 model with 4(-1) nucleotide frequency parameters (intra-codon position independent).", + "MG94 3x4","Muse-Gaut 94 model with 12(-3) nucleotide frequency parameters (intra-codon position specific).", + "GY94 1x4","Goldman-Yang 94 model with 4(-1) nucleotide frequency parameters (intra-codon position independent).", + "GY94 3x4","Goldman-Yang 94 model with 12(-3) nucleotide frequency parameters (intra-codon position specific)." +); + +if (modelType<0) +{ + return; +} + +if ((modelType==0)||(modelType==2)) +{ + if (shareType==0) + { + HarvestFrequencies (observedFreq,filteredData,1,1,0); + vectorOfFrequencies = BuildCodonFrequencies4 (observedFreq); + } + else + { + for (v=0; v1) +{ + global kappa = 2.; +} + +fprintf (stdout, "\n\n\nChoose the cutoff (0 to 1) for posterior of dN/dS>1 for a site to be considered under selective pressure:"); +fscanf (stdin, "Number",psigLevel); +if ((psigLevel <= 0)||(psigLevel>1)) +{ + psigLevel = .95; +} +fprintf (stdout, "\n>Using ", psigLevel , " cutoff\n"); + +fprintf (stdout, "\nChoose the number of categories in discretized distributions:"); +fscanf (stdin, "Number",categCount); +categCount = categCount$1; +if (categCount<=0) +{ + categCount = 8; +} + +fprintf (stdout, "\n>Using ", Format (categCount,0,0), " categories.\n"); + +SetDialogPrompt ("Write detailed results to:"); + +fprintf (PROMPT_FOR_FILE,CLEAR_FILE); + +global c = 1.; + +dummyVar = FrameText ("-","|",2,"SUMMARY TABLE"); +tableSeparator = "+-------------------------+----------------+---------------+-----+\n"; +fprintf (stdout, "\n\"p\" is the number of parameters in addition to the branch lengths.\nDetailed results including sites with dN/dS>1 will be written to\n",LAST_FILE_PATH,"\n\n"); +fprintf (stdout, tableSeparator, + "| MODEL (Number & Desc) | Log likelihood | dN/dS | p |\n", + tableSeparator); + +cachedBranchLengths = {{-1,-1}}; + +if (chosenModelList[0]>0) +{ + timer = Time(1); + fprintf (LAST_FILE_PATH,"\n*** RUNNING SINGLE RATE MODEL ***\n#################################\n"); + dummy = spawnLikelihood (shareType); + Optimize (res,lf); + fprintf (LAST_FILE_PATH,"\n>Done in ", Time(1)-timer, " seconds \n\n"); + fprintf (LAST_FILE_PATH,lf,"\n\n-----------------------------------\n\ndN/dS = ",c,"\n\n"); + + fprintf (stdout, "| 0. Single Rate Model | ",Format (res[1][0],14,6)," | ",Format (c,13,8)," | 0 |\n", + tableSeparator); + + timer = res[1][1]-res[1][2]; + cachedBranchLengths = {timer,1}; + + for (rateType = timer; rateType < Columns(cachedBranchLengths); rateType = rateType+1) + { + cachedBranchLengths[rateType-timer][0] = res [0][rateType]; + } +} + +for (rateType = 0; rateType < 16; rateType = rateType + 1) +{ + if (chosenModelList[rateType+1]==0) + { + continue; + } + timer = Time(1); + dummy = SetWDistribution (categCount); + dummy = spawnLikelihood (shareType); + + fprintf (LAST_FILE_PATH,"\n*** RUNNING MODEL ", Format(rateType+1,0,0), " (",ModelNames[rateType],") ***\n######################################\n"); + /*if (cachedBranchLengths[0][0]>=0.0) + { + v = ParameterCount[rateType]; + if (modelType>1) + { + v=v+1; + } + for (h=0; hDone in ",Time(1)-timer, " seconds \n\n", lf); + fprintf (stdout, "| "); + if (rateType<9) + { + fprintf (stdout," "); + } + fprintf (stdout, Format (rateType+1,0,0), ". ", ModelNames[rateType]); + for (dummy = Abs(ModelNames[rateType])+5; dummy<25; dummy = dummy+1) + { + fprintf (stdout," "); + } + dummy = GetDistributionParameters(psigLevel); + fprintf (stdout,"| ",Format (res[1][0],14,6)," | ",Format (dummy,13,8)," | ", + Format(ParameterCount[rateType],0,0)," |\n",tableSeparator); + + if (modelType>1) + { + kappa = 2.; + } +} diff --git a/samples/HyPhy/MolecularClock.bf b/samples/HyPhy/MolecularClock.bf new file mode 100644 index 00000000..92606d2d --- /dev/null +++ b/samples/HyPhy/MolecularClock.bf @@ -0,0 +1,147 @@ +#include "molclockBootstrap.bf"; + +RESTORE_GLOBALS = 1; +_DO_TREE_REBALANCE_ = 0; +VERBOSITY_LEVEL = -1; + +function RestoreGlobalValues (lfIndex) +{ + if (lfIndex==0) + { + for (i=0;i1) +{ + ChoiceList (parameter2Constrain, "Parameter(s) to constrain:",1,SKIP_NONE,LAST_MODEL_PARAMETER_LIST); + + if (parameter2Constrain<0) + { + return; + } + if (parameter2Constrain==0) + { + parameter2ConstrainString = ""; + for (parameter2Constrain=Rows("LAST_MODEL_PARAMETER_LIST")-1; parameter2Constrain; parameter2Constrain = parameter2Constrain-1) + { + GetString (funnyString,LAST_MODEL_PARAMETER_LIST,parameter2Constrain); + parameter2ConstrainString = parameter2ConstrainString + funnyString + ","; + } + GetString (funnyString,LAST_MODEL_PARAMETER_LIST,0); + parameter2ConstrainString = parameter2ConstrainString + funnyString; + } + else + { + GetString (parameter2ConstrainString,LAST_MODEL_PARAMETER_LIST,parameter2Constrain-1); + } +} +else +{ + GetString (parameter2ConstrainString,LAST_MODEL_PARAMETER_LIST,0); +} + +timer = Time(0); + +LikelihoodFunction lf = (filteredData,givenTree); + +Optimize (res,lf); + +separator = "*-----------------------------------------------------------*"; + +fprintf (stdout, "\n", separator, "\nRESULTS WITHOUT THE CLOCK:\n",lf); + +fullModelLik = res[1][0]; + +fullVars = res[1][1]; + +/* now specify the constraint */ + +Tree clockTree = treeString; + +ExecuteCommands ("MolecularClock (clockTree,"+parameter2ConstrainString+");"); + +LikelihoodFunction lfConstrained = (filteredData, clockTree); + +USE_LAST_RESULTS = 1; +Optimize (res1,lfConstrained); +USE_LAST_RESULTS = 0; + +SAVE_GLOBALS = res1[1][2]; + +if (SAVE_GLOBALS) +{ + globalSpoolMatrix = {1,SAVE_GLOBALS}; + + for (i=0;idS in the data sets? + - Do the sites under positive selection share the same dN/dS? + - Two previous tests combined +*/ + +RequireVersion ("0.9920061001"); + +/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ + +function BuildCodonFrequencies (obsF) +{ + PIStop = 1.0; + result = {ModelMatrixDimension,1}; + hshift = 0; + + for (h=0; h<64; h=h+1) + { + first = h$16; + second = h%16$4; + third = h%4; + if (_Genetic_Code[h]==10) + { + hshift = hshift+1; + PIStop = PIStop-obsF[first][0]*obsF[second][1]*obsF[third][2]; + continue; + } + result[h-hshift][0]=obsF[first][0]*obsF[second][1]*obsF[third][2]; + } + return result*(1.0/PIStop); +} + +/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ + +function ReportDistributionString (rc,freqStrMx,infix, skip0) +{ + distroString = ""; + distroString * 1024; + + reportMx = {rc,4}; + + distroString * (" dN/dS dS dN Prob\n"); + for (mi=0; mi 0) + { + distroString * (Format(reportMx[mi][2],8,3)+Format(reportMx[mi][0],8,3)+Format(reportMx[mi][1],8,3)+Format(reportMx[mi][3],10,3)+"\n"); + } + } + + distroString * 0; + return distroString; +} + +/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ + +function DefineNucleotideBiases (dummy) +{ + ModelTitle = "MG94x"+modelDesc[0]; + modelConstraintString = ""; + + for (customLoopCounter2=0; customLoopCounter2<6; customLoopCounter2=customLoopCounter2+1) + { + if (rateBiasTerms[customLoopCounter2] != "1") + { + ExecuteCommands ("global " + rateBiasTerms[customLoopCounter2] + "=1;"); + } + } + + for (customLoopCounter2=1; customLoopCounter2<6; customLoopCounter2=customLoopCounter2+1) + { + for (customLoopCounter=0; customLoopCounter0.0000001;global NS_"+infix+"0 = 0.1;"); + + for (mi=1; mi0.0000001;\nglobal NS_"+infix+mi+";\n"); + if (randomizeInitValues) + { + categDef1*("global P_"+infix+mi+" = Random(0.05,0.95);\nP_"+infix+mi+":<1;\n"); + categDef1*("global S_"+infix+mi+" = Random(0.05,1);"); + } + else + { + categDef1*("global P_"+infix+mi+" = 1/"+(resp+1-mi)+";\nP_"+infix+mi+":<1;\n"); + } + } + + freqStrMx = {resp,1}; + if (resp>1) + { + freqStrMx[0] = "P_"+infix+"1"; + + for (mi=1; mi1;R_"+infix+mi+"="+Random(1.05,10)+";"); + } + } + else + { + for (mi=respN+respP; mi1;R_"+infix+mi+"=2+"+mi+";"); + } + } + } + else + { + if (randomizeInitValues) + { + for (mi=0; mi 1) + { + lfParts = lfParts + ",filteredData_" + fileID + "," + treeID; + } + } +} + +lfParts * 0; +ExecuteCommands (lfParts + "," + lfDef1 + ");"); + +sumPath = resToPath + ".summary"; + +treeBranchParameters = 0; +for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) +{ + ExecuteCommands ("treeBranchParameters = treeBranchParameters + BranchCount(tree_" + fileID + "_0) + TipCount(tree_" + fileID + "_0);"); +} + +/*-------------- INDEPENDENT DISTRIBUTIONS ------------------*/ + +sop = Max(OPTIMIZATION_PRECISION,0.001); + +fprintf (stdout, "Running simpler distribution approximations to ensure good convergence...\n"); + +OPTIMIZATION_PRECISION = 0.1; + +P_1_1 := 0;P_1_2 := 0;P_1_3 := 0; +P_2_1 := 0;P_2_2 := 0;P_2_3 := 0; + +S_1_0 := 1;S_1_1 := 1;S_1_2 := 1;S_1_3 := 1; +S_2_0 := 1;S_2_1 := 1;S_2_2 := 1;S_2_3 := 1; +R_1_0 := 1;R_1_1 := 1;R_1_2 := 1; +R_2_0 := 1;R_2_1 := 1;R_2_2 := 1; + +Optimize (res,lf); +USE_LAST_RESULTS = 1; +SKIP_CONJUGATE_GRADIENT = 1; + +d1 = ReportDistributionString(4,freqStrMx_1,"1_",1); +d2 = ReportDistributionString(4,freqStrMx_2,"2_",1); + +fprintf (stdout, "\n*** Done with pass 1. Log(L) = ", Format(res[1][0],10,3), " *** \n"); +fprintf (stdout, "Approximate rates for data set 1:\n", d1); +fprintf (stdout, "Approximate rates for data set 2:\n", d2); + +codonFactor_1 := codonFactor_1__; +codonFactor_2 := codonFactor_2__; + +AC_1 := AC_1__; AT_1 := AT_1__; CG_1 := CG_1__; CT_1 := CT_1__; GT_1 := GT_1__; +AC_2 := AC_2__; AT_2 := AT_2__; CG_2 := CG_2__; CT_2 := CT_2__; GT_2 := GT_2__; + + + +fprintf (stdout, "\nGateaux sampling positively selected directions\n"); + +P_1_1 = 2/filteredData_1.sites; +S_1_3 = 1; + +baseLineLL = res[1][0]; +LFCompute (lf,LF_START_COMPUTE); +bestDiff = -1e100; +bestAlpha = 1; +bestBeta = 1; +bestLL = -1e100; + +step = 0.1; +step2 = 0.25; +v1 = 0.05; + +for (v1c = 0; v1c < 10; v1c = v1c + 1) +{ + v2 = v1+step; + for (v2c = 0; v2c < 20; v2c = v2c+1) + { + checkASample ("1_0"); + v2 = v2 + step2; + } + v1 = v1+step; +} + +S_1_0 = bestAlpha; +R_1_0 = bestBeta; +bestAlpha = 1; +bestBeta = 1; + +if (bestDiff <= 0) +{ + P_1_1 = 0; +} +saveBD = bestDiff; + +P_2_1 = 1/filteredData_2.sites; +S_2_3 = 1; + +v1 = 0.05; +for (v1c = 0; v1c < 10; v1c = v1c + 1) +{ + v2 = v1+step; + for (v2c = 0; v2c < 20; v2c = v2c+1) + { + checkASample ("2_0"); + v2 = v2 + step2; + } + v1 = v1+step; +} + +LFCompute (lf,LF_DONE_COMPUTE); + +S_2_0 = bestAlpha; +R_2_0 = bestBeta; + +if (bestDiff <= saveBD) +{ + P_2_1 = 0; +} + +if (bestDiff > 0) +{ + fprintf (stdout, "\nFound a likelihood improvement in the direction (", S_1_0, ",", S_1_0*R_1_0, "), (", S_2_0, ",", S_2_0*R_2_0, "), ",bestDiff," likelihood points\n"); +} + + +Optimize (res,lf); + +d1 = ReportDistributionString(4,freqStrMx_1,"1_",1); +d2 = ReportDistributionString(4,freqStrMx_2,"2_",1); + +fprintf (stdout, "\n*** Done with pass 2. Log(L) = ", Format(res[1][0],10,3), " *** \n"); +fprintf (stdout, "Approximate rates for data set 1:\n", d1); +fprintf (stdout, "Approximate rates for data set 2:\n", d2); + + +fprintf (stdout, "\nGateaux sampling neutral directions\n"); + +baseLineLL = res[1][0]; +LFCompute (lf,LF_START_COMPUTE); + +P_1_2 = 1/(filteredData_1.sites*(1-P_1_1)); +bestDiff = -1e100; +bestAlpha = 1; +step = 0.02; +v1 = 0; +for (v1c = 0; v1c < 50; v1c = v1c + 1) +{ + v1 = v1 + step; + v2 = v1; + + checkASample ("1_1"); +} + + +S_1_1 = bestAlpha; +bestAlpha = 1; + +if (bestDiff <= 0) +{ + P_1_2 = 0; +} +saveBD = bestDiff; +R_1_1 := 1; + +P_2_2 = 1/(filteredData_2.sites*(1-P_2_1)); + +v1 = 0; +for (v1c = 0; v1c < 50; v1c = v1c + 1) +{ + v1 = v1 + step; + v2 = v1; + + checkASample ("2_1"); +} + + +LFCompute (lf,LF_DONE_COMPUTE); + +S_2_1 = bestAlpha; +R_2_1 := 1; + +if (bestDiff <= saveBD) +{ + P_2_2 = 0; +} + +if (bestDiff > 0) +{ + fprintf (stdout, "\nFound a likelihood improvement in the direction (", S_1_1, ",", S_1_1, "), (", S_2_1, ",", S_2_1, "), ",bestDiff," likelihood points\n"); +} + +Optimize (res,lf); +d1 = ReportDistributionString(4,freqStrMx_1,"1_",1); +d2 = ReportDistributionString(4,freqStrMx_2,"2_",1); + +fprintf (stdout, "\n*** Done with pass 3. Log(L) = ", Format(res[1][0],10,3), " *** \n"); +fprintf (stdout, "Approximate rates for data set 1:\n", d1); +fprintf (stdout, "Approximate rates for data set 2:\n", d2); + +fprintf (stdout, "\nGateaux sampling negative selected directions\n"); + + +baseLineLL = res[1][0]; +LFCompute (lf,LF_START_COMPUTE); +bestDiff = -1e100; +bestAlpha = 1; +bestBeta = 1; + + +step = 1/16; +v1 = 0; +P_1_3 = 2/(filteredData_1.sites*(1-P_1_1)*(1-P_1_2)); +S_1_2 = 1; +for (v1c = 0; v1c < 15; v1c = v1c + 1) +{ + v1 = v1+step; + v2 = step/2; + for (v2c = 0; v2c < v1c; v2c = v2c+1) + { + checkASample ("1_2"); + v2 = v2 + step; + } +} + + +S_1_2 = bestAlpha; +R_1_2 = bestBeta; +bestAlpha = 1; +bestBeta = 1; + +if (bestDiff <= 0) +{ + P_1_3 = 0; +} +saveBD = bestDiff; + + +v1 = 0; +P_2_3 = 2/(filteredData_2.sites*(1-P_2_1)*(1-P_2_2)); +S_2_2 = 1; +for (v1c = 0; v1c < 15; v1c = v1c + 1) +{ + v1 = v1+step; + v2 = step/2; + for (v2c = 0; v2c < v1c; v2c = v2c+1) + { + checkASample ("2_2"); + v2 = v2 + step; + } +} + +LFCompute (lf,LF_DONE_COMPUTE); + +S_2_2 = bestAlpha; +R_2_2 = bestBeta; + +if (bestDiff <= saveBD) +{ + P_2_2 = 0; +} + +if (bestDiff > 0) +{ + fprintf (stdout, "\nFound a likelihood improvement in the direction (", S_1_2, ",", S_1_2*R_1_2, "), (", S_2_2, ",", S_2_2*R_2_2, "), ",bestDiff," likelihood points\n"); +} + +AC_1 = AC_1; AT_1 = AT_1; CG_1 = CG_1; CT_1 = CT_1; GT_1 = GT_1; +AC_2 = AC_2; AT_2 = AT_2; CG_2 = CG_2; CT_2 = CT_2; GT_2 = GT_2; + +if (Abs(modelConstraintString_1)) +{ + ExecuteCommands (modelConstraintString_1); +} +if (Abs(modelConstraintString_2)) +{ + ExecuteCommands (modelConstraintString_2); +} + +codonFactor_1 = codonFactor_1; +codonFactor_2 = codonFactor_2; + + +GetString (paramList, lf, -1); +degF = Columns(paramList["Global Independent"]) + 14 - 2*branchLengths + treeBranchParameters; + +OPTIMIZATION_PRECISION = sop; +fprintf (stdout, "Running an independent distributions model fit (", degF, " parameters)...\n"); +Optimize (res,lf); +LogL = res[1][0]; + +AIC = 2*(degF-LogL); +AICc = 2*(degF*totalCodonCount/(totalCodonCount-degF-1) - LogL); + +fprintf (stdout, "\n\nIndependent distributions model fit summary\n", + "\nLog likelihood:", Format (LogL, 15, 5), + "\nParameters :", Format (degF, 15, 0), + "\nAIC :", Format (AIC, 15, 5), + "\nc-AIC :", Format (AICc, 15, 5),"\n" +); + +fprintf (sumPath,CLEAR_FILE,"Independent distributions model fit summary\n", + "\nLog likelihood:", Format (LogL, 15, 5), + "\nParameters :", Format (degF, 15, 0), + "\nAIC :", Format (AIC, 15, 5), + "\nc-AIC :", Format (AICc, 15, 5),"\n"); + +d1 = ReportDistributionString(4,freqStrMx_1,"1_",0); +d2 = ReportDistributionString(4,freqStrMx_2,"2_",0); + +fprintf (stdout, "Inferred rates for data set 1:\n", d1); +fprintf (sumPath, "Inferred rates for data set 1:\n", d1); +fprintf (stdout, "Inferred rates for data set 2:\n", d2); +fprintf (sumPath, "Inferred rates for data set 2:\n", d2); + +LIKELIHOOD_FUNCTION_OUTPUT = 6; +fprintf (resToPath,CLEAR_FILE,lf); + + +/*-------------- SHARED POSITIVE SELECTION STRENGTHS ------------------*/ + +for (k=0; k=respP+respN) + { + ExecuteCommands ("R_1_"+k+"=0.5(R_1_"+k+"+R_2_"+k+");"); + } + ExecuteCommands ("S_2_"+k+":=S_1_"+k+";R_2_"+k+":=R_1_"+k+";"); +} +for (k=1; k<=resp; k=k+1) +{ + ExecuteCommands ("P_1_"+k+"=0.5(P_1_"+k+"+P_2_"+k+");"); + ExecuteCommands ("P_2_"+k+":=P_1_"+k+";"); +} + +GetString (paramList, lf, -1); +degFJ = Columns(paramList["Global Independent"]) + 14 - 2*branchLengths + treeBranchParameters; +fprintf (stdout, "\nRunning a shared distributions model fit (", degFJ, " parameters)...\n"); + +Optimize (res_J,lf); +LogLJ = res_J[1][0]; + +AICJ = 2*(degFJ-LogLJ); +AICcJ = 2*(degFJ*totalCodonCount/(totalCodonCount-degFJ-1) - LogLJ); + +fprintf (stdout, "\n\nShared distributions model fit summary\n", + "\nLog likelihood:", Format (LogLJ, 15, 5), + "\nParameters :", Format (degFJ, 15, 0), + "\nAIC :", Format (AICJ, 15, 5), + "\nc-AIC :", Format (AICcJ, 15, 5),"\n" +); + +fprintf (sumPath, "\n\nShared distributions model fit summary\n", + "\nLog likelihood:", Format (LogLJ, 15, 5), + "\nParameters :", Format (degFJ, 15, 0), + "\nAIC :", Format (AICJ, 15, 5), + "\nc-AIC :", Format (AICcJ, 15, 5),"\n" +); + +d1 = ReportDistributionString(4,freqStrMx_1,"1_",0); + +fprintf (stdout, "Inferred joint rates:\n", d1); +fprintf (sumPath, "Inferred joint rates:\n", d1); + +fpath = resToPath + ".JointAll"; +LIKELIHOOD_FUNCTION_OUTPUT = 6; +fprintf (fpath,CLEAR_FILE,lf); + +USE_LAST_RESULTS = 0; +SKIP_CONJUGATE_GRADIENT = 0; +LIKELIHOOD_FUNCTION_OUTPUT = 2; + +fprintf (stdout, "\nDistribution comparison tests\n", + "\n\tAre the distributions different?", + "\n\t\tLR = ", Format (2*(LogL-LogLJ),10,3), + " DF = ", degF-degFJ, + " p = ", Format(1-CChi2(2*(LogL-LogLJ), degF-degFJ),8,3), "\n"); + +fprintf (stdout, "\n\tAre selective regimes (dN/dS and proportions) different?", + "\n\t\tLR = ", Format (2*(LogL-LogLPSH),10,3), + " DF = ", degF-degFPSH, + " p = ", Format(1-CChi2(2*(LogL-LogLPSH), degF-degFPSH),8,3), "\n"); + +fprintf (stdout, "\n\tAre selection strengths (dN/dS) different?", + "\n\t\tLR = ", Format (2*(LogL-LogLPSS),10,3), + " DF = ", degF-degFPSS, + " p = ", Format(1-CChi2(2*(LogL-LogLPSS), degF-degFPSH),8,3), "\n"); + +fprintf (stdout, "\n\tAre the proportions of codons under selection different?", + "\n\t\tLR = ", Format (2*(LogL-LogLPSP),10,3), + " DF = ", degF-degFPSP, + " p = ", Format(1-CChi2(2*(LogL-LogLPSP), degF-degFPSP),8,3), "\n"); + +fprintf (sumPath, "\n\nDistribution comparison tests\n", + "\n\tAre the distributions different?", + "\n\t\tLR = ", Format (2*(LogL-LogLJ),10,3), + " DF = ", degF-degFJ, + " p = ", Format(1-CChi2(2*(LogL-LogLJ), degF-degFJ),8,3), "\n"); + +fprintf (sumPath, "\n\tAre selective regimes (dN/dS and proportions) different?", + "\n\t\tLR = ", Format (2*(LogL-LogLPSH),10,3), + " DF = ", degF-degFPSH, + " p = ", Format(1-CChi2(2*(LogL-LogLPSH), degF-degFPSH),8,3), "\n"); + +fprintf (sumPath, "\n\tAre selection strengths (dN/dS) different?", + "\n\t\tLR = ", Format (2*(LogL-LogLPSS),10,3), + " DF = ", degF-degFPSS, + " p = ", Format(1-CChi2(2*(LogL-LogLPSS), degF-degFPSH),8,3), "\n"); + +fprintf (sumPath, "\n\tAre the proportions of codons under selection different?", + "\n\t\tLR = ", Format (2*(LogL-LogLPSP),10,3), + " DF = ", degF-degFPSP, + " p = ", Format(1-CChi2(2*(LogL-LogLPSP), degF-degFPSP),8,3), "\n"); +/*------------------------------------------------------------------------------------------------------------*/ + +function checkASample (whichRate) +{ + ExecuteCommands ("S_"+whichRate+"=v1;R_"+whichRate+"=v2/v1;"); + LFCompute (lf,res_n); + localDiff = res_n-baseLineLL; + if (localDiff > bestDiff) + { + bestDiff = localDiff; + bestAlpha = v1; + bestBeta = v2/v1; + } + return 0; +} diff --git a/samples/HyPhy/hyphy_cmds.bf b/samples/HyPhy/hyphy_cmds.bf new file mode 100644 index 00000000..7a2189f9 --- /dev/null +++ b/samples/HyPhy/hyphy_cmds.bf @@ -0,0 +1,11003 @@ +INTEGRATION_PRECISION_FACTOR = 5.0e-6; +END_OF_FILE = 0; +LIKELIHOOD_FUNCTION_OUTPUT = 5; +ACCEPT_BRANCH_LENGTHS = 1; +#include "/home/oashenbe/.local/lib/python2.7/site-packages/phyloExpCM/data//NTsCodonsAAs.ibf"; +fprintf(stdout, "Running HYPHY script hyphy_cmds.bf...\n"); +DataSet data = ReadDataFile("_codenames_Aligned_NPs_Swine.fasta"); +assert(data.sites % 3 == 0, "Sequence lengths not multiples of 3"); +totalcodons = data.sites $ 3; +fprintf(stdout, "Read from _codenames_Aligned_NPs_Swine.fasta a set of ", data.species, " sequences consisting of ", data.sites, " nucleotides corresponding to ", totalcodons, " codons each.\n"); +fprintf(stdout, "The analysis will include the following 498 codon positions (sequential numbering starting with 1):\n1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498\n"); +assert(totalcodons >= 498, "Largest included site exceeds sequence length"); +DataSetFilter codonfilter = CreateFilter(data, 3, "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,542,543,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,975,976,977,978,979,980,981,982,983,984,985,986,987,988,989,990,991,992,993,994,995,996,997,998,999,1000,1001,1002,1003,1004,1005,1006,1007,1008,1009,1010,1011,1012,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023,1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,1104,1105,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,1117,1118,1119,1120,1121,1122,1123,1124,1125,1126,1127,1128,1129,1130,1131,1132,1133,1134,1135,1136,1137,1138,1139,1140,1141,1142,1143,1144,1145,1146,1147,1148,1149,1150,1151,1152,1153,1154,1155,1156,1157,1158,1159,1160,1161,1162,1163,1164,1165,1166,1167,1168,1169,1170,1171,1172,1173,1174,1175,1176,1177,1178,1179,1180,1181,1182,1183,1184,1185,1186,1187,1188,1189,1190,1191,1192,1193,1194,1195,1196,1197,1198,1199,1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211,1212,1213,1214,1215,1216,1217,1218,1219,1220,1221,1222,1223,1224,1225,1226,1227,1228,1229,1230,1231,1232,1233,1234,1235,1236,1237,1238,1239,1240,1241,1242,1243,1244,1245,1246,1247,1248,1249,1250,1251,1252,1253,1254,1255,1256,1257,1258,1259,1260,1261,1262,1263,1264,1265,1266,1267,1268,1269,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279,1280,1281,1282,1283,1284,1285,1286,1287,1288,1289,1290,1291,1292,1293,1294,1295,1296,1297,1298,1299,1300,1301,1302,1303,1304,1305,1306,1307,1308,1309,1310,1311,1312,1313,1314,1315,1316,1317,1318,1319,1320,1321,1322,1323,1324,1325,1326,1327,1328,1329,1330,1331,1332,1333,1334,1335,1336,1337,1338,1339,1340,1341,1342,1343,1344,1345,1346,1347,1348,1349,1350,1351,1352,1353,1354,1355,1356,1357,1358,1359,1360,1361,1362,1363,1364,1365,1366,1367,1368,1369,1370,1371,1372,1373,1374,1375,1376,1377,1378,1379,1380,1381,1382,1383,1384,1385,1386,1387,1388,1389,1390,1391,1392,1393,1394,1395,1396,1397,1398,1399,1400,1401,1402,1403,1404,1405,1406,1407,1408,1409,1410,1411,1412,1413,1414,1415,1416,1417,1418,1419,1420,1421,1422,1423,1424,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434,1435,1436,1437,1438,1439,1440,1441,1442,1443,1444,1445,1446,1447,1448,1449,1450,1451,1452,1453,1454,1455,1456,1457,1458,1459,1460,1461,1462,1463,1464,1465,1466,1467,1468,1469,1470,1471,1472,1473,1474,1475,1476,1477,1478,1479,1480,1481,1482,1483,1484,1485,1486,1487,1488,1489,1490,1491,1492,1493", "", "TAA,TAG,TGA"); +assert(data.species == codonfilter.species, "species number mismatch"); +assert(codonfilter.sites == 498, "Codon filtered data does not contain the right number of sites"); +fprintf(stdout, "Created a codon filter of ", codonfilter.sites, " sites.\n"); +assert(totalcodons - (totalcodons - 498) - 0 == codonfilter.sites, "Codon filtered data is not the expected length. Do sequences contain stop codons?"); +CheckCodonFilter("codonfilter"); +fprintf(stdout, "Reading tree string from _codenames_codonphyml_Swine_tree.newick.\n"); +fscanf("_codenames_codonphyml_Swine_tree.newick", String, treestring); +fprintf(stdout, "Using the Goldman Yang 1994 (GY94) codon model...\n"); +#include "/home/oashenbe/.local/lib/python2.7/site-packages/phyloExpCM/data//CF3x4.ibf"; +#include "/home/oashenbe/.local/lib/python2.7/site-packages/phyloExpCM/data//GY94.ibf"; +CreateGY94Model("CF3x4", "global", "global", 4, 4, 1); +UseModel(model); +ExecuteCommands("Tree tree = treestring;") +assert(codonfilter.species == TipCount(tree), "Number of species and number of tips differ"); +LikelihoodFunction likelihood = (codonfilter, tree); +fprintf(stdout, "\nNow optimizing the likelihood function...\n"); +Optimize(mlestimates, likelihood) +fprintf(stdout, "Completed likelihood optimization. Optimized ", mlestimates[1][1], " indpendent parameters and ", mlestimates[1][2], " shared parameters to obtain a log likelihood of ", mlestimates[1][0], ".\n"); +fprintf(stdout, "Writing the results to hyphy_output.txt.\n"); +fprintf("hyphy_output.txt", "Log likelihood: ", mlestimates[1][0], "\nindependent parameters (includes branch lengths): ", mlestimates[1][1], "\nshared parameters: ", mlestimates[1][2], "\nnumber of branch lengths: ", TipCount(tree) + BranchCount(tree), "\nnumber of tip nodes: ", TipCount(tree), "\nnumber of internal branches: ", BranchCount(tree), "\n",likelihood); +fprintf(stdout, "\nNow computing per-site likelihoods.\n"); +fprintf(stdout, "\nFirst fixing all global variables to the maximum-likelihood values estimated on the entire tree.\n"); +GetString(associativearray, likelihood, -1); +globalindependentvariables = associativearray["Global Independent"]; +for (ivariable=0; ivariable