diff --git a/samples/HyPhy/454.bf b/samples/HyPhy/454.bf deleted file mode 100644 index 40c867bd..00000000 --- a/samples/HyPhy/454.bf +++ /dev/null @@ -1,2988 +0,0 @@ - - - - - -
- - - - - -
- You can clone with - HTTPS, SSH, or Subversion. - - - -
- - - - Clone in Desktop - - - - - - - Download ZIP - -| - | ExecuteAFile ("Utility/GrabBag.bf"); | -
| - | ExecuteAFile ("Utility/DBTools.ibf"); | -
| - | - |
| - | - |
| - | alignOptions = {}; | -
| - | - |
| - | SetDialogPrompt ("Sequence File:"); | -
| - | DataSet unal = ReadDataFile (PROMPT_FOR_FILE); | -
| - | BASE_FILE_PATH = LAST_FILE_PATH; | -
| - | DB_FILE_PATH = LAST_FILE_PATH + ".db"; | -
| - | - |
| - | ANALYSIS_DB_ID = _openCacheDB (DB_FILE_PATH); | -
| - | - |
| - | haveTable = _TableExists (ANALYSIS_DB_ID, "SETTINGS"); | -
| - | if (haveTable) | -
| - | { | -
| - | existingSettings = _ExecuteSQL (ANALYSIS_DB_ID, "SELECT * FROM SETTINGS"); | -
| - | } | -
| - | - |
| - | if (Abs(existingSettings)) | -
| - | { | -
| - | existingSettings = existingSettings [0]; | -
| - | ExecuteCommands ("_Genetic_Code = " + existingSettings["GENETIC_CODE"]); | -
| - | ExecuteCommands (existingSettings["OPTIONS"] ^ {{"_hyphyAssociativeArray","alignOptions"}}); | -
| - | masterReferenceSequence = existingSettings["REFERENCE"]; | -
| - | dbSequences = _ExecuteSQL (ANALYSIS_DB_ID, "SELECT SEQUENCE_ID FROM SEQUENCES WHERE STAGE = 0"); | -
| - | unalSequenceCount = Abs(dbSequences); | -
| - | toDoSequences = {unalSequenceCount,1}; | -
| - | for (k = 0; k < unalSequenceCount; k=k+1) | -
| - | { | -
| - | toDoSequences[k] = (dbSequences[k])["SEQUENCE_ID"]; | -
| - | } | -
| - | dbSequences = 0; | -
| - | fprintf (stdout, "[PHASE 1] Reloaded ", unalSequenceCount, " unprocessed sequences\n"); | -
| - | } | -
| - | else | -
| - | { | -
| - | tableDefines = {}; | -
| - | tableDefines ["SETTINGS"] = {}; | -
| - | (tableDefines ["SETTINGS"])["RUN_DATE"] = "DATE"; | -
| - | (tableDefines ["SETTINGS"])["OPTIONS"] = "TEXT"; | -
| - | (tableDefines ["SETTINGS"])["REFERENCE"] = "TEXT"; | -
| - | (tableDefines ["SETTINGS"])["GENETIC_CODE"] = "TEXT"; | -
| - | (tableDefines ["SETTINGS"])["THRESHOLD"] = "REAL"; | -
| - | - |
| - | tableDefines ["SEQUENCES"] = {}; | -
| - | (tableDefines ["SEQUENCES"])["SEQUENCE_ID"] = "TEXT UNIQUE"; | -
| - | (tableDefines ["SEQUENCES"])["LENGTH"] = "INTEGER"; | -
| - | (tableDefines ["SEQUENCES"])["STAGE"] = "INTEGER"; | -
| - | /* | -
| - | 0 - initial import | -
| - | 1 - in frame without a fix | -
| - | 2 - one frame shift / fixed | -
| - | 3 - out-of-frame; not fixed / not aligned | -
| - | */ | -
| - | (tableDefines ["SEQUENCES"])["RAW"] = "TEXT"; | -
| - | (tableDefines ["SEQUENCES"])["ALIGNED_AA"] = "TEXT"; /* aligned aa. sequence */ | -
| - | (tableDefines ["SEQUENCES"])["ALIGNED"] = "TEXT"; /* aligned nucleotide sequence */ | -
| - | (tableDefines ["SEQUENCES"])["OFFSET"] = "INTEGER"; /* start offset w.r.t the reference sequence */ | -
| - | (tableDefines ["SEQUENCES"])["END_OFFSET"] = "INTEGER"; /* end offset w.r.t the reference sequence */ | -
| - | (tableDefines ["SEQUENCES"])["SCORE"] = "REAL"; | -
| - | (tableDefines ["SEQUENCES"])["FRAME"] = "INTEGER"; | -
| - | - |
| - | _CreateTableIfNeeded (ANALYSIS_DB_ID, "SETTINGS", tableDefines["SETTINGS"], 0); | -
| - | _CreateTableIfNeeded (ANALYSIS_DB_ID, "SEQUENCES", tableDefines["SEQUENCES"], 1); | -
| - | - |
| - | /* START ALIGNMENT SETTINGS */ | -
| - | LoadFunctionLibrary ("SeqAlignShared.ibf"); | -
| - | - |
| - | DataSetFilter filteredData = CreateFilter (unal,1); | -
| - | - |
| - | GetInformation (UnalignedSeqs,filteredData); | -
| - | /* preprocess sequences */ | -
| - | - |
| - | unalSequenceCount = Rows(UnalignedSeqs)*Columns(UnalignedSeqs); | -
| - | GetString (sequenceNames, unal, -1); | -
| - | - |
| - | longestSequence = 0; | -
| - | longestSequenceIDX = 0; | -
| - | - |
| - | seqRecord = {}; | -
| - | - |
| - | fprintf (stdout, "[PHASE 1] Initial Processing of ", unalSequenceCount, " sequences\n"); | -
| - | - |
| - | for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter = seqCounter+1) | -
| - | { | -
| - | UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"[^a-zA-Z]",""}}; | -
| - | UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"^N+",""}}; | -
| - | UnalignedSeqs[seqCounter] = UnalignedSeqs[seqCounter]^{{"N+$",""}}; | -
| - | - |
| - | seqRecord ["SEQUENCE_ID"] = sequenceNames[seqCounter]; | -
| - | seqRecord ["LENGTH"] = Abs (UnalignedSeqs[seqCounter]); | -
| - | seqRecord ["STAGE"] = 0; | -
| - | seqRecord ["RAW"] = UnalignedSeqs[seqCounter]; | -
| - | - |
| - | if (doLongestSequence) | -
| - | { | -
| - | if (doLongestSequence == 1 || seqCounter != unalSequenceCount-1) | -
| - | { | -
| - | if (Abs (UnalignedSeqs[seqCounter]) > longestSequence) | -
| - | { | -
| - | longestSequence = Abs (UnalignedSeqs[seqCounter]); | -
| - | longestSequenceIDX = seqCounter; | -
| - | } | -
| - | } | -
| - | } | -
| - | - |
| - | _InsertRecord (ANALYSIS_DB_ID, "SEQUENCES", seqRecord); | -
| - | SetParameter (STATUS_BAR_STATUS_STRING, "Initial processing ("+seqCounter+"/"+unalSequenceCount+" done)",0); | -
| - | } | -
| - | - |
| - | if (refSeq == 0) | -
| - | { | -
| - | masterReferenceSequence = UnalignedSeqs[0]; | -
| - | } | -
| - | - |
| - | - |
| - | if (doLongestSequence) | -
| - | { | -
| - | fprintf (stdout, "\nSelected sequence ", sequenceNames[longestSequenceIDX], " as reference."); | -
| - | masterReferenceSequence = UnalignedSeqs[longestSequenceIDX]; | -
| - | } | -
| - | - |
| - | incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def"; | -
| - | ExecuteCommands ("#include \""+incFileName+"\";"); | -
| - | - |
| - | doLongestSequence = (refSeq==1); | -
| - | aRecord = {}; | -
| - | aRecord["RUN_DATE"] = _ExecuteSQL(ANALYSIS_DB_ID,"SELECT DATE('NOW') AS CURRENT_DATE"); | -
| - | aRecord["RUN_DATE"] = ((aRecord["RUN_DATE"])[0])["CURRENT_DATE"]; | -
| - | aRecord["OPTIONS"] = "" + alignOptions; | -
| - | aRecord["REFERENCE"] = masterReferenceSequence; | -
| - | aRecord["GENETIC_CODE"] = "" + _Genetic_Code; | -
| - | _InsertRecord (ANALYSIS_DB_ID, "SETTINGS", aRecord); | -
| - | toDoSequences = sequenceNames; | -
| - | UnalignedSequences = 0; | -
| - | - |
| - | } | -
| - | - |
| - | skipOutliers = 1; | -
| - | doRC = 1; | -
| - | - |
| - | /* build codon translation table */ | -
| - | - |
| - | codonToAAMap = {}; | -
| - | codeToAA = "FLIMVSPTAYXHQNKDECWRG"; | -
| - | - |
| - | nucChars = "ACGT"; | -
| - | - |
| - | for (p1=0; p1<64; p1=p1+1) | -
| - | { | -
| - | codon = nucChars[p1$16]+nucChars[p1%16$4]+nucChars[p1%4]; | -
| - | ccode = _Genetic_Code[p1]; | -
| - | codonToAAMap[codon] = codeToAA[ccode]; | -
| - | } | -
| - | - |
| - | /* determine reading frames */ | -
| - | ProteinSequences = {}; | -
| - | AllTranslations = {}; | -
| - | ReadingFrames = {}; | -
| - | StopCodons = {}; | -
| - | StopPositions = {}; | -
| - | RC = {}; | -
| - | - |
| - | fprintf (stdout, "\n[PHASE 2] Detecting reading frames for each unprocessed sequence...\n"); | -
| - | frameCounter = {3,2}; | -
| - | stillHasStops = {}; | -
| - | aRecord = {}; | -
| - | - |
| - | for (seqCounter = 0; seqCounter < unalSequenceCount; seqCounter = seqCounter+1) | -
| - | { | -
| - | rawSeq = (_ExecuteSQL(ANALYSIS_DB_ID,"SELECT RAW FROM SEQUENCES WHERE SEQUENCE_ID = '" + toDoSequences[seqCounter] + "'")); | -
| - | aSeq = (rawSeq[0])["RAW"]; | -
| - | seqLen = Abs(aSeq)-2; | -
| - | - |
| - | minStops = 1e20; | -
| - | tString = ""; | -
| - | rFrame = 0; | -
| - | rrc = 0; | -
| - | allTran = {3,2}; | -
| - | stopPosn = {6,2}; | -
| - | - |
| - | for (rc = 0; rc<=doRC; rc = rc+1) | -
| - | { | -
| - | if (rc) | -
| - | { | -
| - | aSeq = nucleotideReverseComplement (aSeq) | -
| - | } | -
| - | for (offset = 0; offset < 3; offset = offset+1) | -
| - | { | -
| - | translString = ""; | -
| - | translString * (seqLen/3+1); | -
| - | for (seqPos = offset; seqPos < seqLen; seqPos = seqPos+3) | -
| - | { | -
| - | codon = aSeq[seqPos][seqPos+2]; | -
| - | prot = codonToAAMap[codon]; | -
| - | if (Abs(prot)) | -
| - | { | -
| - | translString * prot; | -
| - | } | -
| - | else | -
| - | { | -
| - | translString * "?"; | -
| - | } | -
| - | } | -
| - | translString * 0; | -
| - | translString = translString^{{"X$","?"}}; | -
| - | stopPos = translString||"X"; | -
| - | if (stopPos[0]>=0) | -
| - | { | -
| - | stopCount = Rows(stopPos)$2; | -
| - | stopPosn[3*rc+offset][0] = stopPos[0]; | -
| - | stopPosn[3*rc+offset][1] = stopPos[stopCount*2-1]; | -
| - | } | -
| - | else | -
| - | { | -
| - | stopCount = 0; | -
| - | } | -
| - | if (stopCount<minStops) | -
| - | { | -
| - | minStops = stopCount; | -
| - | rFrame = offset; | -
| - | rrc = rc; | -
| - | tString = translString; | -
| - | } | -
| - | allTran[offset][rc] = translString; | -
| - | } | -
| - | } | -
| - | - |
| - | ReadingFrames [seqCounter] = rFrame; | -
| - | ProteinSequences [seqCounter] = tString; | -
| - | frameCounter [rFrame][rrc] = frameCounter[rFrame][rrc]+1; | -
| - | StopPositions [seqCounter] = stopPosn; | -
| - | AllTranslations [seqCounter] = allTran; | -
| - | RC [seqCounter] = rrc; | -
| - | - |
| - | SetParameter (STATUS_BAR_STATUS_STRING, "Reading frame analysis ("+seqCounter+"/"+unalSequenceCount+" done)",0); | -
| - | } | -
| - | - |
| - | _closeCacheDB (ANALYSIS_DB_ID); | -
| - | return 0; | -
| - | - |
| - | s1 = ProteinSequences[0]; | -
| - | fprintf (stdout, "\nFound:\n\t", frameCounter[0], " sequences in reading frame 1\n\t",frameCounter[1], " sequences in reading frame 2\n\t",frameCounter[2], " sequences in reading frame 3\n\nThere were ", Abs(stillHasStops), " sequences with apparent frameshift/sequencing errors\n"); | -
| - | - |
| - | - |
| - | skipSeqs = {}; | -
| - | - |
| - | for (k=0; k<Abs(stillHasStops); k=k+1) | -
| - | { | -
| - | seqCounter = stillHasStops[k]; | -
| - | seqName = sequenceNames[seqCounter]; | -
| - | fprintf (stdout,"Sequence ", seqCounter+1, " (", seqName, ") seems to have"); | -
| - | stopPosn = StopPositions[seqCounter]; | -
| - | - |
| - | fStart = -1; | -
| - | fEnd = -1; | -
| - | fMin = 1e10; | -
| - | frame1 = 0; | -
| - | frame2 = 0; | -
| - | - |
| - | checkFramePosition (stopPosn[0][1],stopPosn[1][0],0,1); | -
| - | checkFramePosition (stopPosn[1][1],stopPosn[0][0],1,0); | -
| - | checkFramePosition (stopPosn[0][1],stopPosn[2][0],0,2); | -
| - | checkFramePosition (stopPosn[2][1],stopPosn[0][0],2,0); | -
| - | checkFramePosition (stopPosn[2][1],stopPosn[1][0],2,1); | -
| - | checkFramePosition (stopPosn[1][1],stopPosn[2][0],1,2); | -
| - | - |
| - | if (fStart>=0) | -
| - | { | -
| - | allTran = AllTranslations[seqCounter]; | -
| - | useq = UnalignedSeqs[seqCounter]; | -
| - | fprintf (stdout, " a shift from frame ", frame2+1, " to frame ", frame1+1, " between a.a. positions ", fStart, " and ", fEnd, "."); | -
| - | fStart2 = Max(fStart-1,0); | -
| - | fEnd2 = Min(fEnd+1,Min(Abs(allTran[frame1]),Abs(allTran[frame2]))-1); | -
| - | tempString = allTran[frame2]; | -
| - | fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame2+1, ":\n\t", tempString[fStart2][fEnd2]); | -
| - | fprintf (stdout, "\n\t", useq[3*fStart2+frame2][3*fEnd2+frame2-1]); | -
| - | tempString = allTran[frame1]; | -
| - | fprintf (stdout, "\n\tRegion ", fStart2, "-", fEnd2, " in frame ", frame1+1, ":\n\t", tempString[fStart2][fEnd2]); | -
| - | fprintf (stdout, "\n\t", useq[3*fStart2+frame1][3*fEnd2+frame1-1]); | -
| - | fprintf (stdout, "\n\t\tAttempting to resolve by alignment to reference. "); | -
| - | - |
| - | f1s = allTran[frame1]; | -
| - | f2s = allTran[frame2]; | -
| - | f1l = Abs(f1s); | -
| - | - |
| - | bestScore = -1e10; | -
| - | bestSplice = -1; | -
| - | - |
| - | for (k2=fStart; k2<fEnd; k2=k2+1) | -
| - | { | -
| - | s2 = f2s[0][k2]+f1s[k2+1][Abs(f1s)]; | -
| - | inStr = {{s1,s2}}; | -
| - | AlignSequences(aligned, inStr, alignOptions); | -
| - | aligned = aligned[0]; | -
| - | aligned = aligned[0]; | -
| - | if (aligned > bestScore) | -
| - | { | -
| - | bestScore = aligned; | -
| - | bestSplice = k2; | -
| - | bestString = s2; | -
| - | } | -
| - | } | -
| - | fprintf (stdout, "Best splice site appears to be at a.a. position ", bestSplice, "\n"); | -
| - | /* update best spliced string */ | -
| - | - |
| - | ProteinSequences[seqCounter] = bestString; | -
| - | ReadingFrames[seqCounter] = 0; | -
| - | - |
| - | UnalignedSeqs[seqCounter] = useq[frame2][frame2+3*bestSplice+2] + useq[frame1+3*bestSplice+3][Abs(useq)-1] + "---"; | -
| - | } | -
| - | else | -
| - | { | -
| - | - |
| - | fprintf (stdout, " multiple frameshifts\n"); | -
| - | skipSeqs[seqCounter] = 1; | -
| - | } | -
| - | } | -
| - | - |
| - | SeqAlignments = {}; | -
| - | startingPosition = {unalSequenceCount,2}; | -
| - | refLength = Abs(ProteinSequences[0]); | -
| - | refInsertions = {refLength,1}; | -
| - | - |
| - | fprintf (stdout,"\nPerforming pairwise alignment with reference sequences\n"); | -
| - | - |
| - | alignmentScores = {}; | -
| - | - |
| - | for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1) | -
| - | { | -
| - | if (skipSeqs[seqCounter] == 0) | -
| - | { | -
| - | s2 = ProteinSequences[seqCounter]; | -
| - | inStr = {{s1,s2}}; | -
| - | AlignSequences(aligned, inStr, alignOptions); | -
| - | aligned = aligned[0]; | -
| - | SeqAlignments[seqCounter] = aligned; | -
| - | alignmentScores[Abs(alignmentScores)] = aligned[0]/Abs(aligned[1]); | -
| - | aligned = aligned[1]; | -
| - | myStartingPosition = aligned$"[^-]"; | -
| - | myEndingPosition = Abs (aligned)-1; | -
| - | while (aligned[myEndingPosition]=="-") | -
| - | { | -
| - | myEndingPosition = myEndingPosition - 1; | -
| - | } | -
| - | myStartingPosition = myStartingPosition[0]; | -
| - | startingPosition[seqCounter][0] = myStartingPosition; | -
| - | startingPosition[seqCounter][1] = myEndingPosition; | -
| - | aligned = aligned[myStartingPosition][myEndingPosition]; | -
| - | refInsert = aligned||"-+"; | -
| - | if (refInsert[0]>0) | -
| - | { | -
| - | insCount = Rows (refInsert)/2; | -
| - | offset = 0; | -
| - | for (insN = 0; insN < insCount; insN = insN+1) | -
| - | { | -
| - | insPos = refInsert[insN*2]; | -
| - | insLength = refInsert[insN*2+1]-insPos+1; | -
| - | insPos = insPos-offset; | -
| - | if (refInsertions[insPos]<insLength) | -
| - | { | -
| - | refInsertions[insPos]=insLength; | -
| - | } | -
| - | offset = offset + insLength; | -
| - | } | -
| - | } | -
| - | } | -
| - | } | -
| - | - |
| - | alignmentScoresM = avlToMatrix ("alignmentScores"); | -
| - | ExecuteAFile ("Utility/DescriptiveStatistics.bf"); | -
| - | distInfo = GatherDescriptiveStats (alignmentScoresM); | -
| - | /*lowerCuttoff = 0.25;*/ | -
| - | distInfo["Mean"] - 2*distInfo["Std.Dev"]; | -
| - | /* produce a fully gapped reference sequence */ | -
| - | - |
| - | fprintf (stdout,"\nMerging pairwise alignments into a MSA\n"); | -
| - | - |
| - | fullRefSeq = ""; | -
| - | fullRefSeq * refLength; | -
| - | fullRefSeq * (s1[0]); | -
| - | - |
| - | - |
| - | s1N = UnalignedSeqs[0]; | -
| - | - |
| - | fullRefSeqN = ""; | -
| - | fullRefSeqN * (3*refLength); | -
| - | fullRefSeqN * (s1N[0][2]); | -
| - | - |
| - | frameShift = ReadingFrames[0]; | -
| - | - |
| - | for (seqCounter=1;seqCounter<refLength;seqCounter=seqCounter+1) | -
| - | { | -
| - | gapCount = refInsertions[seqCounter]; | -
| - | for (k=0; k<gapCount;k=k+1) | -
| - | { | -
| - | fullRefSeq*("-"); | -
| - | fullRefSeqN*("---"); | -
| - | } | -
| - | fullRefSeq * (s1[seqCounter]); | -
| - | fullRefSeqN * (s1N[frameShift+seqCounter*3][frameShift+seqCounter*3+2]); | -
| - | } | -
| - | - |
| - | fullRefSeq * 0; | -
| - | fullRefSeqN * 0; | -
| - | - |
| - | return 0; | -
| - | - |
| - | refLength = Abs(fullRefSeq); | -
| - | - |
| - | SetDialogPrompt ("Save alignment to:"); | -
| - | - |
| - | seqName=sequenceNames[0]; | -
| - | fprintf (PROMPT_FOR_FILE,CLEAR_FILE,">",seqName,"\n",fullRefSeq); | -
| - | fName = LAST_FILE_PATH; | -
| - | fNameC = fName+".nuc"; | -
| - | fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",fullRefSeqN); | -
| - | - |
| - | alCounter = 0; | -
| - | - |
| - | for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1) | -
| - | { | -
| - | if (skipSeqs[seqCounter] == 0) | -
| - | { | -
| - | if (skipOutliers == 0 && alignmentScoresM[alCounter] < lowerCuttoff) | -
| - | { | -
| - | seqName=sequenceNames[seqCounter]; | -
| - | fprintf (stdout, "Sequence ", seqName ," was skipped because of a poor alignment score.\n"); | -
| - | skipSeqs[seqCounter] = 1; | -
| - | alCounter = alCounter + 1; | -
| - | continue; | -
| - | } | -
| - | alCounter = alCounter + 1; | -
| - | seqName=sequenceNames[seqCounter]; | -
| - | aligned = SeqAlignments[seqCounter]; | -
| - | - |
| - | aligned1 = aligned[1]; | -
| - | aligned2 = aligned[2]; | -
| - | - |
| - | s2 = startingPosition[seqCounter][0]; | -
| - | e2 = startingPosition[seqCounter][1]; | -
| - | - |
| - | gappedSeq = ""; | -
| - | gappedSeq * Abs(aligned2); | -
| - | - |
| - | - |
| - | k=0; | -
| - | - |
| - | while (k<refLength) | -
| - | { | -
| - | while (fullRefSeq[k]!=aligned1[s2]) | -
| - | { | -
| - | gappedSeq*("-"); | -
| - | k=k+1; | -
| - | } | -
| - | gappedSeq*(aligned2[s2]); | -
| - | s2=s2+1; | -
| - | k=k+1; | -
| - | } | -
| - | - |
| - | gappedSeq * 0; | -
| - | - |
| - | gappedSeqN = ""; | -
| - | gappedSeqN * (3*Abs(aligned2)); | -
| - | - |
| - | frameShift = ReadingFrames[seqCounter]; | -
| - | - |
| - | s1N = UnalignedSeqs[seqCounter]; | -
| - | s2N = ProteinSequences[seqCounter]; | -
| - | s2 = startingPosition[seqCounter][0]; | -
| - | k = 0; | -
| - | e2 = Abs(gappedSeq); | -
| - | k = 0; | -
| - | while (k<e2) | -
| - | { | -
| - | while ((s2N[s2]!=gappedSeq[k])&&(k<e2)) | -
| - | { | -
| - | gappedSeqN * ("---"); | -
| - | k=k+1; | -
| - | } | -
| - | if (k<e2) | -
| - | { | -
| - | gappedSeqN * s1N[frameShift+s2*3][frameShift+s2*3+2]; | -
| - | s2 = s2+1; | -
| - | k=k+1; | -
| - | } | -
| - | } | -
| - | gappedSeqN * 0; | -
| - | - |
| - | if (refSeq2 && seqCounter == unalSequenceCount-1) | -
| - | { | -
| - | fscanf (fName, "Raw", soFar); | -
| - | fprintf (fName, CLEAR_FILE,">",seqName,"\n",gappedSeq,"\n",soFar); | -
| - | fscanf (fNameC, "Raw", soFar); | -
| - | fprintf (fNameC,CLEAR_FILE,">",seqName,"\n",gappedSeqN,"\n",soFar); | -
| - | - |
| - | } | -
| - | else | -
| - | { | -
| - | fprintf (fName,"\n>",seqName,"\n",gappedSeq); | -
| - | fprintf (fNameC,"\n>",seqName,"\n",gappedSeqN); | -
| - | } | -
| - | } | -
| - | } | -
| - | - |
| - | if (Abs(skipSeqs)) | -
| - | { | -
| - | fName = fName+".bad"; | -
| - | for (seqCounter = 1; seqCounter < unalSequenceCount; seqCounter = seqCounter+1) | -
| - | { | -
| - | if (skipSeqs[seqCounter]) | -
| - | { | -
| - | seqName=sequenceNames[seqCounter]; | -
| - | fprintf (fName,">",seqName,"\n",UnalignedSeqs[seqCounter],"\n"); | -
| - | } | -
| - | } | -
| - | } | -
| - | - |
| - | function checkFramePosition (pos1, pos2, fr1, fr2) | -
| - | { | -
| - | fSpan = pos2-pos1; | -
| - | - |
| - | if (fSpan>1) /* first followed by second*/ | -
| - | { | -
| - | if (fSpan < fMin) | -
| - | { | -
| - | fMin = fSpan; | -
| - | frame1 = fr1; | -
| - | frame2 = fr2; | -
| - | fStart = pos1+1; | -
| - | fEnd = pos2; | -
| - | } | -
| - | } | -
| - | return 0; | -
| - | } | -
| - | - |
- You can clone with - HTTPS, SSH, or Subversion. - - - -
- - - - Clone in Desktop - - - - - - - Download ZIP - -![]()
Cannot retrieve contributors at this time
-| - | if (Rows (modelMatrixList) == 0) | -
| - | { | -
| - | modelMatrixList = | -
| - | { | -
| - | {"Equal Input", "EIAA.mdl", "19"} | -
| - | {"Dayhoff","Dayhoff.mdl","0"} | -
| - | {"Dayhoff+F","Dayhoff_F.mdl","19"} | -
| - | {"JTT", "Jones.mdl", "0"} | -
| - | {"JTT+F", "Jones_F.mdl", "19"} | -
| - | {"WAG", "WAG.mdl", "0"} | -
| - | {"WAG+F", "WAG_F.mdl", "19"} | -
| - | {"rtREV", "rtREV.mdl", "0"} | -
| - | {"rtREV+F", "rtREV_F.mdl", "19"} | -
| - | {"mtMAM", "mtMAM.mdl", "0"} | -
| - | {"mtMAM+F", "mtMAM_F.mdl", "19"} | -
| - | {"mtREV 24", "mtREV_24.mdl", "0"} | -
| - | {"mtREV 24+F", "mtREV_24_F.mdl", "19"} | -
| - | {"HIV within", "HIVwithin.mdl", "0"} | -
| - | {"HIV within+F", "HIVwithin+F.mdl", "19"} | -
| - | {"HIV between", "HIVbetween.mdl", "0"} | -
| - | {"HIV between+F", "HIVbetween+F.mdl", "19"} | -
| - | {"REV-1 step", "reducedREV.mdl", "19"} | -
| - | {"REV", "mtREV.mdl", "19"} | -
| - | }; | -
| - | } | -
| - | - |
| - | /*___________________________________________________________________________________________________________*/ | -
| - | - |
| - | function runAModel (modelID, fileName, xtraP, midx) | -
| - | { | -
| - | ExecuteCommands ("#include \"TemplateModels/"+fileName+"\";"); | -
| - | Tree givenTree = treeString; | -
| - | LikelihoodFunction lf = (filteredData,givenTree); | -
| - | - |
| - | GetString (lf_info, lf, -1); | -
| - | locals = lf_info["Local Independent"]; | -
| - | - |
| - | if (Columns (branchLengthStash)) | -
| - | { | -
| - | USE_LAST_RESULTS = 1; | -
| - | for (_iv = 0; _iv < Columns (locals); _iv = _iv+1) | -
| - | { | -
| - | ExecuteCommands (locals[_iv] + "=1;\n"); | -
| - | } | -
| - | currentBL = BranchLength (givenTree,0); | -
| - | currentBN = BranchName (givenTree,-1); | -
| - | for (_iv = 0; _iv < Columns (currentBN); _iv = _iv+1) | -
| - | { | -
| - | ExecuteCommands ("givenTree."+currentBN[_iv]+".t="+branchLengthStash[_iv]/currentBL+";"); | -
| - | - |
| - | } | -
| - | } | -
| - | else | -
| - | { | -
| - | for (_iv = 0; _iv < Columns (locals); _iv = _iv+1) | -
| - | { | -
| - | ExecuteCommands (locals[_iv] + "=0.1;\n"); | -
| - | } | -
| - | USE_LAST_RESULTS = 1; | -
| - | } | -
| - | - |
| - | Optimize (res,lf); | -
| - | - |
| - | fprintf (stdout, "| ", modelID); | -
| - | for (k=0; k<maxModelWidth-Abs(modelID)-1; k=k+1) | -
| - | { | -
| - | fprintf (stdout, " "); | -
| - | } | -
| - | - |
| - | params = res[1][1]+xtraP; | -
| - | AIC = 2(-res[1][0]+params); | -
| - | - |
| - | if (filteredData.sites-params>1) | -
| - | { | -
| - | cAIC = 2(-res[1][0]+params*(filteredData.sites/(filteredData.sites-params-1))); | -
| - | } | -
| - | else | -
| - | { | -
| - | cAIC = 0; | -
| - | } | -
| - | - |
| - | branchLengths = BranchLength (givenTree,-1); | -
| - | TL = 0; | -
| - | for (k=Rows(branchLengths)*Columns(branchLengths)-1; k>=0; k=k-1) | -
| - | { | -
| - | TL = TL + branchLengths[k]; | -
| - | } | -
| - | - |
| - | fprintf (stdout, "| ", Format (res[1][0],14,3), " | ", Format (params,5,0), " | ", | -
| - | Format (AIC, 9,3), " | ",); | -
| - | - |
| - | if (cAIC > 0) | -
| - | { | -
| - | fprintf (stdout, Format (cAIC,11,3), " | "); | -
| - | } | -
| - | else | -
| - | { | -
| - | fprintf (stdout, " N/A | "); | -
| - | } | -
| - | - |
| - | fprintf (stdout, Format (TL,11,3), " |\n", sepString); | -
| - | - |
| - | resultMatrix[midx][0] = res[1][0]; | -
| - | resultMatrix[midx][1] = params; | -
| - | resultMatrix[midx][2] = AIC; | -
| - | resultMatrix[midx][3] = cAIC; | -
| - | resultMatrix[midx][4] = TL; | -
| - | - |
| - | if (AIC < bestAIC) | -
| - | { | -
| - | bestAIC = AIC; | -
| - | bestAICidx = midx; | -
| - | branchLengthStash = BranchLength (givenTree,-1); | -
| - | } | -
| - | - |
| - | if (cAIC > 0) | -
| - | { | -
| - | if (bestCAIC > cAIC) | -
| - | { | -
| - | bestCAIC = cAIC; | -
| - | bestCAICidx = midx; | -
| - | - |
| - | } | -
| - | } | -
| - | - |
| - | return 0; | -
| - | } | -
| - | - |
| - | - |
| - | /*___________________________________________________________________________________________________________*/ | -
| - | - |
| - | - |
| - | - |
| - | maxModelWidth = 7; | -
| - | skipCodeSelectionStep = 0; | -
| - | - |
| - | ChoiceList (doREV, "Include REV?", 1, SKIP_NONE, "Yes", "Include REV and reduced REV models. CAUTION: these models take a long time to fit.", | -
| - | "No", "Only use empirical models"); | -
| - | - |
| - | if (doREV < 0) | -
| - | { | -
| - | return 0; | -
| - | } | -
| - | - |
| - | if (doREV == 0) | -
| - | { | -
| - | #include "TemplateModels/chooseGeneticCode.def"; | -
| - | skipCodeSelectionStep = 1; | -
| - | } | -
| - | - |
| - | modelCount = Rows (modelMatrixList) - 2*doREV; | -
| - | - |
| - | for (k=0; k<modelCount; k=k+1) | -
| - | { | -
| - | maxModelWidth = Max(maxModelWidth,Abs (modelMatrixList[k][0])+2); | -
| - | } | -
| - | - |
| - | sepString = ""; | -
| - | capString = ""; | -
| - | sepString * 256; | -
| - | sepString * "+"; | -
| - | - |
| - | capString * 256; | -
| - | capString * "| Model"; | -
| - | - |
| - | for (k=0; k<maxModelWidth; k=k+1) | -
| - | { | -
| - | sepString * "-"; | -
| - | } | -
| - | - |
| - | for (k=0; k<maxModelWidth-6; k=k+1) | -
| - | { | -
| - | capString * " "; | -
| - | } | -
| - | - |
| - | capString * "| Log Likelihood | #prms | AIC Score | c-AIC Score | Tree Length |\n"; | -
| - | sepString * "+----------------+-------+-----------+-------------+-------------+\n"; | -
| - | sepString * 0; | -
| - | capString * 0; | -
| - | - |
| - | branchLengthStash = 0; | -
| - | - |
| - | SKIP_MODEL_PARAMETER_LIST = 0; | -
| - | - |
| - | #include "TemplateModels/modelParameters2.mdl"; | -
| - | if (modelType == 1) | -
| - | { | -
| - | #include "TemplateModels/defineGamma.mdl"; | -
| - | } | -
| - | - |
| - | if (modelType == 2) | -
| - | { | -
| - | #include "TemplateModels/defineHM.mdl"; | -
| - | } | -
| - | SKIP_MODEL_PARAMETER_LIST = 1; | -
| - | - |
| - | SetDialogPrompt ("Please load an amino-acid data file:"); | -
| - | - |
| - | DataSet ds = ReadDataFile (PROMPT_FOR_FILE); | -
| - | DataSetFilter filteredData = CreateFilter (ds,1); | -
| - | - |
| - | fprintf (stdout,"\nRunning aminoacid model comparisons on ", LAST_FILE_PATH, "\n\nThe alignment has ",ds.species, " sequences and ", ds.sites, " sites\n"); | -
| - | - |
| - | _DO_TREE_REBALANCE_ = 1; | -
| - | - |
| - | #include "queryTree.bf"; | -
| - | - |
| - | resultMatrix = {modelCount, 5}; | -
| - | - |
| - | fprintf (stdout, "\n",sepString,capString,sepString); | -
| - | - |
| - | bestAIC = 1e100; | -
| - | bestCAIC = 1e100; | -
| - | bestAICidx = 0; | -
| - | bestCAICidx = -1; | -
| - | - |
| - | for (mid=0; mid<modelCount; mid=mid+1) | -
| - | { | -
| - | runAModel (modelMatrixList[mid][0], modelMatrixList[mid][1], 0+modelMatrixList[mid][2], mid); | -
| - | } | -
| - | - |
| - | fprintf (stdout, "\n\nBest AIC model:\n\t", modelMatrixList[bestAICidx][0], " with the score of ", bestAIC); | -
| - | - |
| - | if (bestCAICidx>=0) | -
| - | { | -
| - | fprintf (stdout, "\n\nBest c-AIC model:\n\t", modelMatrixList[bestCAICidx][0], " with the score of ", bestCAIC); | -
| - | } | -
| - | - |
| - | labelMatrix = {{"Log-likelihood","Parameters","AIC","c-AIC","Total tree length",""}}; | -
| - | - |
| - | aaString = "Model"; | -
| - | - |
| - | for (fC = 0; fC < modelCount; fC = fC+1) | -
| - | { | -
| - | aaString = aaString + ";" + modelMatrixList[fC][0]; | -
| - | } | -
| - | - |
| - | USE_LAST_RESULTS = 0; | -
| - | - |
| - | labelMatrix[5] = aaString; | -
| - | skipCodeSelectionStep = 0; | -
| - | OpenWindow (CHARTWINDOW,{{"Model Fits"} | -
| - | {"labelMatrix"}, | -
| - | {"resultMatrix"}, | -
| - | {"Bar Chart"}, | -
| - | {"Index"}, | -
| - | {"c-AIC"}, | -
| - | {"Model Index"}, | -
| - | {""}, | -
| - | {"AIC"} | -
| - | }, | -
| - | "SCREEN_WIDTH-60;SCREEN_HEIGHT-60;30;30"); | -
- You can clone with - HTTPS, SSH, or Subversion. - - - -
- - - - Clone in Desktop - - - - - - - Download ZIP - -![]()
Cannot retrieve contributors at this time
-| - | NICETY_LEVEL = 3; | -
| - | - |
| - | #include "TemplateModels/chooseGeneticCode.def"; | -
| - | #include "simpleBootstrap.bf"; | -
| - | - |
| - | SetDialogPrompt ("Please specify a codon data file:"); | -
| - | - |
| - | COUNT_GAPS_IN_FREQUENCIES = 0; | -
| - | VERBOSITY_LEVEL = 1; | -
| - | - |
| - | DataSet ds = ReadDataFile (PROMPT_FOR_FILE); | -
| - | DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); | -
| - | - |
| - | fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds); | -
| - | - |
| - | SelectTemplateModel(filteredData); | -
| - | - |
| - | _DO_TREE_REBALANCE_ = 1; | -
| - | #include "queryTree.bf"; | -
| - | - |
| - | if (modelType) | -
| - | { | -
| - | ChoiceList (branchLengths, "Branch Lengths", 1, SKIP_NONE, | -
| - | "Estimate", "Estimate branch lengths by ML", | -
| - | "Proportional to input tree", "Branch lengths are proportional to those in input tree"); | -
| - | - |
| - | if (branchLengths < 0) | -
| - | { | -
| - | return; | -
| - | } | -
| - | - |
| - | if (branchLengths == 1) | -
| - | { | -
| - | global treeScaler = 1; | -
| - | ReplicateConstraint ("this1.?.?:=treeScaler*this2.?.?__", givenTree, givenTree); | -
| - | } | -
| - | } | -
| - | - |
| - | LikelihoodFunction lf = (filteredData,givenTree); | -
| - | - |
| - | Optimize (res,lf); | -
| - | - |
| - | fprintf (stdout, "\n______________RESULTS______________\n",lf); | -
| - | - |
| - | /* compute syn and non-syn stencils for current genetic code */ | -
| - | - |
| - | #include "categoryEcho.bf"; | -
| - | - |
| - | GetString (sendMeBack,lf,-1); | -
| - | sendMeBack["LogL"] = res[1][0]; | -
| - | sendMeBack["NP"] = res[1][1]; | -
| - | - |
| - | return sendMeBack; | -
- You can clone with - HTTPS, SSH, or Subversion. - - - -
- - - - Clone in Desktop - - - - - - - Download ZIP - -![]()
Cannot retrieve contributors at this time
-| - | SetDialogPrompt ("Please specify a di-nucleotide (e.g. stem RNA) file:"); | -
| - | - |
| - | DataSet ds = ReadDataFile (PROMPT_FOR_FILE); | -
| - | DataSetFilter filteredData = CreateFilter (ds,2); | -
| - | - |
| - | fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds); | -
| - | - |
| - | SelectTemplateModel(filteredData); | -
| - | - |
| - | _DO_TREE_REBALANCE_ = 1; | -
| - | - |
| - | #include "queryTree.bf"; | -
| - | - |
| - | LikelihoodFunction lf = (filteredData,givenTree); | -
| - | - |
| - | timer = Time(0); | -
| - | - |
| - | USE_ADAPTIVE_VARIABLE_STEP = 0; | -
| - | - |
| - | Optimize (res,lf); | -
| - | - |
| - | timer = Time(0)-timer; | -
| - | - |
| - | - |
| - | fprintf (stdout, "\n______________RESULTS______________\nTime taken = ", timer, " seconds\nAIC Score = ", | -
| - | 2(res[1][1]-res[1][0]),"\n",lf); | -
| - | - |
| - | #include "categoryEcho.bf"; | -
- You can clone with - HTTPS, SSH, or Subversion. - - - -
- - - - Clone in Desktop - - - - - - - Download ZIP - -![]()
Cannot retrieve contributors at this time
-| - | _Genetic_Code = 0; | -
| - | - |
| - | ExecuteAFile("simpleBootstrap.bf"); | -
| - | - |
| - | SetDialogPrompt ("Please specify a nucleotide or amino-acid data file:"); | -
| - | - |
| - | DataSet ds = ReadDataFile (PROMPT_FOR_FILE); | -
| - | DataSetFilter filteredData = CreateFilter (ds,1); | -
| - | - |
| - | fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds); | -
| - | - |
| - | SelectTemplateModel(filteredData); | -
| - | _DO_TREE_REBALANCE_ = 1; | -
| - | - |
| - | ExecuteAFile ("queryTree.bf"); | -
| - | - |
| - | LikelihoodFunction lf = (filteredData,givenTree); | -
| - | timer = Time(0); | -
| - | Optimize (res,lf); | -
| - | timer = Time(0)-timer; | -
| - | - |
| - | fprintf (stdout, "\n______________RESULTS______________\nTime taken = ", timer, " seconds\nAIC Score = ", | -
| - | 2(res[1][1]-res[1][0]),"\n",lf); | -
| - | - |
| - | ExecuteAFile ("categoryEcho.bf"); | -
| - | GetString (lfInfo, lf, -1); | -
| - | - |
| - | return {"Log(L)": res[1][0], "DF": res[1][1], "Tree": Format (givenTree,1,1)} | -
- You can clone with - HTTPS, SSH, or Subversion. - - - -
- - - - Clone in Desktop - - - - - - - Download ZIP - -![]()
Cannot retrieve contributors at this time
-| - | /* 1. include a file to define the genetic code | -
| - | Note the use of base directory and path forming variables to make this analysis | -
| - | independent of directory placement | -
| - | */ | -
| - | - |
| - | incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def"; | -
| - | ExecuteCommands ("#include \""+incFileName+"\";"); | -
| - | - |
| - | /* 2. load a codon partition */ | -
| - | - |
| - | SetDialogPrompt ("Please locate a coding alignment:"); | -
| - | DataSet ds = ReadDataFile (PROMPT_FOR_FILE); | -
| - | DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions); | -
| - | coding_path = LAST_FILE_PATH; | -
| - | - |
| - | fprintf (stdout, "\nLoaded a ", filteredData.species, " sequence alignment with ", filteredData.sites, " codons from\n",coding_path,"\n"); | -
| - | - |
| - | /* 3. include a file to prompt for a tree */ | -
| - | - |
| - | incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"queryTree.bf"; | -
| - | ExecuteCommands ("#include \""+incFileName+"\";"); | -
| - | - |
| - | /* 4. Compute nucleotide counts by position for the F3x4 estimator */ | -
| - | - |
| - | COUNT_GAPS_IN_FREQUENCIES = 0; | -
| - | HarvestFrequencies (baseFreqs,filteredData,3,1,1); | -
| - | COUNT_GAPS_IN_FREQUENCIES = 1; | -
| - | - |
| - | fprintf (stdout, "\nBase composition:\n\tA: ", Format (baseFreqs[0][0],10,5),",",Format (baseFreqs[0][1],10,5),",",Format (baseFreqs[0][2],10,5), | -
| - | "\n\tC: ", Format (baseFreqs[1][0],10,5),",",Format (baseFreqs[1][1],10,5),",",Format (baseFreqs[1][2],10,5), | -
| - | "\n\tG: ", Format (baseFreqs[2][0],10,5),",",Format (baseFreqs[2][1],10,5),",",Format (baseFreqs[2][2],10,5), | -
| - | "\n\tT: ", Format (baseFreqs[3][0],10,5),",",Format (baseFreqs[3][1],10,5),",",Format (baseFreqs[3][2],10,5), "\n"); | -
| - | - |
| - | - |
| - | - |
| - | /* 6. define the GY94 rate matrix; for now each branch will have it's own | -
| - | dS and dN, we will constrain them later */ | -
| - | - |
| - | global kappa_inv = 1; | -
| - | - |
| - | ModelMatrixDimension = 64; | -
| - | for (h = 0; h<64; h=h+1) | -
| - | { | -
| - | if (_Genetic_Code[h]==10) /* stop codon */ | -
| - | { | -
| - | ModelMatrixDimension = ModelMatrixDimension-1; | -
| - | } | -
| - | } | -
| - | - |
| - | GY_Matrix = {ModelMatrixDimension,ModelMatrixDimension}; | -
| - | hshift = 0; | -
| - | for (h=0; h<64; h=h+1) | -
| - | { | -
| - | if (_Genetic_Code[h]==10) | -
| - | { | -
| - | hshift = hshift+1; | -
| - | } | -
| - | else | -
| - | { | -
| - | vshift = hshift; | -
| - | for (v = h+1; v<64; v=v+1) | -
| - | { | -
| - | diff = v-h; | -
| - | if (_Genetic_Code[v]==10) | -
| - | { | -
| - | vshift = vshift+1; | -
| - | } | -
| - | else | -
| - | { | -
| - | if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) /* one step */ | -
| - | { | -
| - | if (h$4==v$4) | -
| - | { | -
| - | transition = v%4; | -
| - | transition2= h%4; | -
| - | } | -
| - | else | -
| - | { | -
| - | if(diff%16==0) | -
| - | { | -
| - | transition = v$16; | -
| - | transition2= h$16; | -
| - | } | -
| - | else | -
| - | { | -
| - | transition = v%16$4; | -
| - | transition2= h%16$4; | -
| - | } | -
| - | } | -
| - | if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) /* synonymous */ | -
| - | { | -
| - | if (Abs(transition-transition2)%2) /* transversion */ | -
| - | { | -
| - | GY_Matrix[h-hshift][v-vshift] := kappa_inv*synRate; | -
| - | GY_Matrix[v-vshift][h-hshift] := kappa_inv*synRate; | -
| - | } | -
| - | else | -
| - | { | -
| - | GY_Matrix[h-hshift][v-vshift] := synRate; | -
| - | GY_Matrix[v-vshift][h-hshift] := synRate; | -
| - | } | -
| - | } | -
| - | else | -
| - | { | -
| - | if (Abs(transition-transition2)%2) /* transversion */ | -
| - | { | -
| - | GY_Matrix[h-hshift][v-vshift] := kappa_inv*nonSynRate; | -
| - | GY_Matrix[v-vshift][h-hshift] := kappa_inv*nonSynRate; | -
| - | } | -
| - | else | -
| - | { | -
| - | GY_Matrix[h-hshift][v-vshift] := nonSynRate; | -
| - | GY_Matrix[v-vshift][h-hshift] := nonSynRate; | -
| - | } | -
| - | } | -
| - | } | -
| - | } | -
| - | } | -
| - | } | -
| - | } | -
| - | - |
| - | /*8. build codon frequencies (use the F3x4 estimator) */ | -
| - | - |
| - | PIStop = 1.0; | -
| - | codonFreqs = {ModelMatrixDimension,1}; | -
| - | hshift = 0; | -
| - | - |
| - | for (h=0; h<64; h=h+1) | -
| - | { | -
| - | first = h$16; | -
| - | second = h%16$4; | -
| - | third = h%4; | -
| - | if (_Genetic_Code[h]==10) | -
| - | { | -
| - | hshift = hshift+1; | -
| - | PIStop = PIStop-baseFreqs[first][0]*baseFreqs[second][1]*baseFreqs[third][2]; | -
| - | continue; | -
| - | } | -
| - | codonFreqs[h-hshift]=baseFreqs[first][0]*baseFreqs[second][1]*baseFreqs[third][2]; | -
| - | } | -
| - | - |
| - | codonFreqs = codonFreqs*(1.0/PIStop); | -
| - | - |
| - | /*9. define the codon model */ | -
| - | - |
| - | Model GY_Model = (GY_Matrix,codonFreqs,1); | -
| - | - |
| - | /*10. Define the tree and pick the foreground branch, displaying a tree window to facilitate selection; | -
| - | the latter step is executed for 2 of 3 model choices */ | -
| - | - |
| - | Tree givenTree1 = treeString; | -
| - | Tree givenTree2 = treeString; | -
| - | Tree givenTree3 = treeString; | -
| - | Tree givenTree4 = treeString; | -
| - | - |
| - | USE_LAST_RESULTS = 0; | -
| - | OPTIMIZATION_METHOD = 4; | -
| - | - |
| - | /* Approximate kappa and branch lengths using an HKY85 fit */ | -
| - | - |
| - | HKY85_Matrix = {{*,t*kappa_inv,t,t*kappa_inv} | -
| - | {t*kappa_inv,*,kappa_inv*t,t} | -
| - | {t,t*kappa_inv,*,kappa_inv*t} | -
| - | {t*kappa_inv,t,kappa_inv*t,*}}; | -
| - | - |
| - | HarvestFrequencies (nucFreqs,ds,1,1,1); | -
| - | Model HKY85_Model = (HKY85_Matrix,nucFreqs,1); | -
| - | - |
| - | Tree nucTree = treeString; | -
| - | DataSetFilter nucData = CreateFilter (ds,1); | -
| - | - |
| - | fprintf (stdout, "Obtaining nucleotide branch lengths and kappa to be used as starting values...\n"); | -
| - | LikelihoodFunction nuc_lf = (nucData,nucTree); | -
| - | Optimize (nuc_mle,nuc_lf); | -
| - | fprintf (stdout, "\n", Format (nucTree,1,1), "\nkappa=", Format (1/kappa_inv,8,3), "\n"); | -
| - | - |
| - | USE_LAST_RESULTS = 1; | -
| - | - |
| - | mxTreeSpec = {5,1}; | -
| - | mxTreeSpec [0] = "nucTree"; | -
| - | mxTreeSpec [1] = "8240"; | -
| - | mxTreeSpec [2] = "10,40,-10,-175,1"; | -
| - | mxTreeSpec [3] = ""; | -
| - | mxTreeSpec [4] = ""; | -
| - | OpenWindow (TREEWINDOW, mxTreeSpec,"(SCREEN_WIDTH-50)/2;(SCREEN_HEIGHT-50)/2;30+(SCREEN_WIDTH-30)/2;45"); | -
| - | - |
| - | leafNodes = TipCount (givenTree); | -
| - | internalNodes = BranchCount(givenTree); | -
| - | - |
| - | choiceMatrix = {internalNodes+leafNodes,2}; | -
| - | for (bc=0; bc<internalNodes; bc=bc+1) | -
| - | { | -
| - | choiceMatrix[bc][0] = BranchName(givenTree,bc); | -
| - | choiceMatrix[bc][1] = "Internal Branch Rooting " + givenTree[bc]; | -
| - | } | -
| - | for (bc=0; bc<leafNodes; bc=bc+1) | -
| - | { | -
| - | choiceMatrix[bc+internalNodes][0] = TipName(givenTree,bc); | -
| - | choiceMatrix[bc+internalNodes][1] = "Leaf node " + choiceMatrix[bc+internalNodes][0]; | -
| - | } | -
| - | - |
| - | ChoiceList (stOption,"Choose the foreground branch",0,NO_SKIP,choiceMatrix); | -
| - | - |
| - | if (stOption[0] < 0) | -
| - | { | -
| - | return -1; | -
| - | } | -
| - | - |
| - | fprintf (stdout, "\n\n", Columns (stOption)," foreground branch(es) set to: ", "\n"); | -
| - | for (bc = 0; bc < Columns (stOption); bc = bc + 1) | -
| - | { | -
| - | fprintf (stdout, choiceMatrix[stOption[bc]][0], "\n"); | -
| - | } | -
| - | OpenWindow (CLOSEWINDOW, "Tree nucTree"); | -
| - | - |
| - | /* 15. Constrain dS and dN in the tree to based upon different models */ | -
| - | - |
| - | global omega_1 = 0.25; | -
| - | global omega_2 = 0.5; | -
| - | global omega_3 = 1.5; | -
| - | - |
| - | omega_1 :< 1; | -
| - | omega_2 :< 1; | -
| - | omega_3 :> 1; | -
| - | - |
| - | ClearConstraints (givenTree1); | -
| - | ClearConstraints (givenTree2); | -
| - | ClearConstraints (givenTree3); | -
| - | ClearConstraints (givenTree4); | -
| - | - |
| - | /* 16. define and optimize the likelihood function */ | -
| - | - |
| - | bNames = BranchName (givenTree,-1); | -
| - | nucBL = BranchLength (nucTree,-1); | -
| - | - |
| - | for (bc=0; bc<Columns(bNames)-1; bc=bc+1) | -
| - | { | -
| - | ExecuteCommands ("givenTree1."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;"); | -
| - | ExecuteCommands ("givenTree1."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;"); | -
| - | ExecuteCommands ("givenTree2."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;"); | -
| - | ExecuteCommands ("givenTree2."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;"); | -
| - | ExecuteCommands ("givenTree3."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;"); | -
| - | ExecuteCommands ("givenTree3."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;"); | -
| - | ExecuteCommands ("givenTree4."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;"); | -
| - | ExecuteCommands ("givenTree4."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;"); | -
| - | } | -
| - | - |
| - | codBL = BranchLength (givenTree1,-1); | -
| - | - |
| - | for (bc=0; bc<Columns(bNames)-1; bc=bc+1) | -
| - | { | -
| - | if (nucBL[bc]>0) | -
| - | { | -
| - | scalingFactor = nucBL[bc]/codBL[bc]; | -
| - | ExecuteCommands ("givenTree1."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";"); | -
| - | ExecuteCommands ("givenTree1."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";"); | -
| - | ExecuteCommands ("givenTree2."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";"); | -
| - | ExecuteCommands ("givenTree2."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";"); | -
| - | ExecuteCommands ("givenTree3."+bNames[bc]+".synRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";"); | -
| - | ExecuteCommands ("givenTree3."+bNames[bc]+".nonSynRate=nucTree."+bNames[bc]+".t*"+scalingFactor+";"); | -
| - | ExecuteCommands ("givenTree4."+bNames[bc]+".synRate =nucTree."+bNames[bc]+".t;"); | -
| - | ExecuteCommands ("givenTree4."+bNames[bc]+".nonSynRate =nucTree."+bNames[bc]+".t;"); | -
| - | } | -
| - | } | -
| - | - |
| - | for (bc = 0; bc < Columns (stOption); bc = bc + 1) | -
| - | { | -
| - | bName = choiceMatrix[stOption[bc]][0]; | -
| - | ExecuteCommands ("givenTree1."+bName+".nonSynRate:=omega_1*givenTree1."+bName+".synRate"); | -
| - | ExecuteCommands ("givenTree2."+bName+".nonSynRate:=omega_2*givenTree2."+bName+".synRate"); | -
| - | ExecuteCommands ("givenTree3."+bName+".nonSynRate:=givenTree3."+bName+".synRate"); | -
| - | ExecuteCommands ("givenTree4."+bName+".nonSynRate:=omega_3*givenTree4."+bName+".synRate"); | -
| - | } | -
| - | - |
| - | - |
| - | OPTIMIZATION_PRECISION = 0.001; | -
| - | - |
| - | global P_1 = 1/4; | -
| - | global P_2 = 1/3; | -
| - | global P_3 = 1/2; | -
| - | P_1 :< 1; | -
| - | P_2 :< 1; | -
| - | P_3 :< 1; | -
| - | - |
| - | - |
| - | fprintf (stdout, "\nFitting the model with selection...\n"); | -
| - | - |
| - | LikelihoodFunction lf = (filteredData, givenTree1, filteredData, givenTree2,filteredData, givenTree3,filteredData, givenTree4, | -
| - | "Log(P_1*SITE_LIKELIHOOD[0]+(1-P_1)*P_2*SITE_LIKELIHOOD[1]+(1-P_1)(1-P_2)*P_3*SITE_LIKELIHOOD[2]+(1-P_1)(1-P_2)(1-P_3)*SITE_LIKELIHOOD[3])"); | -
| - | - |
| - | - |
| - | Optimize (mles,lf); | -
| - | fprintf (stdout, lf); | -
| - | - |
| - | - |