mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 09:40:21 +00:00
Add Nextflow language support (#3870)
* Added nextflow language * Added main.nf to list of filenames * Fixed duplicate groovy scope * Removed hello-world example * Update grammar submodule * Removed main.nf from filenames * Added nextflow.config example
This commit is contained in:
committed by
Paul Chaignon
parent
5fbe9c0902
commit
bee7e55618
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -895,3 +895,6 @@
|
|||||||
[submodule "vendor/grammars/Sublime-HTTP"]
|
[submodule "vendor/grammars/Sublime-HTTP"]
|
||||||
path = vendor/grammars/Sublime-HTTP
|
path = vendor/grammars/Sublime-HTTP
|
||||||
url = https://github.com/samsalisbury/Sublime-HTTP
|
url = https://github.com/samsalisbury/Sublime-HTTP
|
||||||
|
[submodule "vendor/grammars/atom-language-nextflow"]
|
||||||
|
path = vendor/grammars/atom-language-nextflow
|
||||||
|
url = https://github.com/nextflow-io/atom-language-nextflow
|
||||||
|
|||||||
@@ -195,6 +195,9 @@ vendor/grammars/atom-language-clean:
|
|||||||
vendor/grammars/atom-language-julia:
|
vendor/grammars/atom-language-julia:
|
||||||
- source.julia
|
- source.julia
|
||||||
- source.julia.console
|
- source.julia.console
|
||||||
|
vendor/grammars/atom-language-nextflow:
|
||||||
|
- source.nextflow
|
||||||
|
- source.nextflow-groovy
|
||||||
vendor/grammars/atom-language-p4:
|
vendor/grammars/atom-language-p4:
|
||||||
- source.p4
|
- source.p4
|
||||||
vendor/grammars/atom-language-perl6:
|
vendor/grammars/atom-language-perl6:
|
||||||
|
|||||||
@@ -2905,6 +2905,18 @@ NewLisp:
|
|||||||
codemirror_mode: commonlisp
|
codemirror_mode: commonlisp
|
||||||
codemirror_mime_type: text/x-common-lisp
|
codemirror_mime_type: text/x-common-lisp
|
||||||
language_id: 247
|
language_id: 247
|
||||||
|
Nextflow:
|
||||||
|
type: programming
|
||||||
|
ace_mode: groovy
|
||||||
|
tm_scope: source.nextflow
|
||||||
|
color: "#3ac486"
|
||||||
|
extensions:
|
||||||
|
- ".nf"
|
||||||
|
filenames:
|
||||||
|
- "nextflow.config"
|
||||||
|
interpreters:
|
||||||
|
- nextflow
|
||||||
|
language_id: 506780613
|
||||||
Nginx:
|
Nginx:
|
||||||
type: data
|
type: data
|
||||||
extensions:
|
extensions:
|
||||||
|
|||||||
67
samples/Nextflow/blast.nf
Normal file
67
samples/Nextflow/blast.nf
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
#!/usr/bin/env nextflow
|
||||||
|
/*
|
||||||
|
* This is free and unencumbered software released into the public domain.
|
||||||
|
*
|
||||||
|
* Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||||
|
* distribute this software, either in source code form or as a compiled
|
||||||
|
* binary, for any purpose, commercial or non-commercial, and by any
|
||||||
|
* means.
|
||||||
|
*
|
||||||
|
* In jurisdictions that recognize copyright laws, the author or authors
|
||||||
|
* of this software dedicate any and all copyright interest in the
|
||||||
|
* software to the public domain. We make this dedication for the benefit
|
||||||
|
* of the public at large and to the detriment of our heirs and
|
||||||
|
* successors. We intend this dedication to be an overt act of
|
||||||
|
* relinquishment in perpetuity of all present and future rights to this
|
||||||
|
* software under copyright law.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* For more information, please refer to <http://unlicense.org/>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
params.query = "$HOME/sample.fa"
|
||||||
|
params.db = "$HOME/tools/blast-db/pdb/pdb"
|
||||||
|
|
||||||
|
process blast {
|
||||||
|
output:
|
||||||
|
file top_hits
|
||||||
|
|
||||||
|
"""
|
||||||
|
blastp -query ${params.query} -db ${params.db} -outfmt 6 \
|
||||||
|
| head -n 10 \
|
||||||
|
| cut -f 2 > top_hits
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
process extract {
|
||||||
|
input:
|
||||||
|
file top_hits
|
||||||
|
output:
|
||||||
|
file sequences
|
||||||
|
|
||||||
|
"""
|
||||||
|
blastdbcmd -db ${params.db} -entry_batch $top_hits > sequences
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
process align {
|
||||||
|
input:
|
||||||
|
file sequences
|
||||||
|
echo true
|
||||||
|
|
||||||
|
"""
|
||||||
|
t_coffee $sequences 2>&- | tee align_result
|
||||||
|
"""
|
||||||
|
}
|
||||||
496
samples/Nextflow/callings.nf
Executable file
496
samples/Nextflow/callings.nf
Executable file
@@ -0,0 +1,496 @@
|
|||||||
|
#!/usr/bin/env nextflow
|
||||||
|
/*
|
||||||
|
* This is free and unencumbered software released into the public domain.
|
||||||
|
*
|
||||||
|
* Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||||
|
* distribute this software, either in source code form or as a compiled
|
||||||
|
* binary, for any purpose, commercial or non-commercial, and by any
|
||||||
|
* means.
|
||||||
|
*
|
||||||
|
* In jurisdictions that recognize copyright laws, the author or authors
|
||||||
|
* of this software dedicate any and all copyright interest in the
|
||||||
|
* software to the public domain. We make this dedication for the benefit
|
||||||
|
* of the public at large and to the detriment of our heirs and
|
||||||
|
* successors. We intend this dedication to be an overt act of
|
||||||
|
* relinquishment in perpetuity of all present and future rights to this
|
||||||
|
* software under copyright law.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* For more information, please refer to <http://unlicense.org/>
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 'CalliNGS-NF' - A Nextflow pipeline for variant calling with NGS data
|
||||||
|
*
|
||||||
|
* This pipeline that reproduces steps from the GATK best practics of SNP
|
||||||
|
* calling with RNAseq data procedure:
|
||||||
|
* https://software.broadinstitute.org/gatk/guide/article?id=3891
|
||||||
|
*
|
||||||
|
* Anna Vlasova
|
||||||
|
* Emilio Palumbo
|
||||||
|
* Paolo Di Tommaso
|
||||||
|
* Evan Floden
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define the default parameters
|
||||||
|
*/
|
||||||
|
|
||||||
|
params.genome = "$baseDir/data/genome.fa"
|
||||||
|
params.variants = "$baseDir/data/known_variants.vcf.gz"
|
||||||
|
params.blacklist = "$baseDir/data/blacklist.bed"
|
||||||
|
params.reads = "$baseDir/data/reads/rep1_{1,2}.fq.gz"
|
||||||
|
params.results = "results"
|
||||||
|
params.gatk = '/usr/local/bin/GenomeAnalysisTK.jar'
|
||||||
|
params.gatk_launch = "java -jar $params.gatk"
|
||||||
|
|
||||||
|
log.info "C A L L I N G S - N F v 1.0"
|
||||||
|
log.info "================================"
|
||||||
|
log.info "genome : $params.genome"
|
||||||
|
log.info "reads : $params.reads"
|
||||||
|
log.info "variants : $params.variants"
|
||||||
|
log.info "blacklist: $params.blacklist"
|
||||||
|
log.info "results : $params.results"
|
||||||
|
log.info "gatk : $params.gatk"
|
||||||
|
log.info ""
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse the input parameters
|
||||||
|
*/
|
||||||
|
|
||||||
|
GATK = params.gatk_launch
|
||||||
|
genome_file = file(params.genome)
|
||||||
|
variants_file = file(params.variants)
|
||||||
|
blacklist_file = file(params.blacklist)
|
||||||
|
reads_ch = Channel.fromFilePairs(params.reads)
|
||||||
|
|
||||||
|
|
||||||
|
/**********
|
||||||
|
* PART 1: Data preparation
|
||||||
|
*
|
||||||
|
* Process 1A: Create a FASTA genome index (.fai) with samtools for GATK
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '1A_prepare_genome_samtools' {
|
||||||
|
tag "$genome.baseName"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file genome from genome_file
|
||||||
|
|
||||||
|
output:
|
||||||
|
file "${genome}.fai" into genome_index_ch
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
samtools faidx ${genome}
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process 1B: Create a FASTA genome sequence dictionary with Picard for GATK
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '1B_prepare_genome_picard' {
|
||||||
|
tag "$genome.baseName"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file genome from genome_file
|
||||||
|
output:
|
||||||
|
file "${genome.baseName}.dict" into genome_dict_ch
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
PICARD=`which picard.jar`
|
||||||
|
java -jar \$PICARD CreateSequenceDictionary R= $genome O= ${genome.baseName}.dict
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process 1C: Create STAR genome index file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '1C_prepare_star_genome_index' {
|
||||||
|
tag "$genome.baseName"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file genome from genome_file
|
||||||
|
output:
|
||||||
|
file "genome_dir" into genome_dir_ch
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
mkdir genome_dir
|
||||||
|
|
||||||
|
STAR --runMode genomeGenerate \
|
||||||
|
--genomeDir genome_dir \
|
||||||
|
--genomeFastaFiles ${genome} \
|
||||||
|
--runThreadN ${task.cpus}
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process 1D: Create a file containing the filtered and recoded set of variants
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '1D_prepare_vcf_file' {
|
||||||
|
tag "$variantsFile.baseName"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file variantsFile from variants_file
|
||||||
|
file blacklisted from blacklist_file
|
||||||
|
|
||||||
|
output:
|
||||||
|
set file("${variantsFile.baseName}.filtered.recode.vcf.gz"), file("${variantsFile.baseName}.filtered.recode.vcf.gz.tbi") into prepared_vcf_ch
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
vcftools --gzvcf $variantsFile -c \
|
||||||
|
--exclude-bed ${blacklisted} \
|
||||||
|
--recode | bgzip -c \
|
||||||
|
> ${variantsFile.baseName}.filtered.recode.vcf.gz
|
||||||
|
|
||||||
|
tabix ${variantsFile.baseName}.filtered.recode.vcf.gz
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* END OF PART 1
|
||||||
|
*********/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**********
|
||||||
|
* PART 2: STAR RNA-Seq Mapping
|
||||||
|
*
|
||||||
|
* Process 2: Align RNA-Seq reads to the genome with STAR
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '2_rnaseq_mapping_star' {
|
||||||
|
tag "$replicateId"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file genome from genome_file
|
||||||
|
file genomeDir from genome_dir_ch
|
||||||
|
set replicateId, file(reads) from reads_ch
|
||||||
|
|
||||||
|
output:
|
||||||
|
set replicateId, file('Aligned.sortedByCoord.out.bam'), file('Aligned.sortedByCoord.out.bam.bai') into aligned_bam_ch
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
# ngs-nf-dev Align reads to genome
|
||||||
|
STAR --genomeDir $genomeDir \
|
||||||
|
--readFilesIn $reads \
|
||||||
|
--runThreadN ${task.cpus} \
|
||||||
|
--readFilesCommand zcat \
|
||||||
|
--outFilterType BySJout \
|
||||||
|
--alignSJoverhangMin 8 \
|
||||||
|
--alignSJDBoverhangMin 1 \
|
||||||
|
--outFilterMismatchNmax 999
|
||||||
|
|
||||||
|
# 2nd pass (improve alignmets using table of splice junctions and create a new index)
|
||||||
|
mkdir genomeDir
|
||||||
|
STAR --runMode genomeGenerate \
|
||||||
|
--genomeDir genomeDir \
|
||||||
|
--genomeFastaFiles $genome \
|
||||||
|
--sjdbFileChrStartEnd SJ.out.tab \
|
||||||
|
--sjdbOverhang 75 \
|
||||||
|
--runThreadN ${task.cpus}
|
||||||
|
|
||||||
|
# Final read alignments
|
||||||
|
STAR --genomeDir genomeDir \
|
||||||
|
--readFilesIn $reads \
|
||||||
|
--runThreadN ${task.cpus} \
|
||||||
|
--readFilesCommand zcat \
|
||||||
|
--outFilterType BySJout \
|
||||||
|
--alignSJoverhangMin 8 \
|
||||||
|
--alignSJDBoverhangMin 1 \
|
||||||
|
--outFilterMismatchNmax 999 \
|
||||||
|
--outSAMtype BAM SortedByCoordinate \
|
||||||
|
--outSAMattrRGline ID:$replicateId LB:library PL:illumina PU:machine SM:GM12878
|
||||||
|
|
||||||
|
# Index the BAM file
|
||||||
|
samtools index Aligned.sortedByCoord.out.bam
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* END OF PART 2
|
||||||
|
******/
|
||||||
|
|
||||||
|
|
||||||
|
/**********
|
||||||
|
* PART 3: GATK Prepare Mapped Reads
|
||||||
|
*
|
||||||
|
* Process 3: Split reads that contain Ns in their CIGAR string.
|
||||||
|
* Creates k+1 new reads (where k is the number of N cigar elements)
|
||||||
|
* that correspond to the segments of the original read beside/between
|
||||||
|
* the splicing events represented by the Ns in the original CIGAR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '3_rnaseq_gatk_splitNcigar' {
|
||||||
|
tag "$replicateId"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file genome from genome_file
|
||||||
|
file index from genome_index_ch
|
||||||
|
file genome_dict from genome_dict_ch
|
||||||
|
set replicateId, file(bam), file(index) from aligned_bam_ch
|
||||||
|
|
||||||
|
output:
|
||||||
|
set replicateId, file('split.bam'), file('split.bai') into splitted_bam_ch
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
# SplitNCigarReads and reassign mapping qualities
|
||||||
|
$GATK -T SplitNCigarReads \
|
||||||
|
-R $genome -I $bam \
|
||||||
|
-o split.bam \
|
||||||
|
-rf ReassignOneMappingQuality \
|
||||||
|
-RMQF 255 -RMQT 60 \
|
||||||
|
-U ALLOW_N_CIGAR_READS \
|
||||||
|
--fix_misencoded_quality_scores
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* END OF PART 3
|
||||||
|
******/
|
||||||
|
|
||||||
|
|
||||||
|
/***********
|
||||||
|
* PART 4: GATK Base Quality Score Recalibration Workflow
|
||||||
|
*
|
||||||
|
* Process 4: Base recalibrate to detect systematic errors in base quality scores,
|
||||||
|
* select unique alignments and index
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '4_rnaseq_gatk_recalibrate' {
|
||||||
|
tag "$replicateId"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file genome from genome_file
|
||||||
|
file index from genome_index_ch
|
||||||
|
file dict from genome_dict_ch
|
||||||
|
set replicateId, file(bam), file(index) from splitted_bam_ch
|
||||||
|
set file(variants_file), file(variants_file_index) from prepared_vcf_ch
|
||||||
|
|
||||||
|
output:
|
||||||
|
set sampleId, file("${replicateId}.final.uniq.bam"), file("${replicateId}.final.uniq.bam.bai") into (final_output_ch, bam_for_ASE_ch)
|
||||||
|
|
||||||
|
script:
|
||||||
|
sampleId = replicateId.replaceAll(/[12]$/,'')
|
||||||
|
"""
|
||||||
|
# Indel Realignment and Base Recalibration
|
||||||
|
$GATK -T BaseRecalibrator \
|
||||||
|
--default_platform illumina \
|
||||||
|
-cov ReadGroupCovariate \
|
||||||
|
-cov QualityScoreCovariate \
|
||||||
|
-cov CycleCovariate \
|
||||||
|
-knownSites ${variants_file} \
|
||||||
|
-cov ContextCovariate \
|
||||||
|
-R ${genome} -I ${bam} \
|
||||||
|
--downsampling_type NONE \
|
||||||
|
-nct ${task.cpus} \
|
||||||
|
-o final.rnaseq.grp
|
||||||
|
|
||||||
|
$GATK -T PrintReads \
|
||||||
|
-R ${genome} -I ${bam} \
|
||||||
|
-BQSR final.rnaseq.grp \
|
||||||
|
-nct ${task.cpus} \
|
||||||
|
-o final.bam
|
||||||
|
|
||||||
|
# Select only unique alignments, no multimaps
|
||||||
|
(samtools view -H final.bam; samtools view final.bam| grep -w 'NH:i:1') \
|
||||||
|
|samtools view -Sb - > ${replicateId}.final.uniq.bam
|
||||||
|
|
||||||
|
# Index BAM files
|
||||||
|
samtools index ${replicateId}.final.uniq.bam
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* END OF PART 4
|
||||||
|
******/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/***********
|
||||||
|
* PART 5: GATK Variant Calling
|
||||||
|
*
|
||||||
|
* Process 5: Call variants with GATK HaplotypeCaller.
|
||||||
|
* Calls SNPs and indels simultaneously via local de-novo assembly of
|
||||||
|
* haplotypes in an active region.
|
||||||
|
* Filter called variants with GATK VariantFiltration.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
process '5_rnaseq_call_variants' {
|
||||||
|
tag "$sampleId"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file genome from genome_file
|
||||||
|
file index from genome_index_ch
|
||||||
|
file dict from genome_dict_ch
|
||||||
|
set sampleId, file(bam), file(bai) from final_output_ch.groupTuple()
|
||||||
|
|
||||||
|
output:
|
||||||
|
set sampleId, file('final.vcf') into vcf_files
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
# fix absolute path in dict file
|
||||||
|
sed -i 's@UR:file:.*${genome}@UR:file:${genome}@g' $dict
|
||||||
|
echo "${bam.join('\n')}" > bam.list
|
||||||
|
|
||||||
|
# Variant calling
|
||||||
|
$GATK -T HaplotypeCaller \
|
||||||
|
-R $genome -I bam.list \
|
||||||
|
-dontUseSoftClippedBases \
|
||||||
|
-stand_call_conf 20.0 \
|
||||||
|
-o output.gatk.vcf.gz
|
||||||
|
|
||||||
|
# Variant filtering
|
||||||
|
$GATK -T VariantFiltration \
|
||||||
|
-R $genome -V output.gatk.vcf.gz \
|
||||||
|
-window 35 -cluster 3 \
|
||||||
|
-filterName FS -filter "FS > 30.0" \
|
||||||
|
-filterName QD -filter "QD < 2.0" \
|
||||||
|
-o final.vcf
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* END OF PART 5
|
||||||
|
******/
|
||||||
|
|
||||||
|
|
||||||
|
/***********
|
||||||
|
* PART 6: Post-process variants file and prepare for Allele-Specific Expression and RNA Editing Analysis
|
||||||
|
*
|
||||||
|
* Process 6A: Post-process the VCF result
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '6A_post_process_vcf' {
|
||||||
|
tag "$sampleId"
|
||||||
|
publishDir "$params.results/$sampleId"
|
||||||
|
|
||||||
|
input:
|
||||||
|
set sampleId, file('final.vcf') from vcf_files
|
||||||
|
set file('filtered.recode.vcf.gz'), file('filtered.recode.vcf.gz.tbi') from prepared_vcf_ch
|
||||||
|
output:
|
||||||
|
set sampleId, file('final.vcf'), file('commonSNPs.diff.sites_in_files') into vcf_and_snps_ch
|
||||||
|
|
||||||
|
script:
|
||||||
|
'''
|
||||||
|
grep -v '#' final.vcf | awk '$7~/PASS/' |perl -ne 'chomp($_); ($dp)=$_=~/DP\\=(\\d+)\\;/; if($dp>=8){print $_."\\n"};' > result.DP8.vcf
|
||||||
|
|
||||||
|
vcftools --vcf result.DP8.vcf --gzdiff filtered.recode.vcf.gz --diff-site --out commonSNPs
|
||||||
|
'''
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process 6B: Prepare variants file for allele specific expression (ASE) analysis
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '6B_prepare_vcf_for_ase' {
|
||||||
|
tag "$sampleId"
|
||||||
|
publishDir "$params.results/$sampleId"
|
||||||
|
|
||||||
|
input:
|
||||||
|
set sampleId, file('final.vcf'), file('commonSNPs.diff.sites_in_files') from vcf_and_snps_ch
|
||||||
|
output:
|
||||||
|
set sampleId, file('known_snps.vcf') into vcf_for_ASE
|
||||||
|
file('AF.histogram.pdf') into gghist_pdfs
|
||||||
|
|
||||||
|
script:
|
||||||
|
'''
|
||||||
|
awk 'BEGIN{OFS="\t"} $4~/B/{print $1,$2,$3}' commonSNPs.diff.sites_in_files > test.bed
|
||||||
|
|
||||||
|
vcftools --vcf final.vcf --bed test.bed --recode --keep-INFO-all --stdout > known_snps.vcf
|
||||||
|
|
||||||
|
grep -v '#' known_snps.vcf | awk -F '\\t' '{print $10}' \
|
||||||
|
|awk -F ':' '{print $2}'|perl -ne 'chomp($_); \
|
||||||
|
@v=split(/\\,/,$_); if($v[0]!=0 ||$v[1] !=0)\
|
||||||
|
{print $v[1]/($v[1]+$v[0])."\\n"; }' |awk '$1!=1' \
|
||||||
|
>AF.4R
|
||||||
|
|
||||||
|
gghist.R -i AF.4R -o AF.histogram.pdf
|
||||||
|
'''
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Group data for allele-specific expression.
|
||||||
|
*
|
||||||
|
* The `bam_for_ASE_ch` emites tuples having the following structure, holding the final BAM/BAI files:
|
||||||
|
*
|
||||||
|
* ( sample_id, file_bam, file_bai )
|
||||||
|
*
|
||||||
|
* The `vcf_for_ASE` channel emits tuples having the following structure, holding the VCF file:
|
||||||
|
*
|
||||||
|
* ( sample_id, output.vcf )
|
||||||
|
*
|
||||||
|
* The BAMs are grouped together and merged with VCFs having the same sample id. Finally
|
||||||
|
* it creates a channel named `grouped_vcf_bam_bai_ch` emitting the following tuples:
|
||||||
|
*
|
||||||
|
* ( sample_id, file_vcf, List[file_bam], List[file_bai] )
|
||||||
|
*/
|
||||||
|
|
||||||
|
bam_for_ASE_ch
|
||||||
|
.groupTuple()
|
||||||
|
.phase(vcf_for_ASE)
|
||||||
|
.map{ left, right ->
|
||||||
|
def sampleId = left[0]
|
||||||
|
def bam = left[1]
|
||||||
|
def bai = left[2]
|
||||||
|
def vcf = right[1]
|
||||||
|
tuple(sampleId, vcf, bam, bai)
|
||||||
|
}
|
||||||
|
.set { grouped_vcf_bam_bai_ch }
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process 6C: Allele-Specific Expression analysis with GATK ASEReadCounter.
|
||||||
|
* Calculates allele counts at a set of positions after applying
|
||||||
|
* filters that are tuned for enabling allele-specific expression
|
||||||
|
* (ASE) analysis
|
||||||
|
*/
|
||||||
|
|
||||||
|
process '6C_ASE_knownSNPs' {
|
||||||
|
tag "$sampleId"
|
||||||
|
publishDir "$params.results/$sampleId"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file genome from genome_file
|
||||||
|
file index from genome_index_ch
|
||||||
|
file dict from genome_dict_ch
|
||||||
|
set sampleId, file(vcf), file(bam), file(bai) from grouped_vcf_bam_bai_ch
|
||||||
|
|
||||||
|
output:
|
||||||
|
file "ASE.tsv"
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
echo "${bam.join('\n')}" > bam.list
|
||||||
|
|
||||||
|
$GATK -R ${genome} \
|
||||||
|
-T ASEReadCounter \
|
||||||
|
-o ASE.tsv \
|
||||||
|
-I bam.list \
|
||||||
|
-sites ${vcf}
|
||||||
|
"""
|
||||||
|
}
|
||||||
50
samples/Nextflow/filenames/nextflow.config
Normal file
50
samples/Nextflow/filenames/nextflow.config
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
aws {
|
||||||
|
region = 'eu-west-1'
|
||||||
|
}
|
||||||
|
|
||||||
|
cloud {
|
||||||
|
autoscale {
|
||||||
|
enabled = true
|
||||||
|
minInstances = 3
|
||||||
|
starvingTimeout = '2 min'
|
||||||
|
terminateWhenIdle = true
|
||||||
|
}
|
||||||
|
imageId = 'ami-78ds78d'
|
||||||
|
instanceProfile = 'MyRole'
|
||||||
|
instanceType = 'r4.large'
|
||||||
|
sharedStorageId = 'fs-76ds76s'
|
||||||
|
spotPrice = 0.06
|
||||||
|
subnetId = 'subnet-8d98d7s'
|
||||||
|
}
|
||||||
|
|
||||||
|
env {
|
||||||
|
BAR = 'world'
|
||||||
|
FOO = 'hola'
|
||||||
|
}
|
||||||
|
|
||||||
|
mail {
|
||||||
|
from = 'paolo.ditommaso@gmail.com'
|
||||||
|
smtp {
|
||||||
|
auth = true
|
||||||
|
host = 'email-smtp.us-east-1.amazonaws.com'
|
||||||
|
password = 'my-secret'
|
||||||
|
port = 587
|
||||||
|
starttls {
|
||||||
|
enable = true
|
||||||
|
required = true
|
||||||
|
}
|
||||||
|
user = 'my-name'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
process {
|
||||||
|
executor = 'slurm'
|
||||||
|
queue = 'cn-el7'
|
||||||
|
memory = '16GB'
|
||||||
|
cpus = 8
|
||||||
|
container = 'user/rnaseq-nf:latest'
|
||||||
|
}
|
||||||
|
|
||||||
|
trace {
|
||||||
|
fields = 'task_id,name,status,attempt,exit,queue'
|
||||||
|
}
|
||||||
135
samples/Nextflow/rnaseq.nf
Normal file
135
samples/Nextflow/rnaseq.nf
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
#!/usr/bin/env nextflow
|
||||||
|
/*
|
||||||
|
* This is free and unencumbered software released into the public domain.
|
||||||
|
*
|
||||||
|
* Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||||
|
* distribute this software, either in source code form or as a compiled
|
||||||
|
* binary, for any purpose, commercial or non-commercial, and by any
|
||||||
|
* means.
|
||||||
|
*
|
||||||
|
* In jurisdictions that recognize copyright laws, the author or authors
|
||||||
|
* of this software dedicate any and all copyright interest in the
|
||||||
|
* software to the public domain. We make this dedication for the benefit
|
||||||
|
* of the public at large and to the detriment of our heirs and
|
||||||
|
* successors. We intend this dedication to be an overt act of
|
||||||
|
* relinquishment in perpetuity of all present and future rights to this
|
||||||
|
* software under copyright law.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* For more information, please refer to <http://unlicense.org/>
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Proof of concept of a RNAseq pipeline implemented with Nextflow
|
||||||
|
*
|
||||||
|
* Authors:
|
||||||
|
* - Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||||
|
* - Emilio Palumbo <emiliopalumbo@gmail.com>
|
||||||
|
* - Evan Floden <evanfloden@gmail.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
params.reads = "$baseDir/data/ggal/*_{1,2}.fq"
|
||||||
|
params.transcriptome = "$baseDir/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
|
||||||
|
params.outdir = "."
|
||||||
|
params.multiqc = "$baseDir/multiqc"
|
||||||
|
|
||||||
|
log.info """\
|
||||||
|
R N A S E Q - N F P I P E L I N E
|
||||||
|
===================================
|
||||||
|
transcriptome: ${params.transcriptome}
|
||||||
|
reads : ${params.reads}
|
||||||
|
outdir : ${params.outdir}
|
||||||
|
"""
|
||||||
|
.stripIndent()
|
||||||
|
|
||||||
|
|
||||||
|
transcriptome_file = file(params.transcriptome)
|
||||||
|
multiqc_file = file(params.multiqc)
|
||||||
|
|
||||||
|
|
||||||
|
Channel
|
||||||
|
.fromFilePairs( params.reads )
|
||||||
|
.ifEmpty { error "Cannot find any reads matching: ${params.reads}" }
|
||||||
|
.into { read_pairs_ch; read_pairs2_ch }
|
||||||
|
|
||||||
|
|
||||||
|
process index {
|
||||||
|
tag "$transcriptome_file.simpleName"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file transcriptome from transcriptome_file
|
||||||
|
|
||||||
|
output:
|
||||||
|
file 'index' into index_ch
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
salmon index --threads $task.cpus -t $transcriptome -i index
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
process quant {
|
||||||
|
tag "$pair_id"
|
||||||
|
|
||||||
|
input:
|
||||||
|
file index from index_ch
|
||||||
|
set pair_id, file(reads) from read_pairs_ch
|
||||||
|
|
||||||
|
output:
|
||||||
|
file(pair_id) into quant_ch
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
salmon quant --threads $task.cpus --libType=U -i index -1 ${reads[0]} -2 ${reads[1]} -o $pair_id
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
process fastqc {
|
||||||
|
tag "FASTQC on $sample_id"
|
||||||
|
|
||||||
|
input:
|
||||||
|
set sample_id, file(reads) from read_pairs2_ch
|
||||||
|
|
||||||
|
output:
|
||||||
|
file("fastqc_${sample_id}_logs") into fastqc_ch
|
||||||
|
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
mkdir fastqc_${sample_id}_logs
|
||||||
|
fastqc -o fastqc_${sample_id}_logs -f fastq -q ${reads}
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
process multiqc {
|
||||||
|
publishDir params.outdir, mode:'copy'
|
||||||
|
|
||||||
|
input:
|
||||||
|
file('*') from quant_ch.mix(fastqc_ch).collect()
|
||||||
|
file(config) from multiqc_file
|
||||||
|
|
||||||
|
output:
|
||||||
|
file('multiqc_report.html')
|
||||||
|
|
||||||
|
script:
|
||||||
|
"""
|
||||||
|
cp $config/* .
|
||||||
|
echo "custom_logo: \$PWD/logo.png" >> multiqc_config.yaml
|
||||||
|
multiqc .
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow.onComplete {
|
||||||
|
println ( workflow.success ? "\nDone! Open the following report in your browser --> $params.outdir/multiqc_report.html\n" : "Oops .. something went wrong" )
|
||||||
|
}
|
||||||
1
vendor/README.md
vendored
1
vendor/README.md
vendored
@@ -239,6 +239,7 @@ This is a list of grammars that Linguist selects to provide syntax highlighting
|
|||||||
- **NetLinx+ERB:** [amclain/sublime-netlinx](https://github.com/amclain/sublime-netlinx)
|
- **NetLinx+ERB:** [amclain/sublime-netlinx](https://github.com/amclain/sublime-netlinx)
|
||||||
- **NetLogo:** [textmate/lisp.tmbundle](https://github.com/textmate/lisp.tmbundle)
|
- **NetLogo:** [textmate/lisp.tmbundle](https://github.com/textmate/lisp.tmbundle)
|
||||||
- **NewLisp:** [textmate/lisp.tmbundle](https://github.com/textmate/lisp.tmbundle)
|
- **NewLisp:** [textmate/lisp.tmbundle](https://github.com/textmate/lisp.tmbundle)
|
||||||
|
- **Nextflow:** [nextflow-io/atom-language-nextflow](https://github.com/nextflow-io/atom-language-nextflow)
|
||||||
- **Nginx:** [brandonwamboldt/sublime-nginx](https://github.com/brandonwamboldt/sublime-nginx)
|
- **Nginx:** [brandonwamboldt/sublime-nginx](https://github.com/brandonwamboldt/sublime-nginx)
|
||||||
- **Nim:** [Varriount/NimLime](https://github.com/Varriount/NimLime)
|
- **Nim:** [Varriount/NimLime](https://github.com/Varriount/NimLime)
|
||||||
- **Ninja:** [khyo/language-ninja](https://github.com/khyo/language-ninja)
|
- **Ninja:** [khyo/language-ninja](https://github.com/khyo/language-ninja)
|
||||||
|
|||||||
1
vendor/grammars/atom-language-nextflow
vendored
Submodule
1
vendor/grammars/atom-language-nextflow
vendored
Submodule
Submodule vendor/grammars/atom-language-nextflow added at a8a91d7e10
25
vendor/licenses/grammar/atom-language-nextflow.txt
vendored
Normal file
25
vendor/licenses/grammar/atom-language-nextflow.txt
vendored
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
type: grammar
|
||||||
|
name: atom-language-nextflow
|
||||||
|
license: mit
|
||||||
|
---
|
||||||
|
Copyright (c) 2018 Paolo Di Tommaso
|
||||||
|
Copyright (c) 2014-2017 Jakehp https://github.com/Jakehp/language-groovy
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
Reference in New Issue
Block a user