mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-28 17:20:22 +00:00
Add Nextflow language support (#3870)
* Added nextflow language * Added main.nf to list of filenames * Fixed duplicate groovy scope * Removed hello-world example * Update grammar submodule * Removed main.nf from filenames * Added nextflow.config example
This commit is contained in:
committed by
Paul Chaignon
parent
5fbe9c0902
commit
bee7e55618
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -895,3 +895,6 @@
|
||||
[submodule "vendor/grammars/Sublime-HTTP"]
|
||||
path = vendor/grammars/Sublime-HTTP
|
||||
url = https://github.com/samsalisbury/Sublime-HTTP
|
||||
[submodule "vendor/grammars/atom-language-nextflow"]
|
||||
path = vendor/grammars/atom-language-nextflow
|
||||
url = https://github.com/nextflow-io/atom-language-nextflow
|
||||
|
||||
@@ -195,6 +195,9 @@ vendor/grammars/atom-language-clean:
|
||||
vendor/grammars/atom-language-julia:
|
||||
- source.julia
|
||||
- source.julia.console
|
||||
vendor/grammars/atom-language-nextflow:
|
||||
- source.nextflow
|
||||
- source.nextflow-groovy
|
||||
vendor/grammars/atom-language-p4:
|
||||
- source.p4
|
||||
vendor/grammars/atom-language-perl6:
|
||||
|
||||
@@ -2905,6 +2905,18 @@ NewLisp:
|
||||
codemirror_mode: commonlisp
|
||||
codemirror_mime_type: text/x-common-lisp
|
||||
language_id: 247
|
||||
Nextflow:
|
||||
type: programming
|
||||
ace_mode: groovy
|
||||
tm_scope: source.nextflow
|
||||
color: "#3ac486"
|
||||
extensions:
|
||||
- ".nf"
|
||||
filenames:
|
||||
- "nextflow.config"
|
||||
interpreters:
|
||||
- nextflow
|
||||
language_id: 506780613
|
||||
Nginx:
|
||||
type: data
|
||||
extensions:
|
||||
|
||||
67
samples/Nextflow/blast.nf
Normal file
67
samples/Nextflow/blast.nf
Normal file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env nextflow
|
||||
/*
|
||||
* This is free and unencumbered software released into the public domain.
|
||||
*
|
||||
* Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
* distribute this software, either in source code form or as a compiled
|
||||
* binary, for any purpose, commercial or non-commercial, and by any
|
||||
* means.
|
||||
*
|
||||
* In jurisdictions that recognize copyright laws, the author or authors
|
||||
* of this software dedicate any and all copyright interest in the
|
||||
* software to the public domain. We make this dedication for the benefit
|
||||
* of the public at large and to the detriment of our heirs and
|
||||
* successors. We intend this dedication to be an overt act of
|
||||
* relinquishment in perpetuity of all present and future rights to this
|
||||
* software under copyright law.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* For more information, please refer to <http://unlicense.org/>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
*/
|
||||
|
||||
|
||||
params.query = "$HOME/sample.fa"
|
||||
params.db = "$HOME/tools/blast-db/pdb/pdb"
|
||||
|
||||
process blast {
|
||||
output:
|
||||
file top_hits
|
||||
|
||||
"""
|
||||
blastp -query ${params.query} -db ${params.db} -outfmt 6 \
|
||||
| head -n 10 \
|
||||
| cut -f 2 > top_hits
|
||||
"""
|
||||
}
|
||||
|
||||
process extract {
|
||||
input:
|
||||
file top_hits
|
||||
output:
|
||||
file sequences
|
||||
|
||||
"""
|
||||
blastdbcmd -db ${params.db} -entry_batch $top_hits > sequences
|
||||
"""
|
||||
}
|
||||
|
||||
process align {
|
||||
input:
|
||||
file sequences
|
||||
echo true
|
||||
|
||||
"""
|
||||
t_coffee $sequences 2>&- | tee align_result
|
||||
"""
|
||||
}
|
||||
496
samples/Nextflow/callings.nf
Executable file
496
samples/Nextflow/callings.nf
Executable file
@@ -0,0 +1,496 @@
|
||||
#!/usr/bin/env nextflow
|
||||
/*
|
||||
* This is free and unencumbered software released into the public domain.
|
||||
*
|
||||
* Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
* distribute this software, either in source code form or as a compiled
|
||||
* binary, for any purpose, commercial or non-commercial, and by any
|
||||
* means.
|
||||
*
|
||||
* In jurisdictions that recognize copyright laws, the author or authors
|
||||
* of this software dedicate any and all copyright interest in the
|
||||
* software to the public domain. We make this dedication for the benefit
|
||||
* of the public at large and to the detriment of our heirs and
|
||||
* successors. We intend this dedication to be an overt act of
|
||||
* relinquishment in perpetuity of all present and future rights to this
|
||||
* software under copyright law.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* For more information, please refer to <http://unlicense.org/>
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* 'CalliNGS-NF' - A Nextflow pipeline for variant calling with NGS data
|
||||
*
|
||||
* This pipeline that reproduces steps from the GATK best practics of SNP
|
||||
* calling with RNAseq data procedure:
|
||||
* https://software.broadinstitute.org/gatk/guide/article?id=3891
|
||||
*
|
||||
* Anna Vlasova
|
||||
* Emilio Palumbo
|
||||
* Paolo Di Tommaso
|
||||
* Evan Floden
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Define the default parameters
|
||||
*/
|
||||
|
||||
params.genome = "$baseDir/data/genome.fa"
|
||||
params.variants = "$baseDir/data/known_variants.vcf.gz"
|
||||
params.blacklist = "$baseDir/data/blacklist.bed"
|
||||
params.reads = "$baseDir/data/reads/rep1_{1,2}.fq.gz"
|
||||
params.results = "results"
|
||||
params.gatk = '/usr/local/bin/GenomeAnalysisTK.jar'
|
||||
params.gatk_launch = "java -jar $params.gatk"
|
||||
|
||||
log.info "C A L L I N G S - N F v 1.0"
|
||||
log.info "================================"
|
||||
log.info "genome : $params.genome"
|
||||
log.info "reads : $params.reads"
|
||||
log.info "variants : $params.variants"
|
||||
log.info "blacklist: $params.blacklist"
|
||||
log.info "results : $params.results"
|
||||
log.info "gatk : $params.gatk"
|
||||
log.info ""
|
||||
|
||||
/*
|
||||
* Parse the input parameters
|
||||
*/
|
||||
|
||||
GATK = params.gatk_launch
|
||||
genome_file = file(params.genome)
|
||||
variants_file = file(params.variants)
|
||||
blacklist_file = file(params.blacklist)
|
||||
reads_ch = Channel.fromFilePairs(params.reads)
|
||||
|
||||
|
||||
/**********
|
||||
* PART 1: Data preparation
|
||||
*
|
||||
* Process 1A: Create a FASTA genome index (.fai) with samtools for GATK
|
||||
*/
|
||||
|
||||
process '1A_prepare_genome_samtools' {
|
||||
tag "$genome.baseName"
|
||||
|
||||
input:
|
||||
file genome from genome_file
|
||||
|
||||
output:
|
||||
file "${genome}.fai" into genome_index_ch
|
||||
|
||||
script:
|
||||
"""
|
||||
samtools faidx ${genome}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Process 1B: Create a FASTA genome sequence dictionary with Picard for GATK
|
||||
*/
|
||||
|
||||
process '1B_prepare_genome_picard' {
|
||||
tag "$genome.baseName"
|
||||
|
||||
input:
|
||||
file genome from genome_file
|
||||
output:
|
||||
file "${genome.baseName}.dict" into genome_dict_ch
|
||||
|
||||
script:
|
||||
"""
|
||||
PICARD=`which picard.jar`
|
||||
java -jar \$PICARD CreateSequenceDictionary R= $genome O= ${genome.baseName}.dict
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Process 1C: Create STAR genome index file.
|
||||
*/
|
||||
|
||||
process '1C_prepare_star_genome_index' {
|
||||
tag "$genome.baseName"
|
||||
|
||||
input:
|
||||
file genome from genome_file
|
||||
output:
|
||||
file "genome_dir" into genome_dir_ch
|
||||
|
||||
script:
|
||||
"""
|
||||
mkdir genome_dir
|
||||
|
||||
STAR --runMode genomeGenerate \
|
||||
--genomeDir genome_dir \
|
||||
--genomeFastaFiles ${genome} \
|
||||
--runThreadN ${task.cpus}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Process 1D: Create a file containing the filtered and recoded set of variants
|
||||
*/
|
||||
|
||||
process '1D_prepare_vcf_file' {
|
||||
tag "$variantsFile.baseName"
|
||||
|
||||
input:
|
||||
file variantsFile from variants_file
|
||||
file blacklisted from blacklist_file
|
||||
|
||||
output:
|
||||
set file("${variantsFile.baseName}.filtered.recode.vcf.gz"), file("${variantsFile.baseName}.filtered.recode.vcf.gz.tbi") into prepared_vcf_ch
|
||||
|
||||
script:
|
||||
"""
|
||||
vcftools --gzvcf $variantsFile -c \
|
||||
--exclude-bed ${blacklisted} \
|
||||
--recode | bgzip -c \
|
||||
> ${variantsFile.baseName}.filtered.recode.vcf.gz
|
||||
|
||||
tabix ${variantsFile.baseName}.filtered.recode.vcf.gz
|
||||
"""
|
||||
}
|
||||
|
||||
/*
|
||||
* END OF PART 1
|
||||
*********/
|
||||
|
||||
|
||||
|
||||
/**********
|
||||
* PART 2: STAR RNA-Seq Mapping
|
||||
*
|
||||
* Process 2: Align RNA-Seq reads to the genome with STAR
|
||||
*/
|
||||
|
||||
process '2_rnaseq_mapping_star' {
|
||||
tag "$replicateId"
|
||||
|
||||
input:
|
||||
file genome from genome_file
|
||||
file genomeDir from genome_dir_ch
|
||||
set replicateId, file(reads) from reads_ch
|
||||
|
||||
output:
|
||||
set replicateId, file('Aligned.sortedByCoord.out.bam'), file('Aligned.sortedByCoord.out.bam.bai') into aligned_bam_ch
|
||||
|
||||
script:
|
||||
"""
|
||||
# ngs-nf-dev Align reads to genome
|
||||
STAR --genomeDir $genomeDir \
|
||||
--readFilesIn $reads \
|
||||
--runThreadN ${task.cpus} \
|
||||
--readFilesCommand zcat \
|
||||
--outFilterType BySJout \
|
||||
--alignSJoverhangMin 8 \
|
||||
--alignSJDBoverhangMin 1 \
|
||||
--outFilterMismatchNmax 999
|
||||
|
||||
# 2nd pass (improve alignmets using table of splice junctions and create a new index)
|
||||
mkdir genomeDir
|
||||
STAR --runMode genomeGenerate \
|
||||
--genomeDir genomeDir \
|
||||
--genomeFastaFiles $genome \
|
||||
--sjdbFileChrStartEnd SJ.out.tab \
|
||||
--sjdbOverhang 75 \
|
||||
--runThreadN ${task.cpus}
|
||||
|
||||
# Final read alignments
|
||||
STAR --genomeDir genomeDir \
|
||||
--readFilesIn $reads \
|
||||
--runThreadN ${task.cpus} \
|
||||
--readFilesCommand zcat \
|
||||
--outFilterType BySJout \
|
||||
--alignSJoverhangMin 8 \
|
||||
--alignSJDBoverhangMin 1 \
|
||||
--outFilterMismatchNmax 999 \
|
||||
--outSAMtype BAM SortedByCoordinate \
|
||||
--outSAMattrRGline ID:$replicateId LB:library PL:illumina PU:machine SM:GM12878
|
||||
|
||||
# Index the BAM file
|
||||
samtools index Aligned.sortedByCoord.out.bam
|
||||
"""
|
||||
}
|
||||
|
||||
/*
|
||||
* END OF PART 2
|
||||
******/
|
||||
|
||||
|
||||
/**********
|
||||
* PART 3: GATK Prepare Mapped Reads
|
||||
*
|
||||
* Process 3: Split reads that contain Ns in their CIGAR string.
|
||||
* Creates k+1 new reads (where k is the number of N cigar elements)
|
||||
* that correspond to the segments of the original read beside/between
|
||||
* the splicing events represented by the Ns in the original CIGAR.
|
||||
*/
|
||||
|
||||
process '3_rnaseq_gatk_splitNcigar' {
|
||||
tag "$replicateId"
|
||||
|
||||
input:
|
||||
file genome from genome_file
|
||||
file index from genome_index_ch
|
||||
file genome_dict from genome_dict_ch
|
||||
set replicateId, file(bam), file(index) from aligned_bam_ch
|
||||
|
||||
output:
|
||||
set replicateId, file('split.bam'), file('split.bai') into splitted_bam_ch
|
||||
|
||||
script:
|
||||
"""
|
||||
# SplitNCigarReads and reassign mapping qualities
|
||||
$GATK -T SplitNCigarReads \
|
||||
-R $genome -I $bam \
|
||||
-o split.bam \
|
||||
-rf ReassignOneMappingQuality \
|
||||
-RMQF 255 -RMQT 60 \
|
||||
-U ALLOW_N_CIGAR_READS \
|
||||
--fix_misencoded_quality_scores
|
||||
"""
|
||||
}
|
||||
|
||||
/*
|
||||
* END OF PART 3
|
||||
******/
|
||||
|
||||
|
||||
/***********
|
||||
* PART 4: GATK Base Quality Score Recalibration Workflow
|
||||
*
|
||||
* Process 4: Base recalibrate to detect systematic errors in base quality scores,
|
||||
* select unique alignments and index
|
||||
*
|
||||
*/
|
||||
|
||||
process '4_rnaseq_gatk_recalibrate' {
|
||||
tag "$replicateId"
|
||||
|
||||
input:
|
||||
file genome from genome_file
|
||||
file index from genome_index_ch
|
||||
file dict from genome_dict_ch
|
||||
set replicateId, file(bam), file(index) from splitted_bam_ch
|
||||
set file(variants_file), file(variants_file_index) from prepared_vcf_ch
|
||||
|
||||
output:
|
||||
set sampleId, file("${replicateId}.final.uniq.bam"), file("${replicateId}.final.uniq.bam.bai") into (final_output_ch, bam_for_ASE_ch)
|
||||
|
||||
script:
|
||||
sampleId = replicateId.replaceAll(/[12]$/,'')
|
||||
"""
|
||||
# Indel Realignment and Base Recalibration
|
||||
$GATK -T BaseRecalibrator \
|
||||
--default_platform illumina \
|
||||
-cov ReadGroupCovariate \
|
||||
-cov QualityScoreCovariate \
|
||||
-cov CycleCovariate \
|
||||
-knownSites ${variants_file} \
|
||||
-cov ContextCovariate \
|
||||
-R ${genome} -I ${bam} \
|
||||
--downsampling_type NONE \
|
||||
-nct ${task.cpus} \
|
||||
-o final.rnaseq.grp
|
||||
|
||||
$GATK -T PrintReads \
|
||||
-R ${genome} -I ${bam} \
|
||||
-BQSR final.rnaseq.grp \
|
||||
-nct ${task.cpus} \
|
||||
-o final.bam
|
||||
|
||||
# Select only unique alignments, no multimaps
|
||||
(samtools view -H final.bam; samtools view final.bam| grep -w 'NH:i:1') \
|
||||
|samtools view -Sb - > ${replicateId}.final.uniq.bam
|
||||
|
||||
# Index BAM files
|
||||
samtools index ${replicateId}.final.uniq.bam
|
||||
"""
|
||||
}
|
||||
|
||||
/*
|
||||
* END OF PART 4
|
||||
******/
|
||||
|
||||
|
||||
|
||||
/***********
|
||||
* PART 5: GATK Variant Calling
|
||||
*
|
||||
* Process 5: Call variants with GATK HaplotypeCaller.
|
||||
* Calls SNPs and indels simultaneously via local de-novo assembly of
|
||||
* haplotypes in an active region.
|
||||
* Filter called variants with GATK VariantFiltration.
|
||||
*/
|
||||
|
||||
|
||||
process '5_rnaseq_call_variants' {
|
||||
tag "$sampleId"
|
||||
|
||||
input:
|
||||
file genome from genome_file
|
||||
file index from genome_index_ch
|
||||
file dict from genome_dict_ch
|
||||
set sampleId, file(bam), file(bai) from final_output_ch.groupTuple()
|
||||
|
||||
output:
|
||||
set sampleId, file('final.vcf') into vcf_files
|
||||
|
||||
script:
|
||||
"""
|
||||
# fix absolute path in dict file
|
||||
sed -i 's@UR:file:.*${genome}@UR:file:${genome}@g' $dict
|
||||
echo "${bam.join('\n')}" > bam.list
|
||||
|
||||
# Variant calling
|
||||
$GATK -T HaplotypeCaller \
|
||||
-R $genome -I bam.list \
|
||||
-dontUseSoftClippedBases \
|
||||
-stand_call_conf 20.0 \
|
||||
-o output.gatk.vcf.gz
|
||||
|
||||
# Variant filtering
|
||||
$GATK -T VariantFiltration \
|
||||
-R $genome -V output.gatk.vcf.gz \
|
||||
-window 35 -cluster 3 \
|
||||
-filterName FS -filter "FS > 30.0" \
|
||||
-filterName QD -filter "QD < 2.0" \
|
||||
-o final.vcf
|
||||
"""
|
||||
}
|
||||
|
||||
/*
|
||||
* END OF PART 5
|
||||
******/
|
||||
|
||||
|
||||
/***********
|
||||
* PART 6: Post-process variants file and prepare for Allele-Specific Expression and RNA Editing Analysis
|
||||
*
|
||||
* Process 6A: Post-process the VCF result
|
||||
*/
|
||||
|
||||
process '6A_post_process_vcf' {
|
||||
tag "$sampleId"
|
||||
publishDir "$params.results/$sampleId"
|
||||
|
||||
input:
|
||||
set sampleId, file('final.vcf') from vcf_files
|
||||
set file('filtered.recode.vcf.gz'), file('filtered.recode.vcf.gz.tbi') from prepared_vcf_ch
|
||||
output:
|
||||
set sampleId, file('final.vcf'), file('commonSNPs.diff.sites_in_files') into vcf_and_snps_ch
|
||||
|
||||
script:
|
||||
'''
|
||||
grep -v '#' final.vcf | awk '$7~/PASS/' |perl -ne 'chomp($_); ($dp)=$_=~/DP\\=(\\d+)\\;/; if($dp>=8){print $_."\\n"};' > result.DP8.vcf
|
||||
|
||||
vcftools --vcf result.DP8.vcf --gzdiff filtered.recode.vcf.gz --diff-site --out commonSNPs
|
||||
'''
|
||||
}
|
||||
|
||||
/*
|
||||
* Process 6B: Prepare variants file for allele specific expression (ASE) analysis
|
||||
*/
|
||||
|
||||
process '6B_prepare_vcf_for_ase' {
|
||||
tag "$sampleId"
|
||||
publishDir "$params.results/$sampleId"
|
||||
|
||||
input:
|
||||
set sampleId, file('final.vcf'), file('commonSNPs.diff.sites_in_files') from vcf_and_snps_ch
|
||||
output:
|
||||
set sampleId, file('known_snps.vcf') into vcf_for_ASE
|
||||
file('AF.histogram.pdf') into gghist_pdfs
|
||||
|
||||
script:
|
||||
'''
|
||||
awk 'BEGIN{OFS="\t"} $4~/B/{print $1,$2,$3}' commonSNPs.diff.sites_in_files > test.bed
|
||||
|
||||
vcftools --vcf final.vcf --bed test.bed --recode --keep-INFO-all --stdout > known_snps.vcf
|
||||
|
||||
grep -v '#' known_snps.vcf | awk -F '\\t' '{print $10}' \
|
||||
|awk -F ':' '{print $2}'|perl -ne 'chomp($_); \
|
||||
@v=split(/\\,/,$_); if($v[0]!=0 ||$v[1] !=0)\
|
||||
{print $v[1]/($v[1]+$v[0])."\\n"; }' |awk '$1!=1' \
|
||||
>AF.4R
|
||||
|
||||
gghist.R -i AF.4R -o AF.histogram.pdf
|
||||
'''
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Group data for allele-specific expression.
|
||||
*
|
||||
* The `bam_for_ASE_ch` emites tuples having the following structure, holding the final BAM/BAI files:
|
||||
*
|
||||
* ( sample_id, file_bam, file_bai )
|
||||
*
|
||||
* The `vcf_for_ASE` channel emits tuples having the following structure, holding the VCF file:
|
||||
*
|
||||
* ( sample_id, output.vcf )
|
||||
*
|
||||
* The BAMs are grouped together and merged with VCFs having the same sample id. Finally
|
||||
* it creates a channel named `grouped_vcf_bam_bai_ch` emitting the following tuples:
|
||||
*
|
||||
* ( sample_id, file_vcf, List[file_bam], List[file_bai] )
|
||||
*/
|
||||
|
||||
bam_for_ASE_ch
|
||||
.groupTuple()
|
||||
.phase(vcf_for_ASE)
|
||||
.map{ left, right ->
|
||||
def sampleId = left[0]
|
||||
def bam = left[1]
|
||||
def bai = left[2]
|
||||
def vcf = right[1]
|
||||
tuple(sampleId, vcf, bam, bai)
|
||||
}
|
||||
.set { grouped_vcf_bam_bai_ch }
|
||||
|
||||
|
||||
/*
|
||||
* Process 6C: Allele-Specific Expression analysis with GATK ASEReadCounter.
|
||||
* Calculates allele counts at a set of positions after applying
|
||||
* filters that are tuned for enabling allele-specific expression
|
||||
* (ASE) analysis
|
||||
*/
|
||||
|
||||
process '6C_ASE_knownSNPs' {
|
||||
tag "$sampleId"
|
||||
publishDir "$params.results/$sampleId"
|
||||
|
||||
input:
|
||||
file genome from genome_file
|
||||
file index from genome_index_ch
|
||||
file dict from genome_dict_ch
|
||||
set sampleId, file(vcf), file(bam), file(bai) from grouped_vcf_bam_bai_ch
|
||||
|
||||
output:
|
||||
file "ASE.tsv"
|
||||
|
||||
script:
|
||||
"""
|
||||
echo "${bam.join('\n')}" > bam.list
|
||||
|
||||
$GATK -R ${genome} \
|
||||
-T ASEReadCounter \
|
||||
-o ASE.tsv \
|
||||
-I bam.list \
|
||||
-sites ${vcf}
|
||||
"""
|
||||
}
|
||||
50
samples/Nextflow/filenames/nextflow.config
Normal file
50
samples/Nextflow/filenames/nextflow.config
Normal file
@@ -0,0 +1,50 @@
|
||||
aws {
|
||||
region = 'eu-west-1'
|
||||
}
|
||||
|
||||
cloud {
|
||||
autoscale {
|
||||
enabled = true
|
||||
minInstances = 3
|
||||
starvingTimeout = '2 min'
|
||||
terminateWhenIdle = true
|
||||
}
|
||||
imageId = 'ami-78ds78d'
|
||||
instanceProfile = 'MyRole'
|
||||
instanceType = 'r4.large'
|
||||
sharedStorageId = 'fs-76ds76s'
|
||||
spotPrice = 0.06
|
||||
subnetId = 'subnet-8d98d7s'
|
||||
}
|
||||
|
||||
env {
|
||||
BAR = 'world'
|
||||
FOO = 'hola'
|
||||
}
|
||||
|
||||
mail {
|
||||
from = 'paolo.ditommaso@gmail.com'
|
||||
smtp {
|
||||
auth = true
|
||||
host = 'email-smtp.us-east-1.amazonaws.com'
|
||||
password = 'my-secret'
|
||||
port = 587
|
||||
starttls {
|
||||
enable = true
|
||||
required = true
|
||||
}
|
||||
user = 'my-name'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
executor = 'slurm'
|
||||
queue = 'cn-el7'
|
||||
memory = '16GB'
|
||||
cpus = 8
|
||||
container = 'user/rnaseq-nf:latest'
|
||||
}
|
||||
|
||||
trace {
|
||||
fields = 'task_id,name,status,attempt,exit,queue'
|
||||
}
|
||||
135
samples/Nextflow/rnaseq.nf
Normal file
135
samples/Nextflow/rnaseq.nf
Normal file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env nextflow
|
||||
/*
|
||||
* This is free and unencumbered software released into the public domain.
|
||||
*
|
||||
* Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
* distribute this software, either in source code form or as a compiled
|
||||
* binary, for any purpose, commercial or non-commercial, and by any
|
||||
* means.
|
||||
*
|
||||
* In jurisdictions that recognize copyright laws, the author or authors
|
||||
* of this software dedicate any and all copyright interest in the
|
||||
* software to the public domain. We make this dedication for the benefit
|
||||
* of the public at large and to the detriment of our heirs and
|
||||
* successors. We intend this dedication to be an overt act of
|
||||
* relinquishment in perpetuity of all present and future rights to this
|
||||
* software under copyright law.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* For more information, please refer to <http://unlicense.org/>
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Proof of concept of a RNAseq pipeline implemented with Nextflow
|
||||
*
|
||||
* Authors:
|
||||
* - Paolo Di Tommaso <paolo.ditommaso@gmail.com>
|
||||
* - Emilio Palumbo <emiliopalumbo@gmail.com>
|
||||
* - Evan Floden <evanfloden@gmail.com>
|
||||
*/
|
||||
|
||||
|
||||
params.reads = "$baseDir/data/ggal/*_{1,2}.fq"
|
||||
params.transcriptome = "$baseDir/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
|
||||
params.outdir = "."
|
||||
params.multiqc = "$baseDir/multiqc"
|
||||
|
||||
log.info """\
|
||||
R N A S E Q - N F P I P E L I N E
|
||||
===================================
|
||||
transcriptome: ${params.transcriptome}
|
||||
reads : ${params.reads}
|
||||
outdir : ${params.outdir}
|
||||
"""
|
||||
.stripIndent()
|
||||
|
||||
|
||||
transcriptome_file = file(params.transcriptome)
|
||||
multiqc_file = file(params.multiqc)
|
||||
|
||||
|
||||
Channel
|
||||
.fromFilePairs( params.reads )
|
||||
.ifEmpty { error "Cannot find any reads matching: ${params.reads}" }
|
||||
.into { read_pairs_ch; read_pairs2_ch }
|
||||
|
||||
|
||||
process index {
|
||||
tag "$transcriptome_file.simpleName"
|
||||
|
||||
input:
|
||||
file transcriptome from transcriptome_file
|
||||
|
||||
output:
|
||||
file 'index' into index_ch
|
||||
|
||||
script:
|
||||
"""
|
||||
salmon index --threads $task.cpus -t $transcriptome -i index
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
process quant {
|
||||
tag "$pair_id"
|
||||
|
||||
input:
|
||||
file index from index_ch
|
||||
set pair_id, file(reads) from read_pairs_ch
|
||||
|
||||
output:
|
||||
file(pair_id) into quant_ch
|
||||
|
||||
script:
|
||||
"""
|
||||
salmon quant --threads $task.cpus --libType=U -i index -1 ${reads[0]} -2 ${reads[1]} -o $pair_id
|
||||
"""
|
||||
}
|
||||
|
||||
process fastqc {
|
||||
tag "FASTQC on $sample_id"
|
||||
|
||||
input:
|
||||
set sample_id, file(reads) from read_pairs2_ch
|
||||
|
||||
output:
|
||||
file("fastqc_${sample_id}_logs") into fastqc_ch
|
||||
|
||||
|
||||
script:
|
||||
"""
|
||||
mkdir fastqc_${sample_id}_logs
|
||||
fastqc -o fastqc_${sample_id}_logs -f fastq -q ${reads}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
process multiqc {
|
||||
publishDir params.outdir, mode:'copy'
|
||||
|
||||
input:
|
||||
file('*') from quant_ch.mix(fastqc_ch).collect()
|
||||
file(config) from multiqc_file
|
||||
|
||||
output:
|
||||
file('multiqc_report.html')
|
||||
|
||||
script:
|
||||
"""
|
||||
cp $config/* .
|
||||
echo "custom_logo: \$PWD/logo.png" >> multiqc_config.yaml
|
||||
multiqc .
|
||||
"""
|
||||
}
|
||||
|
||||
workflow.onComplete {
|
||||
println ( workflow.success ? "\nDone! Open the following report in your browser --> $params.outdir/multiqc_report.html\n" : "Oops .. something went wrong" )
|
||||
}
|
||||
1
vendor/README.md
vendored
1
vendor/README.md
vendored
@@ -239,6 +239,7 @@ This is a list of grammars that Linguist selects to provide syntax highlighting
|
||||
- **NetLinx+ERB:** [amclain/sublime-netlinx](https://github.com/amclain/sublime-netlinx)
|
||||
- **NetLogo:** [textmate/lisp.tmbundle](https://github.com/textmate/lisp.tmbundle)
|
||||
- **NewLisp:** [textmate/lisp.tmbundle](https://github.com/textmate/lisp.tmbundle)
|
||||
- **Nextflow:** [nextflow-io/atom-language-nextflow](https://github.com/nextflow-io/atom-language-nextflow)
|
||||
- **Nginx:** [brandonwamboldt/sublime-nginx](https://github.com/brandonwamboldt/sublime-nginx)
|
||||
- **Nim:** [Varriount/NimLime](https://github.com/Varriount/NimLime)
|
||||
- **Ninja:** [khyo/language-ninja](https://github.com/khyo/language-ninja)
|
||||
|
||||
1
vendor/grammars/atom-language-nextflow
vendored
Submodule
1
vendor/grammars/atom-language-nextflow
vendored
Submodule
Submodule vendor/grammars/atom-language-nextflow added at a8a91d7e10
25
vendor/licenses/grammar/atom-language-nextflow.txt
vendored
Normal file
25
vendor/licenses/grammar/atom-language-nextflow.txt
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
---
|
||||
type: grammar
|
||||
name: atom-language-nextflow
|
||||
license: mit
|
||||
---
|
||||
Copyright (c) 2018 Paolo Di Tommaso
|
||||
Copyright (c) 2014-2017 Jakehp https://github.com/Jakehp/language-groovy
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Reference in New Issue
Block a user