diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ff93e31..78ed09c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#435]](https://github.com/nf-core/smrnaseq/pull/435) - Replace local instances of bowtie for nf-core [`bowtie2`](https://github.com/nf-core/smrnaseq/issues/434) and [`bowtie1`](https://github.com/nf-core/smrnaseq/issues/433) - Additionally adds a `bioawk` module that cleans fasta files. - [[#438]](https://github.com/nf-core/smrnaseq/pull/438) - Update [Mirtop to latest version](https://github.com/nf-core/smrnaseq/issues/437) - Process samples separately and join results with `CSVTK_JOIN`. - [[#439]](https://github.com/nf-core/smrnaseq/pull/439) - Fix [Fix paired end samples processing](https://github.com/nf-core/smrnaseq/issues/415) - Fix paired end sample handling and add test profile. +- [[#441]](https://github.com/nf-core/smrnaseq/pull/441) - Migrate [local contaminant bowtie to nf-core](https://github.com/nf-core/smrnaseq/issues/436) - Replace local processes with `BOWTIE2_ALIGN`. ## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch diff --git a/conf/modules.config b/conf/modules.config index de9f4ace..44553227 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -242,6 +242,7 @@ process { publishDir = [ path: { "${params.outdir}/contaminant_filter/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, + enabled: false, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -264,6 +265,62 @@ process { publishDir = [ enabled: false ] } + withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:BOWTIE2_ALIGN.*' { + ext.args = '--very-sensitive-local -k 1' + ext.prefix = {"${meta.contaminant}_${meta.id}"} + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_RRNA' { + ext.prefix = {"${meta.contaminant}_${meta.id}"} + ext.suffix = "stats" + ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "rRNA" + '\\": " tot}\'' + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_TRNA' { + ext.prefix = {"${meta.contaminant}_${meta.id}"} + ext.suffix = "stats" + ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "tRNA" + '\\": " tot}\'' + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_CDNA' { + ext.prefix = {"${meta.contaminant}_${meta.id}"} + ext.suffix = "stats" + ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "cDNA" + '\\": " tot}\'' + publishDir = [ enabled: false ] + } + withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_NCRNA' { + ext.prefix = {"${meta.contaminant}_${meta.id}"} + ext.suffix = "stats" + ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "ncRNA" + '\\": " tot}\'' + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_PIRNA' { + ext.prefix = {"${meta.contaminant}_${meta.id}"} + ext.suffix = "stats" + ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "piRNA" + '\\": " tot}\'' + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_OTHER' { + ext.prefix = {"${meta.contaminant}_${meta.id}"} + ext.suffix = "stats" + ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "other" + '\\": " tot}\'' + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:FILTER_STATS' { + publishDir = [ + path: { "${params.outdir}/contaminant_filter/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + enabled: params.save_intermediates, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // // MIRNA_QUANT // diff --git a/modules.json b/modules.json index 84c3771f..940686a8 100644 --- a/modules.json +++ b/modules.json @@ -21,6 +21,11 @@ "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, + "bowtie2/align": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "bowtie2/build": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", diff --git a/modules/local/bowtie_map_contaminants.nf b/modules/local/bowtie_map_contaminants.nf deleted file mode 100644 index c9863ab3..00000000 --- a/modules/local/bowtie_map_contaminants.nf +++ /dev/null @@ -1,48 +0,0 @@ -process BOWTIE_MAP_CONTAMINANTS { - label 'process_medium' - - conda 'bowtie2=2.4.5' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' : - 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }" - - input: - tuple val(meta), path(reads) - path index - val contaminant_type - - output: - tuple val(meta), path("*sam") , emit: bam - tuple val(meta), path('*.filter.unmapped.contaminant.fastq'), emit: unmapped - path "versions.yml" , emit: versions - path "filtered.*.stats" , emit: stats - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: "" - - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` - bowtie2 \\ - -x \$INDEX \\ - -U ${reads} \\ - --threads ${task.cpus} \\ - --un ${meta.id}.${contaminant_type}.filter.unmapped.contaminant.fastq \\ - --very-sensitive-local \\ - -k 1 \\ - -S ${meta.id}.filter.contaminant.sam \\ - ${args} \\ - > ${meta.id}.contaminant_bowtie.log 2>&1 - - # extracting number of reads from bowtie logs - awk -v type=${contaminant_type} 'BEGIN{tot=0} {if(NR==4 || NR == 5){tot += \$1}} END {print "\\""type"\\": "tot }' ${meta.id}.contaminant_bowtie.log | tr -d , > filtered.${meta.id}_${contaminant_type}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//' | tr -d '\0') - END_VERSIONS - """ - -} diff --git a/modules/local/filter_stats.nf b/modules/local/filter_stats.nf index 3bc1000e..2c51c35e 100644 --- a/modules/local/filter_stats.nf +++ b/modules/local/filter_stats.nf @@ -1,5 +1,6 @@ process FILTER_STATS { label 'process_medium' + tag "$meta.id" conda 'bowtie2=2.4.5' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -7,12 +8,11 @@ process FILTER_STATS { 'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }" input: - tuple val(meta), path(reads) - path stats_files + tuple val(meta), path(reads), path (stats_files) output: path "*_mqc.yaml" , emit: stats - tuple val(meta), path('*.filtered.fastq.gz'), emit: reads + tuple val(meta), path('*.filtered.fastq.gz'), emit: reads, optional: true path "versions.yml" , emit: versions when: @@ -20,11 +20,22 @@ process FILTER_STATS { script: """ - readnumber=\$(wc -l ${reads} | awk '{ print \$1/4 }') - cat ./filtered.${meta.id}_*.stats | \\ + + if [[ ${reads} == *.gz ]]; then + readnumber=\$(zcat ${reads} | wc -l | awk '{ print \$1/4 }') + else + readnumber=\$(wc -l ${reads} | awk '{ print \$1/4 }') + fi + + cat ./*${meta.id}*.stats | \\ tr '\\n' ', ' | \\ awk -v sample=${meta.id} -v readnumber=\$readnumber '{ print "id: \\"my_pca_section\\"\\nsection_name: \\"Contamination Filtering\\"\\ndescription: \\"This plot shows the amount of reads filtered by contaminant type.\\"\\nplot_type: \\"bargraph\\"\\npconfig:\\n id: \\"contamination_filter_plot\\"\\n title: \\"Contamination Plot\\"\\n ylab: \\"Number of reads\\"\\ndata:\\n "sample": {"\$0"\\"remaining reads\\": "readnumber"}" }' > ${meta.id}.contamination_mqc.yaml - gzip -c ${reads} > ${meta.id}.filtered.fastq.gz + + if [[ ${reads} == *.gz ]]; then + cp ${reads} ${meta.id}.filtered.fastq.gz + else + gzip -c ${reads} > ${meta.id}.filtered.fastq.gz + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 00000000..9090f218 --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bowtie2=2.5.2 + - bioconda::samtools=1.18 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf new file mode 100644 index 00000000..809525ad --- /dev/null +++ b/modules/nf-core/bowtie2/align/main.nf @@ -0,0 +1,117 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + touch ${prefix}.bowtie2.log + ${create_unmapped} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml new file mode 100644 index 00000000..f841f781 --- /dev/null +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -0,0 +1,132 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Bowtie2 genome fasta file + pattern: "*.fasta" + - - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - sam: + - meta: + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + - bam: + - meta: + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + - cram: + - meta: + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + - csi: + - meta: + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - "*.csi": + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - crai: + - meta: + type: file + description: Output CRAM index + pattern: "*.crai" + - "*.crai": + type: file + description: Output CRAM index + pattern: "*.crai" + - log: + - meta: + type: file + description: Aligment log + pattern: "*.log" + - "*.log": + type: file + description: Aligment log + pattern: "*.log" + - fastq: + - meta: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - "*fastq.gz": + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 00000000..03f1d5e5 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 00000000..fdc1c59d --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 00000000..0de5950f --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 00000000..028e7da6 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,311 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + [ + "versions.yml:md5,01d18ab035146ea790e9a0f70adb758f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:19:25.337323" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA return [[id:"rRNA"], it]} ) + INDEX_RRNA ( ch_rrna ) ch_versions = ch_versions.mix(INDEX_RRNA.out.versions) - MAP_RRNA ( ch_reads_for_mirna, INDEX_RRNA.out.index.map{meta, it -> return [it]}.first(), Channel.value('rRNA') ) - ch_versions = ch_versions.mix(MAP_RRNA.out.versions) - ch_filter_stats = ch_filter_stats.mix(MAP_RRNA.out.stats.ifEmpty(null)) - MAP_RRNA.out.unmapped.set { rrna_reads } + + // Add meta.contaminant to input reads channel + ch_reads_for_mirna = ch_reads_for_mirna.map{meta, fastq -> return [[id:meta.id, contaminant: "rRNA", single_end:meta.single_end], fastq]} + + // Map which reads are rRNAs + BOWTIE2_ALIGN_RRNA(ch_reads_for_mirna, INDEX_RRNA.out.index.first(), [[],[]], true, false) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_RRNA.out.versions) + + // Obtain how many hits were contaminants + ch_bowtie = BOWTIE2_ALIGN_RRNA.out.log + + STATS_GAWK_RRNA(ch_bowtie, []) + ch_versions = ch_versions.mix(STATS_GAWK_RRNA.out.versions) + + // Remove meta.contaminant and collect all contaminant stats in a single channel + ch_filter_stats = ch_filter_stats + .mix(STATS_GAWK_RRNA.out.output + .map{meta, stats -> return [[id:meta.id, single_end:meta.single_end], stats]} + .ifEmpty(null)) + + // Assign clean reads to new channel + rrna_reads = BOWTIE2_ALIGN_RRNA.out.fastq } rrna_reads.set { trna_reads } if (params.trna) { // Index DB and filter $rrna_reads emit: $trna_reads - INDEX_TRNA ( ch_trna.map{it -> return [[id:"tRNA"], it]} ) + INDEX_TRNA ( ch_trna ) ch_versions = ch_versions.mix(INDEX_TRNA.out.versions) - MAP_TRNA ( rrna_reads, INDEX_TRNA.out.index.map{meta, it -> return [it]}.first(), Channel.value("tRNA") ) - ch_versions = ch_versions.mix(MAP_TRNA.out.versions) - ch_filter_stats = ch_filter_stats.mix(MAP_TRNA.out.stats.ifEmpty(null)) - MAP_TRNA.out.unmapped.set { trna_reads } + + // Add meta.contaminant to input reads channel + rrna_reads = rrna_reads.map{meta, fastq -> return [[id:meta.id, contaminant: "tRNA", single_end:meta.single_end], fastq]} + + // Map which reads are tRNAs + BOWTIE2_ALIGN_TRNA(rrna_reads, INDEX_TRNA.out.index.first(), [[],[]], true, false) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_TRNA.out.versions) + + // Obtain how many hits were contaminants + ch_bowtie = BOWTIE2_ALIGN_TRNA.out.log + + STATS_GAWK_TRNA(ch_bowtie, []) + ch_versions = ch_versions.mix(STATS_GAWK_TRNA.out.versions) + + // Remove meta.contaminant and collect all contaminant stats in a single channel + ch_filter_stats = ch_filter_stats + .mix(STATS_GAWK_TRNA.out.output + .map{meta, stats -> return [[id:meta.id, single_end:meta.single_end], stats]} + .ifEmpty(null)) + + // Assign clean reads to new channel + trna_reads = BOWTIE2_ALIGN_TRNA.out.fastq } trna_reads.set { cdna_reads } @@ -99,16 +143,29 @@ workflow CONTAMINANT_FILTER { SEQKIT_GREP_CDNA(ch_cdna, ch_pattern) ch_versions = ch_versions.mix(SEQKIT_GREP_CDNA.out.versions) - // Remove metamap to make it compatible with previous code - ch_filtered_cdna = SEQKIT_GREP_CDNA.out.filter.map{meta, file -> [file]} - // Previous original code: INDEX_CDNA ( SEQKIT_GREP_CDNA.out.filter ) ch_versions = ch_versions.mix(INDEX_CDNA.out.versions) - MAP_CDNA ( trna_reads, INDEX_CDNA.out.index.map{meta, it -> return [it]}.first(), Channel.value('cDNA')) - ch_versions = ch_versions.mix(MAP_CDNA.out.versions) - ch_filter_stats = ch_filter_stats.mix(MAP_CDNA.out.stats.ifEmpty(null)) - MAP_CDNA.out.unmapped.set { cdna_reads } + + // Add meta.contaminant to input reads channel + trna_reads = trna_reads.map{meta, fastq -> return [[id:meta.id, contaminant: "cDNA", single_end:meta.single_end], fastq]} + + // Map which reads are cDNA + BOWTIE2_ALIGN_CDNA(trna_reads, INDEX_CDNA.out.index.first(), [[],[]], true, false) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_CDNA.out.versions) + + // Obtain how many hits were contaminants + STATS_GAWK_CDNA(BOWTIE2_ALIGN_CDNA.out.log, []) + ch_versions = ch_versions.mix(STATS_GAWK_CDNA.out.versions) + + // Remove meta.contaminant and collect all contaminant stats in a single channel + ch_filter_stats = ch_filter_stats + .mix(STATS_GAWK_CDNA.out.output + .map{meta, stats -> return [[id:meta.id, single_end:meta.single_end], stats]} + .ifEmpty(null)) + + // Assign clean reads to new channel + cdna_reads = BOWTIE2_ALIGN_CDNA.out.fastq } cdna_reads.set { ncrna_reads } @@ -133,16 +190,29 @@ workflow CONTAMINANT_FILTER { SEQKIT_GREP_NCRNA(ch_ncrna, ch_pattern) ch_versions = ch_versions.mix(SEQKIT_GREP_NCRNA.out.versions) - // Remove metamap to make it compatible with previous code - ch_filtered_ncrna = SEQKIT_GREP_NCRNA.out.filter.map{meta, file -> [file]} - // Previous original code: INDEX_NCRNA ( SEQKIT_GREP_NCRNA.out.filter ) ch_versions = ch_versions.mix(INDEX_NCRNA.out.versions) - MAP_NCRNA ( cdna_reads, INDEX_NCRNA.out.index.map{meta, it -> return [it]}.first(), Channel.value('ncRNA') ) - ch_versions = ch_versions.mix(MAP_NCRNA.out.versions) - ch_filter_stats = ch_filter_stats.mix(MAP_NCRNA.out.stats.ifEmpty(null)) - MAP_NCRNA.out.unmapped.set { ncrna_reads } + + // Add meta.contaminant to input reads channel + cdna_reads = cdna_reads.map{meta, fastq -> return [[id:meta.id, contaminant: "ncRNA", single_end:meta.single_end], fastq]} + + // Map which reads are ncRNA + BOWTIE2_ALIGN_NCRNA(cdna_reads, INDEX_NCRNA.out.index.first(), [[],[]], true, false) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_NCRNA.out.versions) + + // Obtain how many hits were contaminants + STATS_GAWK_NCRNA(BOWTIE2_ALIGN_NCRNA.out.log, []) + ch_versions = ch_versions.mix(STATS_GAWK_NCRNA.out.versions) + + // Remove meta.contaminant and collect all contaminant stats in a single channel + ch_filter_stats = ch_filter_stats + .mix(STATS_GAWK_NCRNA.out.output + .map{meta, stats -> return [[id:meta.id, single_end:meta.single_end], stats]} + .ifEmpty(null)) + + // Assign clean reads to new channel + ncrna_reads = BOWTIE2_ALIGN_NCRNA.out.fastq } ncrna_reads.set { pirna_reads } @@ -167,16 +237,29 @@ workflow CONTAMINANT_FILTER { SEQKIT_GREP_PIRNA(ch_pirna, ch_pattern) ch_versions = ch_versions.mix(SEQKIT_GREP_PIRNA.out.versions) - // Remove metamap to make it compatible with previous code - ch_filtered_pirna = SEQKIT_GREP_PIRNA.out.filter.map{meta, file -> [file]} - // Previous original code: INDEX_PIRNA ( SEQKIT_GREP_PIRNA.out.filter ) ch_versions = ch_versions.mix(INDEX_PIRNA.out.versions) - MAP_PIRNA ( ncrna_reads, INDEX_PIRNA.out.index.map{meta, it -> return [it]}.first(), Channel.value('piRNA')) - ch_versions = ch_versions.mix(MAP_PIRNA.out.versions) - ch_filter_stats = ch_filter_stats.mix(MAP_PIRNA.out.stats.ifEmpty(null)) - MAP_PIRNA.out.unmapped.set { pirna_reads } + + // Add meta.contaminant to input reads channel + ncrna_reads = ncrna_reads.map{meta, fastq -> return [[id:meta.id, contaminant: "piRNA", single_end:meta.single_end], fastq]} + + // Map which reads are piRNA + BOWTIE2_ALIGN_PIRNA(ncrna_reads, INDEX_PIRNA.out.index.first(), [[],[]], true, false) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_PIRNA.out.versions) + + // Obtain how many hits were contaminants + STATS_GAWK_PIRNA(BOWTIE2_ALIGN_PIRNA.out.log, []) + ch_versions = ch_versions.mix(STATS_GAWK_PIRNA.out.versions) + + // Remove meta.contaminant and collect all contaminant stats in a single channel + ch_filter_stats = ch_filter_stats + .mix(STATS_GAWK_PIRNA.out.output + .map{meta, stats -> return [[id:meta.id, single_end:meta.single_end], stats]} + .ifEmpty(null)) + + // Assign clean reads to new channel + pirna_reads = BOWTIE2_ALIGN_PIRNA.out.fastq } pirna_reads.set { other_cont_reads } @@ -201,22 +284,41 @@ workflow CONTAMINANT_FILTER { SEQKIT_GREP_OTHER(ch_other_contamination, ch_pattern) ch_versions = ch_versions.mix(SEQKIT_GREP_OTHER.out.versions) - // Remove metamap to make it compatible with previous code - ch_filtered_other = SEQKIT_GREP_OTHER.out.filter.map{meta, file -> [file]} - // Previous original code: INDEX_OTHER ( SEQKIT_GREP_OTHER.out.filter ) ch_versions = ch_versions.mix(INDEX_OTHER.out.versions) - MAP_OTHER ( ncrna_reads, INDEX_OTHER.out.index.map{meta, it -> return [it]}.first(), Channel.value('other')) - ch_versions = ch_versions.mix(MAP_OTHER.out.versions) - ch_filter_stats = ch_filter_stats.mix(MAP_OTHER.out.stats.ifEmpty(null)) - MAP_OTHER.out.unmapped.set { other_cont_reads } + + // Map which reads are other + BOWTIE2_ALIGN_OTHER(pirna_reads, INDEX_OTHER.out.index.first(), [[],[]], true, false) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN_OTHER.out.versions) + + // Obtain how many hits were contaminants + STATS_GAWK_OTHER(BOWTIE2_ALIGN_OTHER.out.log, []) + ch_versions = ch_versions.mix(STATS_GAWK_OTHER.out.versions) + + // Remove meta.contaminant and collect all contaminant stats in a single channel + ch_filter_stats = ch_filter_stats + .mix(STATS_GAWK_OTHER.out.output + .map{meta, stats -> return [[id:meta.id, single_end:meta.single_end], stats]} + .ifEmpty(null)) + + // Assign clean reads to new channel + other_cont_reads = BOWTIE2_ALIGN_OTHER.out.fastq } - FILTER_STATS ( other_cont_reads, ch_filter_stats.collect() ) + // Remove meta.contaminant from final set of reads + other_cont_reads = other_cont_reads + .map{meta, reads -> return [[id:meta.id, single_end:meta.single_end], reads]} + + // Create channel with reads and contaminants + ch_reads_contaminants = other_cont_reads.join(ch_filter_stats.groupTuple()) + + // Filter all contaminant stats and create MultiQC file + FILTER_STATS ( ch_reads_contaminants ) + FILTER_STATS.out.stats.dump(tag:"FILTER_STATS.out.stats") emit: - filtered_reads = FILTER_STATS.out.reads - versions = ch_versions.mix(FILTER_STATS.out.versions) - filter_stats = FILTER_STATS.out.stats + filtered_reads = other_cont_reads // channel: [ val(meta), path(fastq) ] + filter_stats = FILTER_STATS.out.stats // channel: [ path(stats) ] + versions = ch_versions.mix(FILTER_STATS.out.versions) } diff --git a/subworkflows/local/mirna_quant.nf b/subworkflows/local/mirna_quant.nf index c93a09ae..ca97d20f 100644 --- a/subworkflows/local/mirna_quant.nf +++ b/subworkflows/local/mirna_quant.nf @@ -58,8 +58,6 @@ workflow MIRNA_QUANT { BAM_STATS_MATURE ( BOWTIE_MAP_MATURE.out.bam, FORMAT_MATURE.out.formatted_fasta ) ch_versions = ch_versions.mix(BAM_STATS_MATURE.out.versions) - BAM_STATS_MATURE.out.stats.dump(tag:"BAM_STATS_MATURE") - PARSE_HAIRPIN ( ch_reference_hairpin, ch_parse_species_input ) ch_hairpin_parsed = PARSE_HAIRPIN.out.parsed_fasta diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 5a6053b7..9154ae90 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -37,8 +37,8 @@ workflow PREPARE_GENOME { ch_mirna_gtf = val_mirna_gtf ? Channel.fromPath(val_mirna_gtf, checkIfExists: true) : ( mirna_gtf_from_species ? Channel.fromPath(mirna_gtf_from_species, checkIfExists: true).collect() : Channel.empty() ) ch_mirna_adapters = params.with_umi ? [] : Channel.fromPath(val_fastp_known_mirna_adapters, checkIfExists: true).collect() - ch_rrna = val_rrna ? Channel.fromPath(val_rrna) : Channel.empty() - ch_trna = val_trna ? Channel.fromPath(val_trna) : Channel.empty() + ch_rrna = val_rrna ? Channel.fromPath(val_rrna).map{ it -> [ [id:'rRNA'], it ] } : Channel.empty() + ch_trna = val_trna ? Channel.fromPath(val_trna).map{ it -> [ [id:'tRNA'], it ] }.collect() : Channel.empty() ch_cdna = val_cdna ? Channel.fromPath(val_cdna).map{ it -> [ [id:'cDNA'], it ] }.collect() : Channel.empty() ch_ncrna = val_ncrna ? Channel.fromPath(val_ncrna).map{ it -> [ [id:'ncRNA'], it ] }.collect() : Channel.empty() ch_pirna = val_pirna ? Channel.fromPath(val_pirna).map{ it -> [ [id:'piRNA'], it ] }.collect() : Channel.empty() diff --git a/tests/test_contamination_tech_reps.nf.test b/tests/test_contamination_tech_reps.nf.test index 629c5d4a..02266078 100644 --- a/tests/test_contamination_tech_reps.nf.test +++ b/tests/test_contamination_tech_reps.nf.test @@ -20,7 +20,7 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert workflow.trace.succeeded().size() == 91 }, + { assert workflow.trace.succeeded().size() == 100 }, { assert snapshot( path("$outputDir/contaminant_filter/filter/Clone1_N1_trimmed.contamination_mqc.yaml").exists(), //TODO see if we can make these deterministic or why they are non-deterministic diff --git a/tests/test_contamination_tech_reps.nf.test.snap b/tests/test_contamination_tech_reps.nf.test.snap index bcedfc93..05b423d6 100644 --- a/tests/test_contamination_tech_reps.nf.test.snap +++ b/tests/test_contamination_tech_reps.nf.test.snap @@ -34,13 +34,13 @@ }, "software_versions": { "content": [ - "{BLAT_CDNA={blat=36}, BLAT_NCRNA={blat=36}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.1, samtools=1.14}, BOWTIE_MAP_MATURE={bowtie=1.3.1, samtools=1.14}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.1, samtools=1.14}, CAT_FASTQ={cat=8.3}, FASTP={fastp=0.23.4}, FILTER_STATS={BusyBox=1.32.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, GAWK_CDNA={gawk=5.3.0}, GAWK_NCRNA={gawk=5.3.0}, INDEX_CDNA={bowtie2=2.5.2}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, INDEX_NCRNA={bowtie2=2.5.2}, INDEX_TRNA={bowtie2=2.5.2}, MAP_CDNA={bowtie2=2.4.5}, MAP_NCRNA={bowtie2=2.4.5}, MAP_TRNA={bowtie2=2.4.5}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_GREP_CDNA={seqkit=2.8.0}, SEQKIT_GREP_NCRNA={seqkit=2.8.0}, TABLE_MERGE={r-base=3.6.2}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" + "{BLAT_CDNA={blat=36}, BLAT_NCRNA={blat=36}, BOWTIE2_ALIGN_CDNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_NCRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE2_ALIGN_TRNA={bowtie2=2.5.2, samtools=1.18, pigz=2.6}, BOWTIE_MAP_HAIRPIN={bowtie=1.3.1, samtools=1.14}, BOWTIE_MAP_MATURE={bowtie=1.3.1, samtools=1.14}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.1, samtools=1.14}, CAT_FASTQ={cat=8.3}, FASTP={fastp=0.23.4}, FILTER_STATS={BusyBox=1.32.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, GAWK_CDNA={gawk=5.3.0}, GAWK_NCRNA={gawk=5.3.0}, INDEX_CDNA={bowtie2=2.5.2}, INDEX_HAIRPIN={bowtie=1.3.0}, INDEX_MATURE={bowtie=1.3.0}, INDEX_NCRNA={bowtie2=2.5.2}, INDEX_TRNA={bowtie2=2.5.2}, MIRTOP_COUNTS={mirtop=0.4.28}, MIRTOP_EXPORT={mirtop=0.4.28}, MIRTOP_GFF={mirtop=0.4.28}, MIRTOP_STATS={mirtop=0.4.28}, MIRTRACE_QC={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.21}, SAMTOOLS_IDXSTATS={samtools=1.21}, SAMTOOLS_INDEX={samtools=1.21}, SAMTOOLS_SORT={samtools=1.21}, SAMTOOLS_STATS={samtools=1.21}, SEQCLUSTER_COLLAPSE={seqcluster=1.2.9}, SEQKIT_GREP_CDNA={seqkit=2.8.0}, SEQKIT_GREP_NCRNA={seqkit=2.8.0}, STATS_GAWK_CDNA={gawk=5.3.0}, STATS_GAWK_NCRNA={gawk=5.3.0}, STATS_GAWK_TRNA={gawk=5.3.0}, TABLE_MERGE={r-base=3.6.2}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" ], "meta": { - "nf-test": "0.9.0", + "nf-test": "0.8.4", "nextflow": "24.04.4" }, - "timestamp": "2024-09-20T16:43:36.482010104" + "timestamp": "2024-09-25T18:15:44.479114611" }, "mirna_quant_bam": { "content": [ diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 8855c34c..0492b2c3 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -148,8 +148,6 @@ workflow NFCORE_SMRNASEQ { three_prime_adapter = Channel.value(params.three_prime_adapter) phred_offset = Channel.value(params.phred_offset) - ch_reads_for_mirna.dump(tag:"ch_reads_for_mirna") - ch_mirtrace_config = ch_reads_for_mirna .transpose() .combine(three_prime_adapter)