diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b3d17948..c01549b7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -111,6 +111,7 @@ jobs: - "--skip_plasmidid false --skip_asciigenome" - "--additional_annotation ./GCA_009858895.3_ASM985889v3_genomic.gtf.gz" - "--bowtie2_index ./GCA_009858895.3_ASM985889v3_genomic.200409.bt2.index.tar.gz" + - "--skip_noninternal_primers --threeprime_adapters" isMaster: - ${{ github.base_ref == 'master' }} # Exclude conda and singularity on dev diff --git a/CHANGELOG.md b/CHANGELOG.md index cc3fda4d..1d2a5f6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,20 +28,23 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements - [[PR #434](https://github.com/nf-core/viralrecon/pull/434)] - Add blast result filtering through `min_contig_length` and `min_perc_contig_aligned`. - [[PR #438](https://github.com/nf-core/viralrecon/pull/438)] - Update fastp container to 0.23.4 - [[PR #439](https://github.com/nf-core/viralrecon/pull/439)] - Fix cardinality issue when using `--bowtie2_index` +- [[PR #435](https://github.com/nf-core/viralrecon/pull/435)] - Changed to a patched cutadapt from nf-core modules, added `skip_noninternal_primers` param to allow users to process primers inside the pipeline, and added `threeprime_adapters` to determine whether primers are 3' or 5' adapters. ### Parameters -| Old parameter | New parameter | -| ------------- | --------------------------- | -| | `--skip_freyja` | -| | `--freyja_repeats` | -| | `--freyja_db_name` | -| | `--freyja_barcodes` | -| | `--freyja_lineages` | -| | `--skip_freyja_boot` | -| | `--additional_annotation` | -| | `--min_contig_length` | -| | `--min_perc_contig_aligned` | +| Old parameter | New parameter | +| ------------- | ---------------------------- | +| | `--skip_freyja` | +| | `--freyja_repeats` | +| | `--freyja_db_name` | +| | `--freyja_barcodes` | +| | `--freyja_lineages` | +| | `--skip_freyja_boot` | +| | `--additional_annotation` | +| | `--min_contig_length` | +| | `--min_perc_contig_aligned` | +| | `--skip_noninternal_primers` | +| | `--threeprime_adapters` | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. @@ -53,9 +56,10 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi | Dependency | Old version | New version | | ---------- | ----------- | ----------- | +| `cutadapt` | | 4.6 | +| `fastp` | 0.23.2 | 0.23.4 | | `freyja` | | 1.5.0 | | `multiqc` | 1.14 | 1.19 | -| `fastp` | 0.23.2 | 0.23.4 | > **NB:** Dependency has been **updated** if both old and new version information is present. > diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 1756cb48..67f4a04b 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -757,7 +757,14 @@ if (!params.skip_assembly) { enabled: params.save_reference ] } - + withName: 'PREPARE_PRIMER_FASTA' { + ext.args = '^' + ext.prefix = { "${meta.id}"} + publishDir = [ + path: { "${params.outdir}/assembly/cutadapt/log" }, + enabled: false + ] + } withName: 'CUTADAPT' { ext.args = '--overlap 5 --minimum-length 30 --error-rate 0.1' ext.prefix = { "${meta.id}.primer_trim" } diff --git a/docs/usage.md b/docs/usage.md index a626bd95..35e321fa 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -321,6 +321,20 @@ To learn how to provide additional arguments to a particular tool of the pipelin [Freyja](https://github.com/andersen-lab/Freyja) relies on a dataset of barcodes that use lineage defining mutations (see [UShER](https://usher-wiki.readthedocs.io/en/latest/#)). By default the most recent barcodes will be downloaded and used. However, if analyses need to be compared across multiple datasets, it might be of interest to re-use the same barcodes, or to rerun all Freyja analyses with the most recent dataset. To do this, specify the barcodes and lineages using the `--freyja_barcodes`, `--freyja_lineages` parameters, respectivly. The boostrapping of Freyja can be skipped by specifying `--skip_freyja_boot`. +### Cutadapt + +According to [Cutadapt's documentation regarding adapter types](https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types), you can have: + +- Regular 3’ adapter: `-a ADAPTER` + - Set `--skip_noninternal_primers` to `true` + - Set `--threeprime_adapters` to `true` +- Regular 5’ adapter: `-g ADAPTER` + - Set `--skip_noninternal_primers` to `true` +- Non-internal 3’ adapter: `-a ADAPTERX`: + - Change `modules_illumina.config` > `PREPARE_PRIMER_FASTA` > `ext.args` to use `$` instead of `^` to add the X at the end of the sequence. + - Set `--threeprime_adapters` to `true` +- **Non-internal 5’ adapter**: `-g XADAPTER`: **This is the option by default**. + ### nf-core/configs In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. diff --git a/modules.json b/modules.json index bffff062..fb68a8c8 100644 --- a/modules.json +++ b/modules.json @@ -105,6 +105,11 @@ "git_sha": "1b0ffa4e5aed5b7e3cd4311af31bd3b2c8345051", "installed_by": ["modules"] }, + "cutadapt": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "fastp": { "branch": "master", "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", diff --git a/modules/local/cutadapt.nf b/modules/local/cutadapt.nf deleted file mode 100644 index 11c8f6a5..00000000 --- a/modules/local/cutadapt.nf +++ /dev/null @@ -1,43 +0,0 @@ -process CUTADAPT { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::cutadapt=4.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cutadapt:4.2--py39hbf8eff0_0' : - 'quay.io/biocontainers/cutadapt:4.2--py39hbf8eff0_0' }" - - input: - tuple val(meta), path(reads) - path adapters - - output: - tuple val(meta), path('*.fastq.gz'), emit: reads - tuple val(meta), path('*.log') , emit: log - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def paired = meta.single_end ? "-a file:adapters.sub.fa" : "-a file:adapters.sub.fa -A file:adapters.sub.fa" - def trimmed = meta.single_end ? "-o ${prefix}.fastq.gz" : "-o ${prefix}_1.fastq.gz -p ${prefix}_2.fastq.gz" - """ - sed -r '/^[ACTGactg]+\$/ s/\$/X/g' $adapters > adapters.sub.fa - - cutadapt \\ - --cores $task.cpus \\ - $args \\ - $paired \\ - $trimmed \\ - $reads \\ - > ${prefix}.cutadapt.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ -} diff --git a/modules/local/prepare_primer_fasta.nf b/modules/local/prepare_primer_fasta.nf new file mode 100644 index 00000000..1fcbd59d --- /dev/null +++ b/modules/local/prepare_primer_fasta.nf @@ -0,0 +1,30 @@ +process PREPARE_PRIMER_FASTA { + tag "$adapters" + label 'process_low' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path adapters + + output: + path 'adapters.sub.fa', emit: adapters + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + sed -r '/^[ACTGactg]+\$/ s/$args/X/g' $adapters > adapters.sub.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cutadapt/cutadapt.diff b/modules/nf-core/cutadapt/cutadapt.diff new file mode 100644 index 00000000..2a450c54 --- /dev/null +++ b/modules/nf-core/cutadapt/cutadapt.diff @@ -0,0 +1,46 @@ +Changes in module 'nf-core/cutadapt' +'modules/nf-core/cutadapt/meta.yml' is unchanged +Changes in 'cutadapt/main.nf': +--- modules/nf-core/cutadapt/main.nf ++++ modules/nf-core/cutadapt/main.nf +@@ -9,6 +9,7 @@ + + input: + tuple val(meta), path(reads) ++ path adapters + + output: + tuple val(meta), path('*.trim.fastq.gz'), emit: reads +@@ -22,11 +23,25 @@ + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "-o ${prefix}.trim.fastq.gz" : "-o ${prefix}_1.trim.fastq.gz -p ${prefix}_2.trim.fastq.gz" ++ ++ // Ajustar primers según las condiciones dadas ++ def primers ++ if (params.threeprime_adapters && meta.single_end) { ++ primers = "-a file:${adapters}" ++ } else if (params.threeprime_adapters && !meta.single_end) { ++ primers = "-a file:${adapters} -A file:${adapters}" ++ } else if (!params.threeprime_adapters && meta.single_end) { ++ primers = "-g file:${adapters}" ++ } else { ++ primers = "-g file:${adapters} -G file:${adapters}" ++ } ++ + """ + cutadapt \\ + -Z \\ + --cores $task.cpus \\ + $args \\ ++ $primers \\ + $trimmed \\ + $reads \\ + > ${prefix}.cutadapt.log + +'modules/nf-core/cutadapt/environment.yml' is unchanged +'modules/nf-core/cutadapt/tests/main.nf.test' is unchanged +'modules/nf-core/cutadapt/tests/main.nf.test.snap' is unchanged +'modules/nf-core/cutadapt/tests/nextflow.config' is unchanged +'modules/nf-core/cutadapt/tests/tags.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/cutadapt/environment.yml b/modules/nf-core/cutadapt/environment.yml new file mode 100644 index 00000000..dfdbd1c2 --- /dev/null +++ b/modules/nf-core/cutadapt/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cutadapt=4.6 diff --git a/modules/nf-core/cutadapt/main.nf b/modules/nf-core/cutadapt/main.nf new file mode 100644 index 00000000..01ff1173 --- /dev/null +++ b/modules/nf-core/cutadapt/main.nf @@ -0,0 +1,66 @@ +process CUTADAPT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cutadapt:4.6--py39hf95cd2a_1' : + 'biocontainers/cutadapt:4.6--py39hf95cd2a_1' }" + + input: + tuple val(meta), path(reads) + path adapters + + output: + tuple val(meta), path('*.trim.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "-o ${prefix}.trim.fastq.gz" : "-o ${prefix}_1.trim.fastq.gz -p ${prefix}_2.trim.fastq.gz" + + // Ajustar primers según las condiciones dadas + def primers + if (params.threeprime_adapters && meta.single_end) { + primers = "-a file:${adapters}" + } else if (params.threeprime_adapters && !meta.single_end) { + primers = "-a file:${adapters} -A file:${adapters}" + } else if (!params.threeprime_adapters && meta.single_end) { + primers = "-g file:${adapters}" + } else { + primers = "-g file:${adapters} -G file:${adapters}" + } + + """ + cutadapt \\ + -Z \\ + --cores $task.cpus \\ + $args \\ + $primers \\ + $trimmed \\ + $reads \\ + > ${prefix}.cutadapt.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "${prefix}.trim.fastq.gz" : "${prefix}_1.trim.fastq.gz ${prefix}_2.trim.fastq.gz" + """ + touch ${prefix}.cutadapt.log + touch ${trimmed} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cutadapt/meta.yml b/modules/nf-core/cutadapt/meta.yml new file mode 100644 index 00000000..8844d86c --- /dev/null +++ b/modules/nf-core/cutadapt/meta.yml @@ -0,0 +1,58 @@ +name: cutadapt +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - adapter trimming + - adapters + - quality trimming +tools: + - cuatadapt: + description: | + Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. + documentation: https://cutadapt.readthedocs.io/en/stable/index.html + doi: 10.14806/ej.17.1.200 + licence: ["MIT"] + identifier: biotools:cutadapt +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.trim.fastq.gz": + type: file + description: The trimmed/modified fastq reads + pattern: "*fastq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: cuatadapt log file + pattern: "*cutadapt.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/cutadapt/tests/main.nf.test b/modules/nf-core/cutadapt/tests/main.nf.test new file mode 100644 index 00000000..36927bd7 --- /dev/null +++ b/modules/nf-core/cutadapt/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process CUTADAPT" + script "../main.nf" + process "CUTADAPT" + tag "modules" + tag "modules_nfcore" + tag "cutadapt" + + test("sarscov2 Illumina single end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.reads != null }, + { assert process.out.reads.get(0).get(1) ==~ ".*.trim.fastq.gz" }, + { assert snapshot(process.out.versions).match("versions_single_end") }, + { assert snapshot(path(process.out.reads.get(0).get(1)).linesGzip[0]).match() } + ) + } + } + + test("sarscov2 Illumina paired-end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.reads != null }, + { assert process.out.reads.get(0).get(1).get(0) ==~ ".*.1.trim.fastq.gz" }, + { assert process.out.reads.get(0).get(1).get(1) ==~ ".*.2.trim.fastq.gz" }, + { assert snapshot(path(process.out.reads.get(0).get(1).get(1)).linesGzip[0]).match() }, + { assert snapshot(process.out.versions).match("versions_paired_end") } + ) + } + } +} diff --git a/modules/nf-core/cutadapt/tests/main.nf.test.snap b/modules/nf-core/cutadapt/tests/main.nf.test.snap new file mode 100644 index 00000000..3df7389e --- /dev/null +++ b/modules/nf-core/cutadapt/tests/main.nf.test.snap @@ -0,0 +1,46 @@ +{ + "sarscov2 Illumina single end [fastq]": { + "content": [ + "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T10:27:15.235936866" + }, + "sarscov2 Illumina paired-end [fastq]": { + "content": [ + "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/2" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T10:27:24.38468252" + }, + "versions_paired_end": { + "content": [ + [ + "versions.yml:md5,bc9892c68bfa7084ec5dbffbb9e8322f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T10:27:24.38799189" + }, + "versions_single_end": { + "content": [ + [ + "versions.yml:md5,bc9892c68bfa7084ec5dbffbb9e8322f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T10:27:15.219246449" + } +} \ No newline at end of file diff --git a/modules/nf-core/cutadapt/tests/nextflow.config b/modules/nf-core/cutadapt/tests/nextflow.config new file mode 100644 index 00000000..6c3b4253 --- /dev/null +++ b/modules/nf-core/cutadapt/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: CUTADAPT { + ext.args = '-q 25' + } + +} diff --git a/modules/nf-core/cutadapt/tests/tags.yml b/modules/nf-core/cutadapt/tests/tags.yml new file mode 100644 index 00000000..f64f9975 --- /dev/null +++ b/modules/nf-core/cutadapt/tests/tags.yml @@ -0,0 +1,2 @@ +cutadapt: + - modules/nf-core/cutadapt/** diff --git a/nextflow.config b/nextflow.config index 4e881662..e577ae62 100644 --- a/nextflow.config +++ b/nextflow.config @@ -72,6 +72,8 @@ params { skip_kraken2 = false skip_fastp = false skip_cutadapt = false + skip_noninternal_primers = false + threeprime_adapters = false // Illumina variant calling options variant_caller = null diff --git a/nextflow_schema.json b/nextflow_schema.json index cc792a69..b0ce2305 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -584,6 +584,18 @@ "default": 0.7, "fa_icon": "fas fa-sliders-h", "description": "Minimum percentage of contig aligned to filter from BLAST results." + }, + "skip_noninternal_primers": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Set this parameter to false to add an X at the begining or end of the primer's fasta sequence to specify cutadapt that they are non-internal 5' or 3' adapters, respectively.", + "help_text": "See viralrecon's usage and cutadapt documentation: https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types" + }, + "threeprime_adapters": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Set this parameter to true when the primer's for cutadapt are 3' adapters. Default value is false, as default primers are 5' adapters.", + "help_text": "See viralrecon's usage and cutadapt documentation: https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types" } }, "fa_icon": "fas fa-random" diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 21df7cc8..668e7c09 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -65,10 +65,10 @@ ch_ivar_variants_header_mqc = file("$projectDir/assets/headers/ivar_variant // // MODULE: Loaded from modules/local/ // -include { CUTADAPT } from '../modules/local/cutadapt' -include { MULTIQC } from '../modules/local/multiqc_illumina' +include { MULTIQC } from '../modules/local/multiqc_illumina' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_GENOME } from '../modules/local/plot_mosdepth_regions' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../modules/local/plot_mosdepth_regions' +include { PREPARE_PRIMER_FASTA } from '../modules/local/prepare_primer_fasta' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -96,6 +96,7 @@ include { FASTQ_TRIM_FASTP_FASTQC } from '../subworkflows/local/fastq_trim_fastp // MODULE: Installed directly from nf-core/modules // include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { CUTADAPT } from '../modules/nf-core/cutadapt/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/kraken2/kraken2/main' include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/picard/collectmultiplemetrics/main' @@ -576,9 +577,17 @@ workflow ILLUMINA { // ch_cutadapt_multiqc = Channel.empty() if (params.protocol == 'amplicon' && !params.skip_assembly && !params.skip_cutadapt) { + ch_primers = PREPARE_GENOME.out.primer_fasta.collect { it[1] } + if (!params.skip_noninternal_primers){ + PREPARE_PRIMER_FASTA( + PREPARE_GENOME.out.primer_fasta.collect { it[1] } + ) + ch_primers = PREPARE_PRIMER_FASTA.out.adapters + } + CUTADAPT ( ch_assembly_fastq, - PREPARE_GENOME.out.primer_fasta.collect { it[1] } + ch_primers ) ch_assembly_fastq = CUTADAPT.out.reads ch_cutadapt_multiqc = CUTADAPT.out.log