Merge pull request #1052 from nf-core/patch-2.5.1

Patch 2.5.1
nf-core · Feb 21, 2024 · 4eb0ffe · 4eb0ffe
2 parents a2c9f87 + 628014a
commit 4eb0ffe
Show file tree

Hide file tree

Showing 12 changed files with 76 additions and 32 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -37,13 +37,13 @@ jobs:
 
       - name: Build new docker image
         if: env.MATCHED_FILES
-        run: docker build --no-cache . -t nfcore/eager:2.5.0
+        run: docker build --no-cache . -t nfcore/eager:2.5.1
 
       - name: Pull docker image
         if: ${{ !env.MATCHED_FILES }}
         run: |
           docker pull nfcore/eager:dev
-          docker tag nfcore/eager:dev nfcore/eager:2.5.0
+          docker tag nfcore/eager:dev nfcore/eager:2.5.1
 
       - name: Install Nextflow
         env:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,22 @@
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
+## [2.5.1] - 2024-02-21
+
+### `Added`
+
+- [#1037](https://github.com/nf-core/eager/issues/1037) Added an option to deactivate the `-sorted` option of bedtools coverage, in case the feature file is not sorted the same way as the fasta file, albeit with the caveat this will be very slow. (♥ Thanks to @IdoBar for reporting, and contributing.)
+
+### `Fixed`
+
+- [#1048](https://github.com/nf-core/eager/issues/1048) `--vcf2genome_outfile` parameter now gets prefixed by the sample_name and suffixed with `.fasta` (i.e. `<sample_name>_<vcf2genome_outfile>.fasta`). This ensures we avoid overwriting the output fasta of one sample with that of another when the option is provided. (♥ Thanks to @MeriamOs for reporting.)
+- [#1047](https://github.com/nf-core/eager/issues/1047) Changed the row some statistics were reported in the General Stats table. The File name collision fixed in 2.5.0 (see #1017) caused these statistics to be reported in the wrong row due to an added suffix.
+- [#1051](https://github.com/nf-core/eager/issues/1051) An error is now thrown if input BAM files end in `.unmapped.bam`, as this breaks the bam filtering process and empties the bam files in the process. (♥ Thanks to @PCQuilis for reporting.)
+
+### `Dependencies`
+
+### `Deprecated`
+
 ## [2.5.0] - Bopfingen - 2023-11-03
 
 ### `Added`

diff --git a/Dockerfile b/Dockerfile
@@ -7,7 +7,7 @@ COPY environment.yml /
 RUN conda env create --quiet -f /environment.yml && conda clean -a
 
 # Add conda installation dir to PATH (instead of doing 'conda activate')
-ENV PATH /opt/conda/envs/nf-core-eager-2.5.0/bin:$PATH
+ENV PATH /opt/conda/envs/nf-core-eager-2.5.1/bin:$PATH
 
 # Dump the details of the installed packages to a file for posterity
-RUN conda env export --name nf-core-eager-2.5.0 > nf-core-eager-2.5.0.yml
+RUN conda env export --name nf-core-eager-2.5.1 > nf-core-eager-2.5.1.yml
diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
@@ -59,6 +59,8 @@ extra_fn_clean_exts:
   - ".trimmed_stats"
   - "_libmerged"
   - "_bt2"
+  - type: "regex"
+    pattern: "_udg(half|none|full)"
 
 top_modules:
   - "fastqc":

diff --git a/docs/images/usage/eager2_metromap_complex.png b/docs/images/usage/eager2_metromap_complex.png
diff --git a/docs/images/usage/eager2_metromap_complex.svg b/docs/images/usage/eager2_metromap_complex.svg
diff --git a/docs/images/usage/eager2_workflow.png b/docs/images/usage/eager2_workflow.png
diff --git a/docs/images/usage/eager2_workflow.svg b/docs/images/usage/eager2_workflow.svg
diff --git a/environment.yml b/environment.yml
@@ -1,6 +1,6 @@
 # You can use this file to create a conda environment for this pipeline:
 #   conda env create -f environment.yml
-name: nf-core-eager-2.5.0
+name: nf-core-eager-2.5.1
 channels:
   - conda-forge
   - bioconda

diff --git a/main.nf b/main.nf
@@ -36,6 +36,11 @@ if ( params.bam && !params.single_end ) {
   exit 1, "[nf-core/eager] error: bams can only be specified with --single_end. Please check input command."
 }
 
+// Do not allow input bams to be suffixed with '.unmapped.bam'
+if (params.bam && params.input.endsWith('.unmapped.bam')) {
+  exit 1, "[nf-core/eager] error: Input BAM file names ending in '.unmapped.bam' are not allowed. Please rename your input BAM(s)."
+}
+
 // Validate that skip_collapse is only set to True for paired_end reads!
 if (!has_extension(params.input, "tsv") && params.skip_collapse  && params.single_end){
     exit 1, "[nf-core/eager] error: --skip_collapse can only be set for paired_end samples."
@@ -379,6 +384,9 @@ ch_input_sample_check
         def r2 = file(it[9]).getName()
         def bam = file(it[10]).getName()
 
+        // Throw error and exit if the input bam has a name ending in '.unmapped.bam'
+        if (bam.endsWith('.unmapped.bam')) { exit 1, "[nf-core/eager] error: Input BAM file names ending in '.unmapped.bam' are not allowed. Please rename your input BAM(s)." }
+
       [r1, r2, bam]
 
     }
@@ -2063,13 +2071,14 @@ process bedtools {
   tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*")
 
   script:
+  sorting_of_anno = params.anno_file_is_unsorted ? "" : "-sorted"
   """
   ## Create genome file from bam header
   samtools view -H ${bam} | grep '@SQ' | sed 's#@SQ\tSN:\\|LN:##g' > genome.txt
   
   ##  Run bedtools
-  bedtools coverage -nonamecheck -g genome.txt -sorted -a ${anno_file} -b ${bam} | pigz -p ${task.cpus - 1} > "${bam.baseName}".breadth.gz
-  bedtools coverage -nonamecheck -g genome.txt -sorted -a ${anno_file} -b ${bam} -mean | pigz -p ${task.cpus - 1} > "${bam.baseName}".depth.gz
+  bedtools coverage -nonamecheck -g genome.txt ${sorting_of_anno} -a ${anno_file} -b ${bam} | pigz -p ${task.cpus - 1} > "${bam.baseName}".breadth.gz
+  bedtools coverage -nonamecheck -g genome.txt ${sorting_of_anno} -a ${anno_file} -b ${bam} -mean | pigz -p ${task.cpus - 1} > "${bam.baseName}".depth.gz
   """
 }
 
@@ -2741,7 +2750,7 @@ process vcf2genome {
   tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.fasta.gz")
 
   script:
-  def out = !params.vcf2genome_outfile ? "${samplename}.fasta" : "${params.vcf2genome_outfile}"
+  def out = !params.vcf2genome_outfile ? "${samplename}.fasta" : "${samplename}_${params.vcf2genome_outfile}.fasta"
   def fasta_head = !params.vcf2genome_header ? "${samplename}" : "${params.vcf2genome_header}"
   """
   pigz -d -f -p ${task.cpus} ${vcf}

diff --git a/nextflow.config b/nextflow.config
@@ -143,8 +143,9 @@ params {
   rescale_seqlength = 12
 
   //Bedtools settings
-  run_bedtools_coverage = false 
+  run_bedtools_coverage = false
   anno_file = null
+  anno_file_is_unsorted = false
 
   //bamUtils trimbam settings
   run_trim_bam = false 
@@ -288,7 +289,7 @@ params {
 
 // Container slug. Stable releases should specify release tag!
 // Developmental code should specify :dev
-process.container = 'nfcore/eager:2.5.0'
+process.container = 'nfcore/eager:2.5.1'
 
 // Load base.config by default for all pipelines
 includeConfig 'conf/base.config'
@@ -418,7 +419,7 @@ manifest {
   description = 'A fully reproducible and state-of-the-art ancient DNA analysis pipeline'
   mainScript = 'main.nf'
   nextflowVersion = '>=20.07.1'
-  version = '2.5.0'
+  version = '2.5.1'
 }
 
 // Function to ensure that resource requirements don't go beyond

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -981,6 +981,12 @@
                     "description": "Path to GFF or BED file containing positions of features in reference file (--fasta). Path should be enclosed in quotes.",
                     "fa_icon": "fas fa-file-signature",
                     "help_text": "Specify the path to a GFF/BED containing the feature coordinates (or any acceptable input for [`bedtools coverage`](https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html)). Must be in quotes.\n"
+                },
+                "anno_file_is_unsorted": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-random",
+                    "description": "Specify if the annotation file provided to --anno_file is not sorted in the same way as the reference fasta file.",
+                    "help_text": "In cases where the annotation file is NOT sorted the same way as the reference fasta, this option should be specified. This will significantly increase the memory usage of bedtools!\n\n> Modifies bedtools parameter: `-sorted`"
                 }
             },
             "fa_icon": "fas fa-scroll",
@@ -1330,9 +1336,9 @@
                 },
                 "vcf2genome_outfile": {
                     "type": "string",
-                    "description": "Specify name of the output FASTA file containing the consensus sequence. Do not include `.vcf` in the file name.",
+                    "description": "Specify the name of the output FASTA file containing the consensus sequence.",
                     "fa_icon": "fas fa-file-alt",
-                    "help_text": "The name of your requested output FASTA file. Do not include `.fasta` suffix.\n"
+                    "help_text": "The output FASTA file will be named `<sample_name>_<vcf2genome_outfile>.fasta`.\n"
                 },
                 "vcf2genome_header": {
                     "type": "string",
@@ -1718,7 +1724,7 @@
                 "maltextract_percentidentity": {
                     "type": "number",
                     "description": "Minimum percent identity alignments are required to have to be reported. Recommended to set same as MALT parameter.",
-                    "default": 85.0,
+                    "default": 85,
                     "fa_icon": "fas fa-id-card",
                     "help_text": "Minimum percent identity alignments are required to have to be reported. Higher values allows fewer mismatches between read and reference sequence, but therefore will provide greater confidence in the hit. Lower values allow more mismatches, which can account for damage and divergence of a related strain/species to the reference. Recommended to set same as MALT parameter or higher. Default: `85`.\n\nOnly when `--metagenomic_tool malt` is also supplied.\n\n> Modifies MaltExtract parameter: `--minPI`"
                 },