Skip to content

Commit

Permalink
Add nf-core module and subworkflow mirdeep2 (#6662)
Browse files Browse the repository at this point in the history
* first design mirdeep

* fix prettier

* fix linting

* reformat subworkflow structure

* fix linting

* allow paths to files or replace with 'none'

* fix linting

* capture unstable outputs in snapshot

* update meta

---------

Co-authored-by: Alexander Peltzer <[email protected]>
Co-authored-by: Sateesh_Peri <[email protected]>
  • Loading branch information
3 people authored Sep 23, 2024
1 parent 4eecd9a commit 757f60e
Show file tree
Hide file tree
Showing 17 changed files with 831 additions and 0 deletions.
7 changes: 7 additions & 0 deletions modules/nf-core/mirdeep2/mapper/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::mirdeep2=2.0.1.2"
53 changes: 53 additions & 0 deletions modules/nf-core/mirdeep2/mapper/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
process MIRDEEP2_MAPPER {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0':
'biocontainers/mirdeep2:2.0.1.2--0' }"

input:
tuple val(meta), path(reads)
tuple val(meta2), path(index, stageAs: '*')

output:
tuple val(meta), path('*.fa'), path('*.arf'), emit: outputs
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.0.1'

"""
mapper.pl \\
${reads} \\
$args \\
-p ${index}/${meta2.id} \\
-s ${prefix}_collapsed.fa \\
-t ${prefix}_reads_collapsed_vs_${meta2.id}_genome.arf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirdeep2: \$(echo "$VERSION")
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.0.1'
"""
touch ${prefix}.fa
touch ${prefix}reads_vs_refdb.arf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirdeep2: \$(echo "$VERSION")
END_VERSIONS
"""
}
50 changes: 50 additions & 0 deletions modules/nf-core/mirdeep2/mapper/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: "mirdeep2_mapper"
description: |
miRDeep2 Mapper is a tool that prepares deep sequencing reads for downstream miRNA detection by collapsing reads, mapping them to a genome, and outputting the required files for miRNA discovery.
keywords:
- mirdeep2
- mapper
- RNA sequencing
tools:
- "mirdeep2":
description: |
miRDeep2 Mapper (`mapper.pl`) is part of the miRDeep2 suite. It collapses identical reads, maps them to a reference genome, and outputs both collapsed FASTA and ARF files for downstream miRNA detection and analysis.
homepage: "https://www.mdc-berlin.de/content/mirdeep2-documentation"
documentation: "https://www.mdc-berlin.de/content/mirdeep2-documentation"
tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2"
doi: "10.1093/nar/gkn491"
licence: ["GPL V3"]
identifier: biotools:mirdeep2

input:
- - meta:
type: map
description: Groovy Map containing sample information, e.g. `[ id:'sample1',
single_end:false ]`
- reads:
type: file
description: File containing the raw sequencing reads that need to be collapsed
and mapped to a reference genome.
pattern: "*.fa"
- - meta2:
type: map
description: Groovy Map containing information about the genome index.
- index:
type: file
description: Path to the genome index file used for mapping the reads to the
genome.
pattern: "*"
output:
- outputs:
- meta: {}
- "*.fa": {}
- "*.arf": {}
- versions:
- versions.yml:
type: file
description: File containing software versions for tracking.
pattern: "versions.yml"
authors:
- "@atrigila"
maintainers:
- "@atrigila"
141 changes: 141 additions & 0 deletions modules/nf-core/mirdeep2/mapper/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@

nextflow_process {

name "Test Process MIRDEEP2_MAPPER"
script "../main.nf"
process "MIRDEEP2_MAPPER"

tag "modules"
tag "modules_nfcore"
tag "mirdeep2"
tag "bowtie/build"
tag "mirdeep2/mapper"
tag "seqkit/fq2fa"
tag "seqkit/replace"


setup {
run("BOWTIE_BUILD") {
script "../../../bowtie/build/main.nf"
process {
"""
input[0] = [
[ id:'genome_cel_cluster' ], // meta map
file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true)
]
"""
}
}

run("SEQKIT_FQ2FA") {
script "../../../seqkit/fq2fa/main.nf"
process {
"""
input[0] = [
[ id:'small_Clone1_N1' ], // meta map
file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true)
]
"""
}
}

run("SEQKIT_REPLACE") {
script "../../../seqkit/replace/main.nf"
config "./nextflow.config"
process {
"""
input[0] = SEQKIT_FQ2FA.out.fasta
"""
}
}

}

test("mirdeep2 - mapper - fasta celegans") {
config "./nextflow.config"

when {
process {
"""
input[0] = [
[ id:'test_reads', single_end:false ], // meta map
file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true)
]
input[1] = BOWTIE_BUILD.out.index
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match() },

// md5sum not stable - IDs change while sequences are the same

// Assert TCACCGGGGGTACATCAGCTAA occurs once
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TCACCGGGGGTACATCAGCTAA") }.size() == 1 },

// Assert seq_347479_x287 occurs once
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_347479_x287") }.size() == 1 },

// Assert that specific content occurs 4 times
{ assert file(process.out.outputs[0][2]).readLines().findAll { it.contains("21\t1\t21\ttcaccgggtgtaaatcagctt\tchrII:11534525-11540624\t21\t3535\t3555\ttcaccgggtgtaaatcagctt\t+\t0\tmmmmmmmmmmmmmmmmmmmmm") }.size() == 4 }
)
}

}

test("mirdeep2 - mapper - fasta smrnaseq") {
config "./nextflow.config"

when {
process {
"""
input[0] = SEQKIT_REPLACE.out.fastx
input[1] = BOWTIE_BUILD.out.index
"""
}
}

then {
assertAll(
{ assert process.success },

// Assert reads occurs once
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TACCTGAGGTAGCAGGTTGTATAGTTGGGG") }.size() == 1 },

// Assert ID occurs once
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_996152_x1") }.size() == 1 }

)
}

}

test("mirdeep2 - fasta - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test_reads', single_end:false ], // meta map
file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true)
]
input[1] = BOWTIE_BUILD.out.index
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
51 changes: 51 additions & 0 deletions modules/nf-core/mirdeep2/mapper/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"mirdeep2 - fasta - stub": {
"content": [
{
"0": [
[
{
"id": "test_reads",
"single_end": false
},
"test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,33c794292d6772d67fa8001439394614"
],
"outputs": [
[
{
"id": "test_reads",
"single_end": false
},
"test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e",
"test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,33c794292d6772d67fa8001439394614"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-20T20:58:19.544297445"
},
"mirdeep2 - mapper - fasta celegans": {
"content": [
[
"versions.yml:md5,33c794292d6772d67fa8001439394614"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-17T17:41:05.101661825"
}
}
11 changes: 11 additions & 0 deletions modules/nf-core/mirdeep2/mapper/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
process {
withName: 'MIRDEEP2_MAPPER' {
ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v"
}

withName: 'SEQKIT_REPLACE' {
ext.args = "-p '\s.+'"
ext.suffix = "fasta"
}

}
7 changes: 7 additions & 0 deletions modules/nf-core/mirdeep2/mirdeep2/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::mirdeep2=2.0.1.2"
64 changes: 64 additions & 0 deletions modules/nf-core/mirdeep2/mirdeep2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process MIRDEEP2_MIRDEEP2 {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0':
'biocontainers/mirdeep2:2.0.1.2--0' }"

input:
tuple val(meta), path(processed_reads), path(genome_mappings)
tuple val(meta2), path(fasta)
tuple val(meta3), path(mature), path(hairpin), path(mature_other_species)

output:
tuple val(meta), path("result*.{bed,csv,html}") , emit: outputs
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.0.1'
def mature_species = mature ? "${mature}" : "none"
def mature_other = mature_other_species ? "${mature_other_species}": "none"
def precursors = hairpin ? "${hairpin}" : "none"

"""
miRDeep2.pl \\
$processed_reads \\
$fasta \\
$genome_mappings \\
$mature_species \\
$mature_other \\
$precursors \\
$args
mv result_*.bed result_${prefix}.bed
mv result_*.csv result_${prefix}.csv
mv result_*.html result_${prefix}.html
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirdeep2: \$(echo "$VERSION")
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '2.0.1'
"""
touch result_${prefix}.html
touch result_${prefix}.bed
touch result_${prefix}.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirdeep2: \$(echo "$VERSION")
END_VERSIONS
"""
}
Loading

0 comments on commit 757f60e

Please sign in to comment.