-
Notifications
You must be signed in to change notification settings - Fork 718
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add nf-core module and subworkflow
mirdeep2
(#6662)
* first design mirdeep * fix prettier * fix linting * reformat subworkflow structure * fix linting * allow paths to files or replace with 'none' * fix linting * capture unstable outputs in snapshot * update meta --------- Co-authored-by: Alexander Peltzer <[email protected]> Co-authored-by: Sateesh_Peri <[email protected]>
- Loading branch information
1 parent
4eecd9a
commit 757f60e
Showing
17 changed files
with
831 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- "bioconda::mirdeep2=2.0.1.2" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
process MIRDEEP2_MAPPER { | ||
tag "$meta.id" | ||
label 'process_medium' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0': | ||
'biocontainers/mirdeep2:2.0.1.2--0' }" | ||
|
||
input: | ||
tuple val(meta), path(reads) | ||
tuple val(meta2), path(index, stageAs: '*') | ||
|
||
output: | ||
tuple val(meta), path('*.fa'), path('*.arf'), emit: outputs | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def VERSION = '2.0.1' | ||
|
||
""" | ||
mapper.pl \\ | ||
${reads} \\ | ||
$args \\ | ||
-p ${index}/${meta2.id} \\ | ||
-s ${prefix}_collapsed.fa \\ | ||
-t ${prefix}_reads_collapsed_vs_${meta2.id}_genome.arf | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
mirdeep2: \$(echo "$VERSION") | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def VERSION = '2.0.1' | ||
""" | ||
touch ${prefix}.fa | ||
touch ${prefix}reads_vs_refdb.arf | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
mirdeep2: \$(echo "$VERSION") | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
name: "mirdeep2_mapper" | ||
description: | | ||
miRDeep2 Mapper is a tool that prepares deep sequencing reads for downstream miRNA detection by collapsing reads, mapping them to a genome, and outputting the required files for miRNA discovery. | ||
keywords: | ||
- mirdeep2 | ||
- mapper | ||
- RNA sequencing | ||
tools: | ||
- "mirdeep2": | ||
description: | | ||
miRDeep2 Mapper (`mapper.pl`) is part of the miRDeep2 suite. It collapses identical reads, maps them to a reference genome, and outputs both collapsed FASTA and ARF files for downstream miRNA detection and analysis. | ||
homepage: "https://www.mdc-berlin.de/content/mirdeep2-documentation" | ||
documentation: "https://www.mdc-berlin.de/content/mirdeep2-documentation" | ||
tool_dev_url: "https://github.com/rajewsky-lab/mirdeep2" | ||
doi: "10.1093/nar/gkn491" | ||
licence: ["GPL V3"] | ||
identifier: biotools:mirdeep2 | ||
|
||
input: | ||
- - meta: | ||
type: map | ||
description: Groovy Map containing sample information, e.g. `[ id:'sample1', | ||
single_end:false ]` | ||
- reads: | ||
type: file | ||
description: File containing the raw sequencing reads that need to be collapsed | ||
and mapped to a reference genome. | ||
pattern: "*.fa" | ||
- - meta2: | ||
type: map | ||
description: Groovy Map containing information about the genome index. | ||
- index: | ||
type: file | ||
description: Path to the genome index file used for mapping the reads to the | ||
genome. | ||
pattern: "*" | ||
output: | ||
- outputs: | ||
- meta: {} | ||
- "*.fa": {} | ||
- "*.arf": {} | ||
- versions: | ||
- versions.yml: | ||
type: file | ||
description: File containing software versions for tracking. | ||
pattern: "versions.yml" | ||
authors: | ||
- "@atrigila" | ||
maintainers: | ||
- "@atrigila" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
|
||
nextflow_process { | ||
|
||
name "Test Process MIRDEEP2_MAPPER" | ||
script "../main.nf" | ||
process "MIRDEEP2_MAPPER" | ||
|
||
tag "modules" | ||
tag "modules_nfcore" | ||
tag "mirdeep2" | ||
tag "bowtie/build" | ||
tag "mirdeep2/mapper" | ||
tag "seqkit/fq2fa" | ||
tag "seqkit/replace" | ||
|
||
|
||
setup { | ||
run("BOWTIE_BUILD") { | ||
script "../../../bowtie/build/main.nf" | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'genome_cel_cluster' ], // meta map | ||
file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/cel_cluster.fa', checkIfExists: true) | ||
] | ||
""" | ||
} | ||
} | ||
|
||
run("SEQKIT_FQ2FA") { | ||
script "../../../seqkit/fq2fa/main.nf" | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'small_Clone1_N1' ], // meta map | ||
file('https://github.com/nf-core/test-datasets/raw/smrnaseq/testdata/trimmed/small_Clone1_N1.fastp.fastq.gz', checkIfExists: true) | ||
] | ||
""" | ||
} | ||
} | ||
|
||
run("SEQKIT_REPLACE") { | ||
script "../../../seqkit/replace/main.nf" | ||
config "./nextflow.config" | ||
process { | ||
""" | ||
input[0] = SEQKIT_FQ2FA.out.fasta | ||
""" | ||
} | ||
} | ||
|
||
} | ||
|
||
test("mirdeep2 - mapper - fasta celegans") { | ||
config "./nextflow.config" | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test_reads', single_end:false ], // meta map | ||
file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) | ||
] | ||
input[1] = BOWTIE_BUILD.out.index | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out.versions).match() }, | ||
|
||
// md5sum not stable - IDs change while sequences are the same | ||
|
||
// Assert TCACCGGGGGTACATCAGCTAA occurs once | ||
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TCACCGGGGGTACATCAGCTAA") }.size() == 1 }, | ||
|
||
// Assert seq_347479_x287 occurs once | ||
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_347479_x287") }.size() == 1 }, | ||
|
||
// Assert that specific content occurs 4 times | ||
{ assert file(process.out.outputs[0][2]).readLines().findAll { it.contains("21\t1\t21\ttcaccgggtgtaaatcagctt\tchrII:11534525-11540624\t21\t3535\t3555\ttcaccgggtgtaaatcagctt\t+\t0\tmmmmmmmmmmmmmmmmmmmmm") }.size() == 4 } | ||
) | ||
} | ||
|
||
} | ||
|
||
test("mirdeep2 - mapper - fasta smrnaseq") { | ||
config "./nextflow.config" | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = SEQKIT_REPLACE.out.fastx | ||
input[1] = BOWTIE_BUILD.out.index | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
|
||
// Assert reads occurs once | ||
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("TACCTGAGGTAGCAGGTTGTATAGTTGGGG") }.size() == 1 }, | ||
|
||
// Assert ID occurs once | ||
{ assert file(process.out.outputs[0][1]).readLines().findAll { it.contains("seq_996152_x1") }.size() == 1 } | ||
|
||
) | ||
} | ||
|
||
} | ||
|
||
test("mirdeep2 - fasta - stub") { | ||
|
||
options "-stub" | ||
|
||
when { | ||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test_reads', single_end:false ], // meta map | ||
file('https://github.com/rajewsky-lab/mirdeep2/raw/master/tutorial_dir/reads.fa', checkIfExists: true) | ||
] | ||
input[1] = BOWTIE_BUILD.out.index | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success }, | ||
{ assert snapshot(process.out).match() } | ||
) | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
{ | ||
"mirdeep2 - fasta - stub": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "test_reads", | ||
"single_end": false | ||
}, | ||
"test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
"test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,33c794292d6772d67fa8001439394614" | ||
], | ||
"outputs": [ | ||
[ | ||
{ | ||
"id": "test_reads", | ||
"single_end": false | ||
}, | ||
"test_reads.fa:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
"test_readsreads_vs_refdb.arf:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,33c794292d6772d67fa8001439394614" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-20T20:58:19.544297445" | ||
}, | ||
"mirdeep2 - mapper - fasta celegans": { | ||
"content": [ | ||
[ | ||
"versions.yml:md5,33c794292d6772d67fa8001439394614" | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.0", | ||
"nextflow": "24.04.4" | ||
}, | ||
"timestamp": "2024-09-17T17:41:05.101661825" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
process { | ||
withName: 'MIRDEEP2_MAPPER' { | ||
ext.args = "-c -j -k TCGTATGCCGTCTTCTGCTTGT -l 18 -m -v" | ||
} | ||
|
||
withName: 'SEQKIT_REPLACE' { | ||
ext.args = "-p '\s.+'" | ||
ext.suffix = "fasta" | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- "bioconda::mirdeep2=2.0.1.2" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
process MIRDEEP2_MIRDEEP2 { | ||
tag "$meta.id" | ||
label 'process_medium' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/mirdeep2:2.0.1.2--0': | ||
'biocontainers/mirdeep2:2.0.1.2--0' }" | ||
|
||
input: | ||
tuple val(meta), path(processed_reads), path(genome_mappings) | ||
tuple val(meta2), path(fasta) | ||
tuple val(meta3), path(mature), path(hairpin), path(mature_other_species) | ||
|
||
output: | ||
tuple val(meta), path("result*.{bed,csv,html}") , emit: outputs | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def VERSION = '2.0.1' | ||
def mature_species = mature ? "${mature}" : "none" | ||
def mature_other = mature_other_species ? "${mature_other_species}": "none" | ||
def precursors = hairpin ? "${hairpin}" : "none" | ||
|
||
""" | ||
miRDeep2.pl \\ | ||
$processed_reads \\ | ||
$fasta \\ | ||
$genome_mappings \\ | ||
$mature_species \\ | ||
$mature_other \\ | ||
$precursors \\ | ||
$args | ||
mv result_*.bed result_${prefix}.bed | ||
mv result_*.csv result_${prefix}.csv | ||
mv result_*.html result_${prefix}.html | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
mirdeep2: \$(echo "$VERSION") | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def VERSION = '2.0.1' | ||
""" | ||
touch result_${prefix}.html | ||
touch result_${prefix}.bed | ||
touch result_${prefix}.csv | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
mirdeep2: \$(echo "$VERSION") | ||
END_VERSIONS | ||
""" | ||
} |
Oops, something went wrong.