Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mirTrace module #6507

Merged
merged 6 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions modules/nf-core/mirtrace/qc/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "mirtrace_qc"
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::mirtrace=1.0.1"
64 changes: 64 additions & 0 deletions modules/nf-core/mirtrace/qc/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process MIRTRACE_QC {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mirtrace:1.0.1--0':
'biocontainers/mirtrace:1.0.1--0' }"

input:
tuple val(meta), path(reads)
val(mirtrace_species)

output:
tuple val(meta), path ("*.html") , emit: html
tuple val(meta), path ("*.json") , emit: json
tuple val(meta), path ("*.tsv") , emit: tsv
tuple val(meta), path ("qc_passed_reads.all.collapsed/*.{fa,fasta}") , emit: all_fa
tuple val(meta), path ("qc_passed_reads.rnatype_unknown.collapsed/*.{fa,fasta}") , emit: rnatype_unknown_fa
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def file_list = reads.collect { it.toString() }

"""
mirtrace qc \\
--species ${mirtrace_species} \\
--write-fasta \\
--output-dir . \\
--force \\
${file_list.join(' ')}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirtrace: \$(echo \$(mirtrace -v))
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.fa
touch ${prefix}.html
touch ${prefix}.json
touch ${prefix}.tsv

mkdir -p qc_passed_reads.all.collapsed
mkdir -p qc_passed_reads.rnatype_unknown.collapsed

touch qc_passed_reads.all.collapsed/${prefix}.fa
touch qc_passed_reads.rnatype_unknown.collapsed/${prefix}.fa

cat <<-END_VERSIONS > versions.yml
"${task.process}":
mirtrace: \$(echo \$(mirtrace -v))
END_VERSIONS
"""
}
66 changes: 66 additions & 0 deletions modules/nf-core/mirtrace/qc/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "mirtrace_qc"
description: "A tool for quality control and tracing taxonomic origins of microRNA sequencing data"
keywords:
- microRNA
- smrnaseq
- QC
tools:
- "mirtrace":
description: "miRTrace is a new quality control and taxonomic tracing tool developed specifically for small RNA sequencing data (sRNA-Seq). Each sample is characterized by profiling sequencing quality, read length, sequencing depth and miRNA complexity and also the amounts of miRNAs versus undesirable sequences (derived from tRNAs, rRNAs and sequencing artifacts). In addition to these routine quality control (QC) analyses, miRTrace can accurately and sensitively resolve taxonomic origins of small RNA-Seq data based on the composition of clade-specific miRNAs. This feature can be used to detect cross-clade contaminations in typical lab settings. It can also be applied for more specific applications in forensics, food quality control and clinical diagnosis, for instance tracing the origins of meat products or detecting parasitic microRNAs in host serum."
homepage: "https://github.com/friedlanderlab/mirtrace/tree/master"
documentation: "https://github.com/friedlanderlab/mirtrace/blob/master/release-bundle-includes/doc/manual/mirtrace_manual.pdf"
tool_dev_url: "https://github.com/friedlanderlab/mirtrace/tree/master"
doi: "10.1186/s13059-018-1588-9"
licence: ["GPL v2"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- reads:
type: file
description: microRNA sequencing data
pattern: "*.{fastq,fastq.gz}"
- mirtrace_species:
type: string
description: Target species in microRNA sequencing data (miRbase encoding, e.g. “hsa” for Homo sapiens)

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- html:
type: file
description: HTML file
pattern: "*.{html}"
- json:
type: file
description: JSON file
pattern: "*.{json}"
- tsv:
type: file
description: TSV file
pattern: "*.{tsv}"
- all_fa:
type: file
description: QC-passed reads in FASTA file. Identical reads are collapsed. Entries are sorted by abundance.
pattern: "*.{fa,fasta}"
- rnatype_unknown_fa:
type: file
description: Unknown RNA type QC-passed reads in FASTA file. Identical reads are collapsed. Entries are sorted by abundance.
pattern: "*.{fa,fasta}"

authors:
- "@atrigila"
maintainers:
- "@atrigila"
81 changes: 81 additions & 0 deletions modules/nf-core/mirtrace/qc/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
nextflow_process {

name "Test Process MIRTRACE_QC"
script "../main.nf"
process "MIRTRACE_QC"

tag "modules"
tag "modules_nfcore"
tag "mirtrace"
tag "mirtrace/qc"

test("human - fastq") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true),
]
]
input[1] = "hsa"
"""
}
}

then {
assertAll(
{ assert process.success },

// Check HTML
{ assert path(process.out.html.get(0).get(1)).text.contains("This file is part of miRTrace.")} ,

// Check JSON
{ assert path(process.out.json.get(0).get(1)).json.results[0].stats.uniqueQCPassedSeqsCount == 912 },

// Check TSV
{ assert snapshot(process.out.tsv).match("tsv") },

// Check FASTA files
{ assert snapshot(process.out.rnatype_unknown_fa).match("rnatype_unknown_fa") },
{ assert snapshot(process.out.all_fa).match("all_fa") },

// Check versions
{ assert snapshot(process.out.versions).match("versions") }

)
}

}

test("human - fastq - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
]
]
input[1] = "hsa"
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Loading
Loading