Skip to content

Commit

Permalink
Merge pull request #441 from atrigila/nf-core_bowtie_align
Browse files Browse the repository at this point in the history
Migrate to nf-core `bowtie align` in contaminant filter
  • Loading branch information
apeltzer authored Sep 26, 2024
2 parents d86d425 + 1438a91 commit 586ef69
Show file tree
Hide file tree
Showing 21 changed files with 1,448 additions and 112 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[#435]](https://github.com/nf-core/smrnaseq/pull/435) - Replace local instances of bowtie for nf-core [`bowtie2`](https://github.com/nf-core/smrnaseq/issues/434) and [`bowtie1`](https://github.com/nf-core/smrnaseq/issues/433) - Additionally adds a `bioawk` module that cleans fasta files.
- [[#438]](https://github.com/nf-core/smrnaseq/pull/438) - Update [Mirtop to latest version](https://github.com/nf-core/smrnaseq/issues/437) - Process samples separately and join results with `CSVTK_JOIN`.
- [[#439]](https://github.com/nf-core/smrnaseq/pull/439) - Fix [Fix paired end samples processing](https://github.com/nf-core/smrnaseq/issues/415) - Fix paired end sample handling and add test profile.
- [[#441]](https://github.com/nf-core/smrnaseq/pull/441) - Migrate [local contaminant bowtie to nf-core](https://github.com/nf-core/smrnaseq/issues/436) - Replace local processes with `BOWTIE2_ALIGN`.

## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch

Expand Down
57 changes: 57 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ process {
publishDir = [
path: { "${params.outdir}/contaminant_filter/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
enabled: false,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
Expand All @@ -264,6 +265,62 @@ process {
publishDir = [ enabled: false ]
}

withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:BOWTIE2_ALIGN.*' {
ext.args = '--very-sensitive-local -k 1'
ext.prefix = {"${meta.contaminant}_${meta.id}"}
publishDir = [ enabled: false ]
}

withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_RRNA' {
ext.prefix = {"${meta.contaminant}_${meta.id}"}
ext.suffix = "stats"
ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "rRNA" + '\\": " tot}\''
publishDir = [ enabled: false ]
}

withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_TRNA' {
ext.prefix = {"${meta.contaminant}_${meta.id}"}
ext.suffix = "stats"
ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "tRNA" + '\\": " tot}\''
publishDir = [ enabled: false ]
}

withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_CDNA' {
ext.prefix = {"${meta.contaminant}_${meta.id}"}
ext.suffix = "stats"
ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "cDNA" + '\\": " tot}\''
publishDir = [ enabled: false ]
}
withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_NCRNA' {
ext.prefix = {"${meta.contaminant}_${meta.id}"}
ext.suffix = "stats"
ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "ncRNA" + '\\": " tot}\''
publishDir = [ enabled: false ]
}

withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_PIRNA' {
ext.prefix = {"${meta.contaminant}_${meta.id}"}
ext.suffix = "stats"
ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "piRNA" + '\\": " tot}\''
publishDir = [ enabled: false ]
}

withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:STATS_GAWK_OTHER' {
ext.prefix = {"${meta.contaminant}_${meta.id}"}
ext.suffix = "stats"
ext.args2 = '\'BEGIN {tot=0} {if(NR==4 || NR==5){tot+=\$1}} END {print "\\"' + "other" + '\\": " tot}\''
publishDir = [ enabled: false ]
}

withName: 'NFCORE_SMRNASEQ:CONTAMINANT_FILTER:FILTER_STATS' {
publishDir = [
path: { "${params.outdir}/contaminant_filter/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
enabled: params.save_intermediates,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

//
// MIRNA_QUANT
//
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
"git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48",
"installed_by": ["modules"]
},
"bowtie2/align": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"bowtie2/build": {
"branch": "master",
"git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48",
Expand Down
48 changes: 0 additions & 48 deletions modules/local/bowtie_map_contaminants.nf

This file was deleted.

23 changes: 17 additions & 6 deletions modules/local/filter_stats.nf
Original file line number Diff line number Diff line change
@@ -1,30 +1,41 @@
process FILTER_STATS {
label 'process_medium'
tag "$meta.id"

conda 'bowtie2=2.4.5'
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bowtie2:2.4.5--py39hd2f7db1_2' :
'biocontainers/bowtie2:2.4.5--py39hd2f7db1_2' }"

input:
tuple val(meta), path(reads)
path stats_files
tuple val(meta), path(reads), path (stats_files)

output:
path "*_mqc.yaml" , emit: stats
tuple val(meta), path('*.filtered.fastq.gz'), emit: reads
tuple val(meta), path('*.filtered.fastq.gz'), emit: reads, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
"""
readnumber=\$(wc -l ${reads} | awk '{ print \$1/4 }')
cat ./filtered.${meta.id}_*.stats | \\
if [[ ${reads} == *.gz ]]; then
readnumber=\$(zcat ${reads} | wc -l | awk '{ print \$1/4 }')
else
readnumber=\$(wc -l ${reads} | awk '{ print \$1/4 }')
fi
cat ./*${meta.id}*.stats | \\
tr '\\n' ', ' | \\
awk -v sample=${meta.id} -v readnumber=\$readnumber '{ print "id: \\"my_pca_section\\"\\nsection_name: \\"Contamination Filtering\\"\\ndescription: \\"This plot shows the amount of reads filtered by contaminant type.\\"\\nplot_type: \\"bargraph\\"\\npconfig:\\n id: \\"contamination_filter_plot\\"\\n title: \\"Contamination Plot\\"\\n ylab: \\"Number of reads\\"\\ndata:\\n "sample": {"\$0"\\"remaining reads\\": "readnumber"}" }' > ${meta.id}.contamination_mqc.yaml
gzip -c ${reads} > ${meta.id}.filtered.fastq.gz
if [[ ${reads} == *.gz ]]; then
cp ${reads} ${meta.id}.filtered.fastq.gz
else
gzip -c ${reads} > ${meta.id}.filtered.fastq.gz
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/bowtie2/align/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

117 changes: 117 additions & 0 deletions modules/nf-core/bowtie2/align/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 586ef69

Please sign in to comment.