Skip to content

Commit

Permalink
refactor: ica, output file handling CDCgov#149
Browse files Browse the repository at this point in the history
  • Loading branch information
slsevilla committed Apr 1, 2024
1 parent 66e8f46 commit f72f42a
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 69 deletions.
109 changes: 60 additions & 49 deletions modules/local/fairy_scaffold_count_check.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,12 @@ process SCAFFOLD_COUNT_CHECK {
path(names_file)

output:
tuple val(meta), path('*_summary.txt'), emit: outcome
path('*_summaryline.tsv'), optional:true, emit: summary_line
tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis
path("versions.yml"), emit: versions
tuple val(meta), path('*_summary_complete.txt'), emit: outcome
path('*_summaryline.tsv'), optional:true, emit: summary_line
tuple val(meta), path('*.synopsis'), optional:true, emit: synopsis
path("versions.yml"), emit: versions

script:
// terra=true sets paths for bc/wget for terra container paths
if (params.terra==false) { terra = ""}
else if (params.terra==true) { terra = "-2 terra" }
else { error "Please set params.terra to either \"true\" or \"false\"" }
// Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
if (params.ica==false) {
ica_python = ""
ica_bash = ""
} else if (params.ica==true) {
ica_python = "python ${workflow.launchDir}/bin/"
ica_bash = "bash ${workflow.launchDir}/bin/"
}
else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
// define variables
def prefix = task.ext.prefix ?: "${meta.id}"
def fairy_read_count_outcome_file = fairy_read_count_outcome ? "$fairy_read_count_outcome" : ""
Expand All @@ -49,58 +36,82 @@ process SCAFFOLD_COUNT_CHECK {
def extended_qc_arg = extended_qc ? "--extended_qc" : ""
def container_version = "base_v2.1.0"
def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
def script_id = params.ica ? "${params.ica_path}/determine_taxID.sh" : "determine_taxID.sh"
def script_writer = params.ica ? "${params.ica_path}/pipeline_stats_writer.sh" : "pipeline_stats_writer.sh"
def script_summary = params.ica ? "python ${params.ica_path}/Phoenix_summary_line.py" : "Phoenix_summary_line.py"
def script_edit = params.ica ? "python ${params.ica_path}/edit_line_summary.py" : "edit_line_summary.py"
def terra = params.terra ? "-2 terra" : ""
"""
#checking that the output contains scaffolds still:
# set new final script name
complete_summary="${prefix}_summary_complete.txt"
# handle -entry SCAFFOLDS
scaffold_entry_file() {
cat <<<EOT >> \${complete_summary}
PASSED: Using Scaffold entry no corruption check run on R1.
PASSED: Using Scaffold entry no corruption check run on R2.
PASSED: Using Scaffold entry no paired reads to check.
PASSED: Using Scaffold entry no trimd reads to check.
FAILED: No scaffolds in ${prefix} after filtering!
EOT
}
# checking that the output contains scaffolds still:
if grep "Output: 0 reads (0.00%) 0 bases (0.00%)" ${bbmap_log}; then
#Check if the file exists already (it won't with -entry SCAFFOLDS)
if [ -f ${prefix}_summary_old_3.txt ]; then
#replace end of line with actual error message
sed -i 's/End_of_File/FAILED: No scaffolds in ${prefix} after filtering!/' ${fairy_read_count_outcome_file}
if [ -f ${fairy_read_count_outcome} ]; then
# replace end of line with actual error message
cp ${fairy_read_count_outcome} \${complete_summary}
sed -i 's/End_of_File/FAILED: No scaffolds in ${prefix} after filtering!/' \${complete_summary}
else
echo "PASSED: Using Scaffold entry no corruption check run on R1." > ${prefix}_summary_old_3.txt
echo "PASSED: Using Scaffold entry no corruption check run on R2." >> ${prefix}_summary_old_3.txt
echo "PASSED: Using Scaffold entry no paired reads to check." >> ${prefix}_summary_old_3.txt
echo "PASSED: Using Scaffold entry no trimd reads to check." >> ${prefix}_summary_old_3.txt
echo "FAILED: No scaffolds in ${prefix} after filtering!" >> ${prefix}_summary_old_3.txt
scaffold_entry_file
echo "FAILED: No scaffolds in ${prefix} after filtering!" >> \${complete_summary}
fi
# if the sample has no scaffolds left make the summaryline and synopsis file for it.
# get taxa ID
${ica_bash}determine_taxID.sh -r $kraken2_trimd_summary -s ${prefix} -d $nodes_file -m $names_file
${script_id} -r $kraken2_trimd_summary -s ${prefix} -d $nodes_file -m $names_file
#write synopsis file
${ica_bash}pipeline_stats_writer.sh -d ${prefix} -q ${prefix}.tax -5 $coverage $raw_qc $fastp_total_qc_pipeline_stats \\
$kraken2_trimd_report $kraken2_trimd_summary_pipeline_stats $krona_trimd $terra
# write synopsis file
${script_writer} \\
-d ${prefix} \\
-q ${prefix}.tax \\
-5 $coverage \\
$raw_qc \\
$fastp_total_qc_pipeline_stats \\
$kraken2_trimd_report \\
$kraken2_trimd_summary_pipeline_stats \\
$krona_trimd $terra
# write summary_line file
${ica_python}Phoenix_summary_line.py -n ${prefix} -s ${prefix}.synopsis -x ${prefix}.tax -o ${prefix}_summaryline.tsv\\
$kraken2_trimd_summary_summaryline $fastp_total_qc_summaryline $extended_qc_arg
${script_summary} \\
-n ${prefix} \\
-s ${prefix}.synopsis \\
-x ${prefix}.tax
-o ${prefix}_summaryline.tsv\\
$kraken2_trimd_summary_summaryline \\
$fastp_total_qc_summaryline \\
$extended_qc_arg
# change pass to fail and add in error
${ica_python}edit_line_summary.py -i ${prefix}_summaryline.tsv
#change file name.
cp ${prefix}_summary_old_3.txt ${prefix}_summary.txt
${script_edit} -i ${prefix}_summaryline.tsv
# if there are scaffolds left after filtering do the following...
else
#Check if the file exists already (it won't with -entry SCAFFOLDS)
if [ -f ${prefix}_summary_old_3.txt ]; then
if [ -f ${fairy_read_count_outcome} ]; then
#replace end of line with actual error message
sed -i 's/End_of_File/PASSED: More than 0 scaffolds in ${prefix} after filtering./' ${fairy_read_count_outcome_file}
cp ${fairy_read_count_outcome} \${complete_summary}
sed -i 's/End_of_File/PASSED: More than 0 scaffolds in ${prefix} after filtering./' \${complete_summary}
else
echo "PASSED: Using Scaffold entry no corruption check run on R1." > ${prefix}_summary_old_3.txt
echo "PASSED: Using Scaffold entry no corruption check run on R2." >> ${prefix}_summary_old_3.txt
echo "PASSED: Using Scaffold entry no paired reads to check." >> ${prefix}_summary_old_3.txt
echo "PASSED: Using Scaffold entry no trimd reads to check." >> ${prefix}_summary_old_3.txt
echo "PASSED: More than 0 scaffolds in ${prefix} after filtering." >> ${prefix}_summary_old_3.txt
scaffold_entry_file
echo "PASSED: More than 0 scaffolds in ${prefix} after filtering." >> \${complete_summary}
fi
cp ${prefix}_summary_old_3.txt ${prefix}_summary.txt
fi
#gettings script versions
dettaxid_version=\$(${ica_bash}determine_taxID.sh -V)
pipestats_version=\$(${ica_bash}pipeline_stats_writer.sh -V)
dettaxid_version=\$(${script_id} -V)
pipestats_version=\$(${script_writer} -V)
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand All @@ -109,8 +120,8 @@ process SCAFFOLD_COUNT_CHECK {
phoenix_base_container: ${container}
\${dettaxid_version}
\${pipestats_version}
Phoenix_summary_line.py: \$(${ica_python}Phoenix_summary_line.py --version )
edit_line_summary.py: \$(${ica_python}edit_line_summary.py --version )
Phoenix_summary_line.py: \$(${script_summary} --version )
edit_line_summary.py: \$(${script_edit} --version )
END_VERSIONS
"""
}
}
40 changes: 20 additions & 20 deletions workflows/phoenix.nf
Original file line number Diff line number Diff line change
Expand Up @@ -211,26 +211,26 @@ workflow PHOENIX_EXTERNAL {
)
ch_versions = ch_versions.mix(RENAME_FASTA_HEADERS.out.versions)

// // Removing scaffolds <500bp
// BBMAP_REFORMAT (
// RENAME_FASTA_HEADERS.out.renamed_scaffolds
// )
// ch_versions = ch_versions.mix(BBMAP_REFORMAT.out.versions)

// // Combine bbmap log with the fairy outcome file
// scaffold_check_ch = BBMAP_REFORMAT.out.log.map{meta, log -> [[id:meta.id], log]}\
// .join(GET_TRIMD_STATS.out.outcome_to_edit.map{ meta, outcome_to_edit -> [[id:meta.id], outcome_to_edit]}, by: [0])\
// .join(GET_RAW_STATS.out.combined_raw_stats.map{meta, combined_raw_stats -> [[id:meta.id], combined_raw_stats]}, by: [0])\
// .join(GET_TRIMD_STATS.out.fastp_total_qc.map{ meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}, by: [0])\
// .join(KRAKEN2_TRIMD.out.report.map{ meta, report -> [[id:meta.id], report]}, by: [0])\
// .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{ meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}, by: [0])\
// .join(KRAKEN2_TRIMD.out.krona_html.map{ meta, krona_html -> [[id:meta.id], krona_html]}, by: [0])

// // Checking that there are still scaffolds left after filtering
// SCAFFOLD_COUNT_CHECK (
// scaffold_check_ch, false, params.coverage, params.nodes, params.names
// )
// ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions)
// Removing scaffolds <500bp
BBMAP_REFORMAT (
RENAME_FASTA_HEADERS.out.renamed_scaffolds
)
ch_versions = ch_versions.mix(BBMAP_REFORMAT.out.versions)

// Combine bbmap log with the fairy outcome file
scaffold_check_ch = BBMAP_REFORMAT.out.log.map{ meta, log -> [[id:meta.id], log]}\
.join(GET_TRIMD_STATS.out.outcome.map{ meta, outcome -> [[id:meta.id], outcome]}, by: [0])\
.join(GET_RAW_STATS.out.combined_raw_stats.map{ meta, combined_raw_stats -> [[id:meta.id], combined_raw_stats]}, by: [0])\
.join(GET_TRIMD_STATS.out.fastp_total_qc.map{ meta, fastp_total_qc -> [[id:meta.id], fastp_total_qc]}, by: [0])\
.join(KRAKEN2_TRIMD.out.report.map{ meta, report -> [[id:meta.id], report]}, by: [0])\
.join(KRAKEN2_TRIMD.out.k2_bh_summary.map{ meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}, by: [0])\
.join(KRAKEN2_TRIMD.out.krona_html.map{ meta, krona_html -> [[id:meta.id], krona_html]}, by: [0])

// Checking that there are still scaffolds left after filtering
SCAFFOLD_COUNT_CHECK (
scaffold_check_ch, params.extended_qc, params.coverage, params.nodes, params.names
)
ch_versions = ch_versions.mix(SCAFFOLD_COUNT_CHECK.out.versions)

// //combing scaffolds with scaffold check information to ensure processes that need scaffolds only run when there are scaffolds in the file
// filtered_scaffolds_ch = BBMAP_REFORMAT.out.filtered_scaffolds.map{ meta, filtered_scaffolds -> [[id:meta.id], filtered_scaffolds]}
Expand Down

0 comments on commit f72f42a

Please sign in to comment.