generated from CDCgov/template
-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #45 from CDCgov/dev
Update images, samplesheet updates, rehead BAM
- Loading branch information
Showing
19 changed files
with
765 additions
and
257 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env python | ||
|
||
import os | ||
import argparse | ||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser( | ||
description="Generate a samplesheet from a directory of BAM files for Freyja subworkflow", | ||
epilog="Usage: python bam_to_samplesheet.py --directory <PATH_TO_BAM_FILES> --output <OUTPUT_FILE>" | ||
) | ||
parser.add_argument("--directory", help="Directory containing BAM files.", required=True) | ||
parser.add_argument("--output", help="Output file for the samplesheet.", required=True) | ||
return parser.parse_args() | ||
|
||
def extract_sample_name(bam_filename): | ||
""" | ||
Extracts the sample name from the BAM filename assuming the sample name | ||
is the first component before the first ".". | ||
""" | ||
return bam_filename.split(".")[0] | ||
|
||
def generate_samplesheet(directory, output_file): | ||
bam_files = [f for f in os.listdir(directory) if f.endswith(".bam")] | ||
with open(output_file, "w") as fout: | ||
fout.write("SNAME,BAMFILE\n") | ||
for bam_file in bam_files: | ||
sample_name = extract_sample_name(bam_file) | ||
bam_path = os.path.abspath(os.path.join(directory, bam_file)) | ||
fout.write(f"{sample_name},{bam_path}\n") | ||
|
||
def main(): | ||
args = parse_args() | ||
generate_samplesheet(args.directory, args.output) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/usr/bin/env python3.9 | ||
|
||
import os | ||
import sys | ||
import argparse | ||
import http.client | ||
import urllib.parse | ||
|
||
VALID_PLATFORMS = {"illumina", "nanopore", "iontorrent"} | ||
EXPECTED_HEADERS = ["sample", "platform", "fastq_1", "fastq_2", "lr", "bam_file", "bedfile"] | ||
MIN_COLS_REQUIRED = 3 | ||
|
||
def parse_args(args=None): | ||
parser = argparse.ArgumentParser( | ||
description="Reformat nf-core/aquascope samplesheet file and check its contents.", | ||
epilog="Example usage: python check_samplesheet.py <FILE_IN> <FILE_OUT>", | ||
) | ||
parser.add_argument("FILE_IN", help="Input samplesheet file.") | ||
parser.add_argument("FILE_OUT", help="Output file.") | ||
return parser.parse_args(args) | ||
|
||
def validate_fastq(fastq_file, line): | ||
if " " in fastq_file: | ||
print(f"FastQ file '{fastq_file}' contains spaces! Line: {line}") | ||
if not fastq_file.endswith((".fastq.gz", ".fq.gz")): | ||
print(f"FastQ file '{fastq_file}' does not have extension '.fastq.gz' or '.fq.gz'! Line: {line}") | ||
|
||
def validate_bedfile(bedfile, line, platform): | ||
if bedfile: | ||
if bedfile.startswith(("http://", "https://")): | ||
parsed_url = urllib.parse.urlparse(bedfile) | ||
conn = http.client.HTTPConnection(parsed_url.netloc) if parsed_url.scheme == "http" else http.client.HTTPSConnection(parsed_url.netloc) | ||
conn.request("GET", parsed_url.path) | ||
response = conn.getresponse() | ||
if response.status == 200: | ||
lines = response.read().decode('utf-8').splitlines() | ||
for i, bed_line in enumerate(lines): | ||
cols = bed_line.strip().split("\t") | ||
if len(cols) < 6: | ||
print(f"Bed file '{bedfile}' must have at least 6 columns! (Line {i+1}) Line: {line}") | ||
else: | ||
print(f"Failed to download bed file '{bedfile}': {response.status} Line: {line}") | ||
else: | ||
if not os.path.isfile(bedfile): | ||
print(f"Bed file '{bedfile}' does not exist! Line: {line}") | ||
else: | ||
with open(bedfile, "r") as f: | ||
for i, bed_line in enumerate(f): | ||
cols = bed_line.strip().split("\t") | ||
if len(cols) < 6: | ||
print(f"Bed file '{bedfile}' must have at least 6 columns! (Line {i+1}) Line: {line}") | ||
elif platform != "iontorrent": | ||
print(f"Bedfile is required for platforms other than IonTorrent if not provided. Line: {line}") | ||
|
||
def check_samplesheet(args): | ||
file_in = args.FILE_IN | ||
file_out = args.FILE_OUT | ||
|
||
with open(file_in, "r") as fin, open(file_out, "w") as fout: | ||
header = [x.strip('"') for x in fin.readline().strip().split(",")] | ||
if header[: len(EXPECTED_HEADERS)] != EXPECTED_HEADERS: | ||
print(f"Invalid header! Expected {EXPECTED_HEADERS} but got {header}. Line: {','.join(header)}") | ||
|
||
fout.write(",".join(header) + "\n") | ||
|
||
for i, line in enumerate(fin, start=2): | ||
cols = [x.strip().strip('"') for x in line.strip().split(",")] | ||
if len(cols) < MIN_COLS_REQUIRED: | ||
print(f"Invalid number of columns (minimum = {MIN_COLS_REQUIRED})! Line: {line}") | ||
continue | ||
|
||
sample, platform = cols[0], cols[1].lower() | ||
fastq_1, fastq_2, lr, bam_file = (cols[2:6] + [None]*4)[:4] | ||
bedfile = cols[6] if len(cols) > 6 else None # Only if bedfile is specified | ||
|
||
if platform.lower() not in VALID_PLATFORMS: | ||
print(f"Invalid platform '{platform}'! Line: {line}") | ||
continue | ||
if platform == "illumina": | ||
if fastq_1: | ||
validate_fastq(fastq_1, line) | ||
if fastq_2: | ||
validate_fastq(fastq_2, line) | ||
elif platform == "nanopore" and lr and not lr.endswith((".fastq.gz", ".fq.gz")): | ||
print(f"Nanopore requires FastQ file in 'lr' column! Line: {line}") | ||
elif platform == "iontorrent" and bam_file and not bam_file.endswith(".bam"): | ||
print(f"IonTorrent requires BAM file! Line: {line}") | ||
|
||
validate_bedfile(bedfile, line, platform) | ||
|
||
# Write to the output file | ||
output_line = ",".join([sample, platform, fastq_1 or '', fastq_2 or '', lr or '', bam_file or '', bedfile or '']) | ||
fout.write(output_line + "\n") | ||
|
||
def main(args=None): | ||
check_samplesheet(parse_args(args)) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
/* | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
Config file for defining DSL2 per module options and publishing paths | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
Available keys to override module options: | ||
ext.args = Additional arguments appended to command in module. | ||
ext.args2 = Second set of arguments appended to command in module (multi-tool modules). | ||
ext.args3 = Third set of arguments appended to command in module (multi-tool modules). | ||
ext.prefix = File name prefix for output files. | ||
---------------------------------------------------------------------------------------- | ||
*/ | ||
|
||
process { | ||
|
||
publishDir = [ | ||
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, | ||
mode: params.publish_dir_mode, | ||
saveAs: { filename -> filename.equals('versions.yml') ? null : filename } | ||
] | ||
|
||
withName: 'FREYJA_VARIANTS' { | ||
ext.args = "--minq 20 --annot \"${params.gff3}\" --varthresh \"${params.varthresh}\" " | ||
publishDir = [ | ||
path: { "${params.outdir}/FREYJA_STANDALONE/VarCalls" }, | ||
mode: params.publish_dir_mode, | ||
pattern: "*.{tsv,csv}" | ||
] | ||
} | ||
|
||
withName: 'FREYJA_DEMIX' { | ||
ext.args = '--covcut 10 --confirmedonly' | ||
publishDir = [ | ||
path: { "${params.outdir}/FREYJA_STANDALONE/Demix" }, | ||
mode: params.publish_dir_mode, | ||
pattern: "*.{tsv,csv}" | ||
] | ||
} | ||
|
||
withName: 'FREYJA_UPDATE' { | ||
publishDir = [ | ||
path: { "${params.outdir}/FREYJA_STANDALONE/FREYJA_DB/" }, | ||
mode: params.publish_dir_mode, | ||
] | ||
} | ||
|
||
withName: 'MULTIQC' { | ||
publishDir = [ | ||
path: { "${params.outdir}/MULTIQC" }, | ||
mode: params.publish_dir_mode | ||
] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.