Skip to content

Commit

Permalink
Merge pull request #203 from nextstrain/fauna-prioritized-seqs
Browse files Browse the repository at this point in the history
download_from_fauna: Add `--prioritized_seqs_file` option
  • Loading branch information
joverlee521 authored Jan 27, 2025
2 parents 89c6350 + d59e5bd commit 9c918a5
Show file tree
Hide file tree
Showing 26 changed files with 69 additions and 1 deletion.
1 change: 1 addition & 0 deletions config/h1n1pdm/ha/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/h1n1pdm/mp/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/h1n1pdm/na/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/h1n1pdm/np/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/h1n1pdm/ns/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/h1n1pdm/pa/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/h1n1pdm/pb1/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/h1n1pdm/pb2/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
5 changes: 5 additions & 0 deletions config/h3n2/ha/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
strain accession
A/Croatia/10136RV/2023 EPI3250718
A/Croatia/10136RV/2023-egg EPI3356221
A/DistrictOfColumbia/27/2023 EPI2990337
A/DistrictOfColumbia/27/2023-egg EPI3391167
4 changes: 4 additions & 0 deletions config/h3n2/mp/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
strain accession
A/Croatia/10136RV/2023 EPI3250713
A/Croatia/10136RV/2023-egg EPI3356216
A/DistrictOfColumbia/27/2023 EPI2990351
5 changes: 5 additions & 0 deletions config/h3n2/na/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
strain accession
A/Croatia/10136RV/2023 EPI3250717
A/Croatia/10136RV/2023-egg EPI3356220
A/DistrictOfColumbia/27/2023 EPI2990330
A/DistrictOfColumbia/27/2023-egg EPI3391166
4 changes: 4 additions & 0 deletions config/h3n2/np/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
strain accession
A/Croatia/10136RV/2023 EPI3250711
A/Croatia/10136RV/2023-egg EPI3356214
A/DistrictOfColumbia/27/2023 EPI2990339
4 changes: 4 additions & 0 deletions config/h3n2/ns/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
strain accession
A/Croatia/10136RV/2023 EPI3250712
A/Croatia/10136RV/2023-egg EPI3356215
A/DistrictOfColumbia/27/2023 EPI2990345
4 changes: 4 additions & 0 deletions config/h3n2/pa/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
strain accession
A/Croatia/10136RV/2023 EPI3250714
A/Croatia/10136RV/2023-egg EPI3356217
A/DistrictOfColumbia/27/2023 EPI2990331
4 changes: 4 additions & 0 deletions config/h3n2/pb1/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
strain accession
A/Croatia/10136RV/2023 EPI3250716
A/Croatia/10136RV/2023-egg EPI3356219
A/DistrictOfColumbia/27/2023 EPI2990344
4 changes: 4 additions & 0 deletions config/h3n2/pb2/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
strain accession
A/Croatia/10136RV/2023 EPI3250715
A/Croatia/10136RV/2023-egg EPI3356218
A/DistrictOfColumbia/27/2023 EPI2990348
1 change: 1 addition & 0 deletions config/vic/ha/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/vic/mp/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/vic/na/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/vic/np/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/vic/ns/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/vic/pa/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/vic/pb1/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
1 change: 1 addition & 0 deletions config/vic/pb2/prioritized_seqs_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
strain accession
3 changes: 3 additions & 0 deletions profiles/upload.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ recency:
builds:
h1n1pdm_2y:
lineage: h1n1pdm
prioritized_seqs_file: "config/h1n1pdm/{segment}/prioritized_seqs_file.tsv"
reference: "config/h1n1pdm/{segment}/reference.fasta"
annotation: "config/h1n1pdm/{segment}/genemap.gff"
tree_exclude_sites: "config/h1n1pdm/{segment}/exclude-sites.txt"
Expand Down Expand Up @@ -122,6 +123,7 @@ builds:
filters: --query "(passage_category != 'egg') & (is_reference == True)" --min-date {reference_min_date}
h3n2_2y:
lineage: "h3n2"
prioritized_seqs_file: "config/h3n2/{segment}/prioritized_seqs_file.tsv"
reference: "config/h3n2/{segment}/reference.fasta"
annotation: "config/h3n2/{segment}/genemap.gff"
tree_exclude_sites: "config/h3n2/{segment}/exclude-sites.txt"
Expand Down Expand Up @@ -180,6 +182,7 @@ builds:
subsamples: *representative-subsampling-scheme
vic_2y:
lineage: vic
prioritized_seqs_file: "config/vic/{segment}/prioritized_seqs_file.tsv"
reference: "config/vic/{segment}/reference.fasta"
annotation: "config/vic/{segment}/genemap.gff"
tree_exclude_sites: "config/vic/{segment}/exclude-sites.txt"
Expand Down
17 changes: 16 additions & 1 deletion workflow/snakemake_rules/download_from_fauna.smk
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,25 @@ def _get_virus_passage_category(wildcards):
else:
return ""

def _get_prioritized_seqs_file(wildcards):
prioritized_seqs_file = []
for build_name, build_params in config["builds"].items():
if build_params["lineage"] == wildcards.lineage:
prioritized_seqs_file = build_params.get('prioritized_seqs_file', prioritized_seqs_file)
break
return prioritized_seqs_file

rule download_sequences:
input:
prioritized_seqs_file = _get_prioritized_seqs_file,
output:
sequences = "data/{lineage}/raw_{segment}.fasta"
params:
fasta_fields = config["fauna_fasta_fields"],
prioritized_seqs_file = lambda wildcards, input:
f"--prioritized_seqs_file {input.prioritized_seqs_file!r}"
if input.prioritized_seqs_file
else ""
resources:
concurrent_fauna = 1
conda: "../envs/nextstrain.yaml"
Expand All @@ -60,13 +74,14 @@ rule download_sequences:
log:
"logs/download_sequences_{lineage}_{segment}.txt"
shell:
"""
r"""
python3 {path_to_fauna}/vdb/download.py \
--database vdb \
--virus flu \
--fasta_fields {params.fasta_fields} \
--resolve_method split_passage \
--select locus:{wildcards.segment} lineage:seasonal_{wildcards.lineage} \
{params.prioritized_seqs_file} \
--path data \
--fstem {wildcards.lineage}/raw_{wildcards.segment} 2>&1 | tee {log}
"""
Expand Down

0 comments on commit 9c918a5

Please sign in to comment.