Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Instead, changes appear below grouped by the date they were added to the workflo

## 2025

* TBD: [breaking] Switch to use `augur subsample`.
* 02 July 2025: phylogenetic - config schema updates for easier config overlays ([#321][])
* new required config params
* `exclude` - path to exclude.txt for `augur filter`
Expand Down
3 changes: 2 additions & 1 deletion phylogenetic/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ from packaging import version
from augur.__version__ import __version__ as augur_version
import sys

min_augur_version = "22.2.0"
min_augur_version = "31.5.0"
if version.parse(augur_version) < version.parse(min_augur_version):
print("This pipeline needs a newer version of augur than you currently have...")
print(
Expand Down Expand Up @@ -39,6 +39,7 @@ rule all:


include: "rules/config.smk"
include: "rules/write_config.smk"
include: "rules/prepare_sequences.smk"
include: "rules/construct_phylogeny.smk"
include: "rules/annotate_phylogeny.smk"
Expand Down
103 changes: 56 additions & 47 deletions phylogenetic/build-configs/ci/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ custom_rules:
reference: "defaults/reference.fasta"
genome_annotation: "defaults/genome_annotation.gff3"
genbank_reference: "defaults/reference.gb"
include: "defaults/hmpxv1/include.txt"
exclude: "defaults/exclude.txt"
clades: "defaults/clades.tsv"
lat_longs: "defaults/lat_longs.tsv"
Expand All @@ -30,52 +29,62 @@ filter:
### Set 1: Non-B.1 sequences: use all
### Set 2: B.1 sequences: small sample across year/country, maybe month
subsample:
non_b1: >-
--group-by lineage year country
--sequences-per-group 50
--exclude-where
outbreak!=hMPXV-1
clade!=IIb
lineage=B.1
lineage=B.1.1
lineage=B.1.2
lineage=B.1.3
lineage=C.1
lineage=C.1.1
lineage=E.1
lineage=E.2
lineage=E.3
lineage=B.1.4
lineage=B.1.5
lineage=B.1.6
lineage=D.1
lineage=B.1.7
lineage=B.1.8
lineage=B.1.9
lineage=B.1.10
lineage=B.1.11
lineage=B.1.12
lineage=B.1.13
lineage=B.1.14
lineage=B.1.15
lineage=B.1.16
lineage=B.1.17
lineage=B.1.18
lineage=B.1.19
lineage=B.1.20
lineage=F.1
lineage=F.2
lineage=F.3
lineage=F.4
lineage=F.5
lineage=F.6
lineage=B.1.21
lineage=B.1.22
lineage=B.1.23
b1: >-
--group-by country year
--subsample-max-sequences 300
--exclude-where outbreak!=hMPXV-1 clade!=IIb
samples:
non_b1:
group_by:
- lineage
- year
- country
sequences_per_group: 50
exclude_where:
- outbreak!=hMPXV-1
- clade!=IIb
- lineage=B.1
- lineage=B.1.1
- lineage=B.1.2
- lineage=B.1.3
- lineage=C.1
- lineage=C.1.1
- lineage=E.1
- lineage=E.2
- lineage=E.3
- lineage=B.1.4
- lineage=B.1.5
- lineage=B.1.6
- lineage=D.1
- lineage=B.1.7
- lineage=B.1.8
- lineage=B.1.9
- lineage=B.1.10
- lineage=B.1.11
- lineage=B.1.12
- lineage=B.1.13
- lineage=B.1.14
- lineage=B.1.15
- lineage=B.1.16
- lineage=B.1.17
- lineage=B.1.18
- lineage=B.1.19
- lineage=B.1.20
- lineage=F.1
- lineage=F.2
- lineage=F.3
- lineage=F.4
- lineage=F.5
- lineage=F.6
- lineage=B.1.21
- lineage=B.1.22
- lineage=B.1.23
include:
- defaults/hmpxv1/include.txt
b1:
group_by:
- country
- year
max_sequences: 300
exclude_where:
- outbreak!=hMPXV-1
- clade!=IIb

## align
max_indel: 10000
Expand Down
4 changes: 2 additions & 2 deletions phylogenetic/build-configs/inrb/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ traits:
# Private INRB data doesn't have clade annotations so allow empty clade fields
# (i.e. we're assuming all INRB data is clade I)
subsample:
everything: >-
--query 'clade in ["I", "Ia", "Ib", ""]'
everything:
query: clade in ["I", "Ia", "Ib", ""]
8 changes: 5 additions & 3 deletions phylogenetic/defaults/clade-i/config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
reference: "defaults/clade-i/reference.fasta"
genome_annotation: "defaults/clade-i/genome_annotation.gff3"
genbank_reference: "defaults/clade-i/reference.gb"
include: "defaults/clade-i/include.txt"
exclude: "defaults/exclude.txt"
clades: "defaults/clades.tsv"
lat_longs: "defaults/lat_longs.tsv"
Expand All @@ -26,8 +25,11 @@ filter:

### Filter to only Clade I sequences
subsample:
everything: >-
--query 'clade in ["I", "Ia", "Ib"]'
samples:
everything:
query: clade in ["I", "Ia", "Ib"]
include:
- defaults/clade-i/include.txt

## align
max_indel: 10000
Expand Down
103 changes: 56 additions & 47 deletions phylogenetic/defaults/hmpxv1/config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
reference: "defaults/reference.fasta"
genome_annotation: "defaults/genome_annotation.gff3"
genbank_reference: "defaults/reference.gb"
include: "defaults/hmpxv1/include.txt"
exclude: "defaults/exclude.txt"
clades: "defaults/clades.tsv"
lat_longs: "defaults/lat_longs.tsv"
Expand All @@ -27,52 +26,62 @@ filter:
### Set 1: Non-B.1 sequences: use all
### Set 2: B.1 sequences: small sample across year/country, maybe month
subsample:
non_b1: >-
--group-by lineage year country
--sequences-per-group 50
--exclude-where
outbreak!=hMPXV-1
clade!=IIb
lineage=B.1
lineage=B.1.1
lineage=B.1.2
lineage=B.1.3
lineage=C.1
lineage=C.1.1
lineage=E.1
lineage=E.2
lineage=E.3
lineage=B.1.4
lineage=B.1.5
lineage=B.1.6
lineage=D.1
lineage=B.1.7
lineage=B.1.8
lineage=B.1.9
lineage=B.1.10
lineage=B.1.11
lineage=B.1.12
lineage=B.1.13
lineage=B.1.14
lineage=B.1.15
lineage=B.1.16
lineage=B.1.17
lineage=B.1.18
lineage=B.1.19
lineage=B.1.20
lineage=F.1
lineage=F.2
lineage=F.3
lineage=F.4
lineage=F.5
lineage=F.6
lineage=B.1.21
lineage=B.1.22
lineage=B.1.23
b1: >-
--group-by country year
--subsample-max-sequences 300
--exclude-where outbreak!=hMPXV-1 clade!=IIb
samples:
non_b1:
group_by:
- lineage
- year
- country
sequences_per_group: 50
exclude_where:
- outbreak!=hMPXV-1
- clade!=IIb
- lineage=B.1
- lineage=B.1.1
- lineage=B.1.2
- lineage=B.1.3
- lineage=C.1
- lineage=C.1.1
- lineage=E.1
- lineage=E.2
- lineage=E.3
- lineage=B.1.4
- lineage=B.1.5
- lineage=B.1.6
- lineage=D.1
- lineage=B.1.7
- lineage=B.1.8
- lineage=B.1.9
- lineage=B.1.10
- lineage=B.1.11
- lineage=B.1.12
- lineage=B.1.13
- lineage=B.1.14
- lineage=B.1.15
- lineage=B.1.16
- lineage=B.1.17
- lineage=B.1.18
- lineage=B.1.19
- lineage=B.1.20
- lineage=F.1
- lineage=F.2
- lineage=F.3
- lineage=F.4
- lineage=F.5
- lineage=F.6
- lineage=B.1.21
- lineage=B.1.22
- lineage=B.1.23
include:
- defaults/hmpxv1/include.txt
b1:
group_by:
- country
- year
max_sequences: 300
exclude_where:
- outbreak!=hMPXV-1
- clade!=IIb

## align
max_indel: 10000
Expand Down
35 changes: 20 additions & 15 deletions phylogenetic/defaults/hmpxv1_big/config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
reference: "defaults/reference.fasta"
genome_annotation: "defaults/genome_annotation.gff3"
genbank_reference: "defaults/reference.gb"
include: "defaults/hmpxv1_big/include.txt"
exclude: "defaults/exclude.txt"
clades: "defaults/clades.tsv"
lat_longs: "defaults/lat_longs.tsv"
Expand All @@ -24,20 +23,26 @@ filter:
query: "(QC_rare_mutations == 'good' | QC_rare_mutations == 'mediocre')"

subsample:
b1: >-
--group-by year month country
--subsample-max-sequences 5000
--exclude-where
outbreak!=hMPXV-1
clade!=IIb
lineage=A
lineage=A.1
lineage=A.1.1
lineage=A.2
lineage=A.2.1
lineage=A.2.2
lineage=A.2.3
lineage=A.3
samples:
b1:
group_by:
- year
- month
- country
max_sequences: 5000
exclude_where:
- outbreak!=hMPXV-1
- clade!=IIb
- lineage=A
- lineage=A.1
- lineage=A.1.1
- lineage=A.2
- lineage=A.2.1
- lineage=A.2.2
- lineage=A.2.3
- lineage=A.3
include:
- defaults/hmpxv1_big/include.txt

## align
max_indel: 10000
Expand Down
Loading
Loading