generated from nextstrain/ncov-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
corvaseq.yaml
41 lines (41 loc) · 1.72 KB
/
corvaseq.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
inputs:
- name: reference_data
metadata: https://data.nextstrain.org/files/ncov/open/reference/metadata.tsv.xz
aligned: https://data.nextstrain.org/files/ncov/open/reference/aligned.fasta.xz
- name: corvaseq
metadata: gisaid_auspice_omicron_corvaseq/omicron_corvaseq.metadata.tsv
sequences: gisaid_auspice_omicron_corvaseq/omicron_corvaseq.sequences.fasta
- name: north-america
metadata: data/ncov_north-america.tar.gz
sequences: data/ncov_north-america.tar.gz
refine:
root: "Wuhan-Hu-1/2019"
builds:
northcarolina:
title: "North Carolina-specific genomic surveillance build"
subsampling_scheme: northcarolina_scheme
auspice_config: my-ncov-analyses/auspice-config-custom-data.json
subsampling:
northcarolina_scheme:
custom_sample:
query: --query "(corvaseq == 'yes')"
nc_context:
query: --query "(corvaseq != 'yes') & (division == 'North Carolina')"
max_sequences: 1000
group_by: year month #Sequences willbe subsampled evenly across all combinations of year and month, with sequences genetically similar to custom_sample prioritized
priorities:
type: proximity
focus: custom_sample #this section will select 1000 data pieces from North Carolina from the reference data and north america data inputs.
usa_context:
query: --query "(corvaseq != 'yes') & (country == 'USA')"
max_sequences: 100
group_by: division year month #Sequences willbe subsampled evenly across all combinations of division, year, and month
priorities:
type: proximity
focus: custom_sample
global_context:
query: --query "(corvaseq != 'yes')"
max_sequences: 10
priorities:
type: proximity
focus: custom_sample