Skip to content

Commit

Permalink
Added irida-id to new_addresses output to be compatible with IRIDA-Next
Browse files Browse the repository at this point in the history
  • Loading branch information
sgsutcliffe committed Oct 10, 2024
1 parent 2cd6dae commit a246014
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 26 deletions.
10 changes: 7 additions & 3 deletions conf/iridanext.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,18 @@ iridanext {
path = "${params.outdir}/iridanext.output.json.gz"
overwrite = true
files {
idkey = "irida_id"
samples = ["**/input/*_error_report.csv"]
}
metadata {
idkey = "id_irida"
samples {
keep = [
"address"
]
csv {
path = "**/filter/new_addresses.csv"
idcol = "id"
path = "**/filter/new_addresses.tsv"
sep = "\t"
idcol = 'irida_id'
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
<summary>Output files</summary>

- `filter/`
- `new_addresses.csv`
- `new_addresses.tsv`

</details>

Expand Down
9 changes: 6 additions & 3 deletions modules/local/filter_query/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ process FILTER_QUERY {
val out_format

output:
path("new_addresses.*"), emit: csv
path("new_addresses.*"), emit: tsv
path("versions.yml"), emit: versions

script:
Expand All @@ -24,13 +24,16 @@ process FILTER_QUERY {

"""
# Filter the query samples only; keep only the 'id' and 'address' columns
csvtk cut -t -f 2 ${query_ids} > query_list.txt # Need to use the second column to pull meta.id because there is no header
csvtk add-header ${query_ids} -t -n irida_id,id > id.txt
csvtk grep \\
${addresses} \\
-f 1 \\
-P ${query_ids} \\
-P query_list.txt \\
--delimiter "${delimiter}" \\
--out-delimiter "${out_delimiter}" | \\
csvtk cut -f id,address > ${outputFile}.${out_extension}
csvtk cut -t -f id,address > tmp.tsv
csvtk join -t -f id id.txt tmp.tsv > ${outputFile}.${out_extension}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
22 changes: 15 additions & 7 deletions tests/data/irida/sample_name_add_iridanext.output.json
Original file line number Diff line number Diff line change
@@ -1,30 +1,38 @@
{
"files": {
"global": [

],
"samples": {
"sample_1": [
"sampleQ": [
{
"path": "input/sample_1_error_report.csv"
}
],
"sample_2_sample2": [
"sample1": [
{
"path": "input/sample_2_error_report.csv"
}
],
"sample2": [
{
"path": "input/sample_2_sample2_error_report.csv"
}
],
"sample_2": [
"sampleR": [
{
"path": "input/sample_2_error_report.csv"
"path": "input/sample4_error_report.csv"
}
]
}
},
"metadata": {
"samples": {
"sample_1": {
"address": "1.1.3"
"sampleQ": {
"address": "2.2.3"
},
"sampleR": {
"address": "2.2.3"
}
}
}
Expand Down
1 change: 1 addition & 0 deletions tests/data/samplesheets/samplesheet-sample_name.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ sampleQ,sample 1,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/
sample1,sample#2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample1.mlst.json,1.1.1
sample2,sample#2,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample2.mlst.json,1.1.1
sample3,,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sample3.mlst.json,1.1.2
sampleR,sample4,https://raw.githubusercontent.com/phac-nml/gasnomenclature/dev/tests/data/reports/sampleF.mlst.json,
19 changes: 10 additions & 9 deletions tests/pipelines/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ nextflow_pipeline {
assert lines.contains("sampleR,[\'sampleF\'],Query sampleR ID and JSON key in sampleF.mlst.json DO NOT MATCH. The 'sampleF' key in sampleF.mlst.json has been forcefully changed to 'sampleR': User should manually check input files to ensure correctness.")

// Check filter_query csv file
lines = path("$launchDir/results/filter/new_addresses.csv").readLines()
assert lines.contains("sampleQ,2.2.3")
assert lines.contains("sampleR,2.2.3")
lines = path("$launchDir/results/filter/new_addresses.tsv").readLines()
assert lines.contains("sampleQ\tsampleQ\t2.2.3")
assert lines.contains("sampleR\tsampleR\t2.2.3")

// Check IRIDA Next JSON output
assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/mismatched_iridanext.output.json").json
Expand Down Expand Up @@ -271,8 +271,8 @@ nextflow_pipeline {
assert lines.contains('sample3,"[\'extra_key\', \'sample3\']","MLST JSON file (sample3_multiplekeys.mlst.json) contains multiple keys: [\'extra_key\', \'sample3\']. The MLST JSON file has been modified to retain only the \'sample3\' entry"')

// Check filtered query csv results
lines = path("$launchDir/results/filter/new_addresses.csv").readLines()
assert lines.contains("sampleQ,1.1.3")
lines = path("$launchDir/results/filter/new_addresses.tsv").readLines()
assert lines.contains("sampleQ\tsampleQ\t1.1.3")

// Check IRIDA Next JSON output
assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/multiplekeys_iridanext.output.json").json
Expand Down Expand Up @@ -320,8 +320,8 @@ nextflow_pipeline {
assert lines.contains('sample3,"[\'extra_key\', \'sample4\']",No key in the MLST JSON file (sample3_multiplekeys_nomatch.mlst.json) matches the specified sample ID \'sample3\'. The first key \'extra_key\' has been forcefully changed to \'sample3\' and all other keys have been removed.')

// Check filtered query csv results
lines = path("$launchDir/results/filter/new_addresses.csv").readLines()
assert lines.contains("sampleQ,1.1.3")
lines = path("$launchDir/results/filter/new_addresses.tsv").readLines()
assert lines.contains("sampleQ\tsampleQ\t1.1.3")

// Check IRIDA Next JSON output
assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/multiplekeys_iridanext.output.json").json
Expand Down Expand Up @@ -385,8 +385,9 @@ nextflow_pipeline {
assert lines.contains("sample_2_sample2,[\'sample2\'],Reference sample_2_sample2 ID and JSON key in sample2.mlst.json DO NOT MATCH. The 'sample2' key in sample2.mlst.json has been forcefully changed to 'sample_2_sample2': User should manually check input files to ensure correctness.")

// Check filter_query csv file
lines = path("$launchDir/results/filter/new_addresses.csv").readLines()
assert lines.contains("sample_1,1.1.3")
lines = path("$launchDir/results/filter/new_addresses.tsv").readLines()
assert lines.contains("sampleQ\tsample_1\t2.2.3")
assert lines.contains("sampleR\tsample4\t2.2.3")

// Check IRIDA Next JSON output
assert path("$launchDir/results/iridanext.output.json").json == path("$baseDir/tests/data/irida/sample_name_add_iridanext.output.json").json
Expand Down
14 changes: 11 additions & 3 deletions workflows/gas_nomenclature.nf
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ workflow GAS_NOMENCLATURE {
reference_values = input_assure.result.collect{ meta, mlst -> mlst}
query_values = profiles.query.collect{ meta, mlst -> mlst }

// Query Map: Use to return meta.irida_id to output for mapping to IRIDA-Next JSON
query_map = profiles.query.map{ meta, mlst->
tuple(meta.id, meta.irida_id)
}.collect()

// LOCIDEX modules
ref_tag = Channel.value("ref")
query_tag = Channel.value("value")
Expand Down Expand Up @@ -166,16 +171,19 @@ workflow GAS_NOMENCLATURE {
called_data = GAS_CALL(expected_clusters.text, distances.results)
ch_versions = ch_versions.mix(called_data.versions)

// Filter the new queried samples and addresses into a CSV/JSON file for the IRIDANext plug in
query_ids = profiles.query.collectFile { it[0].id + '\n' }
// Filter the new queried samples and addresses into a CSV/JSON file for the IRIDANext plug in and
// add a column with IRIDA ID to allow for IRIDANext plugin to include metadata
query_irida_ids = profiles.query.collectFile { it[0].irida_id + '\t' + it[0].id + '\n'}

new_addresses = FILTER_QUERY(query_ids, called_data.distances, "tsv", "csv")
new_addresses = FILTER_QUERY(query_irida_ids, called_data.distances, "tsv", "tsv")
ch_versions = ch_versions.mix(new_addresses.versions)

CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)



}

/*
Expand Down

0 comments on commit a246014

Please sign in to comment.