From c0f003779d1f54aefbfb7729841c12bcfc930964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Mon, 14 Dec 2020 13:39:03 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20vernacular=20names?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Project.toml | 2 +- docs/src/namefinding.md | 8 ++++++++ src/NCBITaxonomy.jl | 3 +++ src/vernacular.jl | 15 +++++++++++++++ test/taxid.jl | 8 ++++++++ 5 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 src/vernacular.jl diff --git a/Project.toml b/Project.toml index aeaaf8e..4b54251 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NCBITaxonomy" uuid = "f88b31d2-eb98-4433-b52d-2dd32bc6efce" authors = ["Timothée Poisot "] -version = "0.0.5" +version = "0.0.6" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" diff --git a/docs/src/namefinding.md b/docs/src/namefinding.md index bc31e8b..165c553 100644 --- a/docs/src/namefinding.md +++ b/docs/src/namefinding.md @@ -4,6 +4,7 @@ ```@docs taxid +vernacular ``` The `taxid` function will return a `NCBITaxon` object, which has two fields: @@ -28,6 +29,13 @@ the *scientific name* will be returned, no matter what you search taxid("cow") ``` +This may be a good point to note that we can use the `vernacular` function to +get a list of NCBI-known vernacular names: + +```@example taxid +taxid("cow") |> vernacular +``` + You can pass an additional `fuzzy=true` keyword argument to the `taxid` function to perform fuzzy name matching using the Levenshtein distance: diff --git a/src/NCBITaxonomy.jl b/src/NCBITaxonomy.jl index 7e47672..6544374 100644 --- a/src/NCBITaxonomy.jl +++ b/src/NCBITaxonomy.jl @@ -43,4 +43,7 @@ export children, descendants include("lineage.jl") export lineage, parent, rank +include("vernacular.jl") +export vernacular + end diff --git a/src/vernacular.jl b/src/vernacular.jl new file mode 100644 index 0000000..94586a3 --- /dev/null +++ b/src/vernacular.jl @@ -0,0 +1,15 @@ +""" + vernacular(t::NCBITaxon) + +This function will return `nothing` if no vernacular name is known, and an array +of names if found. It searches the "common name" and "genbank common name" +category of the NCBI taxonomy name table. +""" +function vernacular(t::NCBITaxon) + names_from_tax = filter(r -> r.tax_id == t.id, NCBITaxonomy.names_table) + common_names = filter(r -> r.class == NCBITaxonomy.class_common_name, names_from_tax) + genbank_names = filter(r -> r.class == NCBITaxonomy.class_genbank_common_name, names_from_tax) + all_names = vcat(common_names.name, genbank_names.name) + length(all_names) == 0 && return nothing + return unique(all_names) +end \ No newline at end of file diff --git a/test/taxid.jl b/test/taxid.jl index bca1749..efc14c3 100644 --- a/test/taxid.jl +++ b/test/taxid.jl @@ -38,4 +38,12 @@ module TestTaxid chicken = taxid("tchiken"; fuzzy=true, verbose=true) @test typeof(chicken) == NCBITaxon + #Vernacular name + chub = vernacular(ncbi"Leuciscus cephalus") + @test "European chub" in chub + @test "chub" in chub + + # Vernacular missing + @test isnothing(vernacular(ncbi"Lamellodiscus elegans")) + end