Skip to content

Commit

Permalink
Add genotype_values() method
Browse files Browse the repository at this point in the history
  • Loading branch information
hyanwong committed Oct 30, 2022
1 parent 3d6ea1b commit 2c21b63
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 0 deletions.
11 changes: 11 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
--------------------
[0.5.4] - 2022-XX-XX
--------------------

**Features**

- Variants have a `genotype_values()` method that returns the genotypes as an
(inefficient) array of strings or objects, rather than integer indexes, to
aid comparison of genetic variation (:user:`hyanwong`, :pr:`2617`)


--------------------
[0.5.3] - 2022-10-03
--------------------
Expand Down
27 changes: 27 additions & 0 deletions python/tests/test_genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,33 @@ def test_snipped_tree_sequence_mutations_over_isolated(self):
assert non_missing_found
assert missing_found

def test_genotype_values(self):
tables = tskit.TableCollection(1.0)
tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
s = tables.sites.add_row(0, "C")
tables.mutations.add_row(site=s, derived_state="G", node=0)
tables.mutations.add_row(site=s, derived_state="T", node=1)
s = tables.sites.add_row(0.5, "")
tables.mutations.add_row(site=s, derived_state="A long string", node=0)
ts = tables.tree_sequence()

v = ts.variants(isolated_as_missing=False)
vals = next(v).genotype_values()
assert vals.dtype.type == np.str_
assert np.array_equal(vals, np.array(["G", "T"]))
vals = next(v).genotype_values()
assert vals.dtype.type == np.str_
assert np.array_equal(vals, np.array(["A long string", ""]))

v = ts.variants(isolated_as_missing=True)
vals = next(v).genotype_values()
assert vals.dtype.type == np.str_
assert np.array_equal(vals, np.array(["G", "T"]))
vals = next(v).genotype_values()
assert vals.dtype.type == np.object_
assert np.array_equal(vals, np.array(["A long string", None]))


class TestLimitInterval:
def test_simple_case(self, ts_fixture):
Expand Down
13 changes: 13 additions & 0 deletions python/tskit/genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,19 @@ def copy(self) -> Variant:
variant_copy._ll_variant = self._ll_variant.restricted_copy()
return variant_copy

def genotype_values(self) -> np.ndarray:
"""
Returns the genotypes at this site as an numpy array of strings (if
there is no missing data) or objects (if the genotypes contain missing data,
in which case some elements will be equal to ``None``),
rather than an array of integer indexes. Note that this is inefficient
compared to working with the underlying integer representation as
returned by the :attr:`~Variant.genotypes` property.
:return: An array of length ``num_sites`` containing strings or objects.
"""
return np.array(self.alleles)[self.genotypes]

def counts(self) -> typing.Counter[str | None]:
"""
Returns a :class:`python:collections.Counter` object providing counts for each
Expand Down

0 comments on commit 2c21b63

Please sign in to comment.