Skip to content

Commit

Permalink
[SYSTEMDS-3742] Python API Builtin unique
Browse files Browse the repository at this point in the history
  • Loading branch information
e-strauss authored and Baunsgaard committed Sep 3, 2024
1 parent e40bcec commit 2d3c1e6
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/main/python/systemds/operator/nodes/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,23 @@ def trace(self) -> 'Scalar':
"""
return Scalar(self.sds_context, 'trace', [self])

def unique(self, axis: int = None) -> 'Matrix':
"""Returns the unique values for the complete matrix, for each row or for each column.
:param axis: can be 0 or 1 to do either row or column uniques
:return: `Matrix` representing operation
"""
if axis == 0:
named_input_nodes = {"dir": '"c"'}
return Matrix(self.sds_context, 'unique', [self], named_input_nodes=named_input_nodes)
elif axis == 1:
named_input_nodes = {"dir": '"r"'}
return Matrix(self.sds_context, 'unique', [self], named_input_nodes=named_input_nodes)
elif axis is None:
return Matrix(self.sds_context, 'unique', [self])
raise ValueError(
f"Axis has to be either 0, 1 or None, for column, row or complete {self.operation}")

def abs(self) -> 'Matrix':
"""Calculate absolute.
Expand Down
118 changes: 118 additions & 0 deletions src/main/python/tests/matrix/test_unique.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

import unittest
import numpy as np
from systemds.context import SystemDSContext


np.random.seed(7)


# np's unique applied on an axis checks for unique vectors along that axis -> on the other hand systemds' unique
# returns the unique values along that axis for each vector on that axis
def compute_expected(m, num_cols, axis):
def padded(row):
unique = np.unique(row)
row = np.pad(unique, (num_cols - len(unique), 0), "constant", constant_values=0)
return row

if axis == 1:
return np.array([padded(r) for r in m])
else:
return np.array([padded(r) for r in m.T]).T


class TestUNIQUE(unittest.TestCase):
def setUp(self):
self.sds = SystemDSContext()

def tearDown(self):
self.sds.close()

def test_unique_basic(self):
input_matrix = np.array(
[[1, -2, 3, 4], [0, -6, 7, 8], [0, -10, 11, -12], [0, -14, 15, -16]]
)

sds_input = self.sds.from_numpy(input_matrix)
sds_result = sds_input.unique().compute()
sds_result = np.sort(np.reshape(sds_result, (-1)))
np_result = np.unique(input_matrix)
assert np.allclose(sds_result, np_result, 1e-9)

def test_unique_basic2(self):
input_matrix = np.array(
[[1, 1, 1, 1], [2, 2, 2, 2], [0, 10, 11, 12], [0, 14, 15, 16]]
)

sds_input = self.sds.from_numpy(input_matrix)
sds_result = sds_input.unique(1).compute()
sds_result = np.sort(sds_result, 1)
num_cols = sds_result.shape[1]
np_result = compute_expected(input_matrix, num_cols, 1)
assert np.allclose(sds_result, np_result, 1e-9)

def test_unique_basic3(self):
input_matrix = np.array(
[[0, 1, 1, 1], [0, 1, 1, 1], [0, 10, 11, 12], [0, 14, 15, 16]]
)

sds_input = self.sds.from_numpy(input_matrix)
sds_result = sds_input.unique(0).compute()
sds_result = np.sort(sds_result, 0)
num_rows = sds_result.shape[0]
np_result = compute_expected(input_matrix, num_rows, 0)
assert np.allclose(sds_result, np_result, 1e-9)

def test_unique_random1(self):
input_matrix = np.random.random((10, 10)) * 200
sds_input = self.sds.from_numpy(input_matrix)
sds_result = sds_input.unique().compute()
sds_result = np.sort(np.reshape(sds_result, (-1)))
np_result = np.unique(input_matrix)
assert np.allclose(sds_result, np_result, 1e-9)

def test_unique_random2(self):
input_matrix = np.random.random((10, 10)) * 200
sds_input = self.sds.from_numpy(input_matrix)
sds_result = sds_input.unique(1).compute()
sds_result = np.sort(sds_result, 1)
num_cols = sds_result.shape[1]
np_result = compute_expected(input_matrix, num_cols, 1)
assert np.allclose(sds_result, np_result, 1e-9)

def test_unique_random3(self):
input_matrix = np.random.random((10, 10)) * 200
sds_input = self.sds.from_numpy(input_matrix)
sds_result = sds_input.unique(0).compute()
sds_result = np.sort(sds_result, 0)
num_rows = sds_result.shape[0]
np_result = compute_expected(input_matrix, num_rows, 0)
assert np.allclose(sds_result, np_result, 1e-9)

def test_unique_error(self):
with self.assertRaises(ValueError):
self.sds.from_numpy(np.array([[1, 2]])).unique(2)


if __name__ == "__main__":
unittest.main()

0 comments on commit 2d3c1e6

Please sign in to comment.