From 504e75145eaa55a03ef457f1016e308bc453fc44 Mon Sep 17 00:00:00 2001 From: e-strauss Date: Thu, 26 Sep 2024 14:07:07 +0200 Subject: [PATCH] [FORMATTING] apply formatting to the Python API files Closes #2115 --- .../systemds/context/systemds_context.py | 320 +++++++++++------- .../systemds/examples/tutorials/adult.py | 48 +-- .../systemds/examples/tutorials/mnist.py | 27 +- src/main/python/systemds/operator/__init__.py | 14 +- .../python/systemds/operator/nn/affine.py | 28 +- .../python/systemds/operator/nodes/combine.py | 18 +- .../python/systemds/operator/nodes/frame.py | 124 ++++--- .../python/systemds/operator/nodes/list.py | 37 +- .../systemds/operator/nodes/list_access.py | 22 +- .../python/systemds/operator/nodes/matrix.py | 34 +- .../systemds/operator/nodes/multi_return.py | 52 +-- .../python/systemds/operator/nodes/scalar.py | 242 ++++++------- .../python/systemds/operator/nodes/source.py | 92 ++--- .../systemds/operator/operation_node.py | 98 +++--- src/main/python/systemds/project_info.py | 10 +- .../python/systemds/script_building/dag.py | 20 +- .../python/systemds/script_building/script.py | 59 ++-- src/main/python/systemds/scuro/__init__.py | 54 +-- .../systemds/scuro/aligner/alignment.py | 12 +- .../scuro/aligner/alignment_strategy.py | 10 +- .../systemds/scuro/aligner/dr_search.py | 37 +- .../python/systemds/scuro/aligner/task.py | 9 +- src/main/python/systemds/scuro/main.py | 8 +- .../scuro/modality/aligned_modality.py | 6 +- .../systemds/scuro/modality/audio_modality.py | 22 +- .../systemds/scuro/modality/modality.py | 14 +- .../systemds/scuro/modality/text_modality.py | 22 +- .../systemds/scuro/modality/video_modality.py | 22 +- .../systemds/scuro/models/discrete_model.py | 8 +- .../python/systemds/scuro/models/model.py | 12 +- .../systemds/scuro/representations/average.py | 6 +- .../systemds/scuro/representations/bert.py | 41 +-- .../scuro/representations/concatenation.py | 14 +- .../systemds/scuro/representations/fusion.py | 6 +- .../systemds/scuro/representations/lstm.py | 2 +- .../systemds/scuro/representations/max.py | 33 +- .../scuro/representations/mel_spectrogram.py | 16 +- .../scuro/representations/multiplication.py | 17 +- .../scuro/representations/representation.py | 1 + .../systemds/scuro/representations/resnet.py | 103 +++--- .../systemds/scuro/representations/rowmax.py | 14 +- .../systemds/scuro/representations/sum.py | 14 +- .../scuro/representations/unimodal.py | 6 +- .../systemds/scuro/representations/utils.py | 34 +- src/main/python/systemds/utils/__init__.py | 4 +- src/main/python/systemds/utils/consts.py | 8 +- src/main/python/systemds/utils/helpers.py | 28 +- src/main/python/tests/algorithms/test_gmm.py | 8 +- .../algorithms/test_gmm_train_predict.py | 14 +- .../python/tests/algorithms/test_kmeans.py | 9 +- .../python/tests/algorithms/test_l2svm.py | 30 +- src/main/python/tests/algorithms/test_lm.py | 4 +- .../tests/algorithms/test_multiLogReg.py | 32 +- src/main/python/tests/algorithms/test_pca.py | 15 +- .../python/tests/algorithms/test_signal.py | 12 +- src/main/python/tests/basics/test___str__.py | 21 +- .../python/tests/basics/test_context_stats.py | 9 +- .../docs_test/test_end_to_end_tutorial.py | 2 - .../tests/examples/tutorials/test_adult.py | 58 +++- .../examples/tutorials/test_adult_neural.py | 15 +- .../tests/examples/tutorials/test_mnist.py | 32 +- .../federated/test_federated_adult_neural.py | 84 +++-- .../federated/test_federated_aggregations.py | 57 ++-- .../test_federated_aggregations_noHeader.py | 61 ++-- .../tests/federated/test_federated_basic.py | 38 ++- .../federated/test_federated_matrix_mult.py | 122 +++---- .../tests/federated/test_federated_mnist.py | 59 +++- .../tests/federated/test_federated_read.py | 47 ++- src/main/python/tests/frame/test_hyperband.py | 5 +- src/main/python/tests/frame/test_rIndexing.py | 2 +- src/main/python/tests/frame/test_r_c_bind.py | 30 +- src/main/python/tests/frame/test_replace.py | 10 +- .../tests/frame/test_transform_encode.py | 4 +- .../python/tests/frame/test_write_read.py | 3 +- src/main/python/tests/iotests/test_io_csv.py | 9 +- .../python/tests/lineage/test_lineagetrace.py | 26 +- src/main/python/tests/list/test_list.py | 9 +- .../python/tests/list/test_list_readwrite.py | 8 +- .../tests/manual_tests/multi_log_reg_mnist.py | 4 +- .../manual_tests/save_log_reg_mnist_sysds.py | 4 +- .../python/tests/matrix/test_binary_op.py | 115 ++++--- .../tests/matrix/test_block_converter.py | 6 +- src/main/python/tests/matrix/test_diag.py | 13 +- src/main/python/tests/matrix/test_eigen.py | 12 +- src/main/python/tests/matrix/test_fft.py | 163 +++++---- src/main/python/tests/matrix/test_order.py | 25 +- src/main/python/tests/matrix/test_print.py | 5 +- .../python/tests/matrix/test_rIndexing.py | 20 +- src/main/python/tests/matrix/test_rand.py | 49 ++- src/main/python/tests/matrix/test_replace.py | 8 +- src/main/python/tests/matrix/test_reverse.py | 5 +- src/main/python/tests/matrix/test_roll.py | 4 +- src/main/python/tests/matrix/test_slice.py | 1 + src/main/python/tests/matrix/test_split.py | 37 +- src/main/python/tests/matrix/test_sqrt.py | 18 +- src/main/python/tests/matrix/test_svd.py | 12 +- .../python/tests/matrix/test_to_one_hot.py | 37 +- .../python/tests/matrix/test_transpose.py | 9 +- .../python/tests/matrix/test_trigonometric.py | 43 ++- src/main/python/tests/matrix/test_write.py | 9 +- src/main/python/tests/nn/test_affine.py | 67 ++-- .../python/tests/nn/test_neural_network.py | 42 ++- src/main/python/tests/nn/test_relu.py | 15 +- src/main/python/tests/nn/test_sequential.py | 18 +- .../python/tests/script/test_dml_script.py | 13 +- .../python/tests/source/test_source_01.py | 29 +- .../python/tests/source/test_source_02.py | 16 +- .../python/tests/source/test_source_list.py | 5 +- .../source/test_source_multi_arguments.py | 14 +- .../tests/source/test_source_neural_net.py | 9 +- .../tests/source/test_source_no_return.py | 20 +- .../python/tests/source/test_source_reuse.py | 13 +- .../source/test_source_with_default_values.py | 23 +- 113 files changed, 2072 insertions(+), 1474 deletions(-) diff --git a/src/main/python/systemds/context/systemds_context.py b/src/main/python/systemds/context/systemds_context.py index 6d42ad60b73..9385cb991b9 100644 --- a/src/main/python/systemds/context/systemds_context.py +++ b/src/main/python/systemds/context/systemds_context.py @@ -37,8 +37,16 @@ import numpy as np import pandas as pd from py4j.java_gateway import GatewayParameters, JavaGateway, Py4JNetworkError -from systemds.operator import (Frame, List, Matrix, OperationNode, Scalar, - Source, Combine, MultiReturn) +from systemds.operator import ( + Frame, + List, + Matrix, + OperationNode, + Scalar, + Source, + Combine, + MultiReturn, +) from systemds.script_building import DMLScript from systemds.utils.consts import VALID_INPUT_TYPES from systemds.utils.helpers import get_module_dir, valuetype_from_str @@ -59,11 +67,14 @@ class SystemDSContext(object): __stdout: Queue = None __stderr: Queue = None - def __init__(self, port: int = -1, - capture_statistics: bool = False, - capture_stdout: bool = False, - logging_level: int = 20, - py4j_logging_level: int = 50): + def __init__( + self, + port: int = -1, + capture_statistics: bool = False, + capture_stdout: bool = False, + logging_level: int = 20, + py4j_logging_level: int = 50, + ): """Starts a new instance of SystemDSContext, in which the connection to a JVM systemds instance is handled Any new instance of this SystemDS Context, would start a separate new JVM. @@ -79,7 +90,7 @@ def __init__(self, port: int = -1, it can be verbose if not set high. """ self.__setup_logging(logging_level, py4j_logging_level) - self.__start(port, capture_stdout) + self.__start(port, capture_stdout) self.capture_stats(capture_statistics) self._log.debug("Started JVM and SystemDS python context manager") @@ -90,7 +101,7 @@ def get_stdout(self, lines: int = -1): default -1 prints all current lines in the queue. """ if self.__stdout: - if (lines == -1 or self.__stdout.qsize() < lines): + if lines == -1 or self.__stdout.qsize() < lines: return [self.__stdout.get() for x in range(self.__stdout.qsize())] else: return [self.__stdout.get() for x in range(lines)] @@ -132,18 +143,24 @@ def exception_and_close(self, exception, trace_back_limit: int = None): raise RuntimeError(message) def __try_startup(self, command: str, capture_stdout: bool) -> Popen: - if(capture_stdout): + if capture_stdout: process = Popen(command, stdout=PIPE, stdin=PIPE, stderr=PIPE) # Handle Std out from the subprocess. self.__stdout = Queue() self.__stderr = Queue() - self.__stdout_thread = Thread(target=self.__enqueue_output, args=( - process.stdout, self.__stdout), daemon=True) + self.__stdout_thread = Thread( + target=self.__enqueue_output, + args=(process.stdout, self.__stdout), + daemon=True, + ) - self.__stderr_thread = Thread(target=self.__enqueue_output, args=( - process.stderr, self.__stderr), daemon=True) + self.__stderr_thread = Thread( + target=self.__enqueue_output, + args=(process.stderr, self.__stderr), + daemon=True, + ) self.__stdout_thread.start() self.__stderr_thread.start() @@ -167,30 +184,41 @@ def __build_startup_command(self, port: int): root = os.path.join(get_module_dir()) # Find the SystemDS jar file. - if root != None: # root path was set + if root != None: # root path was set self._log.debug("SYSTEMDS_ROOT was set, searching for jar file") lib_release = os.path.join(root, "lib") systemds_cp = os.path.join(root, "target", "SystemDS.jar") - if os.path.exists(lib_release): # It looks like it was a release path for root. + if os.path.exists( + lib_release + ): # It looks like it was a release path for root. classpath = os.path.join(root, "SystemDS.jar") if not os.path.exists(classpath): for f in os.listdir(root): if "systemds" in f: if os.path.exists(classpath): - raise(ValueError("Invalid setup there were multiple conflicting systemds jar fines in" + root)) + raise ( + ValueError( + "Invalid setup there were multiple conflicting systemds jar fines in" + + root + ) + ) else: classpath = os.path.join(root, f) if not os.path.exists(classpath): raise ValueError( - "Invalid setup did not find SystemDS jar file in " + root) + "Invalid setup did not find SystemDS jar file in " + root + ) elif os.path.exists(systemds_cp): classpath = cp_separator.join([systemds_cp]) else: raise ValueError( - "Invalid setup at SYSTEMDS_ROOT env variable path " + root) - else: # root path was not set use the pip installed SystemDS - self._log.warning("SYSTEMDS_ROOT was unset, defaulting to python packaged jar files") - systemds_cp = os.path.join(root,"SystemDS.jar") + "Invalid setup at SYSTEMDS_ROOT env variable path " + root + ) + else: # root path was not set use the pip installed SystemDS + self._log.warning( + "SYSTEMDS_ROOT was unset, defaulting to python packaged jar files" + ) + systemds_cp = os.path.join(root, "SystemDS.jar") classpath = cp_separator.join([systemds_cp]) command.append(classpath) @@ -199,20 +227,25 @@ def __build_startup_command(self, port: int): if os.environ.get("LOG4JPROP") == None: files = glob(os.path.join(root, "conf", "log4j*.properties")) if len(files) > 1: - self._log.warning( - "Multiple logging files found selecting: " + files[0]) + self._log.warning("Multiple logging files found selecting: " + files[0]) if len(files) == 0: - self._log.warning("No log4j file found at: " - + os.path.join(root, "conf") - + " therefore using default settings") + self._log.warning( + "No log4j file found at: " + + os.path.join(root, "conf") + + " therefore using default settings" + ) else: command.append("-Dlog4j.configuration=file:" + files[0]) else: logging_file = os.environ.get("LOG4JPROP") if os.path.exists(logging_file): - command.append("-Dlog4j.configuration=file:" +os.environ.get("LOG4JPROP")) + command.append( + "-Dlog4j.configuration=file:" + os.environ.get("LOG4JPROP") + ) else: - self._log.warning("LOG4JPROP is set but path is invalid: " + str(logging_file)) + self._log.warning( + "LOG4JPROP is set but path is invalid: " + str(logging_file) + ) # Specify the main function inside SystemDS to launch in java. command.append("org.apache.sysds.api.PythonDMLScript") @@ -221,12 +254,13 @@ def __build_startup_command(self, port: int): # TODO: refine the choise of configuration file files = glob(os.path.join(root, "conf", "SystemDS*.xml")) if len(files) > 1: - self._log.warning( - "Multiple config files found selecting: " + files[0]) + self._log.warning("Multiple config files found selecting: " + files[0]) if len(files) == 0: - self._log.warning("No xml config file found at: " - + os.path.join(root, "conf") - + " therefore using default settings") + self._log.warning( + "No xml config file found at: " + + os.path.join(root, "conf") + + " therefore using default settings" + ) else: command.append("-config") command.append(files[0]) @@ -239,7 +273,7 @@ def __build_startup_command(self, port: int): command.append("--python") command.append(str(actual_port)) - self._log.info("Command " + str(command)) + self._log.info("Command " + str(command)) self._log.info("Port used for communication: " + str(actual_port)) return command, actual_port @@ -256,8 +290,7 @@ def __start(self, port: int, capture_stdout: bool, retry: int = 0): :param retry: The Retry number of the current startup. """ if retry > 3: - raise Exception( - "Failed startup of SystemDS Context with 3 repeats") + raise Exception("Failed startup of SystemDS Context with 3 repeats") if port != -1 and self.__is_port_in_use(port): port = -1 @@ -265,7 +298,10 @@ def __start(self, port: int, capture_stdout: bool, retry: int = 0): # Verify the port intended is available. while self.__is_port_in_use(actual_port): - command, actual_port, = self.__build_startup_command(actual_port) + ( + command, + actual_port, + ) = self.__build_startup_command(actual_port) process = self.__try_startup(command, capture_stdout) @@ -280,19 +316,25 @@ def __start(self, port: int, capture_stdout: bool, retry: int = 0): sleep(sleep_time) try: self.java_gateway = JavaGateway( - gateway_parameters=gwp, java_process=process) + gateway_parameters=gwp, java_process=process + ) # Successful startup. return except Py4JNetworkError as pe: m = str(pe) - if "An error occurred while trying to connect to the Java server" in m: + if ( + "An error occurred while trying to connect to the Java server" + in m + ): # Here the startup failed because the java process is not ready. connect_retry += 1 else: # unknown new error or java process crashed raise pe except Exception as e: raise Exception( - "Exception hit when connecting to JavaGateway, perhaps the JVM terminated because of port", e) + "Exception hit when connecting to JavaGateway, perhaps the JVM terminated because of port", + e, + ) raise Exception("Failed to connect to process, making a new JVM") except Exception: self.__kill_Popen(process) @@ -308,15 +350,15 @@ def __exit__(self, exc_type, exc_val, exc_tb): def close(self): """Close the connection to the java process and do necessary cleanup.""" - if hasattr(self, 'java_gateway'): + if hasattr(self, "java_gateway"): self.__kill_Popen(self.java_gateway.java_process) self.java_gateway.shutdown() - if hasattr(self, '__process'): + if hasattr(self, "__process"): logging.error("Has process variable") self.__kill_Popen(self.__process) - if hasattr(self, '__stdout_thread') and self.__stdout_thread.is_alive(): + if hasattr(self, "__stdout_thread") and self.__stdout_thread.is_alive(): self.__stdout_thread.join(0) - if hasattr(self, '__stderr_thread') and self.__stderr_thread.is_alive(): + if hasattr(self, "__stderr_thread") and self.__stderr_thread.is_alive(): self.__stderr_thread.join(0) def __kill_Popen(self, process: Popen): @@ -347,7 +389,7 @@ def __is_port_in_use(self, port: int) -> bool: :param port: The port to analyze""" # https://stackoverflow.com/questions/2470971/fast-way-to-test-if-a-port-is-in-use-using-python with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - return s.connect_ex(('localhost', port)) == 0 + return s.connect_ex(("localhost", port)) == 0 def _execution_completed(self, script: DMLScript): """ @@ -373,7 +415,7 @@ def capture_stats_context(self): Afterwards capturing will be reset to the state it was before. Example: - + # ```Python # with sds.capture_stats_context(): # a = some_computation.compute() @@ -409,11 +451,10 @@ def take_stats(self): return stats def clear_stats(self): - """Clears the captured statistics. - """ + """Clears the captured statistics.""" self._statistics = "" - def full(self, shape: Tuple[int, int], value: Union[float, int]) -> 'Matrix': + def full(self, shape: Tuple[int, int], value: Union[float, int]) -> "Matrix": """Generates a matrix completely filled with a value :param sds_context: SystemDS context @@ -422,11 +463,15 @@ def full(self, shape: Tuple[int, int], value: Union[float, int]) -> 'Matrix': :return: the OperationNode representing this operation """ unnamed_input_nodes = [value] - named_input_nodes = {'rows': shape[0], 'cols': shape[1]} - return Matrix(self, 'matrix', unnamed_input_nodes, named_input_nodes) - - def seq(self, start: Union[float, int], stop: Union[float, int] = None, - step: Union[float, int] = 1) -> 'Matrix': + named_input_nodes = {"rows": shape[0], "cols": shape[1]} + return Matrix(self, "matrix", unnamed_input_nodes, named_input_nodes) + + def seq( + self, + start: Union[float, int], + stop: Union[float, int] = None, + step: Union[float, int] = 1, + ) -> "Matrix": """Create a single column vector with values from `start` to `stop` and an increment of `step`. If no stop is defined and only one parameter is given, then start will be 0 and the parameter will be interpreted as stop. @@ -441,12 +486,19 @@ def seq(self, start: Union[float, int], stop: Union[float, int] = None, stop = start start = 0 unnamed_input_nodes = [start, stop, step] - return Matrix(self, 'seq', unnamed_input_nodes) - - def rand(self, rows: int, cols: int, - min: Union[float, int] = None, max: Union[float, int] = None, pdf: str = "uniform", - sparsity: Union[float, int] = None, seed: Union[float, int] = None, - lamb: Union[float, int] = 1) -> 'Matrix': + return Matrix(self, "seq", unnamed_input_nodes) + + def rand( + self, + rows: int, + cols: int, + min: Union[float, int] = None, + max: Union[float, int] = None, + pdf: str = "uniform", + sparsity: Union[float, int] = None, + seed: Union[float, int] = None, + lamb: Union[float, int] = 1, + ) -> "Matrix": """Generates a matrix filled with random values :param sds_context: SystemDS context @@ -462,35 +514,43 @@ def rand(self, rows: int, cols: int, """ available_pdf = ["uniform", "normal", "poisson"] if rows < 0: - raise ValueError("In rand statement, can only assign rows a long (integer) value >= 0 " - "-- attempted to assign value: {r}".format(r=rows)) + raise ValueError( + "In rand statement, can only assign rows a long (integer) value >= 0 " + "-- attempted to assign value: {r}".format(r=rows) + ) if cols < 0: - raise ValueError("In rand statement, can only assign cols a long (integer) value >= 0 " - "-- attempted to assign value: {c}".format(c=cols)) + raise ValueError( + "In rand statement, can only assign cols a long (integer) value >= 0 " + "-- attempted to assign value: {c}".format(c=cols) + ) if pdf not in available_pdf: - raise ValueError("The pdf passed is invalid! given: {g}, expected: {e}".format( - g=pdf, e=available_pdf)) - - pdf = '\"' + pdf + '\"' - named_input_nodes = { - 'rows': rows, 'cols': cols, 'pdf': pdf, 'lambda': lamb} + raise ValueError( + "The pdf passed is invalid! given: {g}, expected: {e}".format( + g=pdf, e=available_pdf + ) + ) + + pdf = '"' + pdf + '"' + named_input_nodes = {"rows": rows, "cols": cols, "pdf": pdf, "lambda": lamb} if min is not None: - named_input_nodes['min'] = min + named_input_nodes["min"] = min if max is not None: - named_input_nodes['max'] = max + named_input_nodes["max"] = max if sparsity is not None: - named_input_nodes['sparsity'] = sparsity + named_input_nodes["sparsity"] = sparsity if seed is not None: - named_input_nodes['seed'] = seed + named_input_nodes["seed"] = seed - return Matrix(self, 'rand', [], named_input_nodes=named_input_nodes) + return Matrix(self, "rand", [], named_input_nodes=named_input_nodes) def __fix_string_args(self, arg: str) -> str: nf = str(arg).replace('"', "").replace("'", "") return f'"{nf}"' - def read(self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: - """ Read an file from disk. Supported types include: + def read( + self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES] + ) -> OperationNode: + """Read an file from disk. Supported types include: CSV, Matrix Market(coordinate), Text(i,j,v), SystemDS Binary, etc. See: http://apache.github.io/systemds/site/dml-language-reference#readwrite-built-in-functions for more details :return: an Operation Node, containing the read data the operationNode read can be of types, Matrix, Frame or Scalar. @@ -509,8 +569,15 @@ def read(self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Ope if ".csv" in path[-4:]: kwargs["format"] = '"csv"' self._log.warning( - "Guessing '"+path+"' is a csv file, please add a mtd file, or specify in arguments") - if not ("header" in kwargs) and "data_type" in kwargs and kwargs["data_type"] == "frame": + "Guessing '" + + path + + "' is a csv file, please add a mtd file, or specify in arguments" + ) + if ( + not ("header" in kwargs) + and "data_type" in kwargs + and kwargs["data_type"] == "frame" + ): kwargs["header"] = True data_type = kwargs.get("data_type", None) @@ -529,18 +596,20 @@ def read(self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Ope return Scalar(self, "read", [f'"{path}"'], named_input_nodes=kwargs) else: raise ValueError( - "Invalid arguments for reading scalar, value_type must be specified") + "Invalid arguments for reading scalar, value_type must be specified" + ) elif data_type == "list": # Reading a list have no extra arguments. return List(self, "read", [f'"{path}"']) else: kwargs["data_type"] = '"matrix"' self._log.warning( - "Unknown type read please add a mtd file, or specify in arguments, defaulting to matrix") + "Unknown type read please add a mtd file, or specify in arguments, defaulting to matrix" + ) return Matrix(self, "read", [f'"{path}"'], named_input_nodes=kwargs) def scalar(self, v: Dict[str, VALID_INPUT_TYPES]) -> Scalar: - """ Construct an scalar value, this can contain str, float, double, integers and booleans. + """Construct an scalar value, this can contain str, float, double, integers and booleans. :return: A scalar containing the given value. """ if type(v) is str: @@ -551,9 +620,12 @@ def scalar(self, v: Dict[str, VALID_INPUT_TYPES]) -> Scalar: # therefore the output type is assign. return Scalar(self, v, assign=True) - def from_numpy(self, mat: np.array, - *args: Sequence[VALID_INPUT_TYPES], - **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: + def from_numpy( + self, + mat: np.array, + *args: Sequence[VALID_INPUT_TYPES], + **kwargs: Dict[str, VALID_INPUT_TYPES], + ) -> Matrix: """Generate DAGNode representing matrix with data given by a numpy array, which will be sent to SystemDS on need. @@ -563,22 +635,26 @@ def from_numpy(self, mat: np.array, :return: A Matrix """ - unnamed_params = ['\'./tmp/{file_name}\''] + unnamed_params = ["'./tmp/{file_name}'"] if len(mat.shape) == 2: - named_params = {'rows': mat.shape[0], 'cols': mat.shape[1]} + named_params = {"rows": mat.shape[0], "cols": mat.shape[1]} elif len(mat.shape) == 1: - named_params = {'rows': mat.shape[0], 'cols': 1} + named_params = {"rows": mat.shape[0], "cols": 1} else: # TODO Support tensors. raise ValueError("Only two dimensional arrays supported") unnamed_params.extend(args) named_params.update(kwargs) - return Matrix(self, 'read', unnamed_params, named_params, local_data=mat) - - def from_pandas(self, df: pd.DataFrame, - *args: Sequence[VALID_INPUT_TYPES], **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Frame: + return Matrix(self, "read", unnamed_params, named_params, local_data=mat) + + def from_pandas( + self, + df: pd.DataFrame, + *args: Sequence[VALID_INPUT_TYPES], + **kwargs: Dict[str, VALID_INPUT_TYPES], + ) -> Frame: """Generate DAGNode representing frame with data given by a pandas dataframe, which will be sent to SystemDS on need. @@ -590,9 +666,9 @@ def from_pandas(self, df: pd.DataFrame, unnamed_params = ["'./tmp/{file_name}'"] if len(df.shape) == 2: - named_params = {'rows': df.shape[0], 'cols': df.shape[1]} + named_params = {"rows": df.shape[0], "cols": df.shape[1]} elif len(df.shape) == 1: - named_params = {'rows': df.shape[0], 'cols': 1} + named_params = {"rows": df.shape[0], "cols": 1} else: # TODO Support tensors. raise ValueError("Only two dimensional arrays supported") @@ -605,9 +681,13 @@ def from_pandas(self, df: pd.DataFrame, named_params.update(kwargs) return Frame(self, "read", unnamed_params, named_params, local_data=df) - def federated(self, addresses: Iterable[str], - ranges: Iterable[Tuple[Iterable[int], Iterable[int]]], *args, - **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: + def federated( + self, + addresses: Iterable[str], + ranges: Iterable[Tuple[Iterable[int], Iterable[int]]], + *args, + **kwargs: Dict[str, VALID_INPUT_TYPES], + ) -> Matrix: """Create federated matrix object. :param sds_context: the SystemDS context @@ -617,22 +697,23 @@ def federated(self, addresses: Iterable[str], :param kwargs: named params :return: The Matrix containing the Federated data. """ - addresses_str = 'list(' + \ - ','.join(map(lambda s: f'"{s}"', addresses)) + ')' - ranges_str = 'list(' + addresses_str = "list(" + ",".join(map(lambda s: f'"{s}"', addresses)) + ")" + ranges_str = "list(" for begin, end in ranges: - ranges_str += f'list({",".join(map(str, begin))}), list({",".join(map(str, end))}),' + ranges_str += ( + f'list({",".join(map(str, begin))}), list({",".join(map(str, end))}),' + ) ranges_str = ranges_str[:-1] - ranges_str += ')' - named_params = {'addresses': addresses_str, 'ranges': ranges_str} + ranges_str += ")" + named_params = {"addresses": addresses_str, "ranges": ranges_str} named_params.update(kwargs) - return Matrix(self, 'federated', args, named_params) + return Matrix(self, "federated", args, named_params) def source(self, path: str, name: str) -> Source: """Import methods from a given dml file. The importing is done through the DML command source, and adds all defined methods from - the script to the Source object returned in python. This gives the flexibility to call the methods + the script to the Source object returned in python. This gives the flexibility to call the methods directly on the object returned. In systemds a method called func_01 can then be imported using @@ -646,46 +727,48 @@ def source(self, path: str, name: str) -> Source: """ return Source(self, path, name) - def list(self, *args: Sequence[VALID_INPUT_TYPES], **kwargs: Dict[str, VALID_INPUT_TYPES]) -> List: - """ Create a List object containing the given nodes. + def list( + self, *args: Sequence[VALID_INPUT_TYPES], **kwargs: Dict[str, VALID_INPUT_TYPES] + ) -> List: + """Create a List object containing the given nodes. Note that only a sequence is allowed, or a dictionary, not both at the same time. :param args: A Sequence that will be inserted to a list :param kwargs: A Dictionary that will return a dictionary, (internally handled as a list) - :return: A List + :return: A List """ return List(self, unnamed_input_nodes=args, named_input_nodes=kwargs) def combine(self, *args: Sequence[VALID_INPUT_TYPES]) -> Combine: - """ combine nodes to call compute on multiple operations. + """combine nodes to call compute on multiple operations. - This is usefull for the case of having multiple writes in one script and wanting + This is usefull for the case of having multiple writes in one script and wanting to execute all in one execution reusing intermediates. Note this combine does not allow to return anything to the user, so if used, please only use nodes that end with either writing or printing elements. - :param args: A sequence that will be executed with call to compute() + :param args: A sequence that will be executed with call to compute() """ return Combine(self, unnamed_input_nodes=args) def array(self, *args: Sequence[VALID_INPUT_TYPES]) -> List: - """ Create a List object containing the given nodes. + """Create a List object containing the given nodes. Note that only a sequence is allowed, or a dictionary, not both at the same time. :param args: A Sequence that will be inserted to a list :param kwargs: A Dictionary that will return a dictionary, (internally handled as a list) - :return: A List + :return: A List """ return List(self, unnamed_input_nodes=args) - def dict(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> List: - """ Create a List object containing the given nodes. + def dict(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> List: + """Create a List object containing the given nodes. Note that only a sequence is allowed, or a dictionary, not both at the same time. :param args: A Sequence that will be inserted to a list :param kwargs: A Dictionary that will return a dictionary, (internally handled as a list) - :return: A List + :return: A List """ return List(self, named_input_nodes=kwargs) @@ -705,7 +788,8 @@ def __setup_logging(self, level: int, py4j_level: int): f_handler = logging.StreamHandler() f_handler.setLevel(level) f_format = logging.Formatter( - '%(asctime)s - SystemDS- %(levelname)s - %(message)s') + "%(asctime)s - SystemDS- %(levelname)s - %(message)s" + ) f_handler.setFormatter(f_format) self._log.addHandler # avoid the logger to call loggers above. diff --git a/src/main/python/systemds/examples/tutorials/adult.py b/src/main/python/systemds/examples/tutorials/adult.py index dae27c874f2..3f207be481e 100644 --- a/src/main/python/systemds/examples/tutorials/adult.py +++ b/src/main/python/systemds/examples/tutorials/adult.py @@ -47,7 +47,9 @@ class DataManager: _data_string_labels: list def __init__(self): - self._data_zip_url = "https://systemds.apache.org/assets/datasets/adult/data.zip" + self._data_zip_url = ( + "https://systemds.apache.org/assets/datasets/adult/data.zip" + ) self._data_zip_loc = "systemds/examples/tutorials/adult/data.zip" self._train_data_loc = "systemds/examples/tutorials/adult/train_data.csv" @@ -55,23 +57,28 @@ def __init__(self): self._jspec_loc = "systemds/examples/tutorials/adult/jspec.json" def get_preprocessed_dataset(self, sds: SystemDSContext) -> List[pd.DataFrame]: - return self.get_train_data(sds), \ - self.get_train_labels(sds), \ - self.get_test_data(sds), \ - self.get_test_labels(sds) - - def get_preprocessed_dataset_pandas(self, sds: SystemDSContext) -> List[pd.DataFrame]: - return self.get_train_data_pandas(sds), \ - self.get_train_labels_pandas(sds), \ - self.get_test_data_pandas(sds), \ - self.get_test_labels_pandas(sds) + return ( + self.get_train_data(sds), + self.get_train_labels(sds), + self.get_test_data(sds), + self.get_test_labels(sds), + ) + + def get_preprocessed_dataset_pandas( + self, sds: SystemDSContext + ) -> List[pd.DataFrame]: + return ( + self.get_train_data_pandas(sds), + self.get_train_labels_pandas(sds), + self.get_test_data_pandas(sds), + self.get_test_labels_pandas(sds), + ) def get_train_data_pandas(self) -> pd.DataFrame: self._get_data(self._train_data_loc) - return self._parse_data(self._train_data_loc)\ - .drop(labels=["income"], axis=1) + return self._parse_data(self._train_data_loc).drop(labels=["income"], axis=1) - def get_train_data(self, sds: SystemDSContext) -> 'Frame': + def get_train_data(self, sds: SystemDSContext) -> "Frame": self._get_data(self._train_data_loc) return sds.read(self._train_data_loc)[:, 0:14] @@ -79,16 +86,15 @@ def get_train_labels_pandas(self) -> pd.DataFrame: self._get_data(self._train_data_loc) return self._parse_data(self._train_data_loc)[["income"]] - def get_train_labels(self, sds: SystemDSContext) -> 'Frame': + def get_train_labels(self, sds: SystemDSContext) -> "Frame": self._get_data(self._train_data_loc) return sds.read(self._train_data_loc)[:, 14] def get_test_data_pandas(self) -> pd.DataFrame: self._get_data(self._test_data_loc) - return self._parse_data(self._test_data_loc)\ - .drop(labels=["income"], axis=1) + return self._parse_data(self._test_data_loc).drop(labels=["income"], axis=1) - def get_test_data(self, sds: SystemDSContext) -> 'Frame': + def get_test_data(self, sds: SystemDSContext) -> "Frame": self._get_data(self._test_data_loc) return sds.read(self._test_data_loc)[:, 0:14] @@ -96,7 +102,7 @@ def get_test_labels_pandas(self) -> pd.DataFrame: self._get_data(self._test_data_loc) return self._parse_data(self._test_data_loc)[["income"]] - def get_test_labels(self, sds: SystemDSContext) -> 'Frame': + def get_test_labels(self, sds: SystemDSContext) -> "Frame": self._get_data(self._test_data_loc) return sds.read(self._test_data_loc)[:, 14] @@ -105,7 +111,7 @@ def get_jspec_string(self) -> str: with open(self._jspec_loc, "r") as f: return f.read() - def get_jspec(self, sds: SystemDSContext) -> 'Scalar': + def get_jspec(self, sds: SystemDSContext) -> "Scalar": self._get_data(self._jspec_loc) return sds.read(self._jspec_loc, data_type="scalar", value_type="string") @@ -119,7 +125,7 @@ def _get_data(self, loc): os.makedirs(folder) if not os.path.isfile(self._data_zip_loc): myZip = requests.get(self._data_zip_url) - with open(self._data_zip_loc, 'wb') as f: + with open(self._data_zip_loc, "wb") as f: f.write(myZip.content) with zipfile.ZipFile(self._data_zip_loc) as z: z.extractall(folder) diff --git a/src/main/python/systemds/examples/tutorials/mnist.py b/src/main/python/systemds/examples/tutorials/mnist.py index 88d9ad99283..5e041f2c7b9 100644 --- a/src/main/python/systemds/examples/tutorials/mnist.py +++ b/src/main/python/systemds/examples/tutorials/mnist.py @@ -72,23 +72,26 @@ def get_test_labels(self) -> np.array: return self._parse_data(self._test_labels_loc) def _parse_data(self, loc): - f = gzip.open if os.path.splitext(loc)[1] == '.gz' else open - with f(loc, 'rb') as fd: + f = gzip.open if os.path.splitext(loc)[1] == ".gz" else open + with f(loc, "rb") as fd: return self._parse(fd) def _parse(self, fd): - DATA_TYPES = {0x08: 'B', # unsigned byte - 0x09: 'b', # signed byte - 0x0b: 'h', # short (2 bytes) - 0x0c: 'i', # int (4 bytes) - 0x0d: 'f', # float (4 bytes) - 0x0e: 'd'} # double (8 bytes) + DATA_TYPES = { + 0x08: "B", # unsigned byte + 0x09: "b", # signed byte + 0x0B: "h", # short (2 bytes) + 0x0C: "i", # int (4 bytes) + 0x0D: "f", # float (4 bytes) + 0x0E: "d", + } # double (8 bytes) header = fd.read(4) - zeros, data_type, num_dimensions = struct.unpack('>HBB', header) + zeros, data_type, num_dimensions = struct.unpack(">HBB", header) data_type = DATA_TYPES[data_type] - dimension_sizes = struct.unpack('>' + 'I' * num_dimensions, - fd.read(4 * num_dimensions)) + dimension_sizes = struct.unpack( + ">" + "I" * num_dimensions, fd.read(4 * num_dimensions) + ) data = array.array(data_type, fd.read()) data.byteswap() # looks like array.array reads data as little endian @@ -103,5 +106,5 @@ def _get_data(self, url, loc): folder = os.path.dirname(loc) if not os.path.isdir(folder): os.makedirs(folder) - with open(loc, 'wb') as f: + with open(loc, "wb") as f: f.write(myfile.content) diff --git a/src/main/python/systemds/operator/__init__.py b/src/main/python/systemds/operator/__init__.py index 51a586bc636..80cef43001f 100644 --- a/src/main/python/systemds/operator/__init__.py +++ b/src/main/python/systemds/operator/__init__.py @@ -30,5 +30,15 @@ from systemds.operator.nodes.source import Source from systemds.operator import algorithm -__all__ = ["OperationNode", "algorithm", "Scalar", "List", - "ListAccess", "Matrix", "Frame", "Source", "MultiReturn", "Combine"] +__all__ = [ + "OperationNode", + "algorithm", + "Scalar", + "List", + "ListAccess", + "Matrix", + "Frame", + "Source", + "MultiReturn", + "Combine", +] diff --git a/src/main/python/systemds/operator/nn/affine.py b/src/main/python/systemds/operator/nn/affine.py index 35935871aa5..a8f3442e518 100644 --- a/src/main/python/systemds/operator/nn/affine.py +++ b/src/main/python/systemds/operator/nn/affine.py @@ -31,18 +31,20 @@ def __init__(self, sds_context: SystemDSContext, d, m, seed=-1): """ sds_context: The systemdsContext to construct the layer inside of d: The number of features that are input to the affine layer - m: The number of neurons that are contained in the layer, + m: The number of neurons that are contained in the layer, and the number of features output """ - super().__init__(sds_context, 'affine.dml') + super().__init__(sds_context, "affine.dml") self._X = None # init weight and bias - self.weight = Matrix(sds_context, '') - self.bias = Matrix(sds_context, '') - params_dict = {'D': d, 'M': m, 'seed': seed} + self.weight = Matrix(sds_context, "") + self.bias = Matrix(sds_context, "") + params_dict = {"D": d, "M": m, "seed": seed} out = [self.weight, self.bias] - op = MultiReturn(sds_context, "affine::init", output_nodes=out, named_input_nodes=params_dict) + op = MultiReturn( + sds_context, "affine::init", output_nodes=out, named_input_nodes=params_dict + ) self.weight._unnamed_input_nodes = [op] self.bias._unnamed_input_nodes = [op] op._source_node = self._source @@ -59,7 +61,7 @@ def forward(X: Matrix, W: Matrix, b: Matrix): return Affine._source.forward(X, W, b) @staticmethod - def backward(dout:Matrix, X: Matrix, W: Matrix, b: Matrix): + def backward(dout: Matrix, X: Matrix, W: Matrix, b: Matrix): """ dout: The gradient of the output, passed from the upstream X: The input matrix of this layer @@ -69,12 +71,14 @@ def backward(dout:Matrix, X: Matrix, W: Matrix, b: Matrix): """ sds = X.sds_context Affine._create_source(sds, "affine.dml") - params_dict = {'dout': dout, 'X': X, 'W': W, 'b': b} - dX = Matrix(sds, '') - dW = Matrix(sds, '') - db = Matrix(sds, '') + params_dict = {"dout": dout, "X": X, "W": W, "b": b} + dX = Matrix(sds, "") + dW = Matrix(sds, "") + db = Matrix(sds, "") out = [dX, dW, db] - op = MultiReturn(sds, "affine::backward", output_nodes=out, named_input_nodes=params_dict) + op = MultiReturn( + sds, "affine::backward", output_nodes=out, named_input_nodes=params_dict + ) dX._unnamed_input_nodes = [op] dW._unnamed_input_nodes = [op] db._unnamed_input_nodes = [op] diff --git a/src/main/python/systemds/operator/nodes/combine.py b/src/main/python/systemds/operator/nodes/combine.py index b27109509b7..a1a3279f70b 100644 --- a/src/main/python/systemds/operator/nodes/combine.py +++ b/src/main/python/systemds/operator/nodes/combine.py @@ -29,19 +29,25 @@ class Combine(OperationNode): - def __init__(self, sds_context, func='', - unnamed_input_nodes: Iterable[OperationNode] = None): + def __init__( + self, sds_context, func="", unnamed_input_nodes: Iterable[OperationNode] = None + ): for a in unnamed_input_nodes: if not a._datatype_is_none: raise ValueError( - "Cannot combine elements that have outputs, all elements must be instances of print or write") + "Cannot combine elements that have outputs, all elements must be instances of print or write" + ) self._outputs = {} super().__init__(sds_context, func, unnamed_input_nodes, None, False) - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], - named_input_vars: Dict[str, str]) -> str: - return '' + def code_line( + self, + var_name: str, + unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str], + ) -> str: + return "" def compute(self, verbose: bool = False, lineage: bool = False): return super().compute(verbose, lineage) diff --git a/src/main/python/systemds/operator/nodes/frame.py b/src/main/python/systemds/operator/nodes/frame.py index 1adce58685a..2d9e2319541 100644 --- a/src/main/python/systemds/operator/nodes/frame.py +++ b/src/main/python/systemds/operator/nodes/frame.py @@ -22,8 +22,7 @@ __all__ = ["Frame"] import os -from typing import (TYPE_CHECKING, Dict, Iterable, Optional, Sequence, Tuple, - Union) +from typing import TYPE_CHECKING, Dict, Iterable, Optional, Sequence, Tuple, Union import numpy as np import pandas as pd @@ -33,9 +32,12 @@ from systemds.operator.nodes.scalar import Scalar from systemds.operator.nodes.matrix import Matrix from systemds.utils.consts import VALID_INPUT_TYPES -from systemds.utils.converters import (frame_block_to_pandas, - pandas_to_frame_block) -from systemds.utils.helpers import check_is_empty_slice, check_no_less_than_zero, get_slice_string +from systemds.utils.converters import frame_block_to_pandas, pandas_to_frame_block +from systemds.utils.helpers import ( + check_is_empty_slice, + check_no_less_than_zero, + get_slice_string, +) if TYPE_CHECKING: # to avoid cyclic dependencies during runtime @@ -46,11 +48,15 @@ class Frame(OperationNode): _pd_dataframe: pd.DataFrame - def __init__(self, sds_context: "SystemDSContext", operation: str, - unnamed_input_nodes: Union[str, - Iterable[VALID_INPUT_TYPES]] = None, - named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, - local_data: pd.DataFrame = None, brackets: bool = False) -> "Frame": + def __init__( + self, + sds_context: "SystemDSContext", + operation: str, + unnamed_input_nodes: Union[str, Iterable[VALID_INPUT_TYPES]] = None, + named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, + local_data: pd.DataFrame = None, + brackets: bool = False, + ) -> "Frame": is_python_local_data = False if local_data is not None: self._pd_dataframe = local_data @@ -58,18 +64,33 @@ def __init__(self, sds_context: "SystemDSContext", operation: str, else: self._pd_dataframe = None - super().__init__(sds_context, operation, unnamed_input_nodes, - named_input_nodes, is_python_local_data, brackets, is_datatype_none=False) + super().__init__( + sds_context, + operation, + unnamed_input_nodes, + named_input_nodes, + is_python_local_data, + brackets, + is_datatype_none=False, + ) - def pass_python_data_to_prepared_script(self, sds, var_name: str, prepared_script: JavaObject) -> None: + def pass_python_data_to_prepared_script( + self, sds, var_name: str, prepared_script: JavaObject + ) -> None: assert ( - self.is_python_local_data), "Can only pass data to prepared script if it is python local!" + self.is_python_local_data + ), "Can only pass data to prepared script if it is python local!" if self._is_pandas(): prepared_script.setFrame( var_name, pandas_to_frame_block(sds, self._pd_dataframe), True ) # True for reuse - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], named_input_vars: Dict[str, str]) -> str: + def code_line( + self, + var_name: str, + unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str], + ) -> str: code_line = super().code_line(var_name, unnamed_input_vars, named_input_vars) if self._is_pandas(): code_line = code_line.format(file_name=var_name) @@ -83,7 +104,10 @@ def compute(self, verbose: bool = False, lineage: bool = False) -> pd.DataFrame: return super().compute(verbose, lineage) def _parse_output_result_variables(self, result_variables): - return frame_block_to_pandas(self.sds_context, result_variables.getFrameBlock(self._script.out_var_name[0])) + return frame_block_to_pandas( + self.sds_context, + result_variables.getFrameBlock(self._script.out_var_name[0]), + ) def _is_pandas(self) -> bool: return self._pd_dataframe is not None @@ -111,68 +135,86 @@ def transform_apply(self, spec: "Scalar", meta: "Frame"): params_dict = {"target": self, "spec": spec, "meta": meta} return Matrix(self.sds_context, "transformapply", named_input_nodes=params_dict) - def rbind(self, other) -> 'Frame': + def rbind(self, other) -> "Frame": """ - Row-wise frame concatenation, by concatenating the second frame as additional rows to the first frame. + Row-wise frame concatenation, by concatenating the second frame as additional rows to the first frame. :param: The other frame to bind to the right hand side :return: The OperationNode containing the concatenated frames. """ return Frame(self.sds_context, "rbind", [self, other]) - def cbind(self, other) -> 'Frame': + def cbind(self, other) -> "Frame": """ - Column-wise frame concatenation, by concatenating the second frame as additional columns to the first frame. + Column-wise frame concatenation, by concatenating the second frame as additional columns to the first frame. :param: The other frame to bind to the right hand side. :return: The Frame containing the concatenated frames. """ return Frame(self.sds_context, "cbind", [self, other]) - def replace(self, pattern: str, replacement: str) -> 'Frame': + def replace(self, pattern: str, replacement: str) -> "Frame": """ Replace all instances of string with replacement string :param: pattern the string to replace :param: replacement the string to replace with - :return: The Frame containing the replaced values + :return: The Frame containing the replaced values """ - return Frame(self.sds_context, "replace", named_input_nodes={"target": self, "pattern": f"'{pattern}'", "replacement": f"'{replacement}'"}) + return Frame( + self.sds_context, + "replace", + named_input_nodes={ + "target": self, + "pattern": f"'{pattern}'", + "replacement": f"'{replacement}'", + }, + ) - def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'Scalar': - """ Converts the input to a string representation. + def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> "Scalar": + """Converts the input to a string representation. :return: `Scalar` containing the string. """ - return Scalar(self.sds_context, 'toString', [self], kwargs) + return Scalar(self.sds_context, "toString", [self], kwargs) def __str__(self): return "FrameNode" - def nRow(self) -> 'Scalar': - return Scalar(self.sds_context, 'nrow', [self]) + def nRow(self) -> "Scalar": + return Scalar(self.sds_context, "nrow", [self]) - def nCol(self) -> 'Scalar': - return Scalar(self.sds_context, 'ncol', [self]) + def nCol(self) -> "Scalar": + return Scalar(self.sds_context, "ncol", [self]) - def __getitem__(self, i) -> 'Frame': + def __getitem__(self, i) -> "Frame": if isinstance(i, tuple) and len(i) > 2: raise ValueError("Maximum of two dimensions are allowed") elif isinstance(i, list): check_no_less_than_zero(i) slice = self.sds_context.from_numpy(np.array(i)) + 1 - select = Matrix(self.sds_context, "table", - [slice, 1, self.nRow(), 1]) - ret = Frame(self.sds_context, "removeEmpty", [], { - 'target': self, 'margin': '"rows"', 'select': select}) + select = Matrix(self.sds_context, "table", [slice, 1, self.nRow(), 1]) + ret = Frame( + self.sds_context, + "removeEmpty", + [], + {"target": self, "margin": '"rows"', "select": select}, + ) return ret elif isinstance(i, tuple) and isinstance(i[0], list) and isinstance(i[1], list): raise NotImplementedError("double slicing is not supported yet") - elif isinstance(i, tuple) and check_is_empty_slice(i[0]) and isinstance(i[1], list): + elif ( + isinstance(i, tuple) + and check_is_empty_slice(i[0]) + and isinstance(i[1], list) + ): check_no_less_than_zero(i[1]) slice = self.sds_context.from_numpy(np.array(i[1])) + 1 - select = Matrix(self.sds_context, "table", - [slice, 1, self.nCol(), 1]) - ret = Frame(self.sds_context, "removeEmpty", [], { - 'target': self, 'margin': '"cols"', 'select': select}) + select = Matrix(self.sds_context, "table", [slice, 1, self.nCol(), 1]) + ret = Frame( + self.sds_context, + "removeEmpty", + [], + {"target": self, "margin": '"cols"', "select": select}, + ) return ret else: sliceIns = get_slice_string(i) - return Frame(self.sds_context, '', [self, sliceIns], brackets=True) + return Frame(self.sds_context, "", [self, sliceIns], brackets=True) diff --git a/src/main/python/systemds/operator/nodes/list.py b/src/main/python/systemds/operator/nodes/list.py index 17b03f335ad..133296955ed 100644 --- a/src/main/python/systemds/operator/nodes/list.py +++ b/src/main/python/systemds/operator/nodes/list.py @@ -34,17 +34,21 @@ class List(OperationNode): - def __init__(self, sds_context, func='list', - unnamed_input_nodes: Union[str, - Iterable[VALID_INPUT_TYPES]] = None, - named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None): + def __init__( + self, + sds_context, + func="list", + unnamed_input_nodes: Union[str, Iterable[VALID_INPUT_TYPES]] = None, + named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, + ): named = named_input_nodes != None and len(named_input_nodes) != 0 unnamed = unnamed_input_nodes != None and len(unnamed_input_nodes) != 0 if func == "list": if named and unnamed: raise ValueError( - "A List cannot both contain named and unamed variables") + "A List cannot both contain named and unamed variables" + ) elif unnamed: self._outputs = [] for v in unnamed_input_nodes: @@ -57,8 +61,14 @@ def __init__(self, sds_context, func='list', # Initialize the outputs as an empty list, and populate it when items are requested. self._outputs = {} - super().__init__(sds_context, func, unnamed_input_nodes, - named_input_nodes, False, is_datatype_none=False) + super().__init__( + sds_context, + func, + unnamed_input_nodes, + named_input_nodes, + False, + is_datatype_none=False, + ) def __getitem__(self, key): if key in self._outputs: @@ -68,11 +78,16 @@ def __getitem__(self, key): self._outputs[key] = ent return ent - def pass_python_data_to_prepared_script(self, sds, var_name: str, prepared_script: JavaObject) -> None: - assert self.is_python_local_data, 'Can only pass data to prepared script if it is python local!' + def pass_python_data_to_prepared_script( + self, sds, var_name: str, prepared_script: JavaObject + ) -> None: + assert ( + self.is_python_local_data + ), "Can only pass data to prepared script if it is python local!" if self._is_numpy(): - prepared_script.setMatrix(var_name, numpy_to_matrix_block( - sds, self._np_array), True) # True for reuse + prepared_script.setMatrix( + var_name, numpy_to_matrix_block(sds, self._np_array), True + ) # True for reuse def compute(self, verbose: bool = False, lineage: bool = False) -> np.array: return super().compute(verbose, lineage) diff --git a/src/main/python/systemds/operator/nodes/list_access.py b/src/main/python/systemds/operator/nodes/list_access.py index 7138681214e..13ff9f6e365 100644 --- a/src/main/python/systemds/operator/nodes/list_access.py +++ b/src/main/python/systemds/operator/nodes/list_access.py @@ -30,17 +30,27 @@ class ListAccess(OperationNode): - def __init__(self, sds_context: 'SystemDSContext', list_source: 'List', key): + def __init__(self, sds_context: "SystemDSContext", list_source: "List", key): self._key = key self._list_source = list_source inputs = [list_source] - super().__init__(sds_context, None, unnamed_input_nodes=inputs, - is_datatype_unknown=True, is_datatype_none=False, is_python_local_data=False) + super().__init__( + sds_context, + None, + unnamed_input_nodes=inputs, + is_datatype_unknown=True, + is_datatype_none=False, + is_python_local_data=False, + ) - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], - named_input_vars: Dict[str, str]) -> str: - return f'{var_name}={self._list_source._dml_name}[{self._key}];' + def code_line( + self, + var_name: str, + unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str], + ) -> str: + return f"{var_name}={self._list_source._dml_name}[{self._key}];" def as_matrix(self) -> Matrix: ent = self._list_source[self._key] diff --git a/src/main/python/systemds/operator/nodes/matrix.py b/src/main/python/systemds/operator/nodes/matrix.py index fc55b7846f5..cddfad6d46d 100644 --- a/src/main/python/systemds/operator/nodes/matrix.py +++ b/src/main/python/systemds/operator/nodes/matrix.py @@ -68,7 +68,7 @@ def __init__( named_input_nodes, is_python_local_data, brackets, - is_datatype_none=False + is_datatype_none=False, ) def pass_python_data_to_prepared_script( @@ -502,9 +502,7 @@ def moment(self, moment: int, weights: OperationNode = None) -> "Matrix": if weights is not None: unnamed_inputs.append(weights) unnamed_inputs.append(moment) - return Matrix( - self.sds_context, "moment", unnamed_inputs - ) + return Matrix(self.sds_context, "moment", unnamed_inputs) def cholesky(self, safe: bool = False) -> "Matrix": """Computes the Cholesky decomposition of a symmetric, positive definite matrix @@ -629,9 +627,7 @@ def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> "Scalar": """Converts the input to a string representation. :return: `Scalar` containing the string. """ - return Scalar( - self.sds_context, "toString", [self], kwargs - ) + return Scalar(self.sds_context, "toString", [self], kwargs) def isNA(self) -> "Matrix": """Computes a boolean indicator matrix of the same shape as the input, indicating where NA (not available) @@ -809,20 +805,22 @@ def quantile(self, p, weights: "Matrix" = None) -> "OperationNode": else: raise ValueError("P has to be a Scalar or Matrix") - def fft(self) -> 'MultiReturn': + def fft(self) -> "MultiReturn": """ Performs the Fast Fourier Transform (FFT) on the matrix. :return: A MultiReturn object representing the real and imaginary parts of the FFT output. """ - real_output = Matrix(self.sds_context, '') - imag_output = Matrix(self.sds_context, '') + real_output = Matrix(self.sds_context, "") + imag_output = Matrix(self.sds_context, "") - fft_node = MultiReturn(self.sds_context, 'fft', [real_output, imag_output], [self]) + fft_node = MultiReturn( + self.sds_context, "fft", [real_output, imag_output], [self] + ) return fft_node - def ifft(self, imag_input: 'Matrix' = None) -> 'MultiReturn': + def ifft(self, imag_input: "Matrix" = None) -> "MultiReturn": """ Performs the Inverse Fast Fourier Transform (IFFT) on a complex matrix. @@ -830,13 +828,17 @@ def ifft(self, imag_input: 'Matrix' = None) -> 'MultiReturn': :return: A MultiReturn object representing the real and imaginary parts of the IFFT output. """ - real_output = Matrix(self.sds_context, '') - imag_output = Matrix(self.sds_context, '') + real_output = Matrix(self.sds_context, "") + imag_output = Matrix(self.sds_context, "") if imag_input is None: - ifft_node = MultiReturn(self.sds_context, 'ifft', [real_output, imag_output], [self]) + ifft_node = MultiReturn( + self.sds_context, "ifft", [real_output, imag_output], [self] + ) else: - ifft_node = MultiReturn(self.sds_context, 'ifft', [real_output, imag_output], [self, imag_input]) + ifft_node = MultiReturn( + self.sds_context, "ifft", [real_output, imag_output], [self, imag_input] + ) return ifft_node diff --git a/src/main/python/systemds/operator/nodes/multi_return.py b/src/main/python/systemds/operator/nodes/multi_return.py index f5d956b9cdf..a43c478a08c 100644 --- a/src/main/python/systemds/operator/nodes/multi_return.py +++ b/src/main/python/systemds/operator/nodes/multi_return.py @@ -27,41 +27,52 @@ from py4j.java_gateway import JavaObject from systemds.operator import OperationNode from systemds.utils.consts import VALID_INPUT_TYPES -from systemds.utils.converters import (frame_block_to_pandas, - matrix_block_to_numpy) +from systemds.utils.converters import frame_block_to_pandas, matrix_block_to_numpy from systemds.utils.helpers import create_params_string class MultiReturn(OperationNode): - def __init__(self, sds_context, operation, - output_nodes: List[OperationNode], - unnamed_input_nodes: Union[str, - Iterable[VALID_INPUT_TYPES]] = None, - named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None): + def __init__( + self, + sds_context, + operation, + output_nodes: List[OperationNode], + unnamed_input_nodes: Union[str, Iterable[VALID_INPUT_TYPES]] = None, + named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, + ): self._outputs = output_nodes - super().__init__(sds_context, operation, unnamed_input_nodes, - named_input_nodes, False, is_datatype_none=False) + super().__init__( + sds_context, + operation, + unnamed_input_nodes, + named_input_nodes, + False, + is_datatype_none=False, + ) def __getitem__(self, key): return self._outputs[key] - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], - named_input_vars: Dict[str, str]) -> str: + def code_line( + self, + var_name: str, + unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str], + ) -> str: - inputs_comma_sep = create_params_string( - unnamed_input_vars, named_input_vars) + inputs_comma_sep = create_params_string(unnamed_input_vars, named_input_vars) output = "[" for idx, output_node in enumerate(self._outputs): - name = f'{var_name}_{idx}' + name = f"{var_name}_{idx}" output_node.dml_name = name - output += f'{name},' + output += f"{name}," output = output[:-1] + "]" - return f'{output}={self.operation}({inputs_comma_sep});' + return f"{output}={self.operation}({inputs_comma_sep});" def _parse_output_result_variables(self, result_variables): result_var = [] @@ -70,15 +81,18 @@ def _parse_output_result_variables(self, result_variables): output = self._outputs[idx] if str(output) == "MatrixNode": result_var.append( - matrix_block_to_numpy(jvmV, result_variables.getMatrixBlock(v))) + matrix_block_to_numpy(jvmV, result_variables.getMatrixBlock(v)) + ) elif str(output) == "FrameNode": result_var.append( - frame_block_to_pandas(jvmV, result_variables.getFrameBlock(v))) + frame_block_to_pandas(jvmV, result_variables.getFrameBlock(v)) + ) elif str(output) == "ScalarNode": result_var.append(result_variables.getDouble(v)) else: raise NotImplementedError( - "Not Implemented Support of type" + str(output)) + "Not Implemented Support of type" + str(output) + ) return result_var def __iter__(self): diff --git a/src/main/python/systemds/operator/nodes/scalar.py b/src/main/python/systemds/operator/nodes/scalar.py index 674690c0add..1d87ce56377 100644 --- a/src/main/python/systemds/operator/nodes/scalar.py +++ b/src/main/python/systemds/operator/nodes/scalar.py @@ -22,36 +22,52 @@ __all__ = ["Scalar"] import os -from typing import (TYPE_CHECKING, Dict, Iterable, Optional, Sequence, Tuple, - Union) +from typing import TYPE_CHECKING, Dict, Iterable, Optional, Sequence, Tuple, Union import numpy as np from py4j.java_gateway import JavaObject, JVMView from systemds.operator.operation_node import OperationNode -from systemds.utils.consts import (BINARY_OPERATIONS, VALID_ARITHMETIC_TYPES, - VALID_INPUT_TYPES) +from systemds.utils.consts import ( + BINARY_OPERATIONS, + VALID_ARITHMETIC_TYPES, + VALID_INPUT_TYPES, +) from systemds.utils.converters import numpy_to_matrix_block class Scalar(OperationNode): __assign: bool - def __init__(self, sds_context, operation: str, - unnamed_input_nodes: Iterable[VALID_INPUT_TYPES] = None, - named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, - assign: bool = False) -> 'Scalar': + def __init__( + self, + sds_context, + operation: str, + unnamed_input_nodes: Iterable[VALID_INPUT_TYPES] = None, + named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, + assign: bool = False, + ) -> "Scalar": self.__assign = assign - super().__init__(sds_context, operation, unnamed_input_nodes=unnamed_input_nodes, - named_input_nodes=named_input_nodes, is_datatype_none=False) - - def pass_python_data_to_prepared_script(self, sds, var_name: str, prepared_script: JavaObject) -> None: - raise RuntimeError( - 'Scalar Operation Nodes, should not have python data input') - - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], - named_input_vars: Dict[str, str]) -> str: + super().__init__( + sds_context, + operation, + unnamed_input_nodes=unnamed_input_nodes, + named_input_nodes=named_input_nodes, + is_datatype_none=False, + ) + + def pass_python_data_to_prepared_script( + self, sds, var_name: str, prepared_script: JavaObject + ) -> None: + raise RuntimeError("Scalar Operation Nodes, should not have python data input") + + def code_line( + self, + var_name: str, + unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str], + ) -> str: if self.__assign: - return f'{var_name}={self.operation};' + return f"{var_name}={self.operation};" else: return super().code_line(var_name, unnamed_input_vars, named_input_vars) @@ -71,131 +87,132 @@ def _parse_output_result_variables(self, result_variables): return scalar_object.getBooleanValue() else: raise NotImplementedError( - "Not currently support scalar type: " + value_type) + "Not currently support scalar type: " + value_type + ) - def __add__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '+', [self, other]) + def __add__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "+", [self, other]) # Left hand side - def __radd__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '+', [other, self]) + def __radd__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "+", [other, self]) - def __sub__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '-', [self, other]) + def __sub__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "-", [self, other]) # Left hand side - def __rsub__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '-', [other, self]) + def __rsub__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "-", [other, self]) - def __mul__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '*', [self, other]) + def __mul__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "*", [self, other]) - def __rmul__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '*', [other, self]) + def __rmul__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "*", [other, self]) - def __truediv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '/', [self, other]) + def __truediv__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "/", [self, other]) - def __rtruediv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '/', [other, self]) + def __rtruediv__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "/", [other, self]) - def __floordiv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '//', [self, other]) + def __floordiv__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "//", [self, other]) - def __rfloordiv__(self, other: VALID_ARITHMETIC_TYPES) -> 'Scalar': - return Scalar(self.sds_context, '//', [other, self]) + def __rfloordiv__(self, other: VALID_ARITHMETIC_TYPES) -> "Scalar": + return Scalar(self.sds_context, "//", [other, self]) - def __lt__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '<', [self, other]) + def __lt__(self, other) -> "Scalar": + return Scalar(self.sds_context, "<", [self, other]) - def __rlt__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '<', [other, self]) + def __rlt__(self, other) -> "Scalar": + return Scalar(self.sds_context, "<", [other, self]) - def __le__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '<=', [self, other]) + def __le__(self, other) -> "Scalar": + return Scalar(self.sds_context, "<=", [self, other]) - def __rle__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '<=', [other, self]) + def __rle__(self, other) -> "Scalar": + return Scalar(self.sds_context, "<=", [other, self]) - def __gt__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '>', [self, other]) + def __gt__(self, other) -> "Scalar": + return Scalar(self.sds_context, ">", [self, other]) - def __rgt__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '>', [other, self]) + def __rgt__(self, other) -> "Scalar": + return Scalar(self.sds_context, ">", [other, self]) - def __ge__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '>=', [self, other]) + def __ge__(self, other) -> "Scalar": + return Scalar(self.sds_context, ">=", [self, other]) - def __rge__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '>=', [other, self]) + def __rge__(self, other) -> "Scalar": + return Scalar(self.sds_context, ">=", [other, self]) - def __eq__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '==', [self, other]) + def __eq__(self, other) -> "Scalar": + return Scalar(self.sds_context, "==", [self, other]) - def __req__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '==', [other, self]) + def __req__(self, other) -> "Scalar": + return Scalar(self.sds_context, "==", [other, self]) - def __ne__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '!=', [self, other]) + def __ne__(self, other) -> "Scalar": + return Scalar(self.sds_context, "!=", [self, other]) - def __rne__(self, other) -> 'Scalar': - return Scalar(self.sds_context, '!=', [other, self]) + def __rne__(self, other) -> "Scalar": + return Scalar(self.sds_context, "!=", [other, self]) - def __matmul__(self, other: 'Scalar') -> 'Scalar': - return Scalar(self.sds_context, '%*%', [self, other]) + def __matmul__(self, other: "Scalar") -> "Scalar": + return Scalar(self.sds_context, "%*%", [self, other]) - def sum(self) -> 'Scalar': - return Scalar(self.sds_context, 'sum', [self]) + def sum(self) -> "Scalar": + return Scalar(self.sds_context, "sum", [self]) - def mean(self) -> 'Scalar': - return Scalar(self.sds_context, 'mean', [self]) + def mean(self) -> "Scalar": + return Scalar(self.sds_context, "mean", [self]) - def var(self, axis: int = None) -> 'Scalar': - return Scalar(self.sds_context, 'var', [self]) + def var(self, axis: int = None) -> "Scalar": + return Scalar(self.sds_context, "var", [self]) - def abs(self) -> 'Scalar': + def abs(self) -> "Scalar": """Calculate absolute. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'abs', [self]) + return Scalar(self.sds_context, "abs", [self]) - def sqrt(self) -> 'Scalar': + def sqrt(self) -> "Scalar": """Calculate square root. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'sqrt', [self]) + return Scalar(self.sds_context, "sqrt", [self]) - def floor(self) -> 'Scalar': + def floor(self) -> "Scalar": """Return the floor of the input, element-wise. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'floor', [self]) + return Scalar(self.sds_context, "floor", [self]) - def ceil(self) -> 'Scalar': + def ceil(self) -> "Scalar": """Return the ceiling of the input, element-wise. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'ceil', [self]) + return Scalar(self.sds_context, "ceil", [self]) - def log(self) -> 'Scalar': + def log(self) -> "Scalar": """Calculate logarithm. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'log', [self]) + return Scalar(self.sds_context, "log", [self]) - def sin(self) -> 'Scalar': + def sin(self) -> "Scalar": """Calculate sin. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'sin', [self]) + return Scalar(self.sds_context, "sin", [self]) - def exp(self) -> 'Scalar': + def exp(self) -> "Scalar": """Calculate exponential. :return: `Scalar` representing operation @@ -210,91 +227,90 @@ def sign(self) -> "Scalar": """ return Scalar(self.sds_context, "sign", [self]) - def cos(self) -> 'Scalar': + def cos(self) -> "Scalar": """Calculate cos. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'cos', [self]) + return Scalar(self.sds_context, "cos", [self]) - def tan(self) -> 'Scalar': + def tan(self) -> "Scalar": """Calculate tan. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'tan', [self]) + return Scalar(self.sds_context, "tan", [self]) - def asin(self) -> 'Scalar': + def asin(self) -> "Scalar": """Calculate arcsin. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'asin', [self]) + return Scalar(self.sds_context, "asin", [self]) - def acos(self) -> 'Scalar': + def acos(self) -> "Scalar": """Calculate arccos. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'acos', [self]) + return Scalar(self.sds_context, "acos", [self]) - def atan(self) -> 'Scalar': + def atan(self) -> "Scalar": """Calculate arctan. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'atan', [self]) + return Scalar(self.sds_context, "atan", [self]) - def sinh(self) -> 'Scalar': + def sinh(self) -> "Scalar": """Calculate sin. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'sinh', [self]) + return Scalar(self.sds_context, "sinh", [self]) - def cosh(self) -> 'Scalar': + def cosh(self) -> "Scalar": """Calculate cos. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'cosh', [self]) + return Scalar(self.sds_context, "cosh", [self]) - def tanh(self) -> 'Scalar': + def tanh(self) -> "Scalar": """Calculate tan. :return: `Scalar` representing operation """ - return Scalar(self.sds_context, 'tanh', [self]) + return Scalar(self.sds_context, "tanh", [self]) - def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'Scalar': - """ Converts the input to a string representation. + def to_string(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> "Scalar": + """Converts the input to a string representation. :return: `Scalar` containing the string. """ - return Scalar(self.sds_context, 'toString', [self], named_input_nodes=kwargs) + return Scalar(self.sds_context, "toString", [self], named_input_nodes=kwargs) - def isNA(self) -> 'Scalar': - """ Computes a boolean indicator matrix of the same shape as the input, indicating where NA (not available) + def isNA(self) -> "Scalar": + """Computes a boolean indicator matrix of the same shape as the input, indicating where NA (not available) values are located. Currently, NA is only capturing NaN values. :return: the OperationNode representing this operation """ - return Scalar(self.sds_context, 'isNA', [self]) + return Scalar(self.sds_context, "isNA", [self]) - def isNaN(self) -> 'Scalar': - """ Computes a boolean indicator matrix of the same shape as the input, indicating where NaN (not a number) + def isNaN(self) -> "Scalar": + """Computes a boolean indicator matrix of the same shape as the input, indicating where NaN (not a number) values are located. :return: the OperationNode representing this operation """ - return Scalar(self.sds_context, 'isNaN', [self]) + return Scalar(self.sds_context, "isNaN", [self]) - def isInf(self) -> 'Scalar': - """ Computes a boolean indicator matrix of the same shape as the input, indicating where Inf (positive or + def isInf(self) -> "Scalar": + """Computes a boolean indicator matrix of the same shape as the input, indicating where Inf (positive or negative infinity) values are located. :return: the OperationNode representing this operation """ - return Scalar(self.sds_context, 'isInf', [self]) - + return Scalar(self.sds_context, "isInf", [self]) def __str__(self): return "ScalarNode" diff --git a/src/main/python/systemds/operator/nodes/source.py b/src/main/python/systemds/operator/nodes/source.py index 8d83a2fba3b..78e24d0e3de 100644 --- a/src/main/python/systemds/operator/nodes/source.py +++ b/src/main/python/systemds/operator/nodes/source.py @@ -28,8 +28,14 @@ # Import more node types than used, # since source dynamically adds code and the import is needed. -from systemds.operator import (List, ListAccess, Matrix, MultiReturn, - OperationNode, Scalar) +from systemds.operator import ( + List, + ListAccess, + Matrix, + MultiReturn, + OperationNode, + Scalar, +) class Func(object): @@ -49,15 +55,19 @@ def get_func(self, sds_context, source_name) -> MethodType: operation = f'"{source_name}::{self._name}"' argument_string, named_arguments = self.parse_inputs() - named_intput_nodes = f'named_arguments = {{{named_arguments}}}' + named_intput_nodes = f"named_arguments = {{{named_arguments}}}" output_object = self.parse_outputs() - definition = f'def {self._name}(self{argument_string}):' - output = f'out = {output_object}(self.sds_context, {operation}, named_input_nodes=named_arguments)' + definition = f"def {self._name}(self{argument_string}):" + output = f"out = {output_object}(self.sds_context, {operation}, named_input_nodes=named_arguments)" - lines = [definition, - named_intput_nodes, output, - "out._source_node = self", "return out"] + lines = [ + definition, + named_intput_nodes, + output, + "out._source_node = self", + "return out", + ] full_function = "\n\t".join(lines) @@ -66,7 +76,7 @@ def get_func(self, sds_context, source_name) -> MethodType: # Use Exec to build the function from the string exec(full_function) # Use eval to return the function build as a function variable. - return eval(f'{self._name}') + return eval(f"{self._name}") def parse_inputs(self): argument_string = "" @@ -75,10 +85,10 @@ def parse_inputs(self): if s != "": v, t = self.parse_type_and_name(s) if len(v) == 1: - argument_string += f', {v[0]}:{t}' + argument_string += f", {v[0]}:{t}" named_arguments += f'"{v[0]}":{v[0]}, ' else: - argument_string += f', {v[0]}:{t} = {v[1]}' + argument_string += f", {v[0]}:{t} = {v[1]}" named_arguments += f'"{v[0]}":{v[0]}, ' return (argument_string, named_arguments) @@ -93,28 +103,29 @@ def parse_outputs(self): def parse_type_and_name(self, var: str): var_l = var.lower() - if var_l[0] == 'm' and var_l[7] == 'd': # "matrix[double]" - return (self.split_to_value_and_def(var[14:]), 'Matrix') - elif var_l[0] == 'd': # double - return (self.split_to_value_and_def(var[6:]), 'Scalar') - elif var_l[0] == 'i': # integer + if var_l[0] == "m" and var_l[7] == "d": # "matrix[double]" + return (self.split_to_value_and_def(var[14:]), "Matrix") + elif var_l[0] == "d": # double + return (self.split_to_value_and_def(var[6:]), "Scalar") + elif var_l[0] == "i": # integer if "integer" in var_l: - return (self.split_to_value_and_def(var[7:]), 'Scalar') + return (self.split_to_value_and_def(var[7:]), "Scalar") else: # int - return (self.split_to_value_and_def(var[3:]), 'Scalar') - elif var_l[0] == 'b': # boolean - return (self.split_to_value_and_def(var[7:], True), 'Scalar') - elif var_l[0] == 'l': # list[unknown] - return (self.split_to_value_and_def(var[13:]), 'List') - elif var_l[0] == 's': # string - return (self.split_to_value_and_def(var[6:]), 'Scalar') + return (self.split_to_value_and_def(var[3:]), "Scalar") + elif var_l[0] == "b": # boolean + return (self.split_to_value_and_def(var[7:], True), "Scalar") + elif var_l[0] == "l": # list[unknown] + return (self.split_to_value_and_def(var[13:]), "List") + elif var_l[0] == "s": # string + return (self.split_to_value_and_def(var[6:]), "Scalar") else: raise NotImplementedError( - "Not Implemented type parsing for function def: " + var) + "Not Implemented type parsing for function def: " + var + ) def split_to_value_and_def(self, var: str, b: bool = False): split = var.split("=") - if(len(split) == 1): + if len(split) == 1: return split elif b: if split[1] == "TRUE": @@ -130,8 +141,7 @@ class Source(OperationNode): __name: str def __init__(self, sds_context, path: str, name: str): - super().__init__(sds_context, - f'"{path}"') + super().__init__(sds_context, f'"{path}"') self.__name = name functions = self.__parse_functions_from_script(path) @@ -140,7 +150,6 @@ def __init__(self, sds_context, path: str, name: str): func = f.get_func(sds_context, name) setattr(self, f._name, MethodType(func, self)) - def __parse_functions_from_script(self, path: str) -> Iterable[Func]: lines = self.__parse_lines_with_filter(path) functions = [] @@ -164,7 +173,7 @@ def __parse_lines_with_filter(self, path: str) -> Iterable[str]: insideComment = False for l in file.readlines(): ls = l.strip() - if len(ls) == 0 or ls[0] == '#': + if len(ls) == 0 or ls[0] == "#": continue elif insideComment: if ls.endswith("*/"): @@ -175,19 +184,19 @@ def __parse_lines_with_filter(self, path: str) -> Iterable[str]: continue elif insideBracket > 0: for c in ls: - if c == '{': + if c == "{": insideBracket += 1 - elif c == '}': + elif c == "}": insideBracket -= 1 else: if "source(" in ls: continue - elif '{' in ls: - en = ''.join(ls.split('{')[0].split()) + elif "{" in ls: + en = "".join(ls.split("{")[0].split()) lines.append(en) insideBracket += 1 else: - en = ''.join(ls.split()) + en = "".join(ls.split()) lines.append(en) filtered_lines = [] @@ -199,12 +208,17 @@ def __parse_lines_with_filter(self, path: str) -> Iterable[str]: return filtered_lines - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], named_input_vars: Dict[str, str]) -> str: - if platform.system() == 'Windows': - source_path = self.operation.replace("\\","\\\\") + def code_line( + self, + var_name: str, + unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str], + ) -> str: + if platform.system() == "Windows": + source_path = self.operation.replace("\\", "\\\\") else: source_path = self.operation - line = f'source({source_path}) as { self.__name}' + line = f"source({source_path}) as { self.__name}" return line def compute(self, verbose: bool = False, lineage: bool = False): diff --git a/src/main/python/systemds/operator/operation_node.py b/src/main/python/systemds/operator/operation_node.py index a73feaa4682..41c40df900c 100644 --- a/src/main/python/systemds/operator/operation_node.py +++ b/src/main/python/systemds/operator/operation_node.py @@ -20,15 +20,17 @@ # ------------------------------------------------------------- from multiprocessing import Process -from typing import (TYPE_CHECKING, Dict, Iterable, Optional, Sequence, Tuple, - Union) +from typing import TYPE_CHECKING, Dict, Iterable, Optional, Sequence, Tuple, Union import numpy as np from py4j.java_gateway import JavaObject, JVMView from systemds.script_building.dag import DAGNode from systemds.script_building.script import DMLScript -from systemds.utils.consts import (BINARY_OPERATIONS, VALID_ARITHMETIC_TYPES, - VALID_INPUT_TYPES) +from systemds.utils.consts import ( + BINARY_OPERATIONS, + VALID_ARITHMETIC_TYPES, + VALID_INPUT_TYPES, +) from systemds.utils.helpers import create_params_string if TYPE_CHECKING: @@ -47,14 +49,17 @@ class OperationNode(DAGNode): _brackets: bool _datatype_is_unknown: bool - def __init__(self, sds_context: 'SystemDSContext', operation: str, - unnamed_input_nodes: Union[str, - Iterable[VALID_INPUT_TYPES]] = None, - named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, - is_python_local_data: bool = False, - brackets: bool = False, - is_datatype_unknown: bool = False, - is_datatype_none: bool = True): + def __init__( + self, + sds_context: "SystemDSContext", + operation: str, + unnamed_input_nodes: Union[str, Iterable[VALID_INPUT_TYPES]] = None, + named_input_nodes: Dict[str, VALID_INPUT_TYPES] = None, + is_python_local_data: bool = False, + brackets: bool = False, + is_datatype_unknown: bool = False, + is_datatype_none: bool = True, + ): """ Create general `OperationNode` @@ -84,8 +89,9 @@ def __init__(self, sds_context: 'SystemDSContext', operation: str, self._datatype_is_unknown = is_datatype_unknown self._datatype_is_none = is_datatype_none - def compute(self, verbose: bool = False, lineage: bool = False) -> \ - Union[float, np.array, Tuple[Union[float, np.array], str]]: + def compute( + self, verbose: bool = False, lineage: bool = False + ) -> Union[float, np.array, Tuple[Union[float, np.array], str]]: if self._result_var is None or self._lineage_trace is None: self._script = DMLScript(self.sds_context) @@ -95,15 +101,16 @@ def compute(self, verbose: bool = False, lineage: bool = False) -> \ print(self._script.dml_script) if lineage: - result_variables, self._lineage_trace = self._script.execute_with_lineage() + result_variables, self._lineage_trace = ( + self._script.execute_with_lineage() + ) else: result_variables = self._script.execute() self.sds_context._execution_completed(self._script) if result_variables is not None: - self._result_var = self._parse_output_result_variables( - result_variables) + self._result_var = self._parse_output_result_variables(result_variables) if verbose: for x in self.sds_context.get_stdout(): @@ -135,34 +142,47 @@ def get_lineage_trace(self) -> str: return self._lineage_trace - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], - named_input_vars: Dict[str, str]) -> str: + def code_line( + self, + var_name: str, + unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str], + ) -> str: if self._brackets: return f'{var_name}={unnamed_input_vars[0]}[{",".join(unnamed_input_vars[1:])}]' if self.operation in BINARY_OPERATIONS: - assert len( - named_input_vars) == 0, 'Named parameters can not be used with binary operations' - assert len( - unnamed_input_vars) == 2, 'Binary Operations need exactly two input variables' - return f'{var_name}={unnamed_input_vars[0]}{self.operation}{unnamed_input_vars[1]}' + assert ( + len(named_input_vars) == 0 + ), "Named parameters can not be used with binary operations" + assert ( + len(unnamed_input_vars) == 2 + ), "Binary Operations need exactly two input variables" + return f"{var_name}={unnamed_input_vars[0]}{self.operation}{unnamed_input_vars[1]}" - inputs_comma_sep = create_params_string( - unnamed_input_vars, named_input_vars) + inputs_comma_sep = create_params_string(unnamed_input_vars, named_input_vars) if self._datatype_is_none: - return f'{self.operation}({inputs_comma_sep});' + return f"{self.operation}({inputs_comma_sep});" else: - return f'{var_name}={self.operation}({inputs_comma_sep});' + return f"{var_name}={self.operation}({inputs_comma_sep});" - def pass_python_data_to_prepared_script(self, jvm: JVMView, var_name: str, prepared_script: JavaObject) -> None: + def pass_python_data_to_prepared_script( + self, jvm: JVMView, var_name: str, prepared_script: JavaObject + ) -> None: raise NotImplementedError( - 'Operation node has no python local data. Missing implementation in derived class?') - - def write(self, destination: str, format: str = "binary", **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'OperationNode': - """ Write input to disk. - The written format is easily read by SystemDSContext.read(). + "Operation node has no python local data. Missing implementation in derived class?" + ) + + def write( + self, + destination: str, + format: str = "binary", + **kwargs: Dict[str, VALID_INPUT_TYPES], + ) -> "OperationNode": + """Write input to disk. + The written format is easily read by SystemDSContext.read(). There is no return on write. :param destination: The location which the file is stored. Defaulting to HDFS paths if available. @@ -172,11 +192,13 @@ def write(self, destination: str, format: str = "binary", **kwargs: Dict[str, VA unnamed_inputs = [self, f'"{destination}"'] named_parameters = {"format": f'"{format}"'} named_parameters.update(kwargs) - return OperationNode(self.sds_context, 'write', unnamed_inputs, named_parameters) + return OperationNode( + self.sds_context, "write", unnamed_inputs, named_parameters + ) - def print(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'OperationNode': - """ Prints the given Operation Node. + def print(self, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> "OperationNode": + """Prints the given Operation Node. There is no return on calling. To get the returned string look at the stdout of SystemDSContext. """ - return OperationNode(self.sds_context, 'print', [self], kwargs) + return OperationNode(self.sds_context, "print", [self], kwargs) diff --git a/src/main/python/systemds/project_info.py b/src/main/python/systemds/project_info.py index 4b7aca0a8d0..0a3285f01fb 100644 --- a/src/main/python/systemds/project_info.py +++ b/src/main/python/systemds/project_info.py @@ -1,4 +1,4 @@ -#------------------------------------------------------------- +# ------------------------------------------------------------- # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -17,10 +17,10 @@ # specific language governing permissions and limitations # under the License. # -#------------------------------------------------------------- +# ------------------------------------------------------------- # This file can be used to pass maven project properties to python # via string substitutions using the maven-resources-plugin -__project_group_id__ = 'org.apache.systemds' -__project_artifact_id__ = 'systemds' -__project_version__ = '3.3.0-dev' +__project_group_id__ = "org.apache.systemds" +__project_artifact_id__ = "systemds" +__project_version__ = "3.3.0-dev" diff --git a/src/main/python/systemds/script_building/dag.py b/src/main/python/systemds/script_building/dag.py index 3993a7e6994..7f2dcb5de55 100644 --- a/src/main/python/systemds/script_building/dag.py +++ b/src/main/python/systemds/script_building/dag.py @@ -34,10 +34,11 @@ class DAGNode(ABC): """A Node in the directed-acyclic-graph (DAG) defining all operations.""" - sds_context: 'SystemDSContext' - _unnamed_input_nodes: Sequence[Union['DAGNode', str, int, float, bool]] - _named_input_nodes: Dict[str, Union['DAGNode', str, int, float, bool]] - _named_output_nodes: Dict[str, Union['DAGNode', str, int, float, bool]] + + sds_context: "SystemDSContext" + _unnamed_input_nodes: Sequence[Union["DAGNode", str, int, float, bool]] + _named_input_nodes: Dict[str, Union["DAGNode", str, int, float, bool]] + _named_output_nodes: Dict[str, Union["DAGNode", str, int, float, bool]] _source_node: Optional["DAGNode"] _script: Optional["DMLScript"] _is_python_local_data: bool @@ -61,7 +62,12 @@ def get_lineage_trace(self) -> str: # therefore we could cache the result. raise NotImplementedError - def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], named_input_vars: Dict[str, str]) -> str: + def code_line( + self, + var_name: str, + unnamed_input_vars: Sequence[str], + named_input_vars: Dict[str, str], + ) -> str: """Generates the DML code line equal to the intended action of this node. :param var_name: Name of DML-variable this nodes result should be saved in @@ -71,7 +77,9 @@ def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], named_inpu """ raise NotImplementedError - def pass_python_data_to_prepared_script(self, jvm: JVMView, var_name: str, prepared_script: JavaObject) -> None: + def pass_python_data_to_prepared_script( + self, jvm: JVMView, var_name: str, prepared_script: JavaObject + ) -> None: """Passes data from python to the prepared script object. :param jvm: the java virtual machine object diff --git a/src/main/python/systemds/script_building/script.py b/src/main/python/systemds/script_building/script.py index 164db5e6b86..37bd4cdca0c 100644 --- a/src/main/python/systemds/script_building/script.py +++ b/src/main/python/systemds/script_building/script.py @@ -19,8 +19,17 @@ # # ------------------------------------------------------------- -from typing import (TYPE_CHECKING, Any, Collection, Dict, KeysView, List, - Optional, Tuple, Union) +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + KeysView, + List, + Optional, + Tuple, + Union, +) from py4j.protocol import Py4JNetworkError from py4j.java_collections import JavaArray @@ -38,16 +47,17 @@ class DMLScript: """DMLScript is the class used to describe our intended behavior in DML. This script can be then executed to get the results. """ - sds_context: 'SystemDSContext' + + sds_context: "SystemDSContext" dml_script: str inputs: Dict[str, DAGNode] prepared_script: Optional[Any] out_var_name: List[str] _variable_counter: int - def __init__(self, context: 'SystemDSContext') -> None: + def __init__(self, context: "SystemDSContext") -> None: self.sds_context = context - self.dml_script = '' + self.dml_script = "" self.inputs = {} self.prepared_script = None self.out_var_name = [] @@ -58,7 +68,7 @@ def add_code(self, code: str) -> None: :param code: the dml code line """ - self.dml_script += code + '\n' + self.dml_script += code + "\n" def add_input_from_python(self, var_name: str, input_var: DAGNode) -> None: """Add an input for our preparedScript. Should only be executed for data that is python local. @@ -88,7 +98,6 @@ def execute(self) -> JavaObject: exception_str = str(e) trace_back_limit = None self.sds_context.exception_and_close(exception_str, trace_back_limit) - def execute_with_lineage(self) -> Tuple[JavaObject, str]: """If not already created, create a preparedScript from our DMLCode, pass python local data to our prepared @@ -128,10 +137,12 @@ def __prepare_script(self): self.prepared_script = connection.prepareScript( self.dml_script, _list_to_java_array(gateway, input_names), - _list_to_java_array(gateway, self.out_var_name)) - for (name, input_node) in self.inputs.items(): + _list_to_java_array(gateway, self.out_var_name), + ) + for name, input_node in self.inputs.items(): input_node.pass_python_data_to_prepared_script( - self.sds_context, name, self.prepared_script) + self.sds_context, name, self.prepared_script + ) return connection def get_lineage(self) -> str: @@ -143,10 +154,12 @@ def get_lineage(self) -> str: self.prepared_script = connection.prepareScript( self.dml_script, _list_to_java_array(gateway, input_names), - _list_to_java_array(gateway, self.out_var_name)) - for (name, input_node) in self.inputs.items(): + _list_to_java_array(gateway, self.out_var_name), + ) + for name, input_node in self.inputs.items(): input_node.pass_python_data_to_prepared_script( - gateway.jvm, name, self.prepared_script) + gateway.jvm, name, self.prepared_script + ) connection.setLineage(True) @@ -169,12 +182,11 @@ def build_code(self, dag_root: DAGNode) -> None: if str(dag_root) == "MultiReturnNode": self.out_var_name = [] for idx, output_node in enumerate(dag_root._outputs): - self.add_code( - f'write({baseOutVarString}_{idx}, \'./tmp_{idx}\');') - self.out_var_name.append(f'{baseOutVarString}_{idx}') + self.add_code(f"write({baseOutVarString}_{idx}, './tmp_{idx}');") + self.out_var_name.append(f"{baseOutVarString}_{idx}") else: self.out_var_name.append(baseOutVarString) - self.add_code(f'write({baseOutVarString}, \'./tmp\');') + self.add_code(f"write({baseOutVarString}, './tmp');") def clear(self, dag_root: DAGNode): self._dfs_clear_dag_nodes(dag_root) @@ -188,7 +200,7 @@ def _dfs_dag_nodes(self, dag_node: VALID_INPUT_TYPES) -> str: """ if not isinstance(dag_node, DAGNode): if isinstance(dag_node, bool): - return 'TRUE' if dag_node else 'FALSE' + return "TRUE" if dag_node else "FALSE" return str(dag_node) # If the node already have a name then it is already defined @@ -220,7 +232,8 @@ def _dfs_dag_nodes(self, dag_node: VALID_INPUT_TYPES) -> str: self.add_input_from_python(dag_node.dml_name, dag_node) code_line = dag_node.code_line( - dag_node.dml_name, unnamed_input_vars, named_input_vars) + dag_node.dml_name, unnamed_input_vars, named_input_vars + ) self.add_code(code_line) return dag_node.dml_name @@ -243,17 +256,19 @@ def _next_unique_var(self) -> str: """ var_id = self._variable_counter self._variable_counter += 1 - return f'V{var_id}' + return f"V{var_id}" # Helper Functions -def _list_to_java_array(gateway: JavaGateway, py_list: Union[Collection[str], KeysView[str]]) -> JavaArray: +def _list_to_java_array( + gateway: JavaGateway, py_list: Union[Collection[str], KeysView[str]] +) -> JavaArray: """Convert python collection to java array. :param py_list: python collection :return: java array """ array = gateway.new_array(gateway.jvm.java.lang.String, len(py_list)) - for (i, e) in enumerate(py_list): + for i, e in enumerate(py_list): array[i] = e return array diff --git a/src/main/python/systemds/scuro/__init__.py b/src/main/python/systemds/scuro/__init__.py index 1ef36539f01..04139b52830 100644 --- a/src/main/python/systemds/scuro/__init__.py +++ b/src/main/python/systemds/scuro/__init__.py @@ -42,30 +42,30 @@ from systemds.scuro.aligner.task import Task -__all__ = ["Representation", - "Average", - "Concatenation", - "Fusion", - "Sum", - "RowMax", - "Multiplication", - "MelSpectrogram", - "ResNet", - "Bert", - "UnimodalRepresentation", - "LSTM", - "NPY", - "Pickle", - "HDF5", - "JSON", - "Model", - "DiscreteModel", - "AlignedModality", - "AudioModality", - "VideoModality", - "TextModality", - "Modality", - "DRSearch", - "Task" - ] - +__all__ = [ + "Representation", + "Average", + "Concatenation", + "Fusion", + "Sum", + "RowMax", + "Multiplication", + "MelSpectrogram", + "ResNet", + "Bert", + "UnimodalRepresentation", + "LSTM", + "NPY", + "Pickle", + "HDF5", + "JSON", + "Model", + "DiscreteModel", + "AlignedModality", + "AudioModality", + "VideoModality", + "TextModality", + "Modality", + "DRSearch", + "Task", +] diff --git a/src/main/python/systemds/scuro/aligner/alignment.py b/src/main/python/systemds/scuro/aligner/alignment.py index 0e89cdf9e8e..e341e1b76bf 100644 --- a/src/main/python/systemds/scuro/aligner/alignment.py +++ b/src/main/python/systemds/scuro/aligner/alignment.py @@ -26,8 +26,13 @@ class Alignment: - def __init__(self, modality_a: Modality, modality_b: Modality, strategy: AlignmentStrategy, - similarity_measure: Measure): + def __init__( + self, + modality_a: Modality, + modality_b: Modality, + strategy: AlignmentStrategy, + similarity_measure: Measure, + ): """ Defines the core of the library where the alignment of two modalities is performed :param modality_a: first modality @@ -39,7 +44,6 @@ def __init__(self, modality_a: Modality, modality_b: Modality, strategy: Alignme self.modality_b = modality_b self.strategy = strategy self.similarity_measure = similarity_measure - + def align_modalities(self) -> Modality: return AlignedModality(Representation()) - \ No newline at end of file diff --git a/src/main/python/systemds/scuro/aligner/alignment_strategy.py b/src/main/python/systemds/scuro/aligner/alignment_strategy.py index 70d88ee8a86..698a6d0d982 100644 --- a/src/main/python/systemds/scuro/aligner/alignment_strategy.py +++ b/src/main/python/systemds/scuro/aligner/alignment_strategy.py @@ -24,17 +24,17 @@ class AlignmentStrategy: def __init__(self): pass - + def align_chunk(self, chunk_a, chunk_b, similarity_measure: Measure): - raise 'Not implemented error' + raise "Not implemented error" class ChunkedCrossCorrelation(AlignmentStrategy): def __init__(self): super().__init__() - + def align_chunk(self, chunk_a, chunk_b, similarity_measure: Measure): - raise 'Not implemented error' + raise "Not implemented error" + - # TODO: Add additional alignment methods diff --git a/src/main/python/systemds/scuro/aligner/dr_search.py b/src/main/python/systemds/scuro/aligner/dr_search.py index b2a92ab75b2..24f3c3236f5 100644 --- a/src/main/python/systemds/scuro/aligner/dr_search.py +++ b/src/main/python/systemds/scuro/aligner/dr_search.py @@ -29,7 +29,7 @@ import warnings -warnings.filterwarnings('ignore') +warnings.filterwarnings("ignore") def get_modalities_by_name(modalities, name): @@ -37,11 +37,16 @@ def get_modalities_by_name(modalities, name): if modality.name == name: return modality - raise 'Modality ' + name + 'not in modalities' + raise "Modality " + name + "not in modalities" class DRSearch: - def __init__(self, modalities: List[Modality], task: Task, representations: List[Representation]): + def __init__( + self, + modalities: List[Modality], + task: Task, + representations: List[Representation], + ): """ The DRSearch primitive finds the best uni- or multimodal data representation for the given modalities for a specific task @@ -57,8 +62,13 @@ def __init__(self, modalities: List[Modality], task: Task, representations: List self.best_representation = None self.best_score = -1 - def set_best_params(self, modality_name: str, representation: Representation, - scores: List[float], modality_names: List[str]): + def set_best_params( + self, + modality_name: str, + representation: Representation, + scores: List[float], + modality_names: List[str], + ): """ Updates the best parameters for given modalities, representation, and score :param modality_name: The name of the aligned modality @@ -107,7 +117,9 @@ def fit_random(self, seed=-1): modality.combine() scores = self.task.run(modality.data) - self.set_best_params(modality.name, representation, scores, modality.get_modality_names()) + self.set_best_params( + modality.name, representation, scores, modality.get_modality_names() + ) return self.best_representation, self.best_score, self.best_modalities @@ -121,11 +133,18 @@ def fit_enumerate_all(self): for M in range(1, len(self.modalities) + 1): for combination in itertools.combinations(self.modalities, M): for representation in self.representations: - modality = AlignedModality(representation, list(combination)) # noqa + modality = AlignedModality( + representation, list(combination) + ) # noqa modality.combine() scores = self.task.run(modality.data) - self.set_best_params(modality.name, representation, scores, modality.get_modality_names()) + self.set_best_params( + modality.name, + representation, + scores, + modality.get_modality_names(), + ) return self.best_representation, self.best_score, self.best_modalities @@ -138,7 +157,7 @@ def transform(self, modalities: List[Modality]): """ if self.best_score == -1: - raise 'Please fit representations first!' + raise "Please fit representations first!" used_modalities = [] diff --git a/src/main/python/systemds/scuro/aligner/task.py b/src/main/python/systemds/scuro/aligner/task.py index efaafce32d7..fcf09528403 100644 --- a/src/main/python/systemds/scuro/aligner/task.py +++ b/src/main/python/systemds/scuro/aligner/task.py @@ -24,7 +24,9 @@ class Task: - def __init__(self, name: str, model: Model, labels, train_indices: List, val_indices: List): + def __init__( + self, name: str, model: Model, labels, train_indices: List, val_indices: List + ): """ Parent class for the prediction task that is performed on top of the aligned representation :param name: Name of the task @@ -44,9 +46,9 @@ def get_train_test_split(self, data): y_train = [self.labels[i] for i in self.train_indices] X_test = [data[i] for i in self.val_indices] y_test = [self.labels[i] for i in self.val_indices] - + return X_train, y_train, X_test, y_test - + def run(self, data): """ The run method need to be implemented by every task class @@ -55,4 +57,3 @@ def run(self, data): :return: the validation accuracy """ pass - \ No newline at end of file diff --git a/src/main/python/systemds/scuro/main.py b/src/main/python/systemds/scuro/main.py index 0648972fd89..f28b271b979 100644 --- a/src/main/python/systemds/scuro/main.py +++ b/src/main/python/systemds/scuro/main.py @@ -36,7 +36,7 @@ class CustomTask(Task): def __init__(self, model, labels, train_indices, val_indices): - super().__init__('CustomTask', model, labels, train_indices, val_indices) + super().__init__("CustomTask", model, labels, train_indices, val_indices) def run(self, data): X_train, y_train, X_test, y_test = self.get_train_test_split(data) @@ -49,9 +49,9 @@ def run(self, data): train_indices = [] val_indices = [] -video_path = '' -audio_path = '' -text_path = '' +video_path = "" +audio_path = "" +text_path = "" # Load modalities (audio, video, text) video = VideoModality(video_path, HDF5(), train_indices) diff --git a/src/main/python/systemds/scuro/modality/aligned_modality.py b/src/main/python/systemds/scuro/modality/aligned_modality.py index 7950ec1919f..839b9d296f8 100644 --- a/src/main/python/systemds/scuro/modality/aligned_modality.py +++ b/src/main/python/systemds/scuro/modality/aligned_modality.py @@ -31,7 +31,7 @@ def __init__(self, representation: Fusion, modalities: List[Modality]): :param representation: The representation for the aligned modality :param modalities: List of modalities to be combined """ - name = '' + name = "" for modality in modalities: name += modality.name super().__init__(representation, modality_name=name) @@ -41,11 +41,11 @@ def combine(self): """ Initiates the call to fuse the given modalities depending on the Fusion type """ - self.data = self.representation.fuse(self.modalities) # noqa + self.data = self.representation.fuse(self.modalities) # noqa def get_modality_names(self): names = [] for modality in self.modalities: names.append(modality.name) - return names \ No newline at end of file + return names diff --git a/src/main/python/systemds/scuro/modality/audio_modality.py b/src/main/python/systemds/scuro/modality/audio_modality.py index 570faaad778..ba849622269 100644 --- a/src/main/python/systemds/scuro/modality/audio_modality.py +++ b/src/main/python/systemds/scuro/modality/audio_modality.py @@ -25,15 +25,21 @@ class AudioModality(Modality): - def __init__(self, file_path: str, representation: UnimodalRepresentation, train_indices=None, start_index: int = 0): + def __init__( + self, + file_path: str, + representation: UnimodalRepresentation, + train_indices=None, + start_index: int = 0, + ): """ Creates an audio modality :param file_path: path to file where the audio embeddings are stored :param representation: Unimodal representation that indicates how to extract the data from the file """ - super().__init__(representation, start_index, 'Audio', train_indices) + super().__init__(representation, start_index, "Audio", train_indices) self.file_path = file_path - + def file_sanity_check(self): """ Checks if the file can be found is not empty @@ -42,12 +48,14 @@ def file_sanity_check(self): file_size = os.path.getsize(self.file_path) except: raise (f"Error: File {0} not found!".format(self.file_path)) - + if file_size == 0: raise ("File {0} is empty".format(self.file_path)) - + def read_chunk(self): pass - + def read_all(self, indices=None): - self.data = self.representation.parse_all(self.file_path, indices=indices) # noqa + self.data = self.representation.parse_all( + self.file_path, indices=indices + ) # noqa diff --git a/src/main/python/systemds/scuro/modality/modality.py b/src/main/python/systemds/scuro/modality/modality.py index b15321be405..a899576d5b8 100644 --- a/src/main/python/systemds/scuro/modality/modality.py +++ b/src/main/python/systemds/scuro/modality/modality.py @@ -23,8 +23,14 @@ class Modality: - - def __init__(self, representation: Representation, start_index: int = 0, modality_name='', train_indices=None): + + def __init__( + self, + representation: Representation, + start_index: int = 0, + modality_name="", + train_indices=None, + ): """ Parent class of the different Modalities :param representation: Specifies how the data should be represented for a specific modality @@ -37,13 +43,13 @@ def __init__(self, representation: Representation, start_index: int = 0, modalit self.name = modality_name self.data = None self.train_indices = train_indices - + def read_chunk(self): """ Extracts a data chunk of the modality according to the window size specified in params """ raise NotImplementedError - + def read_all(self, indices): """ Implemented for every unique modality to read all samples from a specified format diff --git a/src/main/python/systemds/scuro/modality/text_modality.py b/src/main/python/systemds/scuro/modality/text_modality.py index ab6d7f0547a..c636de71672 100644 --- a/src/main/python/systemds/scuro/modality/text_modality.py +++ b/src/main/python/systemds/scuro/modality/text_modality.py @@ -25,15 +25,21 @@ class TextModality(Modality): - def __init__(self, file_path: str, representation: UnimodalRepresentation, train_indices=None, start_index: int = 0): + def __init__( + self, + file_path: str, + representation: UnimodalRepresentation, + train_indices=None, + start_index: int = 0, + ): """ Creates a text modality :param file_path: path to file(s) where the text data is stored :param representation: Unimodal representation that indicates how to extract the data from the file """ - super().__init__(representation, start_index, 'Text', train_indices) + super().__init__(representation, start_index, "Text", train_indices) self.file_path = file_path - + def file_sanity_check(self): """ Checks if the file can be found is not empty @@ -42,12 +48,14 @@ def file_sanity_check(self): file_size = os.path.getsize(self.file_path) except: raise (f"Error: File {0} not found!".format(self.file_path)) - + if file_size == 0: raise ("File {0} is empty".format(self.file_path)) - + def read_chunk(self): pass - + def read_all(self, indices=None): - self.data = self.representation.parse_all(self.file_path, indices=indices) # noqa + self.data = self.representation.parse_all( + self.file_path, indices=indices + ) # noqa diff --git a/src/main/python/systemds/scuro/modality/video_modality.py b/src/main/python/systemds/scuro/modality/video_modality.py index 110a13ffca2..a6cedf6c86a 100644 --- a/src/main/python/systemds/scuro/modality/video_modality.py +++ b/src/main/python/systemds/scuro/modality/video_modality.py @@ -25,15 +25,21 @@ class VideoModality(Modality): - def __init__(self, file_path: str, representation: UnimodalRepresentation, train_indices=None, start_index: int = 0): + def __init__( + self, + file_path: str, + representation: UnimodalRepresentation, + train_indices=None, + start_index: int = 0, + ): """ Creates a video modality :param file_path: path to file where the video embeddings (for now) are stored :param representation: Unimodal representation that indicates how to extract the data from the file """ - super().__init__(representation, start_index, 'Video', train_indices) + super().__init__(representation, start_index, "Video", train_indices) self.file_path = file_path - + def file_sanity_check(self): """ Checks if the file can be found is not empty @@ -42,12 +48,14 @@ def file_sanity_check(self): file_size = os.path.getsize(self.file_path) except: raise (f"Error: File {0} not found!".format(self.file_path)) - + if file_size == 0: raise ("File {0} is empty".format(self.file_path)) - + def read_chunk(self): pass - + def read_all(self, indices=None): - self.data = self.representation.parse_all(self.file_path, indices=indices) # noqa + self.data = self.representation.parse_all( + self.file_path, indices=indices + ) # noqa diff --git a/src/main/python/systemds/scuro/models/discrete_model.py b/src/main/python/systemds/scuro/models/discrete_model.py index 288643e5d84..7e0bfd87faa 100644 --- a/src/main/python/systemds/scuro/models/discrete_model.py +++ b/src/main/python/systemds/scuro/models/discrete_model.py @@ -26,15 +26,13 @@ def __init__(self): """ Placeholder for a discrete model implementation """ - super().__init__('DiscreteModel') - + super().__init__("DiscreteModel") + def fit(self, X_train, y_train): self.clf = None train_accuracy = 0 return train_accuracy - + def test(self, X_test, y_test): test_accuracy = 0 return test_accuracy - - \ No newline at end of file diff --git a/src/main/python/systemds/scuro/models/model.py b/src/main/python/systemds/scuro/models/model.py index 115bf75ad6b..22d1bbeccfd 100644 --- a/src/main/python/systemds/scuro/models/model.py +++ b/src/main/python/systemds/scuro/models/model.py @@ -19,26 +19,26 @@ # # ------------------------------------------------------------- + class Model: def __init__(self, name: str): """ Parent class for models used to perform a given task :param name: Name of the model - + The classifier (clf) should be set in the fit method of each child class """ self.name = name self.clf = None - + def fit(self, X_train, y_train, X_val=None, y_val=None): """ Fits a model to the training data """ - raise f'Fit method not implemented for {self.name}' - + raise f"Fit method not implemented for {self.name}" + def test(self, X_test, y_test): """ Tests the classifier on a test or validation set """ - raise f'Test method not implemented for {self.name}' - \ No newline at end of file + raise f"Test method not implemented for {self.name}" diff --git a/src/main/python/systemds/scuro/representations/average.py b/src/main/python/systemds/scuro/representations/average.py index 708812d21bc..d10778f1136 100644 --- a/src/main/python/systemds/scuro/representations/average.py +++ b/src/main/python/systemds/scuro/representations/average.py @@ -34,14 +34,14 @@ def __init__(self): """ Combines modalities using averaging """ - super().__init__('Average') - + super().__init__("Average") + def fuse(self, modalities: List[Modality]): max_emb_size = self.get_max_embedding_size(modalities) padded_modalities = [] for modality in modalities: - d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32') + d = pad_sequences(modality.data, maxlen=max_emb_size, dtype="float32") padded_modalities.append(d) data = padded_modalities[0] diff --git a/src/main/python/systemds/scuro/representations/bert.py b/src/main/python/systemds/scuro/representations/bert.py index 365b39c322e..30bdc24a532 100644 --- a/src/main/python/systemds/scuro/representations/bert.py +++ b/src/main/python/systemds/scuro/representations/bert.py @@ -30,18 +30,18 @@ def read_text_file(file_path): - with open(file_path, 'r', encoding='utf-8') as file: + with open(file_path, "r", encoding="utf-8") as file: text = file.read() return text class Bert(UnimodalRepresentation): def __init__(self, avg_layers=None, output_file=None): - super().__init__('Bert') - + super().__init__("Bert") + self.avg_layers = avg_layers self.output_file = output_file - + def parse_all(self, filepath, indices, get_sequences=False): # Assumes text is stored in .txt files data = [] @@ -49,48 +49,51 @@ def parse_all(self, filepath, indices, get_sequences=False): for filename in os.listdir(filepath): f = os.path.join(filepath, filename) if os.path.isfile(f): - with open(f, 'r') as file: + with open(f, "r") as file: data.append(file.readlines()[0]) else: - with open(filepath, 'r') as file: + with open(filepath, "r") as file: data = file.readlines() - - model_name = 'bert-base-uncased' + + model_name = "bert-base-uncased" tokenizer = BertTokenizer.from_pretrained(model_name) - + if self.avg_layers is not None: model = BertModel.from_pretrained(model_name, output_hidden_states=True) else: model = BertModel.from_pretrained(model_name) - + embeddings = self.create_embeddings(data, model, tokenizer) - + if self.output_file is not None: data = {} for i in range(0, embeddings.shape[0]): data[indices[i]] = embeddings[i] self.save_embeddings(data) - + return embeddings - + def create_embeddings(self, data, model, tokenizer): embeddings = [] for d in data: inputs = tokenizer(d, return_tensors="pt", padding=True, truncation=True) - + with torch.no_grad(): outputs = model(**inputs) - + if self.avg_layers is not None: - cls_embedding = [outputs.hidden_states[i][:, 0, :] for i in range(-self.avg_layers, 0)] + cls_embedding = [ + outputs.hidden_states[i][:, 0, :] + for i in range(-self.avg_layers, 0) + ] cls_embedding = torch.mean(torch.stack(cls_embedding), dim=0) else: cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy() embeddings.append(cls_embedding) - + embeddings = np.array(embeddings) return embeddings.reshape((embeddings.shape[0], embeddings.shape[-1])) - + def save_embeddings(self, data): - with open(self.output_file, 'wb') as file: + with open(self.output_file, "wb") as file: pickle.dump(data, file) diff --git a/src/main/python/systemds/scuro/representations/concatenation.py b/src/main/python/systemds/scuro/representations/concatenation.py index a61ab69fce2..7694fa68977 100644 --- a/src/main/python/systemds/scuro/representations/concatenation.py +++ b/src/main/python/systemds/scuro/representations/concatenation.py @@ -34,7 +34,7 @@ def __init__(self, padding=True): """ Combines modalities using concatenation """ - super().__init__('Concatenation') + super().__init__("Concatenation") self.padding = padding def fuse(self, modalities: List[Modality]): @@ -51,8 +51,16 @@ def fuse(self, modalities: List[Modality]): for modality in modalities: if self.padding: - data = np.concatenate([data, pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32')], axis=-1) + data = np.concatenate( + [ + data, + pad_sequences( + modality.data, maxlen=max_emb_size, dtype="float32" + ), + ], + axis=-1, + ) else: data = np.concatenate([data, modality.data], axis=-1) - return np.array(data) \ No newline at end of file + return np.array(data) diff --git a/src/main/python/systemds/scuro/representations/fusion.py b/src/main/python/systemds/scuro/representations/fusion.py index 04e9ebbb64a..0d5cd347260 100644 --- a/src/main/python/systemds/scuro/representations/fusion.py +++ b/src/main/python/systemds/scuro/representations/fusion.py @@ -41,7 +41,7 @@ def fuse(self, modalities: List[Modality]): :param modalities: List of modalities used in the fusion :return: fused data """ - raise f'Not implemented for Fusion: {self.name}' + raise f"Not implemented for Fusion: {self.name}" def get_max_embedding_size(self, modalities: List[Modality]): """ @@ -53,8 +53,8 @@ def get_max_embedding_size(self, modalities: List[Modality]): for idx in range(1, len(modalities)): curr_shape = modalities[idx].data.shape if len(modalities[idx - 1].data) != curr_shape[0]: - raise f'Modality sizes don\'t match!' + raise f"Modality sizes don't match!" elif curr_shape[1] > max_size: max_size = curr_shape[1] - return max_size \ No newline at end of file + return max_size diff --git a/src/main/python/systemds/scuro/representations/lstm.py b/src/main/python/systemds/scuro/representations/lstm.py index dcdd9b65c1d..3687ff65147 100644 --- a/src/main/python/systemds/scuro/representations/lstm.py +++ b/src/main/python/systemds/scuro/representations/lstm.py @@ -34,7 +34,7 @@ def __init__(self, width=128, depth=1, dropout_rate=0.1): """ Combines modalities using an LSTM """ - super().__init__('LSTM') + super().__init__("LSTM") self.depth = depth self.width = width self.dropout_rate = dropout_rate diff --git a/src/main/python/systemds/scuro/representations/max.py b/src/main/python/systemds/scuro/representations/max.py index 50038d54639..2e1e8644773 100644 --- a/src/main/python/systemds/scuro/representations/max.py +++ b/src/main/python/systemds/scuro/representations/max.py @@ -35,38 +35,47 @@ def __init__(self, split=4): Combines modalities by computing the outer product of a modality combination and taking the row max """ - super().__init__('RowMax') + super().__init__("RowMax") self.split = split - - def fuse(self, modalities: List[Modality],): + + def fuse( + self, + modalities: List[Modality], + ): if len(modalities) < 2: return np.array(modalities[0].data) - + max_emb_size = self.get_max_embedding_size(modalities) - + padded_modalities = [] for modality in modalities: - d = pad_sequences(modality.data, maxlen=max_emb_size, dtype='float32') + d = pad_sequences(modality.data, maxlen=max_emb_size, dtype="float32") padded_modalities.append(d) - + split_rows = int(len(modalities[0].data) / self.split) data = [] - + for combination in itertools.combinations(padded_modalities, 2): combined = None for i in range(0, self.split): start = split_rows * i - end = split_rows * (i + 1) if i < (self.split - 1) else len(modalities[0].data) - m = np.einsum('bi,bo->bio', combination[0][start:end], combination[1][start:end]) + end = ( + split_rows * (i + 1) + if i < (self.split - 1) + else len(modalities[0].data) + ) + m = np.einsum( + "bi,bo->bio", combination[0][start:end], combination[1][start:end] + ) m = m.max(axis=2) if combined is None: combined = m else: combined = np.concatenate((combined, m), axis=0) data.append(combined) - + data = np.stack(data) data = data.max(axis=0) - + return np.array(data) diff --git a/src/main/python/systemds/scuro/representations/mel_spectrogram.py b/src/main/python/systemds/scuro/representations/mel_spectrogram.py index 15d4277c2c4..27aba8b997d 100644 --- a/src/main/python/systemds/scuro/representations/mel_spectrogram.py +++ b/src/main/python/systemds/scuro/representations/mel_spectrogram.py @@ -31,10 +31,10 @@ class MelSpectrogram(UnimodalRepresentation): def __init__(self, avg=True, output_file=None): - super().__init__('MelSpectrogram') + super().__init__("MelSpectrogram") self.avg = avg self.output_file = output_file - + def parse_all(self, file_path, indices, get_sequences=False): result = [] max_length = 0 @@ -48,19 +48,19 @@ def parse_all(self, file_path, indices, get_sequences=False): if S_dB.shape[-1] > max_length: max_length = S_dB.shape[-1] result.append(S_dB) - + r = [] for elem in result: - d = pad_sequences(elem, maxlen=max_length, dtype='float32') + d = pad_sequences(elem, maxlen=max_length, dtype="float32") r.append(d) - + np_array_r = np.array(r) if not self.avg else np.mean(np.array(r), axis=1) - + if self.output_file is not None: data = {} for i in range(0, np_array_r.shape[0]): data[indices[i]] = np_array_r[i] - with open(self.output_file, 'wb') as file: + with open(self.output_file, "wb") as file: pickle.dump(data, file) - + return np_array_r diff --git a/src/main/python/systemds/scuro/representations/multiplication.py b/src/main/python/systemds/scuro/representations/multiplication.py index 032ae70fe47..18f34bae6f9 100644 --- a/src/main/python/systemds/scuro/representations/multiplication.py +++ b/src/main/python/systemds/scuro/representations/multiplication.py @@ -34,15 +34,18 @@ def __init__(self): """ Combines modalities using elementwise multiply """ - super().__init__('Multiplication') - + super().__init__("Multiplication") + def fuse(self, modalities: List[Modality], train_indices=None): max_emb_size = self.get_max_embedding_size(modalities) - - data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype='float32') - + + data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype="float32") + for m in range(1, len(modalities)): # scaled = self.scale_data(modalities[m].data, train_indices) - data = np.multiply(data, pad_sequences(modalities[m].data, maxlen=max_emb_size, dtype='float32')) - + data = np.multiply( + data, + pad_sequences(modalities[m].data, maxlen=max_emb_size, dtype="float32"), + ) + return data diff --git a/src/main/python/systemds/scuro/representations/representation.py b/src/main/python/systemds/scuro/representations/representation.py index 13da5d26a5b..92967ed9c5a 100644 --- a/src/main/python/systemds/scuro/representations/representation.py +++ b/src/main/python/systemds/scuro/representations/representation.py @@ -19,6 +19,7 @@ # # ------------------------------------------------------------- + class Representation: def __init__(self, name): self.name = name diff --git a/src/main/python/systemds/scuro/representations/resnet.py b/src/main/python/systemds/scuro/representations/resnet.py index 52802288deb..75c921184b3 100644 --- a/src/main/python/systemds/scuro/representations/resnet.py +++ b/src/main/python/systemds/scuro/representations/resnet.py @@ -32,46 +32,52 @@ import torchvision.transforms as transforms import numpy as np -DEVICE = 'cpu' +DEVICE = "cpu" class ResNet(UnimodalRepresentation): def __init__(self, output_file=None): - super().__init__('ResNet') - + super().__init__("ResNet") + self.output_file = output_file - + def parse_all(self, file_path, indices, get_sequences=False): resnet = models.resnet152(weights=models.ResNet152_Weights.DEFAULT) resnet.eval() - + for param in resnet.parameters(): param.requires_grad = False - - transform = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize((224, 224)), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) - + + transform = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + ] + ) + dataset = ResNetDataset(transform=transform, video_folder_path=file_path) embeddings = {} - + class Identity(torch.nn.Module): def forward(self, input_: torch.Tensor) -> torch.Tensor: return input_ - + resnet.fc = Identity() - + res5c_output = None - - def avg_pool_hook(_module: torch.nn.Module, input_: Tuple[torch.Tensor], _output: Any) -> None: + + def avg_pool_hook( + _module: torch.nn.Module, input_: Tuple[torch.Tensor], _output: Any + ) -> None: nonlocal res5c_output res5c_output = input_[0] - + resnet.avgpool.register_forward_hook(avg_pool_hook) - + for instance in torch.utils.data.DataLoader(dataset): video_id = instance["id"][0] frames = instance["frames"][0].to(DEVICE) @@ -81,33 +87,33 @@ def avg_pool_hook(_module: torch.nn.Module, input_: Tuple[torch.Tensor], _output end_index = min(start_index + batch_size, len(frames)) frame_ids_range = range(start_index, end_index) frame_batch = frames[frame_ids_range] - + avg_pool_value = resnet(frame_batch) - + embeddings[video_id][frame_ids_range] = avg_pool_value.to(DEVICE) - + if self.output_file is not None: - with h5py.File(self.output_file, 'w') as hdf: + with h5py.File(self.output_file, "w") as hdf: for key, value in embeddings.items(): hdf.create_dataset(key, data=value) - - emb = np.zeros((len(indices), 2048), dtype='float32') + + emb = np.zeros((len(indices), 2048), dtype="float32") if indices is not None: for i in indices: emb[i] = embeddings.get(str(i)).mean(dim=0).numpy() else: for i, key in enumerate(embeddings.keys()): emb[i] = embeddings.get(key).mean(dim=0).numpy() - + return emb - + @staticmethod def extract_features_from_video(video_path, model, transform): cap = cv2.VideoCapture(video_path) features = [] count = 0 success, frame = cap.read() - + while success: success, frame = cap.read() transformed_frame = transform(frame).unsqueeze(0) @@ -115,11 +121,11 @@ def extract_features_from_video(video_path, model, transform): with torch.no_grad(): feature_vector = model(transformed_frame) feature_vector = feature_vector.view(-1).numpy() - + features.append(feature_vector) - + count += 1 - + cap.release() return features, count @@ -129,40 +135,43 @@ def __init__(self, video_folder_path: str, transform: Callable = None): self.video_folder_path = video_folder_path self.transform = transform self.video_ids = [] - video_files = [f for f in os.listdir(self.video_folder_path) if - f.lower().endswith(('.mp4', '.avi', '.mov', '.mkv'))] - self.file_extension = video_files[0].split('.')[-1] - + video_files = [ + f + for f in os.listdir(self.video_folder_path) + if f.lower().endswith((".mp4", ".avi", ".mov", ".mkv")) + ] + self.file_extension = video_files[0].split(".")[-1] + for video in video_files: - video_id, _ = video.split('/')[-1].split('.') + video_id, _ = video.split("/")[-1].split(".") self.video_ids.append(video_id) - + self.frame_count_by_video_id = {video_id: 0 for video_id in self.video_ids} - + def __getitem__(self, index) -> Dict[str, object]: video_id = self.video_ids[index] - video_path = self.video_folder_path + '/' + video_id + '.' + self.file_extension - + video_path = self.video_folder_path + "/" + video_id + "." + self.file_extension + frames = None count = 0 - + cap = cv2.VideoCapture(video_path) - + success, frame = cap.read() - + num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) self.frame_count_by_video_id[video_id] = num_frames if frames is None and success: frames = torch.empty((num_frames, 3, 224, 224)) - + while success: frame = self.transform(frame) frames[count] = frame # noqa success, frame = cap.read() count += 1 - + cap.release() return {"id": video_id, "frames": frames} - + def __len__(self) -> int: return len(self.video_ids) diff --git a/src/main/python/systemds/scuro/representations/rowmax.py b/src/main/python/systemds/scuro/representations/rowmax.py index b06839b3349..0dc201e2ee1 100644 --- a/src/main/python/systemds/scuro/representations/rowmax.py +++ b/src/main/python/systemds/scuro/representations/rowmax.py @@ -35,7 +35,7 @@ def __init__(self, split=1): Combines modalities by computing the outer product of a modality combination and taking the row max """ - super().__init__('RowMax') + super().__init__("RowMax") self.split = split def fuse(self, modalities: List[Modality], train_indices): @@ -47,7 +47,7 @@ def fuse(self, modalities: List[Modality], train_indices): padded_modalities = [] for modality in modalities: scaled = self.scale_data(modality.data, train_indices) - d = pad_sequences(scaled, maxlen=max_emb_size, dtype='float32') + d = pad_sequences(scaled, maxlen=max_emb_size, dtype="float32") padded_modalities.append(d) split_rows = int(len(modalities[0].data) / self.split) @@ -58,8 +58,14 @@ def fuse(self, modalities: List[Modality], train_indices): combined = None for i in range(0, self.split): start = split_rows * i - end = split_rows * (i + 1) if i < (self.split - 1) else len(modalities[0].data) - m = np.einsum('bi,bo->bio', combination[0][start:end], combination[1][start:end]) + end = ( + split_rows * (i + 1) + if i < (self.split - 1) + else len(modalities[0].data) + ) + m = np.einsum( + "bi,bo->bio", combination[0][start:end], combination[1][start:end] + ) m = m.max(axis=2) if combined is None: combined = m diff --git a/src/main/python/systemds/scuro/representations/sum.py b/src/main/python/systemds/scuro/representations/sum.py index 1f1740c548e..bfb19d4f7d6 100644 --- a/src/main/python/systemds/scuro/representations/sum.py +++ b/src/main/python/systemds/scuro/representations/sum.py @@ -33,14 +33,16 @@ def __init__(self): """ Combines modalities using colum-wise sum """ - super().__init__('Sum') - + super().__init__("Sum") + def fuse(self, modalities: List[Modality]): max_emb_size = self.get_max_embedding_size(modalities) - data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype='float32') - + data = pad_sequences(modalities[0].data, maxlen=max_emb_size, dtype="float32") + for m in range(1, len(modalities)): - data += pad_sequences(modalities[m].data, maxlen=max_emb_size, dtype='float32') - + data += pad_sequences( + modalities[m].data, maxlen=max_emb_size, dtype="float32" + ) + return data diff --git a/src/main/python/systemds/scuro/representations/unimodal.py b/src/main/python/systemds/scuro/representations/unimodal.py index da0e721a574..ccd61977654 100644 --- a/src/main/python/systemds/scuro/representations/unimodal.py +++ b/src/main/python/systemds/scuro/representations/unimodal.py @@ -28,11 +28,11 @@ def __init__(self, name): :param name: name of the representation """ super().__init__(name) - + def parse_all(self, file_path, indices): - raise f'Not implemented for {self.name}' + raise f"Not implemented for {self.name}" class PixelRepresentation(UnimodalRepresentation): def __init__(self): - super().__init__('Pixel') + super().__init__("Pixel") diff --git a/src/main/python/systemds/scuro/representations/utils.py b/src/main/python/systemds/scuro/representations/utils.py index 720c3386d74..bccc3ac4b2a 100644 --- a/src/main/python/systemds/scuro/representations/utils.py +++ b/src/main/python/systemds/scuro/representations/utils.py @@ -31,11 +31,11 @@ class NPY(UnimodalRepresentation): def __init__(self): - super().__init__('NPY') - + super().__init__("NPY") + def parse_all(self, filepath, indices, get_sequences=False): data = np.load(filepath, allow_pickle=True) - + if indices is not None: return np.array([data[index] for index in indices]) else: @@ -44,32 +44,32 @@ def parse_all(self, filepath, indices, get_sequences=False): class Pickle(UnimodalRepresentation): def __init__(self): - super().__init__('Pickle') - + super().__init__("Pickle") + def parse_all(self, file_path, indices, get_sequences=False): - with open(file_path, 'rb') as f: + with open(file_path, "rb") as f: data = pickle.load(f) - + embeddings = [] for n, idx in enumerate(indices): embeddings.append(data[idx]) - + return np.array(embeddings) class HDF5(UnimodalRepresentation): def __init__(self): - super().__init__('HDF5') - + super().__init__("HDF5") + def parse_all(self, filepath, indices=None, get_sequences=False): data = h5py.File(filepath) - + if get_sequences: max_emb = 0 for index in indices: if max_emb < len(data[index][()]): max_emb = len(data[index][()]) - + emb = [] if indices is not None: for index in indices: @@ -77,7 +77,7 @@ def parse_all(self, filepath, indices=None, get_sequences=False): for i in range(len(emb_i), max_emb): emb_i.append([0 for x in range(0, len(emb_i[0]))]) emb.append(emb_i) - + return np.array(emb) else: if indices is not None: @@ -88,14 +88,14 @@ def parse_all(self, filepath, indices=None, get_sequences=False): class JSON(UnimodalRepresentation): def __init__(self): - super().__init__('JSON') - + super().__init__("JSON") + def parse_all(self, filepath, indices): with open(filepath) as file: return json.load(file) -def pad_sequences(sequences, maxlen=None, dtype='float32', value=0): +def pad_sequences(sequences, maxlen=None, dtype="float32", value=0): if maxlen is None: maxlen = max([len(seq) for seq in sequences]) @@ -103,6 +103,6 @@ def pad_sequences(sequences, maxlen=None, dtype='float32', value=0): for i, seq in enumerate(sequences): data = seq[:maxlen] - result[i, :len(data)] = data + result[i, : len(data)] = data return result diff --git a/src/main/python/systemds/utils/__init__.py b/src/main/python/systemds/utils/__init__.py index cc59154f3c3..e66abb4646f 100644 --- a/src/main/python/systemds/utils/__init__.py +++ b/src/main/python/systemds/utils/__init__.py @@ -1,4 +1,4 @@ -#------------------------------------------------------------- +# ------------------------------------------------------------- # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -17,4 +17,4 @@ # specific language governing permissions and limitations # under the License. # -#------------------------------------------------------------- +# ------------------------------------------------------------- diff --git a/src/main/python/systemds/utils/consts.py b/src/main/python/systemds/utils/consts.py index 39789f5459e..9bfde5b418c 100644 --- a/src/main/python/systemds/utils/consts.py +++ b/src/main/python/systemds/utils/consts.py @@ -20,7 +20,7 @@ from typing import Union -MODULE_NAME = 'systemds' -VALID_INPUT_TYPES = Union['DAGNode', str, int, float, bool] -BINARY_OPERATIONS = ['+', '-', '/', '//', '*', '<', '<=', '>', '>=', '==', '!=', '%*%'] -VALID_ARITHMETIC_TYPES = Union['DAGNode', int, float] +MODULE_NAME = "systemds" +VALID_INPUT_TYPES = Union["DAGNode", str, int, float, bool] +BINARY_OPERATIONS = ["+", "-", "/", "//", "*", "<", "<=", ">", ">=", "==", "!=", "%*%"] +VALID_ARITHMETIC_TYPES = Union["DAGNode", int, float] diff --git a/src/main/python/systemds/utils/helpers.py b/src/main/python/systemds/utils/helpers.py index 5f278c807a4..05c9bf0647f 100644 --- a/src/main/python/systemds/utils/helpers.py +++ b/src/main/python/systemds/utils/helpers.py @@ -27,7 +27,9 @@ from systemds.utils.consts import MODULE_NAME -def create_params_string(unnamed_parameters: Iterable[str], named_parameters: Dict[str, str]) -> str: +def create_params_string( + unnamed_parameters: Iterable[str], named_parameters: Dict[str, str] +) -> str: """ Creates a string for providing parameters in dml. Basically converts both named and unnamed parameter to a format which can be used in a dml function call. @@ -36,8 +38,8 @@ def create_params_string(unnamed_parameters: Iterable[str], named_parameters: Di :param named_parameters: a dictionary of parameter names and variable names :return: the string to represent all parameters """ - named_input_strs = (f'{k}={v}' for (k, v) in named_parameters.items()) - return ','.join(chain(unnamed_parameters, named_input_strs)) + named_input_strs = (f"{k}={v}" for (k, v) in named_parameters.items()) + return ",".join(chain(unnamed_parameters, named_input_strs)) def get_module_dir() -> os.PathLike: @@ -54,17 +56,17 @@ def get_slice_string(i): if isinstance(i, list): raise ValueError("Not Supported list query") if isinstance(i, tuple): - return f'{get_slice_string(i[0])},{get_slice_string(i[1])}' + return f"{get_slice_string(i[0])},{get_slice_string(i[1])}" elif isinstance(i, slice): if i.step: raise ValueError("Invalid to slice with step in systemds") elif i.start == None and i.stop == None: - return '' + return "" elif i.start == None or i.stop == None: raise NotImplementedError("Not Implemented slice with dynamic end") else: # + 1 since R and systemDS is 1 indexed. - return f'{i.start + 1}:{i.stop}' + return f"{i.start + 1}:{i.stop}" else: # + 1 since R and systemDS is 1 indexed. sliceIns = i + 1 @@ -72,12 +74,14 @@ def get_slice_string(i): def check_is_empty_slice(i): - return isinstance(i, slice) and i.start == None and i.stop == None and i.step == None + return ( + isinstance(i, slice) and i.start == None and i.stop == None and i.step == None + ) def check_no_less_than_zero(i: list): for x in i: - if (x < 0): + if x < 0: raise ValueError("Negative index not supported in systemds") @@ -97,13 +101,13 @@ def get_path_to_script_layers() -> str: def valuetype_from_str(val) -> str: val = val.lower() - if val in ['double', 'float']: + if val in ["double", "float"]: return "double" - elif val in ['string', 'str']: + elif val in ["string", "str"]: return "string" - elif val in ['boolean', 'bool']: + elif val in ["boolean", "bool"]: return "boolean" - elif val in ['integer', 'int']: + elif val in ["integer", "int"]: return "integer" else: return None diff --git a/src/main/python/tests/algorithms/test_gmm.py b/src/main/python/tests/algorithms/test_gmm.py index 37475570daa..cbcccc260d1 100644 --- a/src/main/python/tests/algorithms/test_gmm.py +++ b/src/main/python/tests/algorithms/test_gmm.py @@ -41,7 +41,7 @@ def test_lm_simple(self): a = self.sds.rand(500, 10, -100, 100, pdf="normal", seed=10) features = a # training data all not outliers - notOutliers = self.sds.rand(10, 10, -1, 1, seed=10) # inside a + notOutliers = self.sds.rand(10, 10, -1, 1, seed=10) # inside a outliers = self.sds.rand(10, 10, 1150, 1200, seed=10) # outliers test = outliers.rbind(notOutliers) # testing data half outliers @@ -49,10 +49,12 @@ def test_lm_simple(self): n_gaussian = 4 [_, _, _, _, mu, precision_cholesky, weight] = gmm( - features, n_components=n_gaussian, seed=10) + features, n_components=n_gaussian, seed=10 + ) [_, pp] = gmmPredict( - test, weight, mu, precision_cholesky, model=self.sds.scalar("VVV")) + test, weight, mu, precision_cholesky, model=self.sds.scalar("VVV") + ) outliers = pp.max(axis=1) < 0.99 ret = outliers.compute() diff --git a/src/main/python/tests/algorithms/test_gmm_train_predict.py b/src/main/python/tests/algorithms/test_gmm_train_predict.py index 151b5315490..12c86f6ad00 100644 --- a/src/main/python/tests/algorithms/test_gmm_train_predict.py +++ b/src/main/python/tests/algorithms/test_gmm_train_predict.py @@ -28,7 +28,7 @@ class TestGMM(unittest.TestCase): - model_dir: str = "tests/algorithms/readwrite/" + model_dir: str = "tests/algorithms/readwrite/" model_path: str = model_dir + "model" @classmethod @@ -47,7 +47,8 @@ def train(self): n_gaussian = 4 [_, _, _, _, mu, precision_cholesky, weight] = gmm( - features, n_components=n_gaussian, seed=10) + features, n_components=n_gaussian, seed=10 + ) model = sds_train.list(mu, precision_cholesky, weight) model.write(self.model_path).compute() @@ -58,15 +59,14 @@ def predict(self): mu = model[1].as_matrix() precision_cholesky = model[2].as_matrix() weight = model[3].as_matrix() - notOutliers = sds_predict.rand( - 10, 10, -1, 1, seed=10) # inside a - outliers = sds_predict.rand( - 10, 10, 1150, 1200, seed=10) # outliers + notOutliers = sds_predict.rand(10, 10, -1, 1, seed=10) # inside a + outliers = sds_predict.rand(10, 10, 1150, 1200, seed=10) # outliers test = outliers.rbind(notOutliers) # testing data half outliers [_, pp] = gmmPredict( - test, weight, mu, precision_cholesky, model=sds_predict.scalar("VVV")) + test, weight, mu, precision_cholesky, model=sds_predict.scalar("VVV") + ) outliers = pp.max(axis=1) < 0.99 ret = outliers.compute() diff --git a/src/main/python/tests/algorithms/test_kmeans.py b/src/main/python/tests/algorithms/test_kmeans.py index 6369749cee0..9b8ab1ececf 100644 --- a/src/main/python/tests/algorithms/test_kmeans.py +++ b/src/main/python/tests/algorithms/test_kmeans.py @@ -66,8 +66,8 @@ def test_500x2(self): """ features = self.generate_matrices_for_k_means((500, 2), seed=1304) [c, _] = kmeans(features, k=4).compute() - C = self.sds.from_numpy( c) - elm = self.sds.from_numpy( np.array([[1, 1], [-1, 1], [-1, -1], [1, -1]])) + C = self.sds.from_numpy(c) + elm = self.sds.from_numpy(np.array([[1, 1], [-1, 1], [-1, -1], [1, -1]])) res = kmeansPredict(elm, C).compute() corners = set() for x in res: @@ -81,15 +81,14 @@ def test_500x2(self): corners.add("nn") self.assertTrue(len(corners) == 4) - def generate_matrices_for_k_means(self, dims, seed: int = 1234): np.random.seed(seed) mu, sigma = 0, 0.1 - s = np.random.normal(mu, sigma, dims[0] * dims[1]) + s = np.random.normal(mu, sigma, dims[0] * dims[1]) m1 = np.array(s, dtype=np.double) m1 = np.reshape(m1, (dims[0], dims[1])) - return self.sds.from_numpy( m1) + return self.sds.from_numpy(m1) if __name__ == "__main__": diff --git a/src/main/python/tests/algorithms/test_l2svm.py b/src/main/python/tests/algorithms/test_l2svm.py index d91055ed865..d7ea0d862bc 100644 --- a/src/main/python/tests/algorithms/test_l2svm.py +++ b/src/main/python/tests/algorithms/test_l2svm.py @@ -42,23 +42,35 @@ def test_10x10(self): features, labels = self.generate_matrices_for_l2svm(10, seed=1304) model = l2svm(features, labels).compute() # TODO make better verification. - self.assertTrue(np.allclose( - model, - np.array([[-0.03277166], [-0.00820981], [0.00657115], - [0.03228764], [-0.01685067], [0.00892918], - [0.00945636], [0.01514383], [0.0713272], - [-0.05113976]]))) + self.assertTrue( + np.allclose( + model, + np.array( + [ + [-0.03277166], + [-0.00820981], + [0.00657115], + [0.03228764], + [-0.01685067], + [0.00892918], + [0.00945636], + [0.01514383], + [0.0713272], + [-0.05113976], + ] + ), + ) + ) def generate_matrices_for_l2svm(self, dims: int, seed: int = 1234): np.random.seed(seed) - m1 = np.array(np.random.randint( - 100, size=dims * dims) + 1.01, dtype=np.double) + m1 = np.array(np.random.randint(100, size=dims * dims) + 1.01, dtype=np.double) m1.shape = (dims, dims) m2 = np.zeros((dims, 1)) for i in range(dims): if np.random.random() > 0.5: m2[i][0] = 1 - return self.sds.from_numpy( m1), self.sds.from_numpy( m2) + return self.sds.from_numpy(m1), self.sds.from_numpy(m2) if __name__ == "__main__": diff --git a/src/main/python/tests/algorithms/test_lm.py b/src/main/python/tests/algorithms/test_lm.py index 19833eda49f..aad8abac66a 100644 --- a/src/main/python/tests/algorithms/test_lm.py +++ b/src/main/python/tests/algorithms/test_lm.py @@ -57,8 +57,8 @@ def test_lm_simple(self): eps = 1e-03 self.assertTrue( - np.allclose(sds_model_weights, model, eps), - "All elements are not close") + np.allclose(sds_model_weights, model, eps), "All elements are not close" + ) if __name__ == "__main__": diff --git a/src/main/python/tests/algorithms/test_multiLogReg.py b/src/main/python/tests/algorithms/test_multiLogReg.py index 10887ea54b9..597e9d0584b 100644 --- a/src/main/python/tests/algorithms/test_multiLogReg.py +++ b/src/main/python/tests/algorithms/test_multiLogReg.py @@ -46,12 +46,16 @@ def test_simple(self): [X, labels, Y] = self.gen_data() # Call algorithm - bias = multiLogReg(self.sds.from_numpy( - X), self.sds.from_numpy(Y), verbose=False).compute() + bias = multiLogReg( + self.sds.from_numpy(X), self.sds.from_numpy(Y), verbose=False + ).compute() # Calculate result. - res = np.reshape(np.dot(X, bias[:len(X[0])]) + bias[len(X[0])], (250)) - def f2(x): return (x < 0) + 1 + res = np.reshape(np.dot(X, bias[: len(X[0])]) + bias[len(X[0])], (250)) + + def f2(x): + return (x < 0) + 1 + accuracy = np.sum(labels == f2(res)) / 250 * 100 self.assertTrue(accuracy > 98) @@ -63,11 +67,16 @@ def test_using_predict(self): """ [X, labels, Y] = self.gen_data() # Call algorithm - bias = multiLogReg(self.sds.from_numpy( - X), self.sds.from_numpy(Y), verbose=False).compute() + bias = multiLogReg( + self.sds.from_numpy(X), self.sds.from_numpy(Y), verbose=False + ).compute() - [m, y_pred, acc] = multiLogRegPredict(self.sds.from_numpy( - X), self.sds.from_numpy(bias), Y=self.sds.from_numpy(Y), verbose=False).compute() + [m, y_pred, acc] = multiLogRegPredict( + self.sds.from_numpy(X), + self.sds.from_numpy(bias), + Y=self.sds.from_numpy(Y), + verbose=False, + ).compute() self.assertTrue(acc > 98) @@ -75,9 +84,12 @@ def gen_data(self): np.random.seed(13241) # Generate data mu, sigma = 1, 0.1 - X = np.reshape(np.random.normal(mu, sigma, 500), (2, 250)) + X = np.reshape(np.random.normal(mu, sigma, 500), (2, 250)) + # All over 1 is true - def f(x): return (x[0] > 1) + 1 + def f(x): + return (x[0] > 1) + 1 + labels = f(X) # Y labels as double Y = np.array(labels, dtype=np.double) diff --git a/src/main/python/tests/algorithms/test_pca.py b/src/main/python/tests/algorithms/test_pca.py index 7592334eaf9..cdfbe729028 100644 --- a/src/main/python/tests/algorithms/test_pca.py +++ b/src/main/python/tests/algorithms/test_pca.py @@ -40,16 +40,16 @@ def tearDownClass(cls): def test_500x2(self): """ - This test constructs a line of values in 2d space. + This test constructs a line of values in 2d space. That if fit correctly maps perfectly to 1d space. The check is simply if the input value was positive then the output value should be similar. """ m1 = self.generate_matrices_for_pca(30, seed=1304) - X = self.sds.from_numpy( m1) + X = self.sds.from_numpy(m1) [res, model, _, _] = pca(X, K=1, scale="FALSE", center="FALSE").compute() - for (x, y) in zip(m1, res): + for x, y in zip(m1, res): self.assertTrue((x[0] > 0 and y > 0) or (x[0] < 0 and y < 0)) def test_simple(self): @@ -57,8 +57,9 @@ def test_simple(self): line of numbers. Here the pca should return values that are double or close to double of the last value """ m1 = np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) - [res, model, _, _ ] = pca(self.sds.from_numpy( m1), K=1, - scale=False, center=False).compute() + [res, model, _, _] = pca( + self.sds.from_numpy(m1), K=1, scale=False, center=False + ).compute() for x in range(len(m1) - 1): self.assertTrue(abs(res[x + 1] - res[0] * (x + 2)) < 0.001) @@ -66,9 +67,9 @@ def generate_matrices_for_pca(self, dims: int, seed: int = 1234): np.random.seed(seed) mu, sigma = 0, 0.1 - s = np.random.normal(mu, sigma, dims) + s = np.random.normal(mu, sigma, dims) - m1 = np.array(np.c_[np.copy(s) * 1, np.copy(s)*0.3], dtype=np.double) + m1 = np.array(np.c_[np.copy(s) * 1, np.copy(s) * 0.3], dtype=np.double) return m1 diff --git a/src/main/python/tests/algorithms/test_signal.py b/src/main/python/tests/algorithms/test_signal.py index 0680761bdd7..769228fc154 100644 --- a/src/main/python/tests/algorithms/test_signal.py +++ b/src/main/python/tests/algorithms/test_signal.py @@ -1,4 +1,3 @@ - # ------------------------------------------------------------- # # Licensed to the Apache Software Foundation (ASF) under one @@ -45,12 +44,13 @@ def test_create_signal(self): signal = self.sds.seq(0, 2, 1) pi = self.sds.scalar(3.141592654) size = signal.nRow() - n = self.sds.seq(0, size-1) - k = self.sds.seq(0, size-1) - M = (n @ (k.t())) * (2*pi/size) + n = self.sds.seq(0, size - 1) + k = self.sds.seq(0, size - 1) + M = (n @ (k.t())) * (2 * pi / size) Xa = M.cos() @ signal Xb = M.sin() @ signal DFT = signal.cbind(Xa).cbind(Xb).compute() - + + if __name__ == "__main__": - unittest.main(exit=False) \ No newline at end of file + unittest.main(exit=False) diff --git a/src/main/python/tests/basics/test___str__.py b/src/main/python/tests/basics/test___str__.py index 7e2010d6bef..adaa4d23830 100644 --- a/src/main/python/tests/basics/test___str__.py +++ b/src/main/python/tests/basics/test___str__.py @@ -46,20 +46,25 @@ def test_3(self): self.assertTrue("ScalarNode" in str(self.sds.scalar("Hi"))) def test_4(self): - self.assertTrue("ScalarNode" in str( - self.sds.full([1, 2], 3).to_string())) + self.assertTrue("ScalarNode" in str(self.sds.full([1, 2], 3).to_string())) def test_5(self): - self.assertTrue("ListNode" in str(self.sds.list( - self.sds.rand(1, 2, 3, 4), self.sds.scalar(4)))) + self.assertTrue( + "ListNode" + in str(self.sds.list(self.sds.rand(1, 2, 3, 4), self.sds.scalar(4))) + ) def test_6(self): - self.assertTrue("MatrixNode" in str(self.sds.list( - self.sds.rand(1, 2, 3, 4), self.sds.scalar(4))[0])) + self.assertTrue( + "MatrixNode" + in str(self.sds.list(self.sds.rand(1, 2, 3, 4), self.sds.scalar(4))[0]) + ) def test_7(self): - self.assertTrue("ScalarNode" in str(self.sds.list( - self.sds.rand(1, 2, 3, 4), self.sds.scalar(4))[1])) + self.assertTrue( + "ScalarNode" + in str(self.sds.list(self.sds.rand(1, 2, 3, 4), self.sds.scalar(4))[1]) + ) if __name__ == "__main__": diff --git a/src/main/python/tests/basics/test_context_stats.py b/src/main/python/tests/basics/test_context_stats.py index 7c431032407..25a83e18003 100644 --- a/src/main/python/tests/basics/test_context_stats.py +++ b/src/main/python/tests/basics/test_context_stats.py @@ -40,7 +40,7 @@ def tearDownClass(cls): cls.sds.close() def getM(self): - m1 = np.array(np.random.randint(10, size=5*5), dtype=np.int64) + m1 = np.array(np.random.randint(10, size=5 * 5), dtype=np.int64) m1.shape = (5, 5) return m1 @@ -57,9 +57,10 @@ def test_stats_v1(self): stats = self.sds.get_stats() self.sds.clear_stats() - instructions = "\n".join(stats.split( - "Heavy hitter instructions:")[1].split("\n")[2:]) - assert("+" in instructions and "*" in instructions and "/" in instructions) + instructions = "\n".join( + stats.split("Heavy hitter instructions:")[1].split("\n")[2:] + ) + assert "+" in instructions and "*" in instructions and "/" in instructions if __name__ == "__main__": diff --git a/src/main/python/tests/docs_test/test_end_to_end_tutorial.py b/src/main/python/tests/docs_test/test_end_to_end_tutorial.py index 85f71bb1f17..1529102e199 100644 --- a/src/main/python/tests/docs_test/test_end_to_end_tutorial.py +++ b/src/main/python/tests/docs_test/test_end_to_end_tutorial.py @@ -1,5 +1,3 @@ - - # ------------------------------------------------------------- # # Licensed to the Apache Software Foundation (ASF) under one diff --git a/src/main/python/tests/examples/tutorials/test_adult.py b/src/main/python/tests/examples/tutorials/test_adult.py index edf1449269c..f0711eeabb1 100644 --- a/src/main/python/tests/examples/tutorials/test_adult.py +++ b/src/main/python/tests/examples/tutorials/test_adult.py @@ -24,8 +24,7 @@ import numpy as np from systemds.context import SystemDSContext from systemds.examples.tutorials.adult import DataManager -from systemds.operator.algorithm import (confusionMatrix, - multiLogReg, multiLogRegPredict) +from systemds.operator.algorithm import confusionMatrix, multiLogReg, multiLogRegPredict class TestAdultStandardML(unittest.TestCase): @@ -38,7 +37,9 @@ class TestAdultStandardML(unittest.TestCase): neural_net_src_path: str = "tests/examples/tutorials/neural_net_source.dml" preprocess_src_path: str = "tests/examples/tutorials/preprocess.dml" dataset_path_train: str = "../../test/resources/datasets/adult/train_data.csv" - dataset_path_train_mtd: str = "../../test/resources/datasets/adult/train_data.csv.mtd" + dataset_path_train_mtd: str = ( + "../../test/resources/datasets/adult/train_data.csv.mtd" + ) dataset_path_test: str = "../../test/resources/datasets/adult/test_data.csv" dataset_path_test_mtd: str = "../../test/resources/datasets/adult/test_data.csv.mtd" dataset_jspec: str = "../../test/resources/datasets/adult/jspec.json" @@ -75,45 +76,70 @@ def test_train_data_pandas_vs_systemds(self): self.assertEqual(pandas.shape, systemds.shape) def test_train_labels_pandas_vs_systemds(self): - # Pandas does not strip the parsed values.. so i have to do it here. - pandas = np.array([x.strip() for x in self.d.get_train_labels_pandas()[0:2000].to_numpy().flatten()]) - systemds = self.d.get_train_labels(self.sds)[0:2000].compute().to_numpy().flatten() + # Pandas does not strip the parsed values.. so i have to do it here. + pandas = np.array( + [ + x.strip() + for x in self.d.get_train_labels_pandas()[0:2000].to_numpy().flatten() + ] + ) + systemds = ( + self.d.get_train_labels(self.sds)[0:2000].compute().to_numpy().flatten() + ) comp = pandas == systemds self.assertTrue(comp.all()) def test_test_labels_pandas_vs_systemds(self): # Pandas does not strip the parsed values.. so i have to do it here. pandas = np.array( - [x.strip() for x in self.d.get_test_labels_pandas()[0:2000].to_numpy().flatten()]) - systemds = self.d.get_test_labels(self.sds)[0:2000].compute().to_numpy().flatten() + [ + x.strip() + for x in self.d.get_test_labels_pandas()[0:2000].to_numpy().flatten() + ] + ) + systemds = ( + self.d.get_test_labels(self.sds)[0:2000].compute().to_numpy().flatten() + ) comp = pandas == systemds self.assertTrue(comp.all()) def test_transform_encode_train_data(self): jspec = self.d.get_jspec(self.sds) - train_x, M1 = self.d.get_train_data(self.sds)[0:2000].transform_encode(spec=jspec) + train_x, M1 = self.d.get_train_data(self.sds)[0:2000].transform_encode( + spec=jspec + ) train_x_numpy = train_x.compute() self.assertEqual((2000, 101), train_x_numpy.shape) def test_transform_encode_apply_test_data(self): jspec = self.d.get_jspec(self.sds) - train_x, M1 = self.d.get_train_data(self.sds)[0:2000].transform_encode(spec=jspec) - test_x = self.d.get_test_data(self.sds)[0:2000].transform_apply(spec=jspec, meta=M1) + train_x, M1 = self.d.get_train_data(self.sds)[0:2000].transform_encode( + spec=jspec + ) + test_x = self.d.get_test_data(self.sds)[0:2000].transform_apply( + spec=jspec, meta=M1 + ) test_x_numpy = test_x.compute() self.assertEqual((2000, 101), test_x_numpy.shape) def test_transform_encode_train_labels(self): - jspec_dict = {"recode":["income"]} + jspec_dict = {"recode": ["income"]} jspec = self.sds.scalar(f'"{jspec_dict}"') - train_y, M1 = self.d.get_train_labels(self.sds)[0:2000].transform_encode(spec=jspec) + train_y, M1 = self.d.get_train_labels(self.sds)[0:2000].transform_encode( + spec=jspec + ) train_y_numpy = train_y.compute() self.assertEqual((2000, 1), train_y_numpy.shape) def test_transform_encode_test_labels(self): - jspec_dict = {"recode":["income"]} + jspec_dict = {"recode": ["income"]} jspec = self.sds.scalar(f'"{jspec_dict}"') - train_y, M1 = self.d.get_train_labels(self.sds)[0:2000].transform_encode(spec=jspec) - test_y = self.d.get_test_labels(self.sds)[0:2000].transform_apply(spec=jspec, meta=M1) + train_y, M1 = self.d.get_train_labels(self.sds)[0:2000].transform_encode( + spec=jspec + ) + test_y = self.d.get_test_labels(self.sds)[0:2000].transform_apply( + spec=jspec, meta=M1 + ) test_y_numpy = test_y.compute() self.assertEqual((2000, 1), test_y_numpy.shape) diff --git a/src/main/python/tests/examples/tutorials/test_adult_neural.py b/src/main/python/tests/examples/tutorials/test_adult_neural.py index 3b60922ae78..3f5ed34c54e 100644 --- a/src/main/python/tests/examples/tutorials/test_adult_neural.py +++ b/src/main/python/tests/examples/tutorials/test_adult_neural.py @@ -38,7 +38,9 @@ class TestAdultNeural(unittest.TestCase): neural_net_src_path: str = "tests/examples/tutorials/neural_net_source.dml" preprocess_src_path: str = "tests/examples/tutorials/preprocess.dml" dataset_path_train: str = "../../test/resources/datasets/adult/train_data.csv" - dataset_path_train_mtd: str = "../../test/resources/datasets/adult/train_data.csv.mtd" + dataset_path_train_mtd: str = ( + "../../test/resources/datasets/adult/train_data.csv.mtd" + ) dataset_path_test: str = "../../test/resources/datasets/adult/test_data.csv" dataset_path_test_mtd: str = "../../test/resources/datasets/adult/test_data.csv.mtd" dataset_jspec: str = "../../test/resources/datasets/adult/jspec.json" @@ -73,9 +75,9 @@ def test_train_predict(self): def prepare_x(self): jspec = self.d.get_jspec(self.sds) - train_x_frame = self.d.get_train_data(self.sds)[0:self.train_count] + train_x_frame = self.d.get_train_data(self.sds)[0 : self.train_count] train_x, M1 = train_x_frame.transform_encode(spec=jspec) - test_x_frame = self.d.get_test_data(self.sds)[0:self.test_count] + test_x_frame = self.d.get_test_data(self.sds)[0 : self.test_count] test_x = test_x_frame.transform_apply(spec=jspec, meta=M1) # Scale and shift .... not needed because of sigmoid layer, # could be useful therefore tested. @@ -86,9 +88,9 @@ def prepare_x(self): def prepare_y(self): jspec_dict = {"recode": ["income"]} jspec_labels = self.sds.scalar(f'"{jspec_dict}"') - train_y_frame = self.d.get_train_labels(self.sds)[0:self.train_count] + train_y_frame = self.d.get_train_labels(self.sds)[0 : self.train_count] train_y, M2 = train_y_frame.transform_encode(spec=jspec_labels) - test_y_frame = self.d.get_test_labels(self.sds)[0:self.test_count] + test_y_frame = self.d.get_test_labels(self.sds)[0 : self.test_count] test_y = test_y_frame.transform_apply(spec=jspec_labels, meta=M2) labels = 2 train_y = train_y.to_one_hot(labels) @@ -109,8 +111,7 @@ def train_neural_net_and_save(self): def train_neural_net_and_predict(self): [train_x, test_x, train_y, test_y] = self.prepare() FFN_package = self.sds.source(self.neural_net_src_path, "fnn") - network = FFN_package.train_paramserv( - train_x, train_y, 4, 16, 0.01, 2, 1) + network = FFN_package.train_paramserv(train_x, train_y, 4, 16, 0.01, 2, 1) probs = FFN_package.predict(test_x, network) accuracy = FFN_package.eval(probs, test_y).compute() # accuracy is returned in percent diff --git a/src/main/python/tests/examples/tutorials/test_mnist.py b/src/main/python/tests/examples/tutorials/test_mnist.py index 89fffe9c4f9..90950094dbf 100644 --- a/src/main/python/tests/examples/tutorials/test_mnist.py +++ b/src/main/python/tests/examples/tutorials/test_mnist.py @@ -65,18 +65,20 @@ def test_multi_log_reg(self): train_count = 5000 test_count = 2000 # Train data - X = self.sds.from_numpy( self.d.get_train_data().reshape( - (60000, 28*28))[:train_count]) - Y = self.sds.from_numpy( self.d.get_train_labels()[:train_count]) + X = self.sds.from_numpy( + self.d.get_train_data().reshape((60000, 28 * 28))[:train_count] + ) + Y = self.sds.from_numpy(self.d.get_train_labels()[:train_count]) Y = Y + 1.0 # Test data - Xt = self.sds.from_numpy( self.d.get_test_data().reshape( - (10000, 28*28))[:test_count]) - Yt = self.sds.from_numpy( self.d.get_test_labels()[:test_count]) + Xt = self.sds.from_numpy( + self.d.get_test_data().reshape((10000, 28 * 28))[:test_count] + ) + Yt = self.sds.from_numpy(self.d.get_test_labels()[:test_count]) Yt = Yt + 1.0 - bias = multiLogReg(X, Y, verbose = False) + bias = multiLogReg(X, Y, verbose=False) [_, _, acc] = multiLogRegPredict(Xt, bias, Y=Yt, verbose=False).compute() self.assertGreater(acc, 80) @@ -84,10 +86,11 @@ def test_multi_log_reg(self): def test_multi_log_reg_with_read(self): train_count = 100 test_count = 100 - X = self.sds.from_numpy( self.d.get_train_data().reshape( - (60000, 28*28))[:train_count]) + X = self.sds.from_numpy( + self.d.get_train_data().reshape((60000, 28 * 28))[:train_count] + ) X.write(self.base_path + "train_data").compute() - Y = self.sds.from_numpy( self.d.get_train_labels()[:train_count]) + 1 + Y = self.sds.from_numpy(self.d.get_train_labels()[:train_count]) + 1 Y.write(self.base_path + "train_labels").compute() Xr = self.sds.read(self.base_path + "train_data") @@ -95,13 +98,14 @@ def test_multi_log_reg_with_read(self): bias = multiLogReg(Xr, Yr, verbose=False) # Test data - Xt = self.sds.from_numpy( self.d.get_test_data().reshape( - (10000, 28*28))[:test_count]) - Yt = self.sds.from_numpy( self.d.get_test_labels()[:test_count]) + Xt = self.sds.from_numpy( + self.d.get_test_data().reshape((10000, 28 * 28))[:test_count] + ) + Yt = self.sds.from_numpy(self.d.get_test_labels()[:test_count]) Yt = Yt + 1.0 [_, _, acc] = multiLogRegPredict(Xt, bias, Y=Yt).compute(verbose=False) - + self.assertGreater(acc, 70) diff --git a/src/main/python/tests/federated/test_federated_adult_neural.py b/src/main/python/tests/federated/test_federated_adult_neural.py index df65565c0ba..96ad3456dd1 100644 --- a/src/main/python/tests/federated/test_federated_adult_neural.py +++ b/src/main/python/tests/federated/test_federated_adult_neural.py @@ -36,18 +36,17 @@ def create_schema(dataset): schema = [] for dtype in dataset.dtypes: if pd.api.types.is_integer_dtype(dtype): - schema.append('int64') + schema.append("int64") elif pd.api.types.is_float_dtype(dtype): - schema.append('fp64') + schema.append("fp64") elif pd.api.types.is_bool_dtype(dtype): - schema.append('bool') + schema.append("bool") else: - schema.append('string') - return ','.join(schema) + schema.append("string") + return ",".join(schema) -def create_row_federated_dataset(name, dataset, num_parts=2, - federated_workers=None): +def create_row_federated_dataset(name, dataset, num_parts=2, federated_workers=None): if federated_workers is None: federated_workers = ["localhost:8001", "localhost:8002"] tempdir = "./tests/federated/tmp/test_federated_adult_neural/" @@ -61,30 +60,48 @@ def create_row_federated_dataset(name, dataset, num_parts=2, fed_file_content = [] rows_processed = 0 - for worker_id, address, rows in zip(range(num_parts), itertools.cycle(federated_workers), rs): + for worker_id, address, rows in zip( + range(num_parts), itertools.cycle(federated_workers), rs + ): dataset_part_path = path.join(tempdir, f"{name}{worker_id}.csv") - mtd = {"format": "csv", "header": True, "rows": rows, "cols": c, - "data_type": "frame", "schema": schema} - - dataset_part = dataset[rows_processed:rows_processed + rows] + mtd = { + "format": "csv", + "header": True, + "rows": rows, + "cols": c, + "data_type": "frame", + "schema": schema, + } + + dataset_part = dataset[rows_processed : rows_processed + rows] dataset_part.to_csv(dataset_part_path, index=False) with io.open(f"{dataset_part_path}.mtd", "w", encoding="utf-8") as f: json.dump(mtd, f, ensure_ascii=False) - fed_file_content.append({ - "address": address, - "dataType": "FRAME", - "filepath": dataset_part_path, - "begin": [rows_processed, 0], - "end": [rows_processed + rows, c], - }) + fed_file_content.append( + { + "address": address, + "dataType": "FRAME", + "filepath": dataset_part_path, + "begin": [rows_processed, 0], + "end": [rows_processed + rows, c], + } + ) rows_processed += rows with open(federated_file, "w", encoding="utf-8") as f: json.dump(fed_file_content, f) - with open(federated_file + '.mtd', "w", encoding="utf-8") as f: - json.dump({"format": "federated", "rows": dataset.shape[0], "cols": c, - "data_type": "frame", "schema": schema}, f) + with open(federated_file + ".mtd", "w", encoding="utf-8") as f: + json.dump( + { + "format": "federated", + "rows": dataset.shape[0], + "cols": c, + "data_type": "frame", + "schema": schema, + }, + f, + ) return federated_file @@ -114,14 +131,18 @@ class TestFederatedAdultNeural(unittest.TestCase): def setUpClass(cls): cls.sds = SystemDSContext() cls.d = DataManager() - cls.data_path_train = create_row_federated_dataset("train_data", - cls.d.get_train_data_pandas()[0:cls.train_count]) - cls.labels_path_train = create_row_federated_dataset("train_labels", - cls.d.get_train_labels_pandas()[0:cls.train_count]) - cls.data_path_test = create_row_federated_dataset("test_data", - cls.d.get_test_data_pandas()[0:cls.test_count]) - cls.labels_path_test = create_row_federated_dataset("test_labels", - cls.d.get_test_labels_pandas()[0:cls.test_count]) + cls.data_path_train = create_row_federated_dataset( + "train_data", cls.d.get_train_data_pandas()[0 : cls.train_count] + ) + cls.labels_path_train = create_row_federated_dataset( + "train_labels", cls.d.get_train_labels_pandas()[0 : cls.train_count] + ) + cls.data_path_test = create_row_federated_dataset( + "test_data", cls.d.get_test_data_pandas()[0 : cls.test_count] + ) + cls.labels_path_test = create_row_federated_dataset( + "test_labels", cls.d.get_test_labels_pandas()[0 : cls.test_count] + ) shutil.rmtree(cls.network_dir, ignore_errors=True) @classmethod @@ -180,8 +201,7 @@ def train_neural_net_and_save(self): def train_neural_net_and_predict(self): [train_x, test_x, train_y, test_y] = self.prepare() FFN_package = self.sds.source(self.neural_net_src_path, "fnn") - network = FFN_package.train_paramserv( - train_x, train_y, 1, 16, 0.01, 2, 1) + network = FFN_package.train_paramserv(train_x, train_y, 1, 16, 0.01, 2, 1) probs = FFN_package.predict(test_x, network) accuracy = FFN_package.eval(probs, test_y).compute() # accuracy is returned in percent diff --git a/src/main/python/tests/federated/test_federated_aggregations.py b/src/main/python/tests/federated/test_federated_aggregations.py index a209d5909bd..e19d91dc1cb 100644 --- a/src/main/python/tests/federated/test_federated_aggregations.py +++ b/src/main/python/tests/federated/test_federated_aggregations.py @@ -29,7 +29,7 @@ import numpy as np from systemds.context import SystemDSContext -os.environ['SYSDS_QUIET'] = "1" +os.environ["SYSDS_QUIET"] = "1" dim = 5 np.random.seed(132) @@ -39,7 +39,14 @@ m2.shape = (dim, dim) tempdir = "./tests/federated/tmp/test_federated_aggregations/" -mtd = {"format": "csv", "header": True, "rows": dim, "cols": dim, "data_type": "matrix", "value_type": "double" } +mtd = { + "format": "csv", + "header": True, + "rows": dim, + "cols": dim, + "data_type": "matrix", + "value_type": "double", +} # Create the testing directory if it does not exist. if not os.path.exists(tempdir): @@ -79,8 +86,8 @@ def test_sum3(self): # [m1,m1,m1,m1,m1,m2,m2,m2,m2,m2]] f_m_a = ( self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])]) + [fed1, fed2], [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])] + ) .sum() .compute() ) @@ -88,24 +95,12 @@ def test_sum3(self): self.assertAlmostEqual(f_m_a, m1_m2) def test_sum1(self): - f_m1 = ( - self.sds.federated( - [fed1], - [([0, 0], [dim, dim])]) - .sum() - .compute() - ) + f_m1 = self.sds.federated([fed1], [([0, 0], [dim, dim])]).sum().compute() m1_r = m1.sum() self.assertAlmostEqual(f_m1, m1_r) def test_sum2(self): - f_m2 = ( - self.sds.federated( - [fed2], - [([0, 0], [dim, dim])]) - .sum() - .compute() - ) + f_m2 = self.sds.federated([fed2], [([0, 0], [dim, dim])]).sum().compute() m2_r = m2.sum() self.assertAlmostEqual(f_m2, m2_r) @@ -117,8 +112,8 @@ def test_sum3(self): # [m1,m1,m1,m1,m1,m2,m2,m2,m2,m2]] f_m1_m2 = ( self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])]) + [fed1, fed2], [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])] + ) .sum() .compute() ) @@ -139,9 +134,9 @@ def test_sum4(self): # [m2,m2,m2,m2,m2] # [m2,m2,m2,m2,m2]] f_m1_m2 = ( - self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([dim, 0], [dim * 2, dim])]) + self.sds.federated( + [fed1, fed2], [([0, 0], [dim, dim]), ([dim, 0], [dim * 2, dim])] + ) .sum() .compute() ) @@ -162,9 +157,9 @@ def test_sum5(self): # [ 0, 0, 0, 0, 0,m2,m2,m2,m2,m2] # [ 0, 0, 0, 0, 0,m2,m2,m2,m2,m2]] f_m_a = ( - self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([2, dim], [dim + 2, dim * 2])]) + self.sds.federated( + [fed1, fed2], [([0, 0], [dim, dim]), ([2, dim], [dim + 2, dim * 2])] + ) .sum() .compute() ) @@ -181,7 +176,7 @@ def test_sum5(self): # # [ 0, 0, 0,m2,m2,m2,m2,m2] # # [ 0, 0, 0,m2,m2,m2,m2,m2]] # f_m_a = ( - # self.sds.federated( + # self.sds.federated( # [fed1, fed2], [([0, 0], [dim, dim]), ([2, 3], [dim + 2, dim + 3])] # ) # .sum() @@ -200,7 +195,7 @@ def test_sum5(self): # # [m1,m1,m1,m2,m2,m2,m2,m2] # # [ 0, 0, 0,m2,m2,m2,m2,m2] # # [ 0, 0, 0,m2,m2,m2,m2,m2]] - # f_m_a = self.sds.federated( + # f_m_a = self.sds.federated( # [fed1, fed2], [([0, 0], [dim, dim]), ([2, 3], [dim + 2, dim + 3])] # ) # f_m_a = (f_m_a + 1).sum().compute() @@ -220,11 +215,7 @@ def test_sum8(self): # [ 0, 0, 0,m1,m1,m1,m1,m1] # [ 0, 0, 0,m1,m1,m1,m1,m1]] f_m_a = ( - self.sds.federated( - [fed1], - [([2, 3], [dim + 2, dim + 3])]) - .sum() - .compute() + self.sds.federated([fed1], [([2, 3], [dim + 2, dim + 3])]).sum().compute() ) m = m1.sum() diff --git a/src/main/python/tests/federated/test_federated_aggregations_noHeader.py b/src/main/python/tests/federated/test_federated_aggregations_noHeader.py index 25832b648fc..be1a92a8f7d 100644 --- a/src/main/python/tests/federated/test_federated_aggregations_noHeader.py +++ b/src/main/python/tests/federated/test_federated_aggregations_noHeader.py @@ -29,7 +29,7 @@ import numpy as np from systemds.context import SystemDSContext -os.environ['SYSDS_QUIET'] = "1" +os.environ["SYSDS_QUIET"] = "1" dim = 3 @@ -37,19 +37,25 @@ m2 = np.asarray([[2, 2, 2], [3, 3, 3], [4, 4, 4]], dtype=np.int16) tempdir = "./tests/federated/tmp/test_federated_aggregations_noHeader/" -mtd = {"format": "csv", "header": False, "rows": dim, - "cols": dim, "data_type": "matrix", "value_type": "double"} +mtd = { + "format": "csv", + "header": False, + "rows": dim, + "cols": dim, + "data_type": "matrix", + "value_type": "double", +} # Create the testing directory if it does not exist. if not os.path.exists(tempdir): os.makedirs(tempdir) # Save data files for the Federated workers. -np.savetxt(tempdir + "m1.csv", m1, delimiter=",",fmt='%d') +np.savetxt(tempdir + "m1.csv", m1, delimiter=",", fmt="%d") with io.open(tempdir + "m1.csv.mtd", "w", encoding="utf-8") as f: f.write(json.dumps(mtd, ensure_ascii=False)) -np.savetxt(tempdir + "m2.csv", m2, delimiter=",",fmt='%d') +np.savetxt(tempdir + "m2.csv", m2, delimiter=",", fmt="%d") with io.open(tempdir + "m2.csv.mtd", "w", encoding="utf-8") as f: f.write(json.dumps(mtd, ensure_ascii=False)) @@ -71,12 +77,7 @@ def tearDownClass(cls): cls.sds.close() def test_equals(self): - f_m = ( - self.sds.federated( - [fed1], - [([0, 0], [dim, dim])]) - .compute() - ) + f_m = self.sds.federated([fed1], [([0, 0], [dim, dim])]).compute() self.assertTrue(np.allclose(f_m, m1)) def test_sum3(self): @@ -87,8 +88,8 @@ def test_sum3(self): # [m1,m1,m1,m1,m1,m2,m2,m2,m2,m2]] f_m_a = ( self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])]) + [fed1, fed2], [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])] + ) .sum() .compute() ) @@ -96,24 +97,12 @@ def test_sum3(self): self.assertAlmostEqual(f_m_a, m1_m2) def test_sum1(self): - f_m1 = ( - self.sds.federated( - [fed1], - [([0, 0], [dim, dim])]) - .sum() - .compute() - ) + f_m1 = self.sds.federated([fed1], [([0, 0], [dim, dim])]).sum().compute() m1_r = m1.sum() self.assertAlmostEqual(f_m1, m1_r) def test_sum2(self): - f_m2 = ( - self.sds.federated( - [fed2], - [([0, 0], [dim, dim])]) - .sum() - .compute() - ) + f_m2 = self.sds.federated([fed2], [([0, 0], [dim, dim])]).sum().compute() m2_r = m2.sum() self.assertAlmostEqual(f_m2, m2_r) @@ -125,8 +114,8 @@ def test_sum3(self): # [m1,m1,m1,m1,m1,m2,m2,m2,m2,m2]] f_m1_m2 = ( self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])]) + [fed1, fed2], [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])] + ) .sum() .compute() ) @@ -148,8 +137,8 @@ def test_sum4(self): # [m2,m2,m2,m2,m2]] f_m1_m2 = ( self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([dim, 0], [dim * 2, dim])]) + [fed1, fed2], [([0, 0], [dim, dim]), ([dim, 0], [dim * 2, dim])] + ) .sum() .compute() ) @@ -171,8 +160,8 @@ def test_sum5(self): # [ 0, 0, 0, 0, 0,m2,m2,m2,m2,m2]] f_m_a = ( self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([2, dim], [dim + 2, dim * 2])]) + [fed1, fed2], [([0, 0], [dim, dim]), ([2, dim], [dim + 2, dim * 2])] + ) .sum() .compute() ) @@ -188,11 +177,7 @@ def test_sum8(self): # [ 0, 0, 0,m1,m1,m1,m1,m1] # [ 0, 0, 0,m1,m1,m1,m1,m1]] f_m_a = ( - self.sds.federated( - [fed1], - [([2, 3], [dim + 2, dim + 3])]) - .sum() - .compute() + self.sds.federated([fed1], [([2, 3], [dim + 2, dim + 3])]).sum().compute() ) m = m1.sum() diff --git a/src/main/python/tests/federated/test_federated_basic.py b/src/main/python/tests/federated/test_federated_basic.py index e6159a8610b..8910084e989 100644 --- a/src/main/python/tests/federated/test_federated_basic.py +++ b/src/main/python/tests/federated/test_federated_basic.py @@ -29,7 +29,7 @@ import numpy as np from systemds.context import SystemDSContext -os.environ['SYSDS_QUIET'] = "1" +os.environ["SYSDS_QUIET"] = "1" dim = 5 np.random.seed(132) @@ -39,7 +39,14 @@ m2.shape = (dim, dim) tempdir = "./tests/federated/tmp/test_federated_aggregations/" -mtd = {"format": "csv", "header": True, "rows": dim, "cols": dim, "data_type": "matrix", "value_type": "double" } +mtd = { + "format": "csv", + "header": True, + "rows": dim, + "cols": dim, + "data_type": "matrix", + "value_type": "double", +} # Create the testing directory if it does not exist. if not os.path.exists(tempdir): @@ -72,12 +79,12 @@ def tearDownClass(cls): cls.sds.close() def test_1(self): - f_m1 = self.sds.federated( [fed1], [([0,0], [dim, dim])]).compute() + f_m1 = self.sds.federated([fed1], [([0, 0], [dim, dim])]).compute() res = np.allclose(f_m1, m1) self.assertTrue(res, "\n" + str(f_m1) + " is not equal to \n" + str(m1)) def test_2(self): - f_m2 = self.sds.federated( [fed2], [([0, 0], [dim, dim])]).compute() + f_m2 = self.sds.federated([fed2], [([0, 0], [dim, dim])]).compute() res = np.allclose(f_m2, m2) self.assertTrue(res) @@ -87,9 +94,8 @@ def test_3(self): # [m1,m1,m1,m1,m1,m2,m2,m2,m2,m2] # [m1,m1,m1,m1,m1,m2,m2,m2,m2,m2] # [m1,m1,m1,m1,m1,m2,m2,m2,m2,m2]] - f_m1_m2 = self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])] + f_m1_m2 = self.sds.federated( + [fed1, fed2], [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])] ).compute() m1_m2 = np.concatenate((m1, m2), axis=1) res = np.allclose(f_m1_m2, m1_m2) @@ -106,9 +112,8 @@ def test_4(self): # [m2,m2,m2,m2,m2] # [m2,m2,m2,m2,m2] # [m2,m2,m2,m2,m2]] - f_m1_m2 = self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([dim, 0], [dim * 2, dim])] + f_m1_m2 = self.sds.federated( + [fed1, fed2], [([0, 0], [dim, dim]), ([dim, 0], [dim * 2, dim])] ).compute() m1_m2 = np.concatenate((m1, m2)) res = np.allclose(f_m1_m2, m1_m2) @@ -122,9 +127,8 @@ def test_5(self): # [m1,m1,m1,m1,m1,m2,m2,m2,m2,m2] # [ 0, 0, 0, 0, 0,m2,m2,m2,m2,m2] # [ 0, 0, 0, 0, 0,m2,m2,m2,m2,m2]] - f_m1_m2 = self.sds.federated( - [fed1, fed2], - [([0, 0], [dim, dim]), ([2, dim], [dim + 2, dim * 2])] + f_m1_m2 = self.sds.federated( + [fed1, fed2], [([0, 0], [dim, dim]), ([2, dim], [dim + 2, dim * 2])] ).compute() m1_p = np.concatenate((m1, np.zeros((2, dim)))) @@ -142,7 +146,7 @@ def test_5(self): # # [m1,m1,m1,m2,m2,m2,m2,m2] # # [ 0, 0, 0,m2,m2,m2,m2,m2] # # [ 0, 0, 0,m2,m2,m2,m2,m2]] - # f_m1_m2 = self.sds.federated( + # f_m1_m2 = self.sds.federated( # [fed1, fed2], [([0, 0], [dim, dim]), ([2, 3], [dim + 2, dim + 3])] # ).compute() @@ -161,7 +165,7 @@ def test_5(self): # # [m1,m1,m1,m2,m2,m2,m2,m2] # # [ 0, 0, 0,m2,m2,m2,m2,m2] # # [ 0, 0, 0,m2,m2,m2,m2,m2]] - # f_m1_m2 = self.sds.federated( + # f_m1_m2 = self.sds.federated( # [fed1, fed2], [([0, 0], [dim, dim]), ([2, 3], [dim + 2, dim + 3])] # ) # f_m1_m2 = (f_m1_m2 + 1).compute() @@ -185,10 +189,10 @@ def test_8(self): # [ 0, 0, 0,m1,m1,m1,m1,m1] # [ 0, 0, 0,m1,m1,m1,m1,m1] # [ 0, 0, 0,m1,m1,m1,m1,m1]] - f_m1_m2 = self.sds.federated( [fed1], [([2, 3], [dim + 2, dim + 3])]) + f_m1_m2 = self.sds.federated([fed1], [([2, 3], [dim + 2, dim + 3])]) f_m1_m2 = (f_m1_m2).compute() m1_m2 = np.zeros((dim + 2, dim + 3)) - m1_m2[2: dim + 2, 3: dim + 3] = m1 + m1_m2[2 : dim + 2, 3 : dim + 3] = m1 res = np.allclose(f_m1_m2, m1_m2) if not res: print("Federated:") diff --git a/src/main/python/tests/federated/test_federated_matrix_mult.py b/src/main/python/tests/federated/test_federated_matrix_mult.py index 6551e11356b..dcdeaca42d4 100644 --- a/src/main/python/tests/federated/test_federated_matrix_mult.py +++ b/src/main/python/tests/federated/test_federated_matrix_mult.py @@ -28,7 +28,7 @@ import numpy as np from systemds.context import SystemDSContext -os.environ['SYSDS_QUIET'] = "1" +os.environ["SYSDS_QUIET"] = "1" dim = 3 @@ -39,8 +39,14 @@ m_r3 = np.row_stack((m, m_r2)) tempdir = "./tests/federated/tmp/test_federated_matrixmult/" -mtd = {"format": "csv", "header": False, "rows": dim, - "cols": dim, "data_type": "matrix", "value_type": "double"} +mtd = { + "format": "csv", + "header": False, + "rows": dim, + "cols": dim, + "data_type": "matrix", + "value_type": "double", +} # Create the testing directory if it does not exist. if not os.path.exists(tempdir): @@ -56,11 +62,11 @@ fed2 = "localhost:8002/" + tempdir + "m.csv" fed3 = "localhost:8003/" + tempdir + "m.csv" -fed1_file = tempdir+"m1.fed" -fed_c2_file = tempdir+"m_c2.fed" -fed_c3_file = tempdir+"m_c3.fed" -fed_r2_file = tempdir+"m_r2.fed" -fed_r3_file = tempdir+"m_r3.fed" +fed1_file = tempdir + "m1.fed" +fed_c2_file = tempdir + "m_c2.fed" +fed_c3_file = tempdir + "m_c3.fed" +fed_r2_file = tempdir + "m_r2.fed" +fed_r3_file = tempdir + "m_r3.fed" class TestFederatedAggFn(unittest.TestCase): @@ -70,22 +76,31 @@ class TestFederatedAggFn(unittest.TestCase): @classmethod def setUpClass(cls): cls.sds = SystemDSContext() - cls.sds.federated([fed1], [([0, 0], [dim, dim])] - ).write(fed1_file, format="federated").compute() - cls.sds.federated([fed1, fed2], [ - ([0, 0], [dim, dim]), - ([0, dim], [dim, dim*2])]).write(fed_c2_file, format="federated").compute() - cls.sds.federated([fed1, fed2, fed3], [ - ([0, 0], [dim, dim]), - ([0, dim], [dim, dim*2]), - ([0, dim*2], [dim, dim*3])]).write(fed_c3_file, format="federated").compute() - cls.sds.federated([fed1, fed2], [ - ([0, 0], [dim, dim]), - ([dim, 0], [dim*2, dim])]).write(fed_r2_file, format="federated").compute() - cls.sds.federated([fed1, fed2, fed3], [ - ([0, 0], [dim, dim]), - ([dim, 0], [dim*2, dim]), - ([dim*2, 0], [dim*3, dim])]).write(fed_r3_file, format="federated").compute() + cls.sds.federated([fed1], [([0, 0], [dim, dim])]).write( + fed1_file, format="federated" + ).compute() + cls.sds.federated( + [fed1, fed2], [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])] + ).write(fed_c2_file, format="federated").compute() + cls.sds.federated( + [fed1, fed2, fed3], + [ + ([0, 0], [dim, dim]), + ([0, dim], [dim, dim * 2]), + ([0, dim * 2], [dim, dim * 3]), + ], + ).write(fed_c3_file, format="federated").compute() + cls.sds.federated( + [fed1, fed2], [([0, 0], [dim, dim]), ([dim, 0], [dim * 2, dim])] + ).write(fed_r2_file, format="federated").compute() + cls.sds.federated( + [fed1, fed2, fed3], + [ + ([0, 0], [dim, dim]), + ([dim, 0], [dim * 2, dim]), + ([dim * 2, 0], [dim * 3, dim]), + ], + ).write(fed_r3_file, format="federated").compute() @classmethod def tearDownClass(cls): @@ -106,15 +121,13 @@ def test_single_fed_left_same_size(self): def test_single_fed_left_plus_one_row(self): f_m = self.sds.read(fed1_file) - m_row_plus1 = np.reshape( - np.arange(1, dim*(dim+1) + 1, 1), (dim+1, dim)) + m_row_plus1 = np.reshape(np.arange(1, dim * (dim + 1) + 1, 1), (dim + 1, dim)) m_s = self.sds.from_numpy(m_row_plus1) self.exec_test(m_row_plus1, m, m_s, f_m) def test_single_fed_left_minus_one_row(self): f_m = self.sds.read(fed1_file) - m_row_minus1 = np.reshape( - np.arange(1, dim*(dim-1) + 1, 1), (dim-1, dim)) + m_row_minus1 = np.reshape(np.arange(1, dim * (dim - 1) + 1, 1), (dim - 1, dim)) m_s = self.sds.from_numpy(m_row_minus1) self.exec_test(m_row_minus1, m, m_s, f_m) @@ -131,15 +144,13 @@ def test_single_fed_right_same_size(self): def test_single_fed_right_plus_one_row(self): f_m = self.sds.read(fed1_file) - m_col_plus1 = np.reshape( - np.arange(1, dim*(dim+1) + 1, 1), (dim, dim+1)) + m_col_plus1 = np.reshape(np.arange(1, dim * (dim + 1) + 1, 1), (dim, dim + 1)) m_s = self.sds.from_numpy(m_col_plus1) self.exec_test(m, m_col_plus1, f_m, m_s) def test_single_fed_right_minus_one_row(self): f_m = self.sds.read(fed1_file) - m_col_minus1 = np.reshape( - np.arange(1, dim*(dim-1) + 1, 1), (dim, dim-1)) + m_col_minus1 = np.reshape(np.arange(1, dim * (dim - 1) + 1, 1), (dim, dim - 1)) m_s = self.sds.from_numpy(m_col_minus1) self.exec_test(m, m_col_minus1, f_m, m_s) @@ -155,25 +166,25 @@ def test_single_fed_right_vector(self): def test_two_fed_standard(self): f_m2 = self.sds.read(fed_c2_file) - m = np.reshape(np.arange(1, dim*(dim + dim) + 1, 1), (dim*2, dim)) + m = np.reshape(np.arange(1, dim * (dim + dim) + 1, 1), (dim * 2, dim)) m_s = self.sds.from_numpy(m) self.exec_test(m, m_c2, m_s, f_m2) def test_two_fed_left_minus_one_row(self): f_m2 = self.sds.read(fed_c2_file) - m = np.reshape(np.arange(1, dim*(dim + dim-1)+1, 1), (dim*2 - 1, dim)) + m = np.reshape(np.arange(1, dim * (dim + dim - 1) + 1, 1), (dim * 2 - 1, dim)) m_s = self.sds.from_numpy(m) self.exec_test(m, m_c2, m_s, f_m2) def test_two_fed_left_plus_one_row(self): f_m2 = self.sds.read(fed_c2_file) - m = np.reshape(np.arange(1, dim*(dim + dim+1)+1, 1), (dim*2 + 1, dim)) + m = np.reshape(np.arange(1, dim * (dim + dim + 1) + 1, 1), (dim * 2 + 1, dim)) m_s = self.sds.from_numpy(m) self.exec_test(m, m_c2, m_s, f_m2) def test_two_fed_left_vector_row(self): f_m2 = self.sds.read(fed_c2_file) - m = np.arange(1, dim+1, 1) + m = np.arange(1, dim + 1, 1) m_s = self.sds.from_numpy(m).t() self.exec_test(m, m_c2, m_s, f_m2) @@ -184,21 +195,19 @@ def test_two_fed_right_standard(self): def test_two_fed_right_col_minus_1(self): f_m2 = self.sds.read(fed_c2_file) - m = np.reshape(np.arange(1, (dim-1)*(dim + dim)+1, 1), - (dim * 2, dim-1)) + m = np.reshape(np.arange(1, (dim - 1) * (dim + dim) + 1, 1), (dim * 2, dim - 1)) m_s = self.sds.from_numpy(m) self.exec_test(m_c2, m, f_m2, m_s) def test_two_fed_right_col_plus_1(self): f_m2 = self.sds.read(fed_c2_file) - m = np.reshape(np.arange(1, (dim+1)*(dim + dim)+1, 1), - (dim * 2, dim+1)) + m = np.reshape(np.arange(1, (dim + 1) * (dim + dim) + 1, 1), (dim * 2, dim + 1)) m_s = self.sds.from_numpy(m) self.exec_test(m_c2, m, f_m2, m_s) def test_two_fed_right_vector(self): f_m2 = self.sds.read(fed_c2_file) - m = np.reshape(np.arange(1, (dim + dim)+1, 1), (dim * 2, 1)) + m = np.reshape(np.arange(1, (dim + dim) + 1, 1), (dim * 2, 1)) m_s = self.sds.from_numpy(m) self.exec_test(m_c2, m, f_m2, m_s) @@ -208,25 +217,25 @@ def test_two_fed_right_vector(self): def test_three_fed_standard(self): f_m3 = self.sds.read(fed_c3_file) - m = np.reshape(np.arange(1, dim*(dim * 3) + 1, 1), (dim*3, dim)) + m = np.reshape(np.arange(1, dim * (dim * 3) + 1, 1), (dim * 3, dim)) m_s = self.sds.from_numpy(m) self.exec_test(m, m_c3, m_s, f_m3) def test_three_fed_left_minus_one_row(self): f_m3 = self.sds.read(fed_c3_file) - m = np.reshape(np.arange(1, dim*(dim * 3-1)+1, 1), (dim*3 - 1, dim)) + m = np.reshape(np.arange(1, dim * (dim * 3 - 1) + 1, 1), (dim * 3 - 1, dim)) m_s = self.sds.from_numpy(m) self.exec_test(m, m_c3, m_s, f_m3) def test_three_fed_left_plus_one_row(self): f_m3 = self.sds.read(fed_c3_file) - m = np.reshape(np.arange(1, dim*(dim *3+1)+1, 1), (dim*3 + 1, dim)) + m = np.reshape(np.arange(1, dim * (dim * 3 + 1) + 1, 1), (dim * 3 + 1, dim)) m_s = self.sds.from_numpy(m) self.exec_test(m, m_c3, m_s, f_m3) def test_three_fed_left_vector_row(self): f_m3 = self.sds.read(fed_c3_file) - m = np.arange(1, dim+1, 1) + m = np.arange(1, dim + 1, 1) m_s = self.sds.from_numpy(m).t() self.exec_test(m, m_c3, m_s, f_m3) @@ -237,19 +246,19 @@ def test_three_fed_right_standard(self): def test_three_fed_right_col_minus_1(self): f_m3 = self.sds.read(fed_c3_file) - m = np.reshape(np.arange(1, (dim-1)*(dim*3)+1, 1), (dim * 3, dim-1)) + m = np.reshape(np.arange(1, (dim - 1) * (dim * 3) + 1, 1), (dim * 3, dim - 1)) m_s = self.sds.from_numpy(m) self.exec_test(m_c3, m, f_m3, m_s) def test_three_fed_right_col_plus_1(self): f_m3 = self.sds.read(fed_c3_file) - m = np.reshape(np.arange(1, (dim+1)*(dim *3)+1, 1), (dim * 3, dim+1)) + m = np.reshape(np.arange(1, (dim + 1) * (dim * 3) + 1, 1), (dim * 3, dim + 1)) m_s = self.sds.from_numpy(m) self.exec_test(m_c3, m, f_m3, m_s) def test_three_fed_right_vector(self): f_m3 = self.sds.read(fed_c3_file) - m = np.reshape(np.arange(1, (dim *3)+1, 1), (dim * 3, 1)) + m = np.reshape(np.arange(1, (dim * 3) + 1, 1), (dim * 3, 1)) m_s = self.sds.from_numpy(m) self.exec_test(m_c3, m, f_m3, m_s) @@ -267,15 +276,15 @@ def test_federated_row3_binded(self): s_m = self.sds.from_numpy(m_c3) self.exec_test(m_c3, m_r3, s_m, fed) - - - def test_previously_failing(self): # local matrix to multiply with - loc = np.array([ - [1, 2, 3, 4, 5, 6, 7, 8, 9], - [1, 2, 3, 4, 5, 6, 7, 8, 9], - [1, 2, 3, 4, 5, 6, 7, 8, 9]]) + loc = np.array( + [ + [1, 2, 3, 4, 5, 6, 7, 8, 9], + [1, 2, 3, 4, 5, 6, 7, 8, 9], + [1, 2, 3, 4, 5, 6, 7, 8, 9], + ] + ) # Multiply local and federated ret_loc = loc @ m_r3 @@ -284,8 +293,7 @@ def test_previously_failing(self): fed = self.sds.read(fed_r3_file) ret_fed = (loc_systemds @ fed).compute() if not np.allclose(ret_fed, ret_loc): - self.fail( - "not equal outputs of federated matrix multiplications") + self.fail("not equal outputs of federated matrix multiplications") def exec_test(self, left, right, f_left, f_right): fed = f_left @ f_right diff --git a/src/main/python/tests/federated/test_federated_mnist.py b/src/main/python/tests/federated/test_federated_mnist.py index d7cb640157b..cb5e9641165 100644 --- a/src/main/python/tests/federated/test_federated_mnist.py +++ b/src/main/python/tests/federated/test_federated_mnist.py @@ -31,8 +31,7 @@ from systemds.operator.algorithm import kmeans, multiLogReg, multiLogRegPredict -def create_row_federated_dataset(name, dataset, num_parts=2, - federated_workers=None): +def create_row_federated_dataset(name, dataset, num_parts=2, federated_workers=None): if federated_workers is None: federated_workers = ["localhost:8001", "localhost:8002"] tempdir = "./tests/federated/tmp/test_federated_mnist/" @@ -45,30 +44,47 @@ def create_row_federated_dataset(name, dataset, num_parts=2, fed_file_content = [] rows_processed = 0 - for worker_id, address, rows in zip(range(num_parts), itertools.cycle(federated_workers), rs): + for worker_id, address, rows in zip( + range(num_parts), itertools.cycle(federated_workers), rs + ): dataset_part_path = path.join(tempdir, f"{name}{worker_id}.csv") - mtd = {"format": "csv", "rows": rows, "cols": c, - "data_type": "matrix", "value_type": "double"} - - dataset_part = dataset[rows_processed:rows_processed + rows] + mtd = { + "format": "csv", + "rows": rows, + "cols": c, + "data_type": "matrix", + "value_type": "double", + } + + dataset_part = dataset[rows_processed : rows_processed + rows] pd.DataFrame(dataset_part).to_csv(dataset_part_path, index=False, header=False) with io.open(f"{dataset_part_path}.mtd", "w", encoding="utf-8") as f: json.dump(mtd, f, ensure_ascii=False) - fed_file_content.append({ - "address": address, - "dataType": "MATRIX", - "filepath": dataset_part_path, - "begin": [rows_processed, 0], - "end": [rows_processed + rows, c], - }) + fed_file_content.append( + { + "address": address, + "dataType": "MATRIX", + "filepath": dataset_part_path, + "begin": [rows_processed, 0], + "end": [rows_processed + rows, c], + } + ) rows_processed += rows with open(federated_file, "w", encoding="utf-8") as f: json.dump(fed_file_content, f) - with open(federated_file + '.mtd', "w", encoding="utf-8") as f: - json.dump({"format": "federated", "rows": dataset.shape[0], "cols": c, - "data_type": "matrix", "value_type": "double"}, f) + with open(federated_file + ".mtd", "w", encoding="utf-8") as f: + json.dump( + { + "format": "federated", + "rows": dataset.shape[0], + "cols": c, + "data_type": "matrix", + "value_type": "double", + }, + f, + ) return federated_file @@ -114,7 +130,14 @@ def test_multi_log_reg(self): with self.sds.capture_stats_context(): [_, _, acc] = multiLogRegPredict(Xt, bias, Y=Yt).compute() stats = self.sds.take_stats() - for fed_instr in ["fed_contains", "fed_*", "fed_-", "fed_uark+", "fed_r'", "fed_rightIndex"]: + for fed_instr in [ + "fed_contains", + "fed_*", + "fed_-", + "fed_uark+", + "fed_r'", + "fed_rightIndex", + ]: self.assertIn(fed_instr, stats) self.assertGreater(acc, 80) diff --git a/src/main/python/tests/federated/test_federated_read.py b/src/main/python/tests/federated/test_federated_read.py index 6a3c28c28fa..927d5d5fc34 100644 --- a/src/main/python/tests/federated/test_federated_read.py +++ b/src/main/python/tests/federated/test_federated_read.py @@ -29,15 +29,21 @@ import numpy as np from systemds.context import SystemDSContext -os.environ['SYSDS_QUIET'] = "1" +os.environ["SYSDS_QUIET"] = "1" dim = 3 m = np.reshape(np.arange(1, dim * dim + 1, 1), (dim, dim)) tempdir = "./tests/federated/tmp/test_federated_matrixmult/" -mtd = {"format": "csv", "header": False, "rows": dim, - "cols": dim, "data_type": "matrix", "value_type": "double"} +mtd = { + "format": "csv", + "header": False, + "rows": dim, + "cols": dim, + "data_type": "matrix", + "value_type": "double", +} # Create the testing directory if it does not exist. if not os.path.exists(tempdir): @@ -53,9 +59,9 @@ fed2 = "localhost:8002/" + tempdir + "m.csv" fed3 = "localhost:8003/" + tempdir + "m.csv" -fed1_file = tempdir+"m1.fed" -fed2_file = tempdir+"m2.fed" -fed3_file = tempdir+"m3.fed" +fed1_file = tempdir + "m1.fed" +fed2_file = tempdir + "m2.fed" +fed3_file = tempdir + "m3.fed" class TestFederatedAggFn(unittest.TestCase): @@ -65,15 +71,20 @@ class TestFederatedAggFn(unittest.TestCase): @classmethod def setUpClass(cls): cls.sds = SystemDSContext() - cls.sds.federated([fed1], [ - ([0, 0], [dim, dim])]).write(fed1_file, format="federated").compute() - cls.sds.federated([fed1, fed2], [ - ([0, 0], [dim, dim]), - ([0, dim], [dim, dim*2])]).write(fed2_file, format="federated").compute() - cls.sds.federated([fed1, fed2, fed3], [ - ([0, 0], [dim, dim]), - ([0, dim], [dim, dim*2]), - ([0, dim*2], [dim, dim*3])]).write(fed3_file, format="federated").compute() + cls.sds.federated([fed1], [([0, 0], [dim, dim])]).write( + fed1_file, format="federated" + ).compute() + cls.sds.federated( + [fed1, fed2], [([0, 0], [dim, dim]), ([0, dim], [dim, dim * 2])] + ).write(fed2_file, format="federated").compute() + cls.sds.federated( + [fed1, fed2, fed3], + [ + ([0, 0], [dim, dim]), + ([0, dim], [dim, dim * 2]), + ([0, dim * 2], [dim, dim * 3]), + ], + ).write(fed3_file, format="federated").compute() @classmethod def tearDownClass(cls): @@ -89,13 +100,13 @@ def test_verify_same_input_if_reading_fed(self): def test_verify_same_input_if_reading_fed2(self): f_m = self.sds.read(fed2_file).compute() - m2 = np.column_stack((m,m)) + m2 = np.column_stack((m, m)) self.assertTrue(np.allclose(f_m, m2)) def test_verify_same_input_if_reading_fed3(self): f_m = self.sds.read(fed3_file).compute() - m2 = np.column_stack((m,m)) - m3 = np.column_stack((m,m2)) + m2 = np.column_stack((m, m)) + m3 = np.column_stack((m, m2)) self.assertTrue(np.allclose(f_m, m3)) diff --git a/src/main/python/tests/frame/test_hyperband.py b/src/main/python/tests/frame/test_hyperband.py index 12dc1c68bfe..1f4973c611b 100644 --- a/src/main/python/tests/frame/test_hyperband.py +++ b/src/main/python/tests/frame/test_hyperband.py @@ -67,7 +67,7 @@ def test_hyperband(self): y_val=y_val, params=params, paramRanges=paramRanges, - verbose=False + verbose=False, ).compute() self.assertTrue(isinstance(best_weights_mat, np.ndarray)) self.assertTrue(best_weights_mat.shape[0] == self.X_train.shape[1]) @@ -77,7 +77,8 @@ def test_hyperband(self): self.assertTrue(opt_hyper_params_df.shape[1] == 1) for i, hyper_param in enumerate(opt_hyper_params_df.values.flatten().tolist()): self.assertTrue( - self.min_max_params[i][0] <= hyper_param <= self.min_max_params[i][1]) + self.min_max_params[i][0] <= hyper_param <= self.min_max_params[i][1] + ) if __name__ == "__main__": diff --git a/src/main/python/tests/frame/test_rIndexing.py b/src/main/python/tests/frame/test_rIndexing.py index 43d6c367734..060edb07c07 100644 --- a/src/main/python/tests/frame/test_rIndexing.py +++ b/src/main/python/tests/frame/test_rIndexing.py @@ -30,7 +30,7 @@ class Test_rIndexing(unittest.TestCase): sds: SystemDSContext = None - #shape (4, 3) + # shape (4, 3) df = pd.DataFrame(np.arange(0, 100).reshape(10, 10)) @classmethod diff --git a/src/main/python/tests/frame/test_r_c_bind.py b/src/main/python/tests/frame/test_r_c_bind.py index 6a6e99a1896..6a69c8e814d 100644 --- a/src/main/python/tests/frame/test_r_c_bind.py +++ b/src/main/python/tests/frame/test_r_c_bind.py @@ -31,21 +31,17 @@ class TestRCBind(unittest.TestCase): # shape (2, 3) df_cb_1 = pd.DataFrame( - {"col1": ["col1_hello", "col1_world"], - "col2": [0, 1], "col3": [0.0, 0.1]} + {"col1": ["col1_hello", "col1_world"], "col2": [0, 1], "col3": [0.0, 0.1]} ) # shape (2, 2) - df_cb_2 = pd.DataFrame( - {"col4": ["col4_hello", "col4_world"], "col5": [0, 1]}) - df_cb_3 = pd.DataFrame( - {"col6": ["col6_hello", "col6_world"], "col7": [0, 1]}) + df_cb_2 = pd.DataFrame({"col4": ["col4_hello", "col4_world"], "col5": [0, 1]}) + df_cb_3 = pd.DataFrame({"col6": ["col6_hello", "col6_world"], "col7": [0, 1]}) - #shape (2, 3) + # shape (2, 3) df_rb_1 = pd.DataFrame( - {"col1": ["col1_hello_1", "col1_world_1"], - "col2": [0, 1], "col3": [0.0, 0.1]} + {"col1": ["col1_hello_1", "col1_world_1"], "col2": [0, 1], "col3": [0.0, 0.1]} ) - #shape (4, 3) + # shape (4, 3) df_rb_2 = pd.DataFrame( { "col1": ["col1_hello_2", "col1_world_2", "col1_hello_2", "col1_world_2"], @@ -53,7 +49,7 @@ class TestRCBind(unittest.TestCase): "col3": [0.2, 0.3, 0.4, 0.5], } ) - #shape (3, 3) + # shape (3, 3) df_rb_3 = pd.DataFrame( { "col1": ["col1_hello_3", "col1_world_3", "col1_hello_3"], @@ -85,7 +81,8 @@ def test_r_bind_triple(self): result_df = f1.rbind(f2).rbind(f3).compute() self.assertTrue(isinstance(result_df, pd.DataFrame)) target_df = pd.concat( - [self.df_rb_1, self.df_rb_2, self.df_rb_3], ignore_index=True) + [self.df_rb_1, self.df_rb_2, self.df_rb_3], ignore_index=True + ) self.assertTrue(target_df.equals(result_df)) def test_r_bind_triple_twostep(self): @@ -96,7 +93,8 @@ def test_r_bind_triple_twostep(self): result_df = self.sds.from_pandas(tmp_df).rbind(f3).compute() self.assertTrue(isinstance(result_df, pd.DataFrame)) target_df = pd.concat( - [self.df_rb_1, self.df_rb_2, self.df_rb_3], ignore_index=True) + [self.df_rb_1, self.df_rb_2, self.df_rb_3], ignore_index=True + ) self.assertTrue(target_df.equals(result_df)) def test_c_bind_pair(self): @@ -113,8 +111,7 @@ def test_c_bind_triple(self): f3 = self.sds.from_pandas(self.df_cb_3) result_df = f1.cbind(f2).cbind(f3).compute() self.assertTrue(isinstance(result_df, pd.DataFrame)) - target_df = pd.concat( - [self.df_cb_1, self.df_cb_2, self.df_cb_3], axis=1) + target_df = pd.concat([self.df_cb_1, self.df_cb_2, self.df_cb_3], axis=1) self.assertTrue(target_df.equals(result_df)) def test_c_bind_triple_twostep(self): @@ -124,8 +121,7 @@ def test_c_bind_triple_twostep(self): tmp_df = f1.cbind(f2).compute() result_df = self.sds.from_pandas(tmp_df).cbind(f3).compute() self.assertTrue(isinstance(result_df, pd.DataFrame)) - target_df = pd.concat( - [self.df_cb_1, self.df_cb_2, self.df_cb_3], axis=1) + target_df = pd.concat([self.df_cb_1, self.df_cb_2, self.df_cb_3], axis=1) self.assertTrue(target_df.equals(result_df)) diff --git a/src/main/python/tests/frame/test_replace.py b/src/main/python/tests/frame/test_replace.py index c731a8c8e1a..7adafb3050a 100644 --- a/src/main/python/tests/frame/test_replace.py +++ b/src/main/python/tests/frame/test_replace.py @@ -57,10 +57,14 @@ def test_apply_recode_bin(self): format="csv", header=True, ) - ret = F1.replace("north", "south").replace( - "west", "south").replace("east", "south").compute() + ret = ( + F1.replace("north", "south") + .replace("west", "south") + .replace("east", "south") + .compute() + ) self.assertTrue(any(ret.district == "south")) - self.assertTrue(not(any(ret.district == "north"))) + self.assertTrue(not (any(ret.district == "north"))) if __name__ == "__main__": diff --git a/src/main/python/tests/frame/test_transform_encode.py b/src/main/python/tests/frame/test_transform_encode.py index 5159a61da10..c3ae837a557 100644 --- a/src/main/python/tests/frame/test_transform_encode.py +++ b/src/main/python/tests/frame/test_transform_encode.py @@ -68,7 +68,6 @@ def test_encode_recode(self): for col_name in JSPEC["recode"]: self.assertTrue(M[col_name].nunique() == pd_F1[col_name].nunique()) - def test_encode_recode_and_use_matrix(self): with open(self.JSPEC_PATH) as jspec_file: JSPEC = json.load(jspec_file) @@ -83,7 +82,8 @@ def test_encode_recode_and_use_matrix(self): X, M = F1.transform_encode(spec=jspec) xm = X.sum() + 1 res = xm.compute() - self.assertTrue(isinstance(res,float)) + self.assertTrue(isinstance(res, float)) + if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/frame/test_write_read.py b/src/main/python/tests/frame/test_write_read.py index cbbad68c76d..d7fdbd06cba 100644 --- a/src/main/python/tests/frame/test_write_read.py +++ b/src/main/python/tests/frame/test_write_read.py @@ -61,8 +61,7 @@ def test_write_read_binary(self): def test_write_read_csv(self): frame = self.sds.from_pandas(self.df) frame.write(self.temp_dir + "02", header=True, format="csv").compute() - NX = self.sds.read(self.temp_dir + "02", - data_type="frame", format="csv") + NX = self.sds.read(self.temp_dir + "02", data_type="frame", format="csv") result_df = NX.compute() self.assertTrue(isinstance(result_df, pd.DataFrame)) self.assertTrue(self.df.equals(result_df)) diff --git a/src/main/python/tests/iotests/test_io_csv.py b/src/main/python/tests/iotests/test_io_csv.py index 9929658e92a..042e7e308a4 100644 --- a/src/main/python/tests/iotests/test_io_csv.py +++ b/src/main/python/tests/iotests/test_io_csv.py @@ -70,15 +70,13 @@ def test_write_read_data_frame_csv_header(self): def test_write_read_data_frame_csv_header_active(self): filename = self.temp_dir + "data_frame_header_active.csv" self.df.to_csv(filename, index=False, header=True) - result_df = self.sds.read( - filename, data_type="frame", header=True).compute() + result_df = self.sds.read(filename, data_type="frame", header=True).compute() self.compare_frame(result_df, self.df) def test_write_read_data_frame_csv_no_header(self): filename = self.temp_dir + "data_frame_no_header.csv" self.df.to_csv(filename, index=False, header=False) - result_df = self.sds.read( - filename, data_type="frame", header=False).compute() + result_df = self.sds.read(filename, data_type="frame", header=False).compute() self.compare_frame(result_df, self.df) def test_write_read_matrix_csv_no_extra_argument(self): @@ -96,8 +94,7 @@ def test_write_read_matrix_csv_no_extra_argument_header(self): def test_write_read_matrix_csv_no_extra_argument_header_csv(self): filename = self.temp_dir + "data_matrix_header_2.csv" self.df2.to_csv(filename, index=False, header=True) - result_df = (self.sds.read( - filename, format="csv", header=True)).compute() + result_df = (self.sds.read(filename, format="csv", header=True)).compute() self.assertTrue(np.allclose(self.df2.to_numpy(), result_df)) def compare_frame(self, a: pd.DataFrame, b: pd.DataFrame): diff --git a/src/main/python/tests/lineage/test_lineagetrace.py b/src/main/python/tests/lineage/test_lineagetrace.py index d8c325d8f3c..760e26abe10 100644 --- a/src/main/python/tests/lineage/test_lineagetrace.py +++ b/src/main/python/tests/lineage/test_lineagetrace.py @@ -25,11 +25,12 @@ from systemds.context import SystemDSContext -os.environ['SYSDS_QUIET'] = "1" +os.environ["SYSDS_QUIET"] = "1" test_dir = os.path.join("tests", "lineage") temp_dir = os.path.join(test_dir, "temp") -trace_test_1 = os.path.join(test_dir,"trace1.dml") +trace_test_1 = os.path.join(test_dir, "trace1.dml") + class TestLineageTrace(unittest.TestCase): @@ -46,28 +47,30 @@ def tearDownClass(cls): def tearDown(self): shutil.rmtree(temp_dir, ignore_errors=True) - @unittest.skipIf("SYSTEMDS_ROOT" not in os.environ, "The test is skipped if SYSTEMDS_ROOT is not set, this is required for this tests since it use the bin/systemds file to execute a reference") + @unittest.skipIf( + "SYSTEMDS_ROOT" not in os.environ, + "The test is skipped if SYSTEMDS_ROOT is not set, this is required for this tests since it use the bin/systemds file to execute a reference", + ) def test_compare_trace1(self): # test getLineageTrace() on an intermediate m = self.sds.full((10, 10), 1) m_res = m + m - python_trace = [x.strip().split("°") - for x in m_res.get_lineage_trace().split("\n")] + python_trace = [ + x.strip().split("°") for x in m_res.get_lineage_trace().split("\n") + ] - sysds_trace = self.create_execute_and_trace_dml(trace_test_1) # It is not guarantied, that the two lists 100% align to be the same. # Therefore for now, we only compare if the command is the same, in same order. python_trace_commands = [x[:1] for x in python_trace] dml_script_commands = [x[:1] for x in sysds_trace] - if(len(python_trace_commands) == 0): + if len(python_trace_commands) == 0: self.fail("Error in pythonscript execution") - if(len(dml_script_commands) == 0): + if len(dml_script_commands) == 0: self.fail("Error in DML script execution") - - self.assertEqual(python_trace_commands[0], dml_script_commands[0]) + self.assertEqual(python_trace_commands[0], dml_script_commands[0]) def create_execute_and_trace_dml(self, script: str): if not os.path.exists(temp_dir): @@ -75,8 +78,7 @@ def create_execute_and_trace_dml(self, script: str): # Call SYSDS! result_file_name = temp_dir + "/tmp_res.txt" - command = "systemds " + script + \ - " > " + result_file_name + " 2> /dev/null" + command = "systemds " + script + " > " + result_file_name + " 2> /dev/null" status = os.system(command) if status < 0: self.fail("systemds call failed.") diff --git a/src/main/python/tests/list/test_list.py b/src/main/python/tests/list/test_list.py index 075589c9c3c..20835286ad1 100644 --- a/src/main/python/tests/list/test_list.py +++ b/src/main/python/tests/list/test_list.py @@ -42,9 +42,9 @@ def test_creation(self): """ Tests the creation of a List object via the SystemDSContext """ - m1 = np.array([1., 2., 3.]) + m1 = np.array([1.0, 2.0, 3.0]) m1p = self.sds.from_numpy(m1) - m2 = np.array([4., 5., 6.]) + m2 = np.array([4.0, 5.0, 6.0]) m2p = self.sds.from_numpy(m2) list_obj = self.sds.array(m1p, m2p) tmp = list_obj[0] + list_obj[1] @@ -55,14 +55,15 @@ def test_addition(self): """ Tests the creation of a List object via the SystemDSContext and adds a value """ - m1 = np.array([1., 2., 3.]) + m1 = np.array([1.0, 2.0, 3.0]) m1p = self.sds.from_numpy(m1) - m2 = np.array([4., 5., 6.]) + m2 = np.array([4.0, 5.0, 6.0]) m2p = self.sds.from_numpy(m2) list_obj = self.sds.array(m1p, m2p) tmp = list_obj[0] + 2 res = tmp.compute().flatten() self.assertTrue(np.allclose(m1 + 2, res)) + if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/list/test_list_readwrite.py b/src/main/python/tests/list/test_list_readwrite.py index 0ec0cb51a91..f43d0fc3d57 100644 --- a/src/main/python/tests/list/test_list_readwrite.py +++ b/src/main/python/tests/list/test_list_readwrite.py @@ -41,13 +41,13 @@ def tearDownClass(cls): shutil.rmtree(cls.temp_dir) def test_write_followed_by_read(self): - ''' Test write and read of lists variables in python. + """Test write and read of lists variables in python. Since we do not support serializing a list (from java to python) yet we read and compute each list element when reading again - ''' - m1 = np.array([[1., 2., 3.]]) + """ + m1 = np.array([[1.0, 2.0, 3.0]]) m1p = self.sds.from_numpy(m1) - m2 = np.array([[4., 5., 6.]]) + m2 = np.array([[4.0, 5.0, 6.0]]) m2p = self.sds.from_numpy(m2) list_obj = self.sds.array(m1p, m2p) diff --git a/src/main/python/tests/manual_tests/multi_log_reg_mnist.py b/src/main/python/tests/manual_tests/multi_log_reg_mnist.py index 080403ea721..3f2920eac4c 100644 --- a/src/main/python/tests/manual_tests/multi_log_reg_mnist.py +++ b/src/main/python/tests/manual_tests/multi_log_reg_mnist.py @@ -29,11 +29,11 @@ with SystemDSContext() as sds: # Train Data - X = sds.from_numpy(d.get_train_data().reshape((60000, 28*28))) + X = sds.from_numpy(d.get_train_data().reshape((60000, 28 * 28))) Y = sds.from_numpy(d.get_train_labels()) + 1.0 bias = multiLogReg(X, Y, tol=0.0001, verbose=False) # Test data - Xt = sds.from_numpy(d.get_test_data().reshape((10000, 28*28))) + Xt = sds.from_numpy(d.get_test_data().reshape((10000, 28 * 28))) Yt = sds.from_numpy(d.get_test_labels()) + 1.0 [_, _, acc] = multiLogRegPredict(Xt, bias, Y=Yt).compute() diff --git a/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py b/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py index d59b837ec68..ba22f435f15 100644 --- a/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py +++ b/src/main/python/tests/manual_tests/save_log_reg_mnist_sysds.py @@ -27,11 +27,11 @@ base_path = "systemds/examples/tutorials/mnist/" with SystemDSContext() as sds: # Train Data - X = sds.from_numpy(d.get_train_data().reshape((60000, 28*28))) + X = sds.from_numpy(d.get_train_data().reshape((60000, 28 * 28))) X.write(base_path + "train_data").compute() Y = sds.from_numpy(d.get_train_labels()) + 1.0 Y.write(base_path + "train_labels").compute() - Xt = sds.from_numpy(d.get_test_data().reshape((10000, 28*28))) + Xt = sds.from_numpy(d.get_test_data().reshape((10000, 28 * 28))) Xt.write(base_path + "test_data").compute() Yt = sds.from_numpy(d.get_test_labels()) + 1.0 Yt.write(base_path + "test_labels").compute() diff --git a/src/main/python/tests/matrix/test_binary_op.py b/src/main/python/tests/matrix/test_binary_op.py index 81699648c39..582cc8b8f5b 100644 --- a/src/main/python/tests/matrix/test_binary_op.py +++ b/src/main/python/tests/matrix/test_binary_op.py @@ -47,109 +47,126 @@ def tearDownClass(cls): cls.sds.close() def test_plus(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) + self.sds.from_numpy(m2)).compute(), m1 + m2)) + self.assertTrue( + np.allclose( + (self.sds.from_numpy(m1) + self.sds.from_numpy(m2)).compute(), m1 + m2 + ) + ) def test_minus(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) - self.sds.from_numpy(m2)).compute(), m1 - m2)) + self.assertTrue( + np.allclose( + (self.sds.from_numpy(m1) - self.sds.from_numpy(m2)).compute(), m1 - m2 + ) + ) def test_mul(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) * self.sds.from_numpy(m2)).compute(), m1 * m2)) + self.assertTrue( + np.allclose( + (self.sds.from_numpy(m1) * self.sds.from_numpy(m2)).compute(), m1 * m2 + ) + ) def test_div(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) / self.sds.from_numpy(m2)).compute(), m1 / m2)) + self.assertTrue( + np.allclose( + (self.sds.from_numpy(m1) / self.sds.from_numpy(m2)).compute(), m1 / m2 + ) + ) def test_plus3_rhs(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) + s).compute(), m1 + s)) + self.assertTrue(np.allclose((self.sds.from_numpy(m1) + s).compute(), m1 + s)) def test_plus3_lhs(self): - self.assertTrue(np.allclose( - (s + self.sds.from_numpy(m1) ).compute(), s + m1)) + self.assertTrue(np.allclose((s + self.sds.from_numpy(m1)).compute(), s + m1)) def test_minus3_rhs(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) - s).compute(), m1 - s)) + self.assertTrue(np.allclose((self.sds.from_numpy(m1) - s).compute(), m1 - s)) def test_minus3_lhs(self): - self.assertTrue(np.allclose( - (s - self.sds.from_numpy(m1)).compute(), s - m1 )) + self.assertTrue(np.allclose((s - self.sds.from_numpy(m1)).compute(), s - m1)) def test_mul3_rhs(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) * s).compute(), m1 * s)) + self.assertTrue(np.allclose((self.sds.from_numpy(m1) * s).compute(), m1 * s)) def test_mul3_lhs(self): - self.assertTrue(np.allclose( - (s * self.sds.from_numpy(m1)).compute(), s * m1)) + self.assertTrue(np.allclose((s * self.sds.from_numpy(m1)).compute(), s * m1)) def test_div3_rhs(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) / s).compute(), m1 / s)) + self.assertTrue(np.allclose((self.sds.from_numpy(m1) / s).compute(), m1 / s)) def test_div3_lhs(self): - self.assertTrue(np.allclose( - (s / self.sds.from_numpy(m1) ).compute(), s / m1)) + self.assertTrue(np.allclose((s / self.sds.from_numpy(m1)).compute(), s / m1)) def test_matmul(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) @ self.sds.from_numpy(m2)).compute(), m1.dot(m2))) + self.assertTrue( + np.allclose( + (self.sds.from_numpy(m1) @ self.sds.from_numpy(m2)).compute(), + m1.dot(m2), + ) + ) def test_matmul_chain(self): m3 = np.ones((m2.shape[1], 10), dtype=np.uint8) - m = self.sds.from_numpy(m1) @ self.sds.from_numpy(m2) @ self.sds.from_numpy(m3) + m = self.sds.from_numpy(m1) @ self.sds.from_numpy(m2) @ self.sds.from_numpy(m3) res = (m).compute() np_res = m1.dot(m2).dot(m3) self.assertTrue(np.allclose(res, np_res)) - + def test_matmul_self(self): m = self.sds.from_numpy(m1).t() @ self.sds.from_numpy(m1) res = (m).compute() np_res = np.transpose(m1).dot(m1) self.assertTrue(np.allclose(res, np_res)) - + def test_lt(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) < self.sds.from_numpy(m2)).compute(), m1 < m2)) + self.assertTrue( + np.allclose( + (self.sds.from_numpy(m1) < self.sds.from_numpy(m2)).compute(), m1 < m2 + ) + ) def test_gt(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) > self.sds.from_numpy(m2)).compute(), m1 > m2)) + self.assertTrue( + np.allclose( + (self.sds.from_numpy(m1) > self.sds.from_numpy(m2)).compute(), m1 > m2 + ) + ) def test_le(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) <= self.sds.from_numpy(m2)).compute(), m1 <= m2)) + self.assertTrue( + np.allclose( + (self.sds.from_numpy(m1) <= self.sds.from_numpy(m2)).compute(), m1 <= m2 + ) + ) def test_ge(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) >= self.sds.from_numpy(m2)).compute(), m1 >= m2)) + self.assertTrue( + np.allclose( + (self.sds.from_numpy(m1) >= self.sds.from_numpy(m2)).compute(), m1 >= m2 + ) + ) def test_abs(self): - self.assertTrue(np.allclose( - self.sds.from_numpy(m1).abs().compute(), np.abs(m1))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).abs().compute(), np.abs(m1)) + ) def test_lt3_rhs(self): - self.assertTrue(np.allclose( - (self.sds.from_numpy(m1) <3).compute(), m1 < 3)) + self.assertTrue(np.allclose((self.sds.from_numpy(m1) < 3).compute(), m1 < 3)) def test_lt3_lhs(self): - self.assertTrue(np.allclose( - (3 < self.sds.from_numpy(m1)).compute(), 3 < m1 )) + self.assertTrue(np.allclose((3 < self.sds.from_numpy(m1)).compute(), 3 < m1)) def test_gt3_rhs(self): - self.assertTrue(np.allclose( - (3 > self.sds.from_numpy(m1)).compute(), 3 > m1 )) + self.assertTrue(np.allclose((3 > self.sds.from_numpy(m1)).compute(), 3 > m1)) def test_le3_rhs(self): - self.assertTrue(np.allclose( - (3<= self.sds.from_numpy(m1) ).compute(), 3 <= m1 )) + self.assertTrue(np.allclose((3 <= self.sds.from_numpy(m1)).compute(), 3 <= m1)) def test_ge3_rhs(self): - self.assertTrue(np.allclose( - (3 >= self.sds.from_numpy(m1)).compute(), 3>= m1)) + self.assertTrue(np.allclose((3 >= self.sds.from_numpy(m1)).compute(), 3 >= m1)) + if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/matrix/test_block_converter.py b/src/main/python/tests/matrix/test_block_converter.py index 25be1203f64..5fe4b205b61 100644 --- a/src/main/python/tests/matrix/test_block_converter.py +++ b/src/main/python/tests/matrix/test_block_converter.py @@ -25,13 +25,11 @@ import numpy as np from py4j.java_gateway import JVMView from systemds.context import SystemDSContext -from systemds.utils.converters import ( - matrix_block_to_numpy, numpy_to_matrix_block) +from systemds.utils.converters import matrix_block_to_numpy, numpy_to_matrix_block class Test_MatrixBlockConverter(unittest.TestCase): - """Test class for testing behavior of the fundamental DMLScript class - """ + """Test class for testing behavior of the fundamental DMLScript class""" sds: SystemDSContext = None diff --git a/src/main/python/tests/matrix/test_diag.py b/src/main/python/tests/matrix/test_diag.py index 7e3f103aeb0..99f74f24c2c 100644 --- a/src/main/python/tests/matrix/test_diag.py +++ b/src/main/python/tests/matrix/test_diag.py @@ -42,14 +42,13 @@ def test_diag_basic1(self): assert np.allclose(sds_result, np_result, 1e-9) def test_diag_basic2(self): - input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) sds_input = self.sds.from_numpy(input_matrix) sds_result = sds_input.diag().compute() - np_result = np.reshape(np.diag(input_matrix), (-1,1)) + np_result = np.reshape(np.diag(input_matrix), (-1, 1)) assert np.allclose(sds_result, np_result, 1e-9) def test_diag_random1(self): @@ -63,9 +62,9 @@ def test_diag_random2(self): input_matrix = np.random.random((10, 10)) sds_input = self.sds.from_numpy(input_matrix) sds_result = sds_input.diag().compute() - np_result = np.reshape(np.diag(input_matrix), (-1,1)) + np_result = np.reshape(np.diag(input_matrix), (-1, 1)) assert np.allclose(sds_result, np_result, 1e-9) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/main/python/tests/matrix/test_eigen.py b/src/main/python/tests/matrix/test_eigen.py index fbf77e53c5c..f036ce73690 100644 --- a/src/main/python/tests/matrix/test_eigen.py +++ b/src/main/python/tests/matrix/test_eigen.py @@ -23,6 +23,7 @@ import numpy as np from systemds.context import SystemDSContext + class TestEigen(unittest.TestCase): def setUp(self): self.sds = SystemDSContext() @@ -32,18 +33,17 @@ def tearDown(self): def test_svd_basic(self): - input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) sds_input = self.sds.from_numpy(input_matrix) eigen_result = sds_input.eigen().compute() - w,V = eigen_result + w, V = eigen_result # TODO add a proper verification # Currently this implementation rely on internal testing only. -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/main/python/tests/matrix/test_fft.py b/src/main/python/tests/matrix/test_fft.py index f0055cbb8e2..d4806ab31d5 100644 --- a/src/main/python/tests/matrix/test_fft.py +++ b/src/main/python/tests/matrix/test_fft.py @@ -23,6 +23,7 @@ import numpy as np from systemds.context import SystemDSContext + class TestFFT(unittest.TestCase): def setUp(self): self.sds = SystemDSContext() @@ -32,10 +33,9 @@ def tearDown(self): def test_fft_basic(self): - input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) sds_input = self.sds.from_numpy(input_matrix) fft_result = sds_input.fft().compute() @@ -44,15 +44,15 @@ def test_fft_basic(self): np_fft_result = np.fft.fft2(input_matrix) expected_real = np.real(np_fft_result) - expected_imag = np.imag(np_fft_result) + expected_imag = np.imag(np_fft_result) np.testing.assert_array_almost_equal(real_part, expected_real, decimal=5) np.testing.assert_array_almost_equal(imag_part, expected_imag, decimal=5) def test_fft_random_1d(self): - np.random.seed(123) + np.random.seed(123) for _ in range(10): - input_matrix = np.random.rand(1, 16) + input_matrix = np.random.rand(1, 16) sds_input = self.sds.from_numpy(input_matrix) @@ -60,17 +60,21 @@ def test_fft_random_1d(self): real_part, imag_part = fft_result - np_fft_result = np.fft.fft(input_matrix[0]) + np_fft_result = np.fft.fft(input_matrix[0]) expected_real = np.real(np_fft_result) - expected_imag = np.imag(np_fft_result) + expected_imag = np.imag(np_fft_result) - np.testing.assert_array_almost_equal(real_part.flatten(), expected_real, decimal=5) - np.testing.assert_array_almost_equal(imag_part.flatten(), expected_imag, decimal=5) + np.testing.assert_array_almost_equal( + real_part.flatten(), expected_real, decimal=5 + ) + np.testing.assert_array_almost_equal( + imag_part.flatten(), expected_imag, decimal=5 + ) def test_fft_2d(self): - np.random.seed(123) + np.random.seed(123) for _ in range(10): - input_matrix = np.random.rand(8, 8) + input_matrix = np.random.rand(8, 8) sds_input = self.sds.from_numpy(input_matrix) @@ -87,22 +91,23 @@ def test_fft_2d(self): def test_fft_non_power_of_two_matrix(self): - input_matrix = np.random.rand(3, 5) + input_matrix = np.random.rand(3, 5) sds_input = self.sds.from_numpy(input_matrix) - with self.assertRaisesRegex(RuntimeError, "This FFT implementation is only defined for matrices with dimensions that are powers of 2."): + with self.assertRaisesRegex( + RuntimeError, + "This FFT implementation is only defined for matrices with dimensions that are powers of 2.", + ): _ = sds_input.fft().compute() def test_ifft_basic(self): - real_input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + real_input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) - imag_input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + imag_input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) sds_real_input = self.sds.from_numpy(real_input_matrix) sds_imag_input = self.sds.from_numpy(imag_input_matrix) @@ -115,20 +120,17 @@ def test_ifft_basic(self): expected_real = np.real(np_ifft_result) expected_imag = np.imag(np_ifft_result) - np.testing.assert_array_almost_equal(real_part, expected_real, decimal=5) np.testing.assert_array_almost_equal(imag_part, expected_imag, decimal=5) def test_ifft_only_zeros_imag(self): - real_input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + real_input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) - imag_input_matrix = np.array([[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]) + imag_input_matrix = np.array( + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] + ) sds_real_input = self.sds.from_numpy(real_input_matrix) sds_imag_input = self.sds.from_numpy(imag_input_matrix) @@ -141,44 +143,47 @@ def test_ifft_only_zeros_imag(self): expected_real = np.real(np_ifft_result) expected_imag = np.imag(np_ifft_result) - np.testing.assert_array_almost_equal(real_part, expected_real, decimal=5) np.testing.assert_array_almost_equal(imag_part, expected_imag, decimal=5) def test_ifft_empty_matrix_imag(self): - real_input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + real_input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) - imag_input_matrix = np.array([]) + imag_input_matrix = np.array([]) sds_real_input = self.sds.from_numpy(real_input_matrix) sds_imag_input = self.sds.from_numpy(imag_input_matrix) - with self.assertRaisesRegex(RuntimeError, "The second argument to IFFT cannot be an empty matrix. Provide either only a real matrix or a filled real and imaginary one."): + with self.assertRaisesRegex( + RuntimeError, + "The second argument to IFFT cannot be an empty matrix. Provide either only a real matrix or a filled real and imaginary one.", + ): sds_real_input.ifft(sds_imag_input).compute() def test_ifft_empty_2dmatrix_imag(self): - real_input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + real_input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) - imag_input_matrix = np.array([[]]) + imag_input_matrix = np.array([[]]) sds_real_input = self.sds.from_numpy(real_input_matrix) sds_imag_input = self.sds.from_numpy(imag_input_matrix) - with self.assertRaisesRegex(RuntimeError, "The second argument to IFFT cannot be an empty matrix. Provide either only a real matrix or a filled real and imaginary one."): + with self.assertRaisesRegex( + RuntimeError, + "The second argument to IFFT cannot be an empty matrix. Provide either only a real matrix or a filled real and imaginary one.", + ): sds_real_input.ifft(sds_imag_input).compute() def test_ifft_random_1d(self): - np.random.seed(123) + np.random.seed(123) for _ in range(10): - real_part = np.random.rand(1, 16) - imag_part = np.random.rand(1, 16) - complex_input = real_part + 1j * imag_part + real_part = np.random.rand(1, 16) + imag_part = np.random.rand(1, 16) + complex_input = real_part + 1j * imag_part np_fft_result = np.fft.fft(complex_input[0]) @@ -196,12 +201,16 @@ def test_ifft_random_1d(self): expected_real = np.real(expected_ifft) expected_imag = np.imag(expected_ifft) - np.testing.assert_array_almost_equal(real_part_result, expected_real, decimal=5) - np.testing.assert_array_almost_equal(imag_part_result, expected_imag, decimal=5) + np.testing.assert_array_almost_equal( + real_part_result, expected_real, decimal=5 + ) + np.testing.assert_array_almost_equal( + imag_part_result, expected_imag, decimal=5 + ) def test_ifft_real_only_basic(self): - np.random.seed(123) - real = np.array([1, 2, 3, 4, 4, 3, 2, 1]) + np.random.seed(123) + real = np.array([1, 2, 3, 4, 4, 3, 2, 1]) sds_real_input = self.sds.from_numpy(real) @@ -220,9 +229,9 @@ def test_ifft_real_only_basic(self): np.testing.assert_array_almost_equal(imag_part_result, expected_imag, decimal=5) def test_ifft_real_only_random(self): - np.random.seed(123) + np.random.seed(123) for _ in range(10): - input_matrix = np.random.rand(1, 16) + input_matrix = np.random.rand(1, 16) sds_input = self.sds.from_numpy(input_matrix) @@ -230,16 +239,19 @@ def test_ifft_real_only_random(self): real_part, imag_part = ifft_result - np_ifft_result = np.fft.ifft(input_matrix[0]) + np_ifft_result = np.fft.ifft(input_matrix[0]) expected_real = np.real(np_ifft_result) - expected_imag = np.imag(np_ifft_result) - - np.testing.assert_array_almost_equal(real_part.flatten(), expected_real, decimal=5) - np.testing.assert_array_almost_equal(imag_part.flatten(), expected_imag, decimal=5) + expected_imag = np.imag(np_ifft_result) + np.testing.assert_array_almost_equal( + real_part.flatten(), expected_real, decimal=5 + ) + np.testing.assert_array_almost_equal( + imag_part.flatten(), expected_imag, decimal=5 + ) def test_ifft_2d(self): - np.random.seed(123) + np.random.seed(123) for _ in range(10): input_matrix = np.random.rand(8, 8) + 1j * np.random.rand(8, 8) @@ -263,14 +275,18 @@ def test_fft_empty_matrix(self): input_matrix = np.array([]) sds_input = self.sds.from_numpy(input_matrix) - with self.assertRaisesRegex(RuntimeError, "The first argument to FFT cannot be an empty matrix."): + with self.assertRaisesRegex( + RuntimeError, "The first argument to FFT cannot be an empty matrix." + ): _ = sds_input.fft().compute() def test_ifft_empty_matrix(self): input_matrix = np.array([]) sds_input = self.sds.from_numpy(input_matrix) - with self.assertRaisesRegex(RuntimeError, "The first argument to IFFT cannot be an empty matrix."): + with self.assertRaisesRegex( + RuntimeError, "The first argument to IFFT cannot be an empty matrix." + ): _ = sds_input.ifft().compute() def test_fft_single_element(self): @@ -310,24 +326,31 @@ def test_ifft_zeros_matrix(self): np.testing.assert_array_almost_equal(imag_part, np.zeros((4, 4)), decimal=5) def test_ifft_real_and_imaginary_dimensions_check(self): - real_part = np.random.rand(1, 16) - imag_part = np.random.rand(1, 14) + real_part = np.random.rand(1, 16) + imag_part = np.random.rand(1, 14) sds_real_input = self.sds.from_numpy(real_part) sds_imag_input = self.sds.from_numpy(imag_part) - with self.assertRaisesRegex(RuntimeError, "The real and imaginary part of the provided matrix are of different dimensions."): + with self.assertRaisesRegex( + RuntimeError, + "The real and imaginary part of the provided matrix are of different dimensions.", + ): sds_real_input.ifft(sds_imag_input).compute() def test_ifft_non_power_of_two_matrix(self): - real_part = np.random.rand(3, 5) - imag_part = np.random.rand(3, 5) + real_part = np.random.rand(3, 5) + imag_part = np.random.rand(3, 5) sds_real_input = self.sds.from_numpy(real_part) sds_imag_input = self.sds.from_numpy(imag_part) - with self.assertRaisesRegex(RuntimeError, "This IFFT implementation is only defined for matrices with dimensions that are powers of 2."): + with self.assertRaisesRegex( + RuntimeError, + "This IFFT implementation is only defined for matrices with dimensions that are powers of 2.", + ): _ = sds_real_input.ifft(sds_imag_input).compute() -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/src/main/python/tests/matrix/test_order.py b/src/main/python/tests/matrix/test_order.py index cd88ac4b4cd..6319c1451ab 100644 --- a/src/main/python/tests/matrix/test_order.py +++ b/src/main/python/tests/matrix/test_order.py @@ -31,7 +31,7 @@ m = np.random.rand(shape[0], shape[1]) mx = np.random.rand(1, shape[1]) my = np.random.rand(shape[0], 1) -by = random.randrange(1, np.size(m, 1)+1) +by = random.randrange(1, np.size(m, 1) + 1) class TestOrderBase(unittest.TestCase): @@ -50,20 +50,29 @@ def tearDownClass(cls): class TestOrderValid(TestOrderBase): def test_basic(self): - o = self.sds.from_numpy(m).order( - by=by, decreasing=False, index_return=False).compute() - s = m[np.argsort(m[:, by-1])] + o = ( + self.sds.from_numpy(m) + .order(by=by, decreasing=False, index_return=False) + .compute() + ) + s = m[np.argsort(m[:, by - 1])] self.assertTrue(np.allclose(o, s)) def test_index(self): - o = self.sds.from_numpy(m).order( - by=by, decreasing=False, index_return=True).compute() + o = ( + self.sds.from_numpy(m) + .order(by=by, decreasing=False, index_return=True) + .compute() + ) s = np.argsort(m[:, by - 1]) + 1 self.assertTrue(np.allclose(np.transpose(o), s)) def test_decreasing(self): - o = self.sds.from_numpy(m).order( - by=by, decreasing=True, index_return=True).compute() + o = ( + self.sds.from_numpy(m) + .order(by=by, decreasing=True, index_return=True) + .compute() + ) s = np.argsort(-m[:, by - 1]) + 1 self.assertTrue(np.allclose(np.transpose(o), s)) diff --git a/src/main/python/tests/matrix/test_print.py b/src/main/python/tests/matrix/test_print.py index dec3db2fa99..9f921a3708b 100644 --- a/src/main/python/tests/matrix/test_print.py +++ b/src/main/python/tests/matrix/test_print.py @@ -46,12 +46,13 @@ def tearDownClass(cls): def test_print_01(self): self.sds.from_numpy(np.array([1])).to_string().print().compute() sleep(0.2) - self.assertEqual(1,float(self.sds.get_stdout()[0].replace(",", "."))) + self.assertEqual(1, float(self.sds.get_stdout()[0].replace(",", "."))) def test_print_02(self): self.sds.scalar(1).print().compute() sleep(0.2) - self.assertEqual(1,float(self.sds.get_stdout()[0])) + self.assertEqual(1, float(self.sds.get_stdout()[0])) + if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/matrix/test_rIndexing.py b/src/main/python/tests/matrix/test_rIndexing.py index 01401add739..61b337c47ae 100644 --- a/src/main/python/tests/matrix/test_rIndexing.py +++ b/src/main/python/tests/matrix/test_rIndexing.py @@ -61,42 +61,42 @@ def test_3(self): def test_4(self): npA = np.arange(0, 100).reshape(10, 10) m1 = self.sds.from_numpy(npA) - npres = npA[:,4] - res = m1[:,4].compute().flatten() + npres = npA[:, 4] + res = m1[:, 4].compute().flatten() self.assertTrue(np.allclose(res, npres)) def test_5(self): npA = np.arange(0, 100).reshape(10, 10) m1 = self.sds.from_numpy(npA) - npres = npA[:,4:6] - res = m1[:,4:6].compute() + npres = npA[:, 4:6] + res = m1[:, 4:6].compute() self.assertTrue(np.allclose(res, npres)) def test_6(self): npA = np.arange(0, 100).reshape(10, 10) m1 = self.sds.from_numpy(npA) - npres = npA[1:2,4:6] - res = m1[1:2,4:6].compute() + npres = npA[1:2, 4:6] + res = m1[1:2, 4:6].compute() self.assertTrue(np.allclose(res, npres)) def test_7(self): npA = np.arange(0, 100).reshape(10, 10) m1 = self.sds.from_numpy(npA) - npres = npA[1,4:6] - res = m1[1,4:6].compute() + npres = npA[1, 4:6] + res = m1[1, 4:6].compute() self.assertTrue(np.allclose(res, npres)) def test_8(self): npA = np.arange(0, 100).reshape(10, 10) m1 = self.sds.from_numpy(npA) with self.assertRaises(NotImplementedError) as context: - res = m1[1:,4:6].compute() + res = m1[1:, 4:6].compute() def test_9(self): npA = np.arange(0, 100).reshape(10, 10) m1 = self.sds.from_numpy(npA) with self.assertRaises(NotImplementedError) as context: - res = m1[:3,4:6].compute() + res = m1[:3, 4:6].compute() if __name__ == "__main__": diff --git a/src/main/python/tests/matrix/test_rand.py b/src/main/python/tests/matrix/test_rand.py index c05edd3a6c4..3a67d92de6b 100644 --- a/src/main/python/tests/matrix/test_rand.py +++ b/src/main/python/tests/matrix/test_rand.py @@ -55,44 +55,41 @@ def test_rand_shape(self): self.assertTrue(m.shape == shape) def test_rand_min_max(self): - m = ( - self.sds.rand(rows=shape[0], cols=shape[1], - min=min_max[0], max=min_max[1]) - .compute()) + m = self.sds.rand( + rows=shape[0], cols=shape[1], min=min_max[0], max=min_max[1] + ).compute() self.assertTrue((m.min() >= min_max[0]) and (m.max() <= min_max[1])) def test_rand_sparsity(self): - m = self.sds.rand(rows=shape[0], cols=shape[1], - sparsity=sparsity, seed=0).compute() + m = self.sds.rand( + rows=shape[0], cols=shape[1], sparsity=sparsity, seed=0 + ).compute() non_zero_value_percent = np.count_nonzero(m) * 100 / np.prod(m.shape) - self.assertTrue(math.isclose( - non_zero_value_percent, sparsity*100, rel_tol=5)) + self.assertTrue(math.isclose(non_zero_value_percent, sparsity * 100, rel_tol=5)) def test_rand_uniform_distribution(self): - m = ( - self.sds.rand( - rows=dist_shape[0], - cols=dist_shape[1], - pdf="uniform", - min=min_max[0], - max=min_max[1], - seed=0) - .compute()) + m = self.sds.rand( + rows=dist_shape[0], + cols=dist_shape[1], + pdf="uniform", + min=min_max[0], + max=min_max[1], + seed=0, + ).compute() dist = find_best_fit_distribution(m.flatten("F"), distributions) self.assertTrue(dist == "uniform") def test_rand_normal_distribution(self): - m = ( - self.sds.rand( - rows=dist_shape[0], - cols=dist_shape[1], - pdf="normal", - min=min_max[0], - max=min_max[1], - seed=0) - .compute()) + m = self.sds.rand( + rows=dist_shape[0], + cols=dist_shape[1], + pdf="normal", + min=min_max[0], + max=min_max[1], + seed=0, + ).compute() dist = find_best_fit_distribution(m.flatten("F"), distributions) self.assertTrue(dist == "norm") diff --git a/src/main/python/tests/matrix/test_replace.py b/src/main/python/tests/matrix/test_replace.py index 85bc3c448e5..331770331e1 100644 --- a/src/main/python/tests/matrix/test_replace.py +++ b/src/main/python/tests/matrix/test_replace.py @@ -50,8 +50,12 @@ def tearDown(self): pass def test_replace_01(self): - m = self.sds.rand(min=0, max=2, rows=shape[0], cols=shape[1], seed=14)\ - .round().replace(1, 2).compute() + m = ( + self.sds.rand(min=0, max=2, rows=shape[0], cols=shape[1], seed=14) + .round() + .replace(1, 2) + .compute() + ) self.assertTrue(1 not in m) self.assertTrue(2 in m) self.assertTrue(0 in m) diff --git a/src/main/python/tests/matrix/test_reverse.py b/src/main/python/tests/matrix/test_reverse.py index 24fac90f803..a1643af257e 100644 --- a/src/main/python/tests/matrix/test_reverse.py +++ b/src/main/python/tests/matrix/test_reverse.py @@ -32,6 +32,7 @@ mx = np.random.rand(1, shape[1]) my = np.random.rand(shape[0], 1) + class TestReverse(unittest.TestCase): sds: SystemDSContext = None @@ -57,7 +58,9 @@ def test_x_axis(self): self.assertTrue(np.allclose(self.sds.from_numpy(mx).rev().compute(), mx)) def test_y_axis(self): - self.assertTrue(np.allclose(self.sds.from_numpy(my).rev().compute(), np.flip(my, 0))) + self.assertTrue( + np.allclose(self.sds.from_numpy(my).rev().compute(), np.flip(my, 0)) + ) if __name__ == "__main__": diff --git a/src/main/python/tests/matrix/test_roll.py b/src/main/python/tests/matrix/test_roll.py index bd5a0f86162..8778e9514d3 100644 --- a/src/main/python/tests/matrix/test_roll.py +++ b/src/main/python/tests/matrix/test_roll.py @@ -32,7 +32,9 @@ m = np.random.rand(shape[0], shape[1]) my = np.random.rand(shape[0], 1) m_empty = np.asarray([[]]) -m_sparse = sparse.random(shape[0], shape[1], density=0.1, format="csr", random_state=5).toarray() +m_sparse = sparse.random( + shape[0], shape[1], density=0.1, format="csr", random_state=5 +).toarray() m_sparse = np.around(m_sparse, decimals=22) diff --git a/src/main/python/tests/matrix/test_slice.py b/src/main/python/tests/matrix/test_slice.py index 2333b85793c..b795de31e1f 100644 --- a/src/main/python/tests/matrix/test_slice.py +++ b/src/main/python/tests/matrix/test_slice.py @@ -80,5 +80,6 @@ def test_slice_row_col_both(self): with self.assertRaises(NotImplementedError): self.sds.from_numpy(m)[[1, 2], [0, 3]] + if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/matrix/test_split.py b/src/main/python/tests/matrix/test_split.py index 2920821fc8c..f0c984b28ff 100644 --- a/src/main/python/tests/matrix/test_split.py +++ b/src/main/python/tests/matrix/test_split.py @@ -29,6 +29,7 @@ # Seed the randomness. np.random.seed(7) + class TestOrder(unittest.TestCase): sds: SystemDSContext = None @@ -50,9 +51,11 @@ def test_basic(self): def test_split(self): X = self.make_matrix() - Y = self.make_matrix(cols = 2) + Y = self.make_matrix(cols=2) - [p1,p2,p3,p4] = split(self.sds.from_numpy(X), self.sds.from_numpy(Y)).compute() + [p1, p2, p3, p4] = split( + self.sds.from_numpy(X), self.sds.from_numpy(Y) + ).compute() exp1 = X[:2] exp2 = X[2:] exp3 = Y[:2] @@ -64,10 +67,12 @@ def test_split(self): def test_split_2(self): rows = 10 - X = self.make_matrix(rows = rows) - Y = self.make_matrix(rows = rows, cols = 2) + X = self.make_matrix(rows=rows) + Y = self.make_matrix(rows=rows, cols=2) - [p1,p2,p3,p4] = split(self.sds.from_numpy(X), self.sds.from_numpy(Y)).compute() + [p1, p2, p3, p4] = split( + self.sds.from_numpy(X), self.sds.from_numpy(Y) + ).compute() exp1 = X[:7] exp2 = X[7:] exp3 = Y[:7] @@ -79,10 +84,12 @@ def test_split_2(self): def test_split_3(self): rows = 100 - X = self.make_matrix(rows = rows) - Y = self.make_matrix(rows = rows, cols = 2) + X = self.make_matrix(rows=rows) + Y = self.make_matrix(rows=rows, cols=2) - [p1,p2,p3,p4] = split(self.sds.from_numpy(X), self.sds.from_numpy(Y)).compute() + [p1, p2, p3, p4] = split( + self.sds.from_numpy(X), self.sds.from_numpy(Y) + ).compute() exp1 = X[:70] exp2 = X[70:] exp3 = Y[:70] @@ -92,13 +99,14 @@ def test_split_3(self): self.assertTrue(np.allclose(p3, exp3)) self.assertTrue(np.allclose(p4, exp4)) - def test_split_4(self): rows = 100 - X = self.make_matrix(rows = rows) - Y = self.make_matrix(rows = rows, cols = 2) + X = self.make_matrix(rows=rows) + Y = self.make_matrix(rows=rows, cols=2) - [p1,p2,p3,p4] = split(self.sds.from_numpy(X), self.sds.from_numpy(Y), f=0.2).compute() + [p1, p2, p3, p4] = split( + self.sds.from_numpy(X), self.sds.from_numpy(Y), f=0.2 + ).compute() exp1 = X[:20] exp2 = X[20:] exp3 = Y[:20] @@ -108,9 +116,8 @@ def test_split_4(self): self.assertTrue(np.allclose(p3, exp3)) self.assertTrue(np.allclose(p4, exp4)) - - def make_matrix(self, rows = 4, cols = 4): - return np.random.rand(rows,cols) + def make_matrix(self, rows=4, cols=4): + return np.random.rand(rows, cols) if __name__ == "__main__": diff --git a/src/main/python/tests/matrix/test_sqrt.py b/src/main/python/tests/matrix/test_sqrt.py index 3f7791f149e..bf46836d6c1 100644 --- a/src/main/python/tests/matrix/test_sqrt.py +++ b/src/main/python/tests/matrix/test_sqrt.py @@ -23,6 +23,7 @@ import numpy as np from systemds.context import SystemDSContext + class TestSQRT(unittest.TestCase): def setUp(self): self.sds = SystemDSContext() @@ -32,31 +33,30 @@ def tearDown(self): def test_sqrt_basic(self): - input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) sds_input = self.sds.from_numpy(input_matrix) sds_result = sds_input.sqrt().compute() np_result_np = np.sqrt(input_matrix) - assert np.allclose(sds_result, np_result_np,1e-9) + assert np.allclose(sds_result, np_result_np, 1e-9) def test_sqrt_random(self): - input_matrix = np.random.random((10,10)) + input_matrix = np.random.random((10, 10)) sds_input = self.sds.from_numpy(input_matrix) sds_result = sds_input.sqrt().compute() np_result_np = np.sqrt(input_matrix) - assert np.allclose(sds_result, np_result_np,1e-9) + assert np.allclose(sds_result, np_result_np, 1e-9) def test_sqrt_scalar(self): for i in np.random.random(10): sds_input = self.sds.scalar(i) sds_result = sds_input.sqrt().compute() np_result_np = np.sqrt(i) - assert np.isclose(sds_result, np_result_np,1e-9) + assert np.isclose(sds_result, np_result_np, 1e-9) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/main/python/tests/matrix/test_svd.py b/src/main/python/tests/matrix/test_svd.py index 3429930c44e..2e2dd500f34 100644 --- a/src/main/python/tests/matrix/test_svd.py +++ b/src/main/python/tests/matrix/test_svd.py @@ -23,6 +23,7 @@ import numpy as np from systemds.context import SystemDSContext + class TestSVD(unittest.TestCase): def setUp(self): self.sds = SystemDSContext() @@ -32,18 +33,17 @@ def tearDown(self): def test_svd_basic(self): - input_matrix = np.array([[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16]]) + input_matrix = np.array( + [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + ) sds_input = self.sds.from_numpy(input_matrix) svd_result = sds_input.svd().compute() - U,S,V = svd_result + U, S, V = svd_result # TODO add a proper verification # Currently this implementation rely on internal testing only. -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/main/python/tests/matrix/test_to_one_hot.py b/src/main/python/tests/matrix/test_to_one_hot.py index bd987c4f1f4..c4d673b2a02 100644 --- a/src/main/python/tests/matrix/test_to_one_hot.py +++ b/src/main/python/tests/matrix/test_to_one_hot.py @@ -24,6 +24,7 @@ import numpy as np from systemds.context import SystemDSContext + class TestMatrixOneHot(unittest.TestCase): sds: SystemDSContext = None @@ -38,32 +39,32 @@ def tearDownClass(cls): def test_one_hot_1(self): m1 = np.array([1]) - res = self.sds.from_numpy( m1).to_one_hot(3).compute() + res = self.sds.from_numpy(m1).to_one_hot(3).compute() self.assertTrue((res == [[1, 0, 0]]).all()) def test_one_hot_2(self): m1 = np.array([2]) - res = self.sds.from_numpy( m1).to_one_hot(3).compute() + res = self.sds.from_numpy(m1).to_one_hot(3).compute() self.assertTrue((res == [[0, 1, 0]]).all()) def test_one_hot_3(self): m1 = np.array([2]) - res = self.sds.from_numpy( m1).to_one_hot(2).compute() + res = self.sds.from_numpy(m1).to_one_hot(2).compute() self.assertTrue((res == [[0, 1]]).all()) def test_one_hot_2_2(self): m1 = np.array([2, 2]) - res = self.sds.from_numpy( m1).to_one_hot(2).compute() + res = self.sds.from_numpy(m1).to_one_hot(2).compute() self.assertTrue((res == [[0, 1], [0, 1]]).all()) def test_one_hot_1_2(self): m1 = np.array([1, 2]) - res = self.sds.from_numpy( m1).to_one_hot(2).compute() + res = self.sds.from_numpy(m1).to_one_hot(2).compute() self.assertTrue((res == [[1, 0], [0, 1]]).all()) def test_one_hot_1_2(self): m1 = np.array([1, 2, 2]) - res = self.sds.from_numpy( m1).to_one_hot(2).compute() + res = self.sds.from_numpy(m1).to_one_hot(2).compute() self.assertTrue((res == [[1, 0], [0, 1], [0, 1]]).all()) # TODO make tests for runtime errors, like this one @@ -73,29 +74,29 @@ def test_one_hot_1_2(self): # res = self.sds.from_numpy( m1).to_one_hot(2).compute() def test_one_hot_matrix_1(self): - m1 = np.array([[1],[2],[3]]) - res = self.sds.from_numpy( m1).to_one_hot(3).compute() - self.assertTrue((res == [[1,0,0], [0,1,0], [0,0,1]]).all()) - + m1 = np.array([[1], [2], [3]]) + res = self.sds.from_numpy(m1).to_one_hot(3).compute() + self.assertTrue((res == [[1, 0, 0], [0, 1, 0], [0, 0, 1]]).all()) + def test_one_hot_matrix_2(self): - m1 = np.array([[1],[3],[3]]) - res = self.sds.from_numpy( m1).to_one_hot(3).compute() - self.assertTrue((res == [[1,0,0], [0,0,1], [0,0,1]]).all()) + m1 = np.array([[1], [3], [3]]) + res = self.sds.from_numpy(m1).to_one_hot(3).compute() + self.assertTrue((res == [[1, 0, 0], [0, 0, 1], [0, 0, 1]]).all()) def test_one_hot_matrix_3(self): - m1 = np.array([[1],[2],[1]]) - res = self.sds.from_numpy( m1).to_one_hot(2).compute() - self.assertTrue((res == [[1,0], [0,1], [1,0]]).all()) + m1 = np.array([[1], [2], [1]]) + res = self.sds.from_numpy(m1).to_one_hot(2).compute() + self.assertTrue((res == [[1, 0], [0, 1], [1, 0]]).all()) def test_neg_one_hot_numClasses(self): m1 = np.array([1]) with self.assertRaises(ValueError) as context: - res = self.sds.from_numpy( m1).to_one_hot(1).compute() + res = self.sds.from_numpy(m1).to_one_hot(1).compute() def test_neg_one_hot_inputShape(self): m1 = np.array([[1]]) with self.assertRaises(ValueError) as context: - res = self.sds.from_numpy( m1).to_one_hot(1).compute() + res = self.sds.from_numpy(m1).to_one_hot(1).compute() if __name__ == "__main__": diff --git a/src/main/python/tests/matrix/test_transpose.py b/src/main/python/tests/matrix/test_transpose.py index 27f98e19e11..1ed01394291 100644 --- a/src/main/python/tests/matrix/test_transpose.py +++ b/src/main/python/tests/matrix/test_transpose.py @@ -32,6 +32,7 @@ mx = np.random.rand(1, shape[1]) my = np.random.rand(shape[0], 1) + class TestTranspose(unittest.TestCase): sds: SystemDSContext = None @@ -45,19 +46,19 @@ def tearDownClass(cls): cls.sds.close() def test_basic(self): - trans = self.sds.from_numpy( m).t().compute() + trans = self.sds.from_numpy(m).t().compute() self.assertTrue(np.allclose(trans, np.transpose(m))) def test_empty(self): - trans = self.sds.from_numpy( np.asarray([])).t().compute() + trans = self.sds.from_numpy(np.asarray([])).t().compute() self.assertTrue(np.allclose(trans, np.asarray([]))) def test_row(self): - trans = self.sds.from_numpy( mx).t().compute() + trans = self.sds.from_numpy(mx).t().compute() self.assertTrue(np.allclose(trans, np.transpose(mx))) def test_col(self): - trans = self.sds.from_numpy( my).t().compute() + trans = self.sds.from_numpy(my).t().compute() self.assertTrue(np.allclose(trans, np.transpose(my))) diff --git a/src/main/python/tests/matrix/test_trigonometric.py b/src/main/python/tests/matrix/test_trigonometric.py index 257e9a42850..f0da9c8f39b 100644 --- a/src/main/python/tests/matrix/test_trigonometric.py +++ b/src/main/python/tests/matrix/test_trigonometric.py @@ -1,4 +1,4 @@ -#------------------------------------------------------------- +# ------------------------------------------------------------- # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. # -#------------------------------------------------------------- +# ------------------------------------------------------------- import unittest @@ -27,9 +27,10 @@ dim = 5 m1 = np.array(np.random.randint(100, size=dim * dim) + 1.01, dtype=np.double) m1.shape = (dim, dim) -m2 = np.random.choice(np.arange(0.01, 1, 0.1), size=(dim,dim)) +m2 = np.random.choice(np.arange(0.01, 1, 0.1), size=(dim, dim)) s = 3.02 + class TestTrigonometricOp(unittest.TestCase): sds: SystemDSContext = None @@ -43,31 +44,49 @@ def tearDownClass(cls): cls.sds.close() def test_sin(self): - self.assertTrue(np.allclose(self.sds.from_numpy( m1).sin().compute(), np.sin(m1))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).sin().compute(), np.sin(m1)) + ) def test_cos(self): - self.assertTrue(np.allclose(self.sds.from_numpy( m1).cos().compute(), np.cos(m1))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).cos().compute(), np.cos(m1)) + ) def test_tan(self): - self.assertTrue(np.allclose(self.sds.from_numpy( m1).tan().compute(), np.tan(m1))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).tan().compute(), np.tan(m1)) + ) def test_asin(self): - self.assertTrue(np.allclose(self.sds.from_numpy( m2).asin().compute(), np.arcsin(m2))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m2).asin().compute(), np.arcsin(m2)) + ) def test_acos(self): - self.assertTrue(np.allclose(self.sds.from_numpy( m2).acos().compute(), np.arccos(m2))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m2).acos().compute(), np.arccos(m2)) + ) def test_atan(self): - self.assertTrue(np.allclose(self.sds.from_numpy( m2).atan().compute(), np.arctan(m2))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m2).atan().compute(), np.arctan(m2)) + ) def test_sinh(self): - self.assertTrue(np.allclose(self.sds.from_numpy( m1).sinh().compute(), np.sinh(m1))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).sinh().compute(), np.sinh(m1)) + ) def test_cosh(self): - self.assertTrue(np.allclose(self.sds.from_numpy( m1).cosh().compute(), np.cosh(m1))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).cosh().compute(), np.cosh(m1)) + ) def test_tanh(self): - self.assertTrue(np.allclose(self.sds.from_numpy( m1).tanh().compute(), np.tanh(m1))) + self.assertTrue( + np.allclose(self.sds.from_numpy(m1).tanh().compute(), np.tanh(m1)) + ) if __name__ == "__main__": diff --git a/src/main/python/tests/matrix/test_write.py b/src/main/python/tests/matrix/test_write.py index 5e2e1f612b1..d50d1f7b3c4 100644 --- a/src/main/python/tests/matrix/test_write.py +++ b/src/main/python/tests/matrix/test_write.py @@ -44,20 +44,21 @@ def tearDown(self): def test_write_01(self): original = np.ones([10, 10]) - X = self.sds.from_numpy( original) + X = self.sds.from_numpy(original) X.write(self.temp_dir + "01").compute() - + NX = self.sds.read(self.temp_dir + "01") res = NX.compute() self.assertTrue(np.allclose(original, res)) def test_write_02(self): - original = np.array([[1,2,3,4,5]]) - X = self.sds.from_numpy( original) + original = np.array([[1, 2, 3, 4, 5]]) + X = self.sds.from_numpy(original) X.write(self.temp_dir + "02").compute() NX = self.sds.read(self.temp_dir + "02") res = NX.compute() self.assertTrue(np.allclose(original, res)) + if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/nn/test_affine.py b/src/main/python/tests/nn/test_affine.py index a7de2c383d6..b13ba39ea39 100644 --- a/src/main/python/tests/nn/test_affine.py +++ b/src/main/python/tests/nn/test_affine.py @@ -31,23 +31,35 @@ dim = 6 n = 5 m = 6 -X = np.array([[9., 2., 5., 5., 9., 6.], - [0., 8., 8., 0., 5., 7.], - [2., 2., 6., 3., 4., 3.], - [3., 5., 2., 6., 6., 0.], - [3., 8., 5., 2., 5., 2.]]) - -W = np.array([[8., 3., 7., 2., 0., 1.], - [6., 5., 1., 2., 6., 1.], - [2., 4., 7., 7., 6., 4.], - [3., 8., 9., 3., 5., 6.], - [3., 8., 0., 5., 7., 9.], - [7., 9., 7., 4., 5., 7.]]) -dout = np.array([[9., 5., 4., 0., 4., 1.], - [1., 2., 2., 3., 3., 9.], - [7., 4., 0., 8., 7., 0.], - [8., 7., 0., 6., 0., 9.], - [1., 6., 5., 8., 8., 9.]]) +X = np.array( + [ + [9.0, 2.0, 5.0, 5.0, 9.0, 6.0], + [0.0, 8.0, 8.0, 0.0, 5.0, 7.0], + [2.0, 2.0, 6.0, 3.0, 4.0, 3.0], + [3.0, 5.0, 2.0, 6.0, 6.0, 0.0], + [3.0, 8.0, 5.0, 2.0, 5.0, 2.0], + ] +) + +W = np.array( + [ + [8.0, 3.0, 7.0, 2.0, 0.0, 1.0], + [6.0, 5.0, 1.0, 2.0, 6.0, 1.0], + [2.0, 4.0, 7.0, 7.0, 6.0, 4.0], + [3.0, 8.0, 9.0, 3.0, 5.0, 6.0], + [3.0, 8.0, 0.0, 5.0, 7.0, 9.0], + [7.0, 9.0, 7.0, 4.0, 5.0, 7.0], + ] +) +dout = np.array( + [ + [9.0, 5.0, 4.0, 0.0, 4.0, 1.0], + [1.0, 2.0, 2.0, 3.0, 3.0, 9.0], + [7.0, 4.0, 0.0, 8.0, 7.0, 0.0], + [8.0, 7.0, 0.0, 6.0, 0.0, 9.0], + [1.0, 6.0, 5.0, 8.0, 8.0, 9.0], + ] +) class TestAffine(unittest.TestCase): @@ -116,14 +128,16 @@ def test_multiple_sourcing(self): scripts = DMLScript(sds) scripts.build_code(X2) - self.assertEqual(1, self.count_sourcing(scripts.dml_script, layer_name="affine")) + self.assertEqual( + 1, self.count_sourcing(scripts.dml_script, layer_name="affine") + ) sds.close() def test_multiple_context(self): # This test evaluate if multiple conflicting contexts work. - # It is not the 'optimal' nor the intended use + # It is not the 'optimal' nor the intended use # If it fails in the future, feel free to delete it. - + # two context sds1 = SystemDSContext() sds2 = SystemDSContext() @@ -157,11 +171,14 @@ def count_sourcing(self, script: str, layer_name: str): :param layer_name: example: "affine", "relu" :return: """ - return len([ - line for line in script.split("\n") - if all([line.startswith("source"), line.endswith(layer_name)]) - ]) + return len( + [ + line + for line in script.split("\n") + if all([line.startswith("source"), line.endswith(layer_name)]) + ] + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/main/python/tests/nn/test_neural_network.py b/src/main/python/tests/nn/test_neural_network.py index 7ff85487953..f13b591cb48 100644 --- a/src/main/python/tests/nn/test_neural_network.py +++ b/src/main/python/tests/nn/test_neural_network.py @@ -25,6 +25,7 @@ from tests.nn.neural_network import NeuralNetwork from systemds.script_building.script import DMLScript + class TestNeuralNetwork(unittest.TestCase): sds: SystemDSContext = None @@ -33,12 +34,22 @@ def setUpClass(cls): cls.sds = SystemDSContext() np.random.seed(42) cls.X = np.random.rand(6, 1) - cls.exp_out = np.array([ - -0.37768756, -0.47785831, -0.95870362, - -1.21297214, -0.73814523, -0.933917, - -0.60368929, -0.76380049, -0.15732974, - -0.19905692, -0.15730542, -0.19902615 - ]) + cls.exp_out = np.array( + [ + -0.37768756, + -0.47785831, + -0.95870362, + -1.21297214, + -0.73814523, + -0.933917, + -0.60368929, + -0.76380049, + -0.15732974, + -0.19905692, + -0.15730542, + -0.19902615, + ] + ) @classmethod def tearDownClass(cls): @@ -55,7 +66,7 @@ def test_forward_pass(self): # test forward pass through the network using dynamic calls dynamic_out = nn.forward_dynamic_pass(Xm).compute().flatten() - self.assertTrue(np.allclose(dynamic_out,self.exp_out)) + self.assertTrue(np.allclose(dynamic_out, self.exp_out)) def test_multiple_sourcing(self): sds = SystemDSContext() @@ -67,7 +78,9 @@ def test_multiple_sourcing(self): scripts = DMLScript(sds) scripts.build_code(network_out) - self.assertEqual(1, self.count_sourcing(scripts.dml_script, layer_name="affine")) + self.assertEqual( + 1, self.count_sourcing(scripts.dml_script, layer_name="affine") + ) self.assertEqual(1, self.count_sourcing(scripts.dml_script, layer_name="relu")) sds.close() @@ -81,11 +94,14 @@ def count_sourcing(self, script: str, layer_name: str): :param layer_name: example: "affine", "relu" :return: """ - return len([ - line for line in script.split("\n") - if all([line.startswith("source"), line.endswith(layer_name)]) - ]) + return len( + [ + line + for line in script.split("\n") + if all([line.startswith("source"), line.endswith(layer_name)]) + ] + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/main/python/tests/nn/test_relu.py b/src/main/python/tests/nn/test_relu.py index 06839ce494b..2fa6306a04c 100644 --- a/src/main/python/tests/nn/test_relu.py +++ b/src/main/python/tests/nn/test_relu.py @@ -82,7 +82,7 @@ def test_multiple_sourcing(self): scripts = DMLScript(sds) scripts.build_code(X2) - self.assertEqual(1,self.count_sourcing(scripts.dml_script, layer_name="relu")) + self.assertEqual(1, self.count_sourcing(scripts.dml_script, layer_name="relu")) sds.close() def count_sourcing(self, script: str, layer_name: str): @@ -95,11 +95,14 @@ def count_sourcing(self, script: str, layer_name: str): :param layer_name: example: "affine", "relu" :return: """ - return len([ - line for line in script.split("\n") - if all([line.startswith("source"), line.endswith(layer_name)]) - ]) + return len( + [ + line + for line in script.split("\n") + if all([line.startswith("source"), line.endswith(layer_name)]) + ] + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/src/main/python/tests/nn/test_sequential.py b/src/main/python/tests/nn/test_sequential.py index a7a361e40fb..12360679d33 100644 --- a/src/main/python/tests/nn/test_sequential.py +++ b/src/main/python/tests/nn/test_sequential.py @@ -50,10 +50,12 @@ def __init__(self, sds): self.sds = sds def _instance_forward(self, X: Matrix): - return MultiReturn(self.sds, "test.dml", output_nodes=[X, 'some_random_return']) + return MultiReturn(self.sds, "test.dml", output_nodes=[X, "some_random_return"]) def _instance_backward(self, dout: Matrix, X: Matrix): - return MultiReturn(self.sds, "test.dml", output_nodes=[dout, X, 'some_random_return']) + return MultiReturn( + self.sds, "test.dml", output_nodes=[dout, X, "some_random_return"] + ) class TestSequential(unittest.TestCase): @@ -274,7 +276,9 @@ def test_multireturn_variation_single_to_multiple(self): """ Test that a single return into multiple MultiReturn are handled correctly """ - model = Sequential(TestLayerImpl(1), MultiReturnImpl(self.sds), MultiReturnImpl(self.sds)) + model = Sequential( + TestLayerImpl(1), MultiReturnImpl(self.sds), MultiReturnImpl(self.sds) + ) in_matrix = self.sds.from_numpy(np.array([[1, 2], [3, 4]])) out_matrix = model.forward(in_matrix).compute() self.assertEqual(out_matrix.tolist(), [[2, 3], [4, 5]]) @@ -285,7 +289,9 @@ def test_multireturn_variation_multiple_to_single(self): """ Test that multiple MultiReturn into a single return are handled correctly """ - model = Sequential(MultiReturnImpl(self.sds), MultiReturnImpl(self.sds), TestLayerImpl(1)) + model = Sequential( + MultiReturnImpl(self.sds), MultiReturnImpl(self.sds), TestLayerImpl(1) + ) in_matrix = self.sds.from_numpy(np.array([[1, 2], [3, 4]])) out_matrix = model.forward(in_matrix).compute() self.assertEqual(out_matrix.tolist(), [[2, 3], [4, 5]]) @@ -296,7 +302,9 @@ def test_multireturn_variation_sandwich(self): """ Test that a single return between two MultiReturn are handled correctly """ - model = Sequential(MultiReturnImpl(self.sds), TestLayerImpl(1), MultiReturnImpl(self.sds)) + model = Sequential( + MultiReturnImpl(self.sds), TestLayerImpl(1), MultiReturnImpl(self.sds) + ) in_matrix = self.sds.from_numpy(np.array([[1, 2], [3, 4]])) out_matrix = model.forward(in_matrix).compute() self.assertEqual(out_matrix.tolist(), [[2, 3], [4, 5]]) diff --git a/src/main/python/tests/script/test_dml_script.py b/src/main/python/tests/script/test_dml_script.py index 809a20cb879..901271c35e6 100644 --- a/src/main/python/tests/script/test_dml_script.py +++ b/src/main/python/tests/script/test_dml_script.py @@ -27,8 +27,7 @@ class Test_DMLScript(unittest.TestCase): - """Test class for testing behavior of the fundamental DMLScript class - """ + """Test class for testing behavior of the fundamental DMLScript class""" sds: SystemDSContext = None @@ -59,14 +58,14 @@ def test_simple_print_2(self): script.execute() sleep(0.5) stdout = self.sds.get_stdout(100) - self.assertListEqual(['Hello', 'World', '!'], stdout) + self.assertListEqual(["Hello", "World", "!"], stdout) def test_multiple_executions_1(self): scr_a = DMLScript(self.sds) - scr_a.add_code('x = 4') - scr_a.add_code('print(x)') - scr_a.add_code('y = x + 1') - scr_a.add_code('print(y)') + scr_a.add_code("x = 4") + scr_a.add_code("print(x)") + scr_a.add_code("y = x + 1") + scr_a.add_code("print(y)") scr_a.execute() sleep(0.5) stdout = self.sds.get_stdout(100) diff --git a/src/main/python/tests/source/test_source_01.py b/src/main/python/tests/source/test_source_01.py index 00cb14b8276..ef9b4ee723f 100644 --- a/src/main/python/tests/source/test_source_01.py +++ b/src/main/python/tests/source/test_source_01.py @@ -38,25 +38,22 @@ def tearDownClass(cls): cls.sds.close() def test_01_single_call(self): - c = self.sds.source("./tests/source/source_01.dml", - "test").test_01() + c = self.sds.source("./tests/source/source_01.dml", "test").test_01() res = c.compute() self.assertEqual(1, self.imports(c.script_str)) self.assertTrue(np.allclose(np.array([[1]]), res)) def test_01_multi_call_01(self): - s = self.sds.source("./tests/source/source_01.dml", - "test") + s = self.sds.source("./tests/source/source_01.dml", "test") a = s.test_01() b = s.test_01() - c = a + b + c = a + b res = c.compute() self.assertEqual(1, self.imports(c.script_str)) self.assertTrue(np.allclose(np.array([[2]]), res)) def test_01_multi_call_02(self): - s = self.sds.source("./tests/source/source_01.dml", - "test") + s = self.sds.source("./tests/source/source_01.dml", "test") a = s.test_01() b = s.test_01() c = a + b + a @@ -65,27 +62,27 @@ def test_01_multi_call_02(self): self.assertTrue(np.allclose(np.array([[3]]), res)) def test_01_invalid_function(self): - s = self.sds.source("./tests/source/source_01.dml", - "test") + s = self.sds.source("./tests/source/source_01.dml", "test") with self.assertRaises(AttributeError) as context: a = s.test_01_NOT_A_REAL_FUNCTION() def test_01_invalid_arguments(self): - s = self.sds.source("./tests/source/source_01.dml", - "test") - m = self.sds.full((1,1),2) + s = self.sds.source("./tests/source/source_01.dml", "test") + m = self.sds.full((1, 1), 2) with self.assertRaises(TypeError) as context: a = s.test_01(m) def test_01_sum(self): - c = self.sds.source("./tests/source/source_01.dml", - "test").test_01().sum() + c = self.sds.source("./tests/source/source_01.dml", "test").test_01().sum() res = c.compute() self.assertEqual(1, self.imports(c.script_str)) self.assertTrue(np.allclose(np.array([[1]]), res)) - def imports(self, script:str) -> int: - return script.split("\n").count('source("./tests/source/source_01.dml") as test') + def imports(self, script: str) -> int: + return script.split("\n").count( + 'source("./tests/source/source_01.dml") as test' + ) + if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/source/test_source_02.py b/src/main/python/tests/source/test_source_02.py index dc74efbfcc8..bc6d1168ac5 100644 --- a/src/main/python/tests/source/test_source_02.py +++ b/src/main/python/tests/source/test_source_02.py @@ -38,16 +38,14 @@ def tearDownClass(cls): cls.sds.close() def test_func_01(self): - c = self.sds.source("./tests/source/source_02.dml", - "test").func_01() + c = self.sds.source("./tests/source/source_02.dml", "test").func_01() res = c.compute() self.assertEqual(1, self.imports(c.script_str)) self.assertTrue(np.allclose(np.array([[1]]), res)) def test_func_02(self): m = self.sds.full((3, 5), 2) - c = self.sds.source("./tests/source/source_02.dml", - "test").func_02(m) + c = self.sds.source("./tests/source/source_02.dml", "test").func_02(m) res = c.compute() self.assertEqual(1, self.imports(c.script_str)) self.assertEqual(1, res.shape[1]) @@ -63,22 +61,22 @@ def test_func_02_call_self(self): def test_func_02_sum(self): m = self.sds.full((3, 5), 2) - c = self.sds.source("./tests/source/source_02.dml", - "test").func_02(m) + c = self.sds.source("./tests/source/source_02.dml", "test").func_02(m) c = c.sum() res = c.compute() self.assertEqual(1, self.imports(c.script_str)) def test_Preprocess_sum(self): m = self.sds.full((3, 5), 2) - c = self.sds.source("./tests/source/source_02.dml", - "test").Preprocess(m) + c = self.sds.source("./tests/source/source_02.dml", "test").Preprocess(m) c = c.sum() res = c.compute() self.assertEqual(1, self.imports(c.script_str)) def imports(self, script: str) -> int: - return script.split("\n").count('source("./tests/source/source_02.dml") as test') + return script.split("\n").count( + 'source("./tests/source/source_02.dml") as test' + ) if __name__ == "__main__": diff --git a/src/main/python/tests/source/test_source_list.py b/src/main/python/tests/source/test_source_list.py index 4a3cc7f60a1..066571a2e1e 100644 --- a/src/main/python/tests/source/test_source_list.py +++ b/src/main/python/tests/source/test_source_list.py @@ -43,7 +43,7 @@ def test_single_return(self): arr = self.sds.array(self.sds.full((10, 10), 4)) c = self.sds.source(self.source_path, "test").func(arr) res = c.sum().compute() - self.assertTrue(res == 10*10*4) + self.assertTrue(res == 10 * 10 * 4) def test_input_multireturn(self): m = self.sds.full((10, 10), 2) @@ -62,5 +62,6 @@ def test_input_multireturn(self): # res = c.sum().compute() # self.assertTrue(res == 10*10*4) + if __name__ == "__main__": - unittest.main(exit=False) \ No newline at end of file + unittest.main(exit=False) diff --git a/src/main/python/tests/source/test_source_multi_arguments.py b/src/main/python/tests/source/test_source_multi_arguments.py index 448392d68d3..555096a9d6a 100644 --- a/src/main/python/tests/source/test_source_multi_arguments.py +++ b/src/main/python/tests/source/test_source_multi_arguments.py @@ -39,17 +39,16 @@ def tearDownClass(cls): cls.sds.close() def test_01(self): - s = self.sds.source(self.src_path,"test") + s = self.sds.source(self.src_path, "test") - m1 = self.sds.rand(12,1) - m2 = self.sds.rand(1,2) - m3 = self.sds.rand(23,3) - c = s.blaaa_is_a_BAAD_function_name_but_it_works(m1,m2,m3) + m1 = self.sds.rand(12, 1) + m2 = self.sds.rand(1, 2) + m3 = self.sds.rand(23, 3) + c = s.blaaa_is_a_BAAD_function_name_but_it_works(m1, m2, m3) res = c.compute() self.assertEqual(1, self.imports(c.script_str)) - # def test_02(self): # s = self.sds.source(self.src_path,"test") @@ -60,8 +59,9 @@ def test_01(self): # self.assertEqual(1, self.imports(c.script_str)) # self.assertTrue("V3" not in c.script_str, "Only 2 variables should be allocated.") - def imports(self, script:str) -> int: + def imports(self, script: str) -> int: return script.split("\n").count(f'source("{self.src_path}") as test') + if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/source/test_source_neural_net.py b/src/main/python/tests/source/test_source_neural_net.py index 5eff8c5e871..59302b20844 100644 --- a/src/main/python/tests/source/test_source_neural_net.py +++ b/src/main/python/tests/source/test_source_neural_net.py @@ -45,9 +45,12 @@ def test_01(self): def test_test_method(self): # Verify that we can call a function. m = np.full((1, 2), 1) - res = self.sds.source(self.src_path, "test")\ - .test_function(self.sds.full((1, 2), 1))[1]\ - .as_matrix().compute() + res = ( + self.sds.source(self.src_path, "test") + .test_function(self.sds.full((1, 2), 1))[1] + .as_matrix() + .compute() + ) self.assertTrue(np.allclose(m, res)) diff --git a/src/main/python/tests/source/test_source_no_return.py b/src/main/python/tests/source/test_source_no_return.py index 237a8d2fbdb..75c6f11e8ea 100644 --- a/src/main/python/tests/source/test_source_no_return.py +++ b/src/main/python/tests/source/test_source_no_return.py @@ -24,6 +24,7 @@ from time import sleep from systemds.context import SystemDSContext + class TestSource_NoReturn(unittest.TestCase): sds: SystemDSContext = None @@ -38,28 +39,29 @@ def tearDownClass(cls): cls.sds.close() def test_01(self): - s = self.sds.source(self.src_path,"test") + s = self.sds.source(self.src_path, "test") c = s.no_return() c.compute() - sleep(1) # to allow the std buffer to fill + sleep(1) # to allow the std buffer to fill stdout = self.sds.get_stdout() - self.assertEqual(4.2 + 14 * 2,float(stdout[0])) + self.assertEqual(4.2 + 14 * 2, float(stdout[0])) def test_02(self): - s = self.sds.source(self.src_path,"test") + s = self.sds.source(self.src_path, "test") c = s.no_return(4) c.compute() - sleep(1) # to allow the std buffer to fill + sleep(1) # to allow the std buffer to fill stdout = self.sds.get_stdout() - self.assertEqual(4 + 14 * 2,float(stdout[0])) + self.assertEqual(4 + 14 * 2, float(stdout[0])) def test_03(self): - s = self.sds.source(self.src_path,"test") + s = self.sds.source(self.src_path, "test") c = s.no_return(a=14) c.compute() - sleep(1) # to allow the std buffer to fill + sleep(1) # to allow the std buffer to fill stdout = self.sds.get_stdout() - self.assertEqual(14 + 14 * 2,float(stdout[0])) + self.assertEqual(14 + 14 * 2, float(stdout[0])) + if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/source/test_source_reuse.py b/src/main/python/tests/source/test_source_reuse.py index f374e78647c..77188af7a0f 100644 --- a/src/main/python/tests/source/test_source_reuse.py +++ b/src/main/python/tests/source/test_source_reuse.py @@ -33,8 +33,7 @@ class TestSourceReuse(unittest.TestCase): @classmethod def setUpClass(cls): cls.sds = SystemDSContext() - cls.source_reuse = cls.sds.source("./tests/source/source_01.dml", - "test") + cls.source_reuse = cls.sds.source("./tests/source/source_01.dml", "test") @classmethod def tearDownClass(cls): @@ -47,8 +46,7 @@ def test_02_second_call(self): self.call() def test_03_same_function(self): - s = self.sds.source("./tests/source/source_01.dml", - "test") + s = self.sds.source("./tests/source/source_01.dml", "test") c = s.test_01().compute() d = s.test_01().compute() self.assertTrue(np.allclose(c, d)) @@ -59,10 +57,11 @@ def call(self): self.assertEqual(1, self.imports(c.script_str)) self.assertTrue(np.allclose(np.array([[1]]), res)) + def imports(self, script: str) -> int: + return script.split("\n").count( + 'source("./tests/source/source_01.dml") as test' + ) - def imports(self, script:str) -> int: - return script.split("\n").count('source("./tests/source/source_01.dml") as test') - if __name__ == "__main__": unittest.main(exit=False) diff --git a/src/main/python/tests/source/test_source_with_default_values.py b/src/main/python/tests/source/test_source_with_default_values.py index bcee056aa1e..f8229fbd7f9 100644 --- a/src/main/python/tests/source/test_source_with_default_values.py +++ b/src/main/python/tests/source/test_source_with_default_values.py @@ -24,6 +24,7 @@ import numpy as np from systemds.context import SystemDSContext + class TestSource_DefaultValues(unittest.TestCase): sds: SystemDSContext = None @@ -38,34 +39,34 @@ def tearDownClass(cls): cls.sds.close() def test_01(self): - s = self.sds.source(self.src_path,"test") + s = self.sds.source(self.src_path, "test") c = s.d() res = c.compute() - self.assertEqual(4.2,res) + self.assertEqual(4.2, res) def test_02(self): - s = self.sds.source(self.src_path,"test") + s = self.sds.source(self.src_path, "test") c = s.d(a=self.sds.scalar(5)) res = c.compute() - self.assertEqual(5,res) + self.assertEqual(5, res) def test_03(self): - s = self.sds.source(self.src_path,"test") + s = self.sds.source(self.src_path, "test") c = s.d(a=5) res = c.compute() - self.assertEqual(5,res) + self.assertEqual(5, res) def test_04(self): - s = self.sds.source(self.src_path,"test") + s = self.sds.source(self.src_path, "test") c = s.d(c=False) res = c.compute() - self.assertEqual(10,res) + self.assertEqual(10, res) def test_05(self): - s = self.sds.source(self.src_path,"test") - c = s.d(b = 1, c=False) + s = self.sds.source(self.src_path, "test") + c = s.d(b=1, c=False) res = c.compute() - self.assertEqual(1,res) + self.assertEqual(1, res) if __name__ == "__main__":