Skip to content

Commit

Permalink
[FORMATTING] apply formatting to the Python API files
Browse files Browse the repository at this point in the history
  • Loading branch information
e-strauss committed Sep 26, 2024
1 parent 7bc6ee4 commit 504e751
Show file tree
Hide file tree
Showing 113 changed files with 2,072 additions and 1,474 deletions.
320 changes: 202 additions & 118 deletions src/main/python/systemds/context/systemds_context.py

Large diffs are not rendered by default.

48 changes: 27 additions & 21 deletions src/main/python/systemds/examples/tutorials/adult.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,56 +47,62 @@ class DataManager:
_data_string_labels: list

def __init__(self):
self._data_zip_url = "https://systemds.apache.org/assets/datasets/adult/data.zip"
self._data_zip_url = (
"https://systemds.apache.org/assets/datasets/adult/data.zip"
)
self._data_zip_loc = "systemds/examples/tutorials/adult/data.zip"

self._train_data_loc = "systemds/examples/tutorials/adult/train_data.csv"
self._test_data_loc = "systemds/examples/tutorials/adult/test_data.csv"
self._jspec_loc = "systemds/examples/tutorials/adult/jspec.json"

def get_preprocessed_dataset(self, sds: SystemDSContext) -> List[pd.DataFrame]:
return self.get_train_data(sds), \
self.get_train_labels(sds), \
self.get_test_data(sds), \
self.get_test_labels(sds)

def get_preprocessed_dataset_pandas(self, sds: SystemDSContext) -> List[pd.DataFrame]:
return self.get_train_data_pandas(sds), \
self.get_train_labels_pandas(sds), \
self.get_test_data_pandas(sds), \
self.get_test_labels_pandas(sds)
return (
self.get_train_data(sds),
self.get_train_labels(sds),
self.get_test_data(sds),
self.get_test_labels(sds),
)

def get_preprocessed_dataset_pandas(
self, sds: SystemDSContext
) -> List[pd.DataFrame]:
return (
self.get_train_data_pandas(sds),
self.get_train_labels_pandas(sds),
self.get_test_data_pandas(sds),
self.get_test_labels_pandas(sds),
)

def get_train_data_pandas(self) -> pd.DataFrame:
self._get_data(self._train_data_loc)
return self._parse_data(self._train_data_loc)\
.drop(labels=["income"], axis=1)
return self._parse_data(self._train_data_loc).drop(labels=["income"], axis=1)

def get_train_data(self, sds: SystemDSContext) -> 'Frame':
def get_train_data(self, sds: SystemDSContext) -> "Frame":
self._get_data(self._train_data_loc)
return sds.read(self._train_data_loc)[:, 0:14]

def get_train_labels_pandas(self) -> pd.DataFrame:
self._get_data(self._train_data_loc)
return self._parse_data(self._train_data_loc)[["income"]]

def get_train_labels(self, sds: SystemDSContext) -> 'Frame':
def get_train_labels(self, sds: SystemDSContext) -> "Frame":
self._get_data(self._train_data_loc)
return sds.read(self._train_data_loc)[:, 14]

def get_test_data_pandas(self) -> pd.DataFrame:
self._get_data(self._test_data_loc)
return self._parse_data(self._test_data_loc)\
.drop(labels=["income"], axis=1)
return self._parse_data(self._test_data_loc).drop(labels=["income"], axis=1)

def get_test_data(self, sds: SystemDSContext) -> 'Frame':
def get_test_data(self, sds: SystemDSContext) -> "Frame":
self._get_data(self._test_data_loc)
return sds.read(self._test_data_loc)[:, 0:14]

def get_test_labels_pandas(self) -> pd.DataFrame:
self._get_data(self._test_data_loc)
return self._parse_data(self._test_data_loc)[["income"]]

def get_test_labels(self, sds: SystemDSContext) -> 'Frame':
def get_test_labels(self, sds: SystemDSContext) -> "Frame":
self._get_data(self._test_data_loc)
return sds.read(self._test_data_loc)[:, 14]

Expand All @@ -105,7 +111,7 @@ def get_jspec_string(self) -> str:
with open(self._jspec_loc, "r") as f:
return f.read()

def get_jspec(self, sds: SystemDSContext) -> 'Scalar':
def get_jspec(self, sds: SystemDSContext) -> "Scalar":
self._get_data(self._jspec_loc)
return sds.read(self._jspec_loc, data_type="scalar", value_type="string")

Expand All @@ -119,7 +125,7 @@ def _get_data(self, loc):
os.makedirs(folder)
if not os.path.isfile(self._data_zip_loc):
myZip = requests.get(self._data_zip_url)
with open(self._data_zip_loc, 'wb') as f:
with open(self._data_zip_loc, "wb") as f:
f.write(myZip.content)
with zipfile.ZipFile(self._data_zip_loc) as z:
z.extractall(folder)
27 changes: 15 additions & 12 deletions src/main/python/systemds/examples/tutorials/mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,26 @@ def get_test_labels(self) -> np.array:
return self._parse_data(self._test_labels_loc)

def _parse_data(self, loc):
f = gzip.open if os.path.splitext(loc)[1] == '.gz' else open
with f(loc, 'rb') as fd:
f = gzip.open if os.path.splitext(loc)[1] == ".gz" else open
with f(loc, "rb") as fd:
return self._parse(fd)

def _parse(self, fd):
DATA_TYPES = {0x08: 'B', # unsigned byte
0x09: 'b', # signed byte
0x0b: 'h', # short (2 bytes)
0x0c: 'i', # int (4 bytes)
0x0d: 'f', # float (4 bytes)
0x0e: 'd'} # double (8 bytes)
DATA_TYPES = {
0x08: "B", # unsigned byte
0x09: "b", # signed byte
0x0B: "h", # short (2 bytes)
0x0C: "i", # int (4 bytes)
0x0D: "f", # float (4 bytes)
0x0E: "d",
} # double (8 bytes)

header = fd.read(4)
zeros, data_type, num_dimensions = struct.unpack('>HBB', header)
zeros, data_type, num_dimensions = struct.unpack(">HBB", header)
data_type = DATA_TYPES[data_type]
dimension_sizes = struct.unpack('>' + 'I' * num_dimensions,
fd.read(4 * num_dimensions))
dimension_sizes = struct.unpack(
">" + "I" * num_dimensions, fd.read(4 * num_dimensions)
)

data = array.array(data_type, fd.read())
data.byteswap() # looks like array.array reads data as little endian
Expand All @@ -103,5 +106,5 @@ def _get_data(self, url, loc):
folder = os.path.dirname(loc)
if not os.path.isdir(folder):
os.makedirs(folder)
with open(loc, 'wb') as f:
with open(loc, "wb") as f:
f.write(myfile.content)
14 changes: 12 additions & 2 deletions src/main/python/systemds/operator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,15 @@
from systemds.operator.nodes.source import Source
from systemds.operator import algorithm

__all__ = ["OperationNode", "algorithm", "Scalar", "List",
"ListAccess", "Matrix", "Frame", "Source", "MultiReturn", "Combine"]
__all__ = [
"OperationNode",
"algorithm",
"Scalar",
"List",
"ListAccess",
"Matrix",
"Frame",
"Source",
"MultiReturn",
"Combine",
]
28 changes: 16 additions & 12 deletions src/main/python/systemds/operator/nn/affine.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,20 @@ def __init__(self, sds_context: SystemDSContext, d, m, seed=-1):
"""
sds_context: The systemdsContext to construct the layer inside of
d: The number of features that are input to the affine layer
m: The number of neurons that are contained in the layer,
m: The number of neurons that are contained in the layer,
and the number of features output
"""
super().__init__(sds_context, 'affine.dml')
super().__init__(sds_context, "affine.dml")
self._X = None

# init weight and bias
self.weight = Matrix(sds_context, '')
self.bias = Matrix(sds_context, '')
params_dict = {'D': d, 'M': m, 'seed': seed}
self.weight = Matrix(sds_context, "")
self.bias = Matrix(sds_context, "")
params_dict = {"D": d, "M": m, "seed": seed}
out = [self.weight, self.bias]
op = MultiReturn(sds_context, "affine::init", output_nodes=out, named_input_nodes=params_dict)
op = MultiReturn(
sds_context, "affine::init", output_nodes=out, named_input_nodes=params_dict
)
self.weight._unnamed_input_nodes = [op]
self.bias._unnamed_input_nodes = [op]
op._source_node = self._source
Expand All @@ -59,7 +61,7 @@ def forward(X: Matrix, W: Matrix, b: Matrix):
return Affine._source.forward(X, W, b)

@staticmethod
def backward(dout:Matrix, X: Matrix, W: Matrix, b: Matrix):
def backward(dout: Matrix, X: Matrix, W: Matrix, b: Matrix):
"""
dout: The gradient of the output, passed from the upstream
X: The input matrix of this layer
Expand All @@ -69,12 +71,14 @@ def backward(dout:Matrix, X: Matrix, W: Matrix, b: Matrix):
"""
sds = X.sds_context
Affine._create_source(sds, "affine.dml")
params_dict = {'dout': dout, 'X': X, 'W': W, 'b': b}
dX = Matrix(sds, '')
dW = Matrix(sds, '')
db = Matrix(sds, '')
params_dict = {"dout": dout, "X": X, "W": W, "b": b}
dX = Matrix(sds, "")
dW = Matrix(sds, "")
db = Matrix(sds, "")
out = [dX, dW, db]
op = MultiReturn(sds, "affine::backward", output_nodes=out, named_input_nodes=params_dict)
op = MultiReturn(
sds, "affine::backward", output_nodes=out, named_input_nodes=params_dict
)
dX._unnamed_input_nodes = [op]
dW._unnamed_input_nodes = [op]
db._unnamed_input_nodes = [op]
Expand Down
18 changes: 12 additions & 6 deletions src/main/python/systemds/operator/nodes/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,25 @@

class Combine(OperationNode):

def __init__(self, sds_context, func='',
unnamed_input_nodes: Iterable[OperationNode] = None):
def __init__(
self, sds_context, func="", unnamed_input_nodes: Iterable[OperationNode] = None
):
for a in unnamed_input_nodes:
if not a._datatype_is_none:
raise ValueError(
"Cannot combine elements that have outputs, all elements must be instances of print or write")
"Cannot combine elements that have outputs, all elements must be instances of print or write"
)

self._outputs = {}
super().__init__(sds_context, func, unnamed_input_nodes, None, False)

def code_line(self, var_name: str, unnamed_input_vars: Sequence[str],
named_input_vars: Dict[str, str]) -> str:
return ''
def code_line(
self,
var_name: str,
unnamed_input_vars: Sequence[str],
named_input_vars: Dict[str, str],
) -> str:
return ""

def compute(self, verbose: bool = False, lineage: bool = False):
return super().compute(verbose, lineage)
Expand Down
Loading

0 comments on commit 504e751

Please sign in to comment.