Skip to content

Commit

Permalink
Remove __init__ for Model classes
Browse files Browse the repository at this point in the history
Replace path preprocessing and column existence/type checks with
SQLAlchemy event listeners and validator helpers.
  • Loading branch information
dbutenhof committed Mar 5, 2021
1 parent 4e167eb commit 18cdc73
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 123 deletions.
229 changes: 113 additions & 116 deletions lib/pbench/server/db/models/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
Enum,
ForeignKey,
UniqueConstraint,
event,
)
from sqlalchemy.orm import relationship
from sqlalchemy.orm import relationship, validates
from sqlalchemy.exc import SQLAlchemyError

from pbench.server.db.database import Database
Expand Down Expand Up @@ -302,7 +303,7 @@ class Dataset(Database.Base):
ANTI_USER = "public" # The "user" that owns unowned datasets

id = Column(Integer, primary_key=True, autoincrement=True)
user = Column(String(255), unique=False, nullable=False)
user = Column(String(255), unique=False, nullable=False, default=ANTI_USER)
controller = Column(String(255), unique=False, nullable=False)
name = Column(String(255), unique=False, nullable=False)

Expand All @@ -315,9 +316,9 @@ class Dataset(Database.Base):
# This could be improved when we drop `pbench-server-prep-shim-002`
# as server `PUT` does not have the same problem.
md5 = Column(String(255), unique=False, nullable=True)
created = Column(DateTime, nullable=False)
state = Column(Enum(States), unique=False, nullable=False)
transition = Column(DateTime, nullable=False)
created = Column(DateTime, nullable=False, default=datetime.datetime.now())
state = Column(Enum(States), unique=False, nullable=False, default=States.UPLOADING)
transition = Column(DateTime, nullable=False, default=datetime.datetime.now())

# NOTE: this relationship defines a `dataset` property in `Metadata`
# that refers to the parent `Dataset` object.
Expand All @@ -334,53 +335,23 @@ class Dataset(Database.Base):
# entirely by database and messages, we can improve this.
__table_args__ = (UniqueConstraint("controller", "name"), {})

def __init__(self, **kwargs):
@validates("state")
def validate_state(self, key, value):
"""Validate that the value provided for the Dataset state is a member
of the States ENUM before it's applied by the SQLAlchemy constructor.
"""
Construct a new Dataset object; generally this only makes sense when
taking in a new dataset via PUT, and the state of the dataset defaults
to UPLOADING.
The dataset may be specified by controller and name, or by tarball
file path. If path is specified and either controller or name are
omitted, they'll be filled in from path. (If all three are specified,
path is ignored.)
NOTE: until we can get rid of the old `gold` unit tests, we need to
be able to create a Dataset in an arbitrary state, so that's allowed;
however, deployed server code should never do this!
Parameters (all kwargs):
user User associated with the dataset
controller Controller node
name Name of dataset
path Dataset path name (see _render_path method)
md5 The dataset MD5 hash
created Date the dataset was PUT to server [now]
state The current state of the dataset [UPLOADING]
"""
Dataset._render_path(kwargs)
if "state" in kwargs:
if type(kwargs.get("state")) is not States:
raise DatasetBadParameterType(kwargs.get("state"), States)
super().__init__(**kwargs)
self.user = kwargs.get("user", Dataset.ANTI_USER)
self.controller = kwargs.get("controller")
self.name = kwargs.get("name")
time = datetime.datetime.now()
self.created = time
self.md5 = kwargs.get("md5")
self.state = kwargs.get("state", States.UPLOADING)
self.transition = time
if type(value) is not States:
raise DatasetBadParameterType(value, States)
return value

@staticmethod
def _render_path(kwargs):
def _render_path(patharg=None, controllerarg=None, namearg=None):
"""
_render_path Process a `path` kwarg and convert it into `controller`
and/or `name` kwargs, modifying the kwarg dict directly.
_render_path Process a `path` string and convert it into `controller`
and/or `name` strings.
This pre-processes the kwargs dict before it can be presented to a
query or constructor. If the calling context has only the full file
This pre-processes the controller and name before they are presented to
a query or constructor. If the calling context has only the full file
path of a dataset, this can extract both "controller" and "name" from
the path. It can also be used to construct either "controller" or
"name", as it will fill in either or both if not already present in
Expand All @@ -393,34 +364,34 @@ def _render_path(kwargs):
possibly fragile assumptions regarding the structure of the symlink
name.
IMPLEMENTATION NOTE: After pre-processing, whether used or not, the
"path" key will be removed from the dict, as the SQLAlchemy model
superclass would complain about the undefined column.
Args:
kwargs (dict):
"path": A tarball file path from which the controller (host)
name, the tarball dataset name (basename minus extension)
or both will be derived.
"controller": The controller name (hostname) of the dataset;
this is retained if specified, or will be constructed
from "path" if not present.
"name": The dataset name (file path basename minus ".tar.xz");
this is retained if specified, or will be constructed from
"path" if not present
patharg: A tarball file path from which the controller (host)
name, the tarball dataset name (basename minus extension),
or both will be derived.
controllerarg: The controller name (hostname) of the dataset;
this is retained if specified, or will be constructed
from "path" if not present.
namearg: The dataset name (file path basename minus ".tar.xz");
this is retained if specified, or will be constructed from
"path" if not present
Returns:
A tuple of (controller, name) based on the three arguments
"""
if "path" in kwargs:
path = Path(kwargs.get("path"))
controller_result = controllerarg
name_result = namearg

if patharg:
path = Path(patharg)
if path.is_symlink():
path = Path(os.path.realpath(path))
if "name" not in kwargs:
name = path.name
if name.endswith(".tar.xz"):
name = name[:-7]
kwargs["name"] = name
if "controller" not in kwargs:
kwargs["controller"] = path.parent.name
del kwargs["path"] # Don't let the parental unit see this
if not name_result:
name_result = path.name
if name_result.endswith(".tar.xz"):
name_result = name_result[:-7]
if not controller_result:
controller_result = path.parent.name
return controller_result, name_result

@staticmethod
def create(**kwargs):
Expand All @@ -432,7 +403,7 @@ def create(**kwargs):
kwargs (dict):
"user": The owner of the dataset; defaults to "public".
"path": A tarball file path from which the controller (host)
name, the tarball dataset name (basename minus extension)
name, the tarball dataset name (basename minus extension),
or both will be derived.
"controller": The controller name (hostname) of the dataset;
this is retained if specified, or will be constructed
Expand All @@ -458,27 +429,26 @@ def create(**kwargs):
return dataset

@staticmethod
def attach(**kwargs):
def attach(path=None, controller=None, name=None, state=None):
"""
attach Attempt to fetch dataset for the controller and dataset name,
or using a specified file path (see __init__ and _render_path).
or using a specified file path (see _render_path and the path_init
event listener for details).
If the `state` kwarg is specified, the dataset will be created in that
state or, if it already exists, `attach` will attempt to advance the
dataset to that state.
If state is specified, attach will attempt to advance the dataset to
that state.
Args:
kwargs (dict):
"path": A tarball file path from which the controller (host)
name, the tarball dataset name (basename minus extension)
or both will be derived.
"controller": The controller name (hostname) of the dataset;
this is retained if specified, or will be constructed
from "path" if not present.
"name": The dataset name (file path basename minus ".tar.xz");
this is retained if specified, or will be constructed from
"path" if not present.
"state": The desired state to advance the dataset.
"path": A tarball file path from which the controller (host)
name, the tarball dataset name (basename minus extension),
or both will be derived.
"controller": The controller name (hostname) of the dataset;
this is retained if specified, or will be constructed
from "path" if not present.
"name": The dataset name (file path basename minus ".tar.xz");
this is retained if specified, or will be constructed from
"path" if not present.
"state": The desired state to advance the dataset.
Raises:
DatasetSqlError: problem interacting with Database
Expand All @@ -493,9 +463,7 @@ def attach(**kwargs):
Dataset: a dataset object in the desired state (if specified)
"""
# Make sure we have controller and name from path
Dataset._render_path(kwargs)
controller = kwargs.get("controller")
name = kwargs.get("name")
controller, name = Dataset._render_path(path, controller, name)
try:
dataset = (
Database.db_session.query(Dataset)
Expand All @@ -507,12 +475,12 @@ def attach(**kwargs):
"Error attaching {}>{}: {}", controller, name, str(e)
)
raise DatasetSqlError("attaching", controller, name) from e
else:
if dataset is None:
Dataset.logger.warning("{}>{} not found", controller, name)
raise DatasetNotFound(controller, name)
elif "state" in kwargs:
dataset.advance(kwargs.get("state"))

if dataset is None:
Dataset.logger.warning("{}>{} not found", controller, name)
raise DatasetNotFound(controller, name)
elif state:
dataset.advance(state)
return dataset

def __str__(self):
Expand Down Expand Up @@ -583,6 +551,32 @@ def update(self):
raise DatasetSqlError("updating", self.controller, self.name) from e


@event.listens_for(Dataset, "init")
def path_init(target, args, kwargs):
"""Listen for an init event on a Dataset to process a path before the
SQLAlchemy constructor sees it.
We want the constructor to see both "controller" and "name" parameters in
the kwargs representing the initial SQL column values. This listener allows
us to provide those values by specifying a file path, which is processed
into controller and name using the internal _render_path() helper.
We will remove "path" from kwargs so that SQLAlchemy doesn't see it. (This
is an explicitly allowed side effect of the listener architecture.)
"""
if "path" in kwargs:
controller, name = Dataset._render_path(
patharg=kwargs.get("path"),
controllerarg=kwargs.get("controller"),
namearg=kwargs.get("name"),
)
if "controller" not in kwargs:
kwargs["controller"] = controller
if "name" not in kwargs:
kwargs["name"] = name
del kwargs["path"]


class Metadata(Database.Base):
""" Retain secondary information about datasets
Expand All @@ -609,26 +603,14 @@ class Metadata(Database.Base):
Integer, ForeignKey("dataset.id", ondelete="CASCADE"), nullable=False
)

def __init__(self, **kwargs):
@validates("key")
def validate_key(self, key, value):
"""Validate that the value provided for the Metadata key argument is an
allowed name.
"""
Construct a new Metadata object for a dataset
Parameters:
key Metadata key (from METADATA_KEYS)
value String value for key
"""
if "key" not in kwargs:
raise MetadataMissingParameter("key")
key = kwargs.get("key")
if key not in Metadata.METADATA_KEYS:
raise MetadataBadKey(key)
if "value" not in kwargs:
raise MetadataMissingKeyValue(key)
value = kwargs.get("value")
super().__init__(**kwargs)
self.key = key
self.value = value
if value not in Metadata.METADATA_KEYS:
raise MetadataBadKey(value)
return value

@staticmethod
def create(**kwargs):
Expand Down Expand Up @@ -735,3 +717,18 @@ def delete(self):
Metadata.logger.exception("Can't delete {} from DB", self)
Database.db_session.rollback()
raise MetadataSqlError("deleting", self.dataset, self.key) from e


@event.listens_for(Metadata, "init")
def check_required(target, args, kwargs):
"""Listen for an init event on Metadata to verify parameters before the
SQLAlchemy constructor gets control.
We want the constructor to see both "key" and "value" parameters; if either
isn't present, give a simpler and more useful error message rather than the
internal SQL constraint failures that would result.
"""
if "key" not in kwargs:
raise MetadataMissingParameter("key")
if "value" not in kwargs:
raise MetadataMissingKeyValue(kwargs.get("key"))
Loading

0 comments on commit 18cdc73

Please sign in to comment.