Skip to content

Commit

Permalink
cluster status safeguards (#254)
Browse files Browse the repository at this point in the history
  • Loading branch information
Maxusmusti authored Aug 7, 2023
1 parent a285ef6 commit f34697a
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
23 changes: 18 additions & 5 deletions src/codeflare_sdk/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,15 @@ def status(
ready = False
status = CodeFlareClusterStatus.FAILED # should deleted be separate
return status, ready # exit early, no need to check ray status
elif appwrapper.status in [AppWrapperStatus.PENDING]:
elif appwrapper.status in [
AppWrapperStatus.PENDING,
AppWrapperStatus.QUEUEING,
]:
ready = False
status = CodeFlareClusterStatus.QUEUED
if appwrapper.status == AppWrapperStatus.PENDING:
status = CodeFlareClusterStatus.QUEUED
else:
status = CodeFlareClusterStatus.QUEUEING
if print_to_console:
pretty_print.print_app_wrappers_status([appwrapper])
return (
Expand Down Expand Up @@ -561,11 +567,18 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:


def _map_to_app_wrapper(aw) -> AppWrapper:
if "status" in aw and "canrun" in aw["status"]:
return AppWrapper(
name=aw["metadata"]["name"],
status=AppWrapperStatus(aw["status"]["state"].lower()),
can_run=aw["status"]["canrun"],
job_state=aw["status"]["queuejobstate"],
)
return AppWrapper(
name=aw["metadata"]["name"],
status=AppWrapperStatus(aw["status"]["state"].lower()),
can_run=aw["status"]["canrun"],
job_state=aw["status"]["queuejobstate"],
status=AppWrapperStatus("queueing"),
can_run=False,
job_state="Still adding to queue",
)


Expand Down
6 changes: 4 additions & 2 deletions src/codeflare_sdk/cluster/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class AppWrapperStatus(Enum):
Defines the possible reportable states of an AppWrapper.
"""

QUEUEING = "queueing"
PENDING = "pending"
RUNNING = "running"
FAILED = "failed"
Expand All @@ -55,8 +56,9 @@ class CodeFlareClusterStatus(Enum):
READY = 1
STARTING = 2
QUEUED = 3
FAILED = 4
UNKNOWN = 5
QUEUEING = 4
FAILED = 5
UNKNOWN = 6


@dataclass
Expand Down

0 comments on commit f34697a

Please sign in to comment.