diff --git a/aviary/backend/llm/initializers/hf_transformers/base.py b/aviary/backend/llm/initializers/hf_transformers/base.py index d850aac7..b64e01f3 100644 --- a/aviary/backend/llm/initializers/hf_transformers/base.py +++ b/aviary/backend/llm/initializers/hf_transformers/base.py @@ -71,7 +71,7 @@ def _get_model_location_on_disk(self, model_id: str) -> str: ) model_id_or_path = model_id - if os.path.exists(path): + if os.path.exists(path) and os.path.exists(os.path.join(path, "refs", "main")): with open(os.path.join(path, "refs", "main"), "r") as f: snapshot_hash = f.read().strip() if os.path.exists( diff --git a/aviary/backend/llm/predictor/continuous_batching_predictor.py b/aviary/backend/llm/predictor/continuous_batching_predictor.py index 268df196..7636fe48 100644 --- a/aviary/backend/llm/predictor/continuous_batching_predictor.py +++ b/aviary/backend/llm/predictor/continuous_batching_predictor.py @@ -414,4 +414,5 @@ async def _stream_async( def check_health(self) -> None: super().check_health() - self.scheduler.check_health() + if self.scheduler: + self.scheduler.check_health() diff --git a/aviary/backend/llm/predictor/predictor.py b/aviary/backend/llm/predictor/predictor.py index 710d7817..42fbced9 100644 --- a/aviary/backend/llm/predictor/predictor.py +++ b/aviary/backend/llm/predictor/predictor.py @@ -95,6 +95,7 @@ def __init__(self, llm_config: LLMConfig, world_size: int): self.local_rank = None self.current_device = None self.gpu_memory_fraction = 1.0 + self.pg = None def init_model( self,