explictly handle timeouts, fixes #35, fixes #31 (#53)

Signed-off-by: Max Pumperla <[email protected]>
ray-project · May 25, 2023 · 0bfe3fd · 0bfe3fd
1 parent b38db6f
commit 0bfe3fd
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 15 deletions.
diff --git a/aviary/common/backend.py b/aviary/common/backend.py
@@ -1,3 +1,4 @@
+import logging
 from abc import ABC, abstractmethod
 import os
 import requests
@@ -78,23 +79,37 @@ def models(self):
 
     def completions(self, prompt: str, llm: str):
         url = self.backend_url + "query/" + llm.replace("/", "--")
-        response = requests.post(
-            url,
-            headers=self.header,
-            json={"prompt": prompt},
-            timeout=TIMEOUT,
-        )
-        return response.json()[llm]
+        try:
+            response = requests.post(
+                url,
+                headers=self.header,
+                json={"prompt": prompt},
+                timeout=TIMEOUT,
+            )
+            return response.json()[llm]
+        except requests.exceptions.Timeout:
+            logging.warning(f"Request for {llm} timed out")
+            return {"generated_text": "[ERROR] Request timed out",
+                    "total_time": 1,
+                    "num_total_tokens": 1
+                    }
 
     def batch_completions(self, prompts: List[str], llm: str):
         url = self.backend_url + "query/batch/" + llm.replace("/", "--")
-        response = requests.post(
-            url,
-            headers=self.header,
-            json={"prompts": prompts},
-            timeout=TIMEOUT,
-        )
-        return response.json()
+        try:
+            response = requests.post(
+                url,
+                headers=self.header,
+                json={"prompts": prompts},
+                timeout=TIMEOUT,
+            )
+            return response.json()
+        except requests.exceptions.Timeout:
+            logging.warning(f"Request for {llm} timed out")
+            return {"generated_text": "[ERROR] Request timed out",
+                    "total_time": 1,
+                    "num_total_tokens": 1
+                    }
 
 
 class MockBackend(Backend):

diff --git a/aviary/common/constants.py b/aviary/common/constants.py
@@ -2,7 +2,9 @@
 
 NUM_LLM_OPTIONS = 3
 
-TIMEOUT = 120
+# (connect, read) timeouts in seconds. we make the "read" timeout deliberately
+# shorter than in cloudfront OR gradio, so that we can explicitly handle timeouts.
+TIMEOUT = (5, 40)
 
 AVIARY_DESC = """