vadim0x60 · vadim0x60 · Mar 8, 2024 · Mar 8, 2024 · Mar 21, 2024 · Mar 25, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "seidr"
-version = "3.1.1"
+version = "3.2.3"
 description = "Synthesize Execute Instruct Debug Rank"
 authors = ["Vadim Liventsev <[email protected]>", "Anastasia Grishina <[email protected]>"]
 license = "MIT"
@@ -16,7 +16,7 @@ python = "^3.9"
 psb2 = ">=1.1.1"
 openai = "<1.0.0"
 more-itertools = ">=8.0.0,<9.0.0"
-programlib = ">=9.0.2,<10.0.0"
+programlib = ">=11.0.0"
 wandb = "<1.0.0"
 gitpython = ">=3.0.0,<4.0.0"
 tenacity = ">=8.0.0,<9.0.0"

diff --git a/seidr/eval.py b/seidr/eval.py
@@ -10,7 +10,7 @@ class Evaluation(ABC):
     Produces a binary pass/fail result, a float score, and a text report
     """
 
-    def __init__(self, SUT: Program, passing_score: float = 1.):
+    def __init__(self, SUT, passing_score: float = 1.):
         """
         SUT: System Under Test
         passing_score: float score required to pass the evaluation
@@ -97,3 +97,51 @@ def pen_report(self) -> str:
         else:
             self.output = "\n".join(self.output) if type(self.output) == list else self.output
             return self.output
+
+class Gymnasium(Evaluation):
+    def __init__(self, env, code, language, passing_score):
+        agent = Program(code, language=language).spawn()
+        super().__init__(agent, passing_score)
+
+        self.env = env
+        self.tot_reward = 0
+        self.tot_txt = ''
+        self.done = False
+
+    def __del__(self):
+        self.SUT.close()
+
+    def play(self):
+        if self.done:
+            return
+
+        try:
+            observation, info = self.env.reset()
+            self.tot_txt += info.get('memos', '')
+            terminated = False
+            truncated = False
+
+            while not (terminated or truncated):
+                if 'ascii' in self.env.metadata.get('render.modes', []):
+                    ascii_render = self.env.render(mode='ascii')
+                    self.tot_txt += ascii_render
+
+                action, _ = self.SUT.predict(observation, deterministic=True)
+
+                observation, reward, terminated, truncated, info = self.env.step(action)
+                self.tot_reward += reward
+                self.tot_txt += info.get('memos', '')
+        except RuntimeError as e:
+            self.tot_reward = -1000
+            self.tot_txt = str(e)
+
+        self.done = True
+
+    def score(self):
+        self.play()
+        return self.tot_reward
+
+    def pen_report(self):
+        self.play()
+        self.tot_txt += f'\nFinal reward: {self.tot_reward}'
+        return self.tot_txt
diff --git a/seidr/github.py b/seidr/github.py
@@ -52,7 +52,7 @@ def ensure_repo(remote: str, path: pathlib.Path | str, branch: str = None) -> Re
         if branch:
             repo.git.checkout(branch)
     except GitError as e:
-        logging.info(f'Git error in ensure repo {e}. \n{traceback.print_stack()}')
+        logging.info(f'Git error in ensure repo {e}.')
         shutil.rmtree(path, ignore_errors=True)
         repo = Repo.clone_from(remote, path)
 

diff --git a/seidr/llm.py b/seidr/llm.py
@@ -22,9 +22,9 @@ def extract_codes(
         language: Language | str
 ) -> str:
     """Extract code out of a message and (if Python) format it with black"""
+
     try:
         code_blocks = list(extract_from_buffer(StringIO(message_content)))
-        code_blocks = [code for code in code_blocks if not bool(code)]
     except RuntimeError as e:
         code_blocks = []
 
@@ -90,7 +90,7 @@ def query_llm(
     # Assistants are trained to respond with one message.
     # it is theoretically possible to get more than one message, but it is very unlikely.
     assert all(len(r) == 1 for r in result.generations), "The models are expected to respond with one message"
-    result = [r[0].message.content for r in result.generations if r[0].message.content]
+    result = [r[0].message.content for r in result.generations]
 
     if mode == "repair":
         logging.info(f"Generating repair candidates for bug summary: \n{kwargs['bug_summary']}\n")