Update for release

CarperAI · Jul 10, 2023 · 163adfb · 163adfb
1 parent 3c9c8be
commit 163adfb
Show file tree

Hide file tree

Showing 12 changed files with 370 additions and 142 deletions.
diff --git a/OpenELM_Paper.pdf b/OpenELM_Paper.pdf
diff --git a/README.md b/README.md
@@ -10,6 +10,9 @@ We want to support users with many different compute profiles!
 3. Provide a simple interface to a range of example environments for evolutionary search, to let users adapt these easily for their domain.
 4. Demonstrate the potential of evolution with LLMs.
 
+# Install
+`pip install openelm`
+
 # Features
 
 ### LLM integration with evolutionary algorithms
@@ -43,7 +46,7 @@ Roughly, ELM consists of a pipeline of different components:
 All options for these classes are defined in `configs.py`, via dataclasses which are registered as a `hydra` config, and can be overriden via the command line when running one of the example scripts such as `run_elm.py`.
 
 ## Running ELM
-`python run_elm.py` will start an ELM evolutionary search using the defaults listed in `configs.py`. These can be overriden via the command line.
+`python run_elm.py` will start an ELM evolutionary search using the defaults listed in `configs.py`. These can be overriden via the command line. For example, you can use `run_elm.py env=image_evolution` to run the Image Evolution environment.
 
 ## Sandbox
 To use the code execution sandbox, see the [sandboxing readme](https://github.com/CarperAI/OpenELM/blob/main/src/openelm/sandbox/README.md) for instructions to set it up in a Docker container with the gVisor runtime.

diff --git a/run_p3.py b/run_p3.py
@@ -3,19 +3,16 @@
 import pathlib
 import time
 from collections import Counter
-from typing import List
 
 import hydra
 import requests
 from hydra.core.hydra_config import HydraConfig
 from omegaconf import OmegaConf
 
 from openelm.codegen.codegen_utilities import set_seed
-from openelm.environments import P3Problem, P3ProbSol
-from openelm.mutation_model import DiffModel, MutationModel, PromptModel
 from openelm.configs import P3Config
-from openelm.environments import P3Problem, p3_long_init_args, p3_med_init_args
-from openelm.mutation_model import DiffModel, MutationModel, PromptModel
+from openelm.environments.p3.p3 import P3Problem, P3ProbSol
+from openelm.mutation_model import MutationModel, PromptModel
 from openelm.sandbox.server.sandbox_codex_execute import ExecResult
 from openelm.utils.code_eval import pass_at_k
 
@@ -30,6 +27,7 @@
 python run_p3.py probsol=True model.model_path=Salesforce/codegen-2B-mono env.batch_size=8 iterations_per_puzzle=16
 """
 
+
 class P3:
     def __init__(self, config: P3Config) -> None:
         """
@@ -38,78 +36,86 @@ def __init__(self, config: P3Config) -> None:
         self.config: P3Config = config
 
         # Model
-        if self.config.model.model_name == 'prompt':
+        if self.config.model.model_name == "prompt":
             self.mutation_model: MutationModel = PromptModel(self.config.model)
         # elif self.config.model.model_name == 'diff':
         #     self.mutation_model: MutationModel = DiffModel(self.config.model)
 
         self.log_dir = self.cfg.output_dir
 
-
     def run(self):
         """
         Query PromptModel to generate
             self.config.probsol=False: solutions to given programming puzzle problems
             self.config.probsol=True:  new problem+solution pairs
         """
-        puzzles = requests.get("https://raw.githubusercontent.com/microsoft/PythonProgrammingPuzzles/v0.2/puzzles/puzzles.json").json()
+        puzzles = requests.get(
+            "https://raw.githubusercontent.com/microsoft/PythonProgrammingPuzzles/v0.2/puzzles/puzzles.json"
+        ).json()
         run_start_time = time.time()
         for puzzle_id in self.config.starting_seeds:
             self.config.env.starting_seed = puzzle_id
 
             puzzle = puzzles[puzzle_id]
             puzzle_start_time = time.time()
-            puzzle_dict = {'name': puzzle['name']}
-            logging.info(puzzle['name'])
+            puzzle_dict = {"name": puzzle["name"]}
+            logging.info(puzzle["name"])
 
             if self.config.probsol:
-                env = P3ProbSol(config=self.config.env, mutation_model=self.mutation_model)
+                env = P3ProbSol(
+                    config=self.config.env, mutation_model=self.mutation_model
+                )
             else:
-                env = P3Problem(config=self.config.env, mutation_model=self.mutation_model)
+                env = P3Problem(
+                    config=self.config.env, mutation_model=self.mutation_model
+                )
 
             # Run
             solutions = []
             assert self.config.iterations_per_puzzle >= self.config.env.batch_size
-            for i in range(self.config.iterations_per_puzzle // self.config.env.batch_size):
-                set_seed(i) # Change seed for each query
+            for i in range(
+                self.config.iterations_per_puzzle // self.config.env.batch_size
+            ):
+                set_seed(i)  # Change seed for each query
 
                 solutions += env.random()
 
             # Evaluate fitness of solutions
             res_sols_list = []
             solved = False
             for sol in solutions:
-                res_sol_dict = {'program_str': sol.program_str}
+                res_sol_dict = {"program_str": sol.program_str}
                 if self.config.save_result_obj is not None:
                     if isinstance(sol.result_obj, ExecResult):
-                        res_sol_dict['result_obj'] = sol.result_obj.name
+                        res_sol_dict["result_obj"] = sol.result_obj.name
                     else:
-                        res_sol_dict['result_obj'] = sol.result_obj
+                        res_sol_dict["result_obj"] = sol.result_obj
 
                 fitness = env.fitness(sol)
 
                 res_sol_dict["fitness"] = fitness
                 res_sols_list.append(res_sol_dict)
                 if fitness == 1.0:
-                    solved = True # just want to save if the current problem is solved by any attempt
+                    solved = True  # just want to save if the current problem is solved by any attempt
 
-            puzzle_dict['config'] = OmegaConf.to_container(self.config)
-            puzzle_dict['solutions'] = res_sols_list
-            puzzle_dict['solved'] = solved
-            puzzle_dict['time_elapsed'] = time.time() - puzzle_start_time
+            puzzle_dict["config"] = OmegaConf.to_container(self.config)
+            puzzle_dict["solutions"] = res_sols_list
+            puzzle_dict["solved"] = solved
+            puzzle_dict["time_elapsed"] = time.time() - puzzle_start_time
 
             # Save results
             if self.config.save_results:
                 dir = f'{self.log_dir}/{puzzle_dict["name"]}/{run_start_time}'
                 pathlib.Path(dir).mkdir(parents=True, exist_ok=True)
 
-                with open(f'{dir}/results.json', 'w') as file:
+                with open(f"{dir}/results.json", "w") as file:
                     file.write(json.dumps(puzzle_dict))
 
-        logging.info(f'Successfully ran on {len(self.config.starting_seeds)}' +
-                        f'/{len(self.config.starting_seeds)}' +
-                        f' puzzles and saved any results to {self.log_dir}')
-
+        logging.info(
+            f"Successfully ran on {len(self.config.starting_seeds)}"
+            + f"/{len(self.config.starting_seeds)}"
+            + f" puzzles and saved any results to {self.log_dir}"
+        )
 
     def eval_pass_at_k(self, timestamp: str, k: int):
         """
@@ -122,7 +128,7 @@ def eval_pass_at_k(self, timestamp: str, k: int):
         """
 
         path = pathlib.Path(self.log_dir)
-        puzzle_paths = sorted(list(path.iterdir())) # Get all logged puzzles
+        puzzle_paths = sorted(list(path.iterdir()))  # Get all logged puzzles
         paks = []
         for p in puzzle_paths:
             n = 0
@@ -131,7 +137,7 @@ def eval_pass_at_k(self, timestamp: str, k: int):
             if len(timestamp) == 0:
                 # Get latest run
                 path = pathlib.Path(p)
-                run_paths = sorted(list(path.iterdir())) # Get all the runs per puzzle
+                run_paths = sorted(list(path.iterdir()))  # Get all the runs per puzzle
                 run_path = run_paths[-1]
             else:
                 # Get 'timestamp' run

diff --git a/src/openelm/algorithms/genetic.py b/src/openelm/algorithms/genetic.py
@@ -0,0 +1,159 @@
+import os
+import pickle
+import random
+from pathlib import Path
+from typing import Optional, Tuple
+
+import numpy as np
+
+from openelm.configs import QDConfig
+from openelm.environments import BaseEnvironment, Genotype
+
+Phenotype = Optional[np.ndarray]
+MapIndex = Optional[tuple]
+Individual = Tuple[np.ndarray, float]
+
+
+class Pool:
+    """The pool stores a set of solutions or individuals."""
+
+    def __init__(self, pool_size: int):
+        """Initializes an empty pool.
+
+        Args:
+            pool_size (int): The number of solutions to store in the pool.
+            history_length (int): The number of historical solutions
+                to maintain in the pool.
+        """
+        self.pool_size = pool_size
+        self.pool = []
+
+    def add(self, solution, fitness):
+        """Adds a solution to the pool.
+
+        If the pool is full, the oldest solution is removed. The solution
+        is also added to the history.
+
+        Args:
+            solution: The solution to add to the pool.
+        """
+        # if new fitness is better than the worst, add it to the pool
+        if fitness > self.pool[-1][1]:
+            if len(self.pool) >= self.pool_size:
+                self.pool.pop(0)
+            self.pool.append((solution, fitness))
+            # sort the pool by fitness
+            self.pool.sort(key=lambda x: x[1], reverse=True)
+
+
+class MAPElitesBase:
+    """
+    Base class for a genetic algorithm
+    """
+
+    def __init__(
+        self,
+        env,
+        config: QDConfig,
+        init_pool: Optional[Pool] = None,
+    ):
+        """
+        The base class for a genetic algorithm, implementing common functions and search.
+
+        Args:
+            env (BaseEnvironment): The environment to evaluate solutions in. This
+            should be a subclass of `BaseEnvironment`, and should implement
+            methods to generate random solutions, mutate existing solutions,
+            and evaluate solutions for their fitness in the environment.
+            config (QDConfig): The configuration for the algorithm.
+            init_pool (Pool, optional): A pool to use for the algorithm. If not passed,
+            a new pool will be created. Defaults to None.
+        """
+        self.env: BaseEnvironment = env
+        self.config: QDConfig = config
+        self.save_history = self.config.save_history
+        self.save_snapshot_interval = self.config.save_snapshot_interval
+        self.start_step = 0
+        self.save_np_rng_state = self.config.save_np_rng_state
+        self.load_np_rng_state = self.config.load_np_rng_state
+        self.rng = np.random.default_rng(self.config.seed)
+        self.rng_generators = None
+
+        self._init_pool(init_pool, self.config.log_snapshot_dir)
+
+    def to_mapindex(self, b: Phenotype) -> MapIndex:
+        """Converts a phenotype (position in behaviour space) to a map index."""
+        raise NotImplementedError
+
+    def _init_pool(
+        self, init_map: Optional[Pool] = None, log_snapshot_dir: Optional[str] = None
+    ):
+        if init_map is None and log_snapshot_dir is None:
+            self.pool = Pool(self.config.pool_size)
+        elif init_map is not None and log_snapshot_dir is None:
+            self.pool = init_map
+        elif init_map is None and log_snapshot_dir is not None:
+            self.pool = Pool(self.config.pool_size)
+            log_path = Path(log_snapshot_dir)
+            if log_snapshot_dir and os.path.isdir(log_path):
+                stem_dir = log_path.stem
+
+                assert (
+                    "step_" in stem_dir
+                ), f"loading directory ({stem_dir}) doesn't contain 'step_' in name"
+                self.start_step = (
+                    int(stem_dir.replace("step_", "")) + 1
+                )  # add 1 to correct the iteration steps to run
+
+                snapshot_path = log_path / "pool.pkl"
+                assert os.path.isfile(
+                    snapshot_path
+                ), f'{log_path} does not contain map snapshot "pool.pkl"'
+                # first, load arrays and set them in Maps
+                # Load maps from pickle file
+                with open(snapshot_path, "rb") as f:
+                    self.pool = pickle.load(f)
+
+        print("Loading finished")
+
+    def random_selection(self) -> MapIndex:
+        """Randomly select a niche (cell) in the map that has been explored."""
+        return random.choice(self.pool.pool)
+
+    def search(self, init_steps: int, total_steps: int, atol: float = 0.0) -> str:
+        """
+        Run the genetic algorithm.
+
+        Args:
+            initsteps (int): Number of initial random solutions to generate.
+            totalsteps (int): Total number of steps to run the algorithm for,
+                including initial steps.
+            atol (float, optional): Tolerance for how close the best performing
+                solution has to be to the maximum possible fitness before the
+                search stops early. Defaults to 1.
+
+        Returns:
+            str: A string representation of the best perfoming solution. The
+                best performing solution object can be accessed via the
+                `current_max_genome` class attribute.
+        """
+        total_steps = int(total_steps)
+        for n_steps in range(total_steps):
+            if n_steps < init_steps:
+                # Initialise by generating initsteps random solutions
+                new_individuals: list[Genotype] = self.env.random()
+            else:
+                # Randomly select a batch of individuals
+                batch: list[Genotype] = []
+                for _ in range(self.env.batch_size):
+                    item = self.random_selection()
+                    batch.append(item)
+                # Mutate
+                new_individuals = self.env.mutate(batch)
+
+            for individual in new_individuals:
+                # Evaluate fitness
+                fitness = self.env.fitness(individual)
+                if np.isinf(fitness):
+                    continue
+                self.pool.add(individual, fitness)