Skip to content

Commit

Permalink
Improve docs generation (#255)
Browse files Browse the repository at this point in the history
  • Loading branch information
younik authored Nov 2, 2024
1 parent 9a48df0 commit 3f1618e
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 117 deletions.
240 changes: 128 additions & 112 deletions docs/_scripts/gen_dataset_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,85 @@
import warnings
from collections import defaultdict
from multiprocessing import Pool
from typing import Dict, OrderedDict
from typing import OrderedDict

import generate_env_table
import generate_gif
from gymnasium.envs.registration import EnvSpec
from md_utils import dict_to_table

import minari
from minari.dataset.minari_dataset import gen_dataset_id, parse_dataset_id
from minari.namespace import download_namespace_metadata, get_namespace_metadata
from minari.utils import get_dataset_spec_dict, get_env_spec_dict
from minari.utils import get_dataset_spec_dict


DATASET_FOLDER = pathlib.Path(__file__).parent.parent.joinpath("datasets")
NAMESPACE_CONTENTS = defaultdict(OrderedDict)

NO_ENV_MSG = """
```{eval-rst}
.. warning::
This dataset doesn't contain an `env_spec`, neither an `eval_env_spec` attribute. Any call to :func:`minari.MinariDataset.recover_environment` will throw an error.
```
"""
NO_TRAIN_ENV_MSG = """
```{eval-rst}
def _md_table(table_dict: Dict[str, str]) -> str:
markdown = "| | |\n |----|----|"
for key, value in table_dict.items():
markdown += f"\n| {key} | {value} |"
return markdown
.. warning::
This dataset doesn't contain an `env_spec` attribute. Calling :func:`minari.MinariDataset.recover_environment` with `eval_env=False` will throw an error.
```
"""
NO_EVAL_ENV_MSG = """
```{{eval-rst}}
.. note::
This dataset doesn't contain an `eval_env_spec` attribute which means that the specs of the environment used for evaluation are the same as the specs of the environment used for creating the dataset. The following calls will return the same environment:
.. code-block::
import minari
dataset = minari.load_dataset('{}')
env = dataset.recover_environment()
eval_env = dataset.recover_environment(eval_env=True)
assert env.spec == eval_env.spec
```
"""
PRE_TRAIN_ENV_MSG = """
```{{eval-rst}}
.. note::
The following table rows correspond to the Gymnasium environment specifications used to generate the dataset.
To read more about what each parameter means you can have a look at the Gymnasium documentation https://gymnasium.farama.org/api/registry/#gymnasium.envs.registration.EnvSpec
This environment can be recovered from the Minari dataset as follows:
.. code-block::
import minari
dataset = minari.load_dataset('{}')
env = dataset.recover_environment()
```
"""
PRE_EVAL_ENV_MSG = """
```{{eval-rst}}
.. note::
This environment can be recovered from the Minari dataset as follows:
.. code-block::
import minari
dataset = minari.load_dataset('{}')
eval_env = dataset.recover_environment(eval_env=True)
```
"""


def main():
Expand Down Expand Up @@ -78,24 +137,23 @@ def _generate_dataset_page(arg):
_, dataset_name, version = parse_dataset_id(dataset_id)
versioned_name = gen_dataset_id(None, dataset_name, version)

description = metadata.get("description")
try:
venv.create(dataset_id, with_pip=True)

requirements = [
"minari[gcs,hdf5] @ git+https://github.com/Farama-Foundation/Minari.git",
"imageio",
"absl-py",
]
requirements.extend(metadata.get("requirements", []))
pip_path = pathlib.Path(dataset_id) / "bin" / "pip"
req_args = [pip_path, "install", *requirements]
subprocess.check_call(req_args, stdout=subprocess.DEVNULL)
logging.info(f"Installed requirements for {dataset_id}")
venv_name = f"venv_{dataset_id.replace('/', '_')}"
venv.create(venv_name, with_pip=True)
python_path = pathlib.Path(venv_name) / "bin" / "python"
pip_path = pathlib.Path(venv_name) / "bin" / "pip"

requirements = [
"minari[gcs,hdf5] @ git+https://github.com/Farama-Foundation/Minari.git",
"imageio",
"absl-py",
]
requirements.extend(metadata.get("requirements", []))
req_args = [pip_path, "install", *requirements]
subprocess.check_call(req_args, stdout=subprocess.DEVNULL)
logging.info(f"Installed requirements for {dataset_id}")

try:
minari.download_dataset(dataset_id)

python_path = pathlib.Path(dataset_id) / "bin" / "python"
subprocess.check_call(
[
python_path,
Expand All @@ -105,7 +163,6 @@ def _generate_dataset_page(arg):
]
)
minari.delete_dataset(dataset_id)
shutil.rmtree(dataset_id)
img_link_str = f'<img src="../{versioned_name}.gif" width="200" style="display: block; margin:0 auto"/>'
except Exception as e:
warnings.warn(f"Failed to generate gif for {dataset_id}: {e}")
Expand All @@ -116,109 +173,67 @@ def _generate_dataset_page(arg):
eval_env_spec = metadata.get("eval_env_spec")

if env_spec is None and eval_env_spec is None:
env_docs += """
```{eval-rst}
.. warning::
This dataset doesn't contain an `env_spec`, neither an `eval_env_spec` attribute. Any call to :func:`minari.MinariDataset.recover_environment` will throw an error.
env_docs += NO_ENV_MSG

```
"""
else:
env_docs += """
## Environment Specs
"""
env_docs += "\n## Environment Specs\n"
if env_spec is None:
env_docs += """
```{eval-rst}
.. warning::
This dataset doesn't contain an `env_spec` attribute. Calling :func:`minari.MinariDataset.recover_environment` with `eval_env=False` will throw an error.
```
"""
env_docs += NO_TRAIN_ENV_MSG
else:
env_docs += f"""
```{{eval-rst}}
.. note::
The following table rows correspond to (in addition to the action and observation space) the Gymnasium environment specifications used to generate the dataset.
To read more about what each parameter means you can have a look at the Gymnasium documentation https://gymnasium.farama.org/api/registry/#gymnasium.envs.registration.EnvSpec
This environment can be recovered from the Minari dataset as follows:
.. code-block::
import minari
dataset = minari.load_dataset('{dataset_id}')
env = dataset.recover_environment()
```
{_md_table(get_env_spec_dict(EnvSpec.from_json(env_spec)))}
"""

env_docs += """
## Evaluation Environment Specs
"""
env_docs += PRE_TRAIN_ENV_MSG.format(dataset_id)
env_docs += "\n"

train_spec_file = f"train_spec_{dataset_id.replace('/', '_')}.md"
subprocess.check_call(
[
python_path,
generate_env_table.__file__,
f"--env_spec={env_spec}",
f"--file_name={train_spec_file}",
]
)

env_docs += pathlib.Path(train_spec_file).read_text()
env_docs += "\n"

env_docs += """\n## Evaluation Environment Specs\n"""
if eval_env_spec is None:
env_docs += f"""
```{{eval-rst}}
.. note::
This dataset doesn't contain an `eval_env_spec` attribute which means that the specs of the environment used for evaluation are the same as the specs of the environment used for creating the dataset. The following calls will return the same environment:
.. code-block::
import minari
dataset = minari.load_dataset('{dataset_id}')
env = dataset.recover_environment()
eval_env = dataset.recover_environment(eval_env=True)
assert env.spec == eval_env.spec
```
"""
env_docs += NO_EVAL_ENV_MSG.format(dataset_id)
else:
env_docs += f"""
```{{eval-rst}}
.. note::
This environment can be recovered from the Minari dataset as follows:
.. code-block::
import minari
dataset = minari.load_dataset('{dataset_id}')
eval_env = dataset.recover_environment(eval_env=True)
```
{_md_table(get_env_spec_dict(EnvSpec.from_json(eval_env_spec)))}
"""

content = f"""---
autogenerated:
title: {dataset_name.title()}
---
# {dataset_name.title()}
"""
env_docs += PRE_EVAL_ENV_MSG.format(dataset_id)
env_docs += "\n"

eval_spec_file = f"eval_spec_{dataset_id.replace('/', '_')}.md"
subprocess.check_call(
[
python_path,
generate_env_table.__file__,
f"--env_spec={env_spec}",
f"--file_name={eval_spec_file}",
]
)

env_docs += pathlib.Path(eval_spec_file).read_text()
env_docs += "\n"

content = "---\nautogenerated:\n"
content += f"title: {dataset_name.title()}"
content += "---\n\n"
content += f"# {dataset_name.title()}"
content += "\n\n"

if img_link_str is not None:
content += img_link_str
content += "\n\n"
if description is not None:
if "description" in metadata:
content += "## Description"
content += "\n\n"
content += description
content += metadata["description"]
content += "\n\n"

content += "## Dataset Specs"
content += "\n\n"
content += _md_table(get_dataset_spec_dict(metadata))
content += dict_to_table(get_dataset_spec_dict(metadata))
content += "\n\n"
content += env_docs

Expand All @@ -228,6 +243,7 @@ def _generate_dataset_page(arg):
file.close()

logging.info(f"Generated dataset page for {dataset_id}")
shutil.rmtree(venv_name)


def _generate_namespace_page(namespace: str, namespace_content):
Expand Down
22 changes: 22 additions & 0 deletions docs/_scripts/generate_env_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from absl import app, flags
from gymnasium.envs.registration import EnvSpec
from md_utils import dict_to_table

from minari.utils import get_env_spec_dict


FLAGS = flags.FLAGS
flags.DEFINE_string("env_spec", None, "Environment spec json file")
flags.DEFINE_string("file_name", None, "File name to save the md file")


def main(argv):
del argv
env_spec_dict = get_env_spec_dict(EnvSpec.from_json(FLAGS.env_spec))
md_table = dict_to_table(env_spec_dict)
with open(FLAGS.file_name, "w") as f:
f.write(md_table)


if __name__ == "__main__":
app.run(main)
8 changes: 8 additions & 0 deletions docs/_scripts/md_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import Dict


def dict_to_table(table_dict: Dict[str, str]) -> str:
markdown = "| | |\n |----|----|"
for key, value in table_dict.items():
markdown += f"\n| {key} | {value} |"
return markdown
12 changes: 7 additions & 5 deletions docs/content/dataset_standards.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,13 @@ The Minari storage format supports the following observation and action spaces:

| Space | Description |
| ------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
| [Discrete](https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/spaces/discrete.py) |Describes a discrete space where `{0, 1, ..., n-1}` are the possible values our observation can take. An optional argument can be used to shift the values to `{a, a+1, ..., a+n-1}`.|
| [Box](https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/spaces/box.py) |An n-dimensional continuous space. The `upper` and `lower` arguments can be used to define bounded spaces.|
| [Tuple](https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/spaces/tuple.py) |Represents a tuple of spaces. |
| [Dict](https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/spaces/dict.py) |Represents a dictionary of spaces. |
| [Text](https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/spaces/text.py) |The elements of this space are bounded strings from a charset. Note: at the moment, we don't guarantee support for all surrogate pairs. | |
| [Box](https://gymnasium.farama.org/api/spaces/fundamental/#gymnasium.spaces.Box) |An n-dimensional continuous space. The `upper` and `lower` arguments can be used to define bounded spaces.|
| [Discrete](https://gymnasium.farama.org/api/spaces/fundamental/#gymnasium.spaces.Discrete) |Describes a discrete space where `{0, 1, ..., n-1}` are the possible values our observation can take. An optional argument can be used to shift the values to `{a, a+1, ..., a+n-1}`.|
| [MultiDiscrete](https://gymnasium.farama.org/api/spaces/fundamental/#gymnasium.spaces.MultiDiscrete) |Represents the cartesian product of arbitrary Discrete spaces.|
| [MultiBinary](https://gymnasium.farama.org/api/spaces/fundamental/#gymnasium.spaces.MultiBinary) |A binary space. The elements are binary array.|
| [Tuple](https://gymnasium.farama.org/api/spaces/composite/#gymnasium.spaces.Tuple) |Represents a tuple of spaces. |
| [Dict](https://gymnasium.farama.org/api/spaces/composite/#gymnasium.spaces.Dict) |Represents a dictionary of spaces. |
| [Text](https://gymnasium.farama.org/api/spaces/fundamental/#gymnasium.spaces.Text) |The elements of this space are bounded strings from a charset. Note: at the moment, we don't guarantee support for all surrogate pairs. | |

Spaces are serialized to a JSON format when saving to disk. This serialization supports all space types supported by Minari, and aims to be both human, and machine readable.

Expand Down

0 comments on commit 3f1618e

Please sign in to comment.