PKU-Alignment · Gaiejj · Apr 17, 2024 · Apr 30, 2024 · May 3, 2024 · May 6, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
       - id: debug-statements
       - id: double-quote-string-fixer
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.4.2
+    rev: v0.5.0
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
@@ -54,7 +54,7 @@ repos:
       - id: pyupgrade
         args: [--py38-plus] # sync with requires-python
   - repo: https://github.com/pycqa/flake8
-    rev: 7.0.0
+    rev: 7.1.0
     hooks:
       - id: flake8
         additional_dependencies:
@@ -114,6 +114,7 @@ repos:
             ^tests/|
             ^setup.py$|
             ^omnisafe/envs/classic_control/envs_from_crabs.py$|
+            ^omnisafe/envs/classic_control/envs_from_rcbf.py$|
             ^omnisafe/common/control_barrier_function/crabs/models.py$|
             ^omnisafe/common/control_barrier_function/crabs/optimizers.py$|
             ^omnisafe/common/control_barrier_function/crabs/utils.py$|

diff --git a/conftest.py b/conftest.py
@@ -10,6 +10,4 @@
 
 
 def pytest_ignore_collect(path, config):
-    if os.path.basename(path) == 'meta_drive_env.py' and not meta_drive_env_available:
-        return True
-    return False
+    return os.path.basename(path) == 'meta_drive_env.py' and not meta_drive_env_available
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -394,6 +394,7 @@ this project, don't hesitate to ask your question on `the GitHub issue page <htt
     saferl/pcpo
     saferl/focops
     saferl/lag
+    saferl/cbf
 
 .. toctree::
     :hidden:

diff --git a/docs/source/saferl/cbf.rst b/docs/source/saferl/cbf.rst
@@ -0,0 +1,136 @@
+Control Barrier Functions Methods
+=================================
+
+Quick Facts
+-----------
+
+.. card::
+    :class-card: sd-outline-info  sd-rounded-1
+    :class-body: sd-font-weight-bold
+
+    #. Control Barrier Functions (CBF) method maps :bdg-danger-line:`unsafe` actions to :bdg-info-line:`safe` actions.
+    #. CBF method is highly :bdg-info-line:`coupled` with the environment.
+    #. Currently, OmniSafe provides implementations for four algorithms: *DDPGCBF*, *TRPOCBF*, *SACRCBF*, and *CRABS*
+    #. Here we introduce the :bdg-info-line:`interface` to extend the CBF method to :bdg-info-line:`custom environments`.
+
+Introduction
+------------
+
+This section will introduce the implementation framework of OmniSafe using the DDPGCBF from the classic CBF method: `End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous Control Tasks <https://arxiv.org/pdf/1903.08792>`_ as an example.
+
+The CBF method implementation in OmniSafe revolves around the ``Adapter``, which decouples and integrates the two core components: ``dynamics model`` and ``solver``. The former predicts the dynamic changes of the environment, while the latter maps the current action to a safe space based on the given environment dynamics.
+
+CBF Adapter
+-----------
+
+.. currentmodule:: omnisafe.adapter
+
+.. card::
+    :class-header: sd-bg-success sd-text-white
+    :class-card: sd-outline-success  sd-rounded-1
+
+    Documentation
+    ^^^
+
+    .. autoclass:: OffPolicyBarrierFunctionAdapter
+        :members:
+
+Core Components
+---------------
+
+Dynamics Model
+""""""""""""""
+
+The environmental dynamic model of the CBF method needs to be designed for a specific environment. For example, in the case of the ``Pendulum-v1`` environment, the environmental dynamics will be calculated together with variables such as mass and gravitational acceleration.
+
+.. code-block:: python
+    :linenos:
+
+    def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
+        dt = 0.05
+        # gravitational constant
+        G = 10
+        # mass
+        m = 2
+        # length
+        length = 2
+        # calculate the angle
+        theta = np.arctan2(obs[1], obs[0])
+        # angular velocity
+        theta_dot = obs[2]
+        # dynamics equations
+        f = np.array(
+            [
+                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
+                + theta_dot * dt
+                + theta
+                + 3 / (m * length**2) * original_action * dt**2,
+                theta_dot
+                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
+                + 3 / (m * length**2) * original_action * dt,
+            ],
+        )
+        return np.squeeze(f)
+
+The current mainstream implementation often uses a combination of several Gaussian Process (GP) models to fit the environmental dynamics. The specific code documentation is as follows:
+
+.. currentmodule:: omnisafe.common
+
+.. card::
+    :class-header: sd-bg-success sd-text-white
+    :class-card: sd-outline-success  sd-rounded-1
+
+    Documentation
+    ^^^
+
+    .. autoclass:: DynamicsModel
+        :members:
+        :private-members:
+
+The ``solver`` is responsible for taking the feedback information from the ``dynamics model`` and mapping the often unsafe actions generated by the agent into a safe one.
+
+CBF Solver
+""""""""""
+
+.. currentmodule:: omnisafe.common
+
+.. card::
+    :class-header: sd-bg-success sd-text-white
+    :class-card: sd-outline-success  sd-rounded-1
+
+    Documentation
+    ^^^
+
+    .. autoclass:: PendulumSolver
+        :members:
+        :private-members:
+
+Architecture of methods
+"""""""""""""""""""""""
+
+-  ``DDPGCBF.learn()``
+
+   - ``DDPGCBF._env.rollout()``
+
+     - ``DDPGCBF._env.get_safe_action()``
+
+       - ``DDPGCBF._env.dynamics_model.get_gp_dynamics()``
+       - ``DDPGCBF._env.solver.control_barrier()``
+
+     - ``DDPGCBF._env.dynamics_model.update_gp_dynamics()``
+
+   - ``DDPGCBF._update()``
+
+
+Further Discussion
+""""""""""""""""""
+
+For details on the implementation, performance, reproducible scripts, and related discussions of algorithms including DDPGCBF, please refer to: https://github.com/PKU-Alignment/omnisafe/pull/323
+
+
+References
+----------
+
+-  `End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous Control Tasks <https://arxiv.org/pdf/1903.08792>`__
+-  `Safe Reinforcement Learning Using Robust Control Barrier Functions <https://arxiv.org/pdf/2110.05415>`__
+-  `Learning Barrier Certificates: Towards Safe Reinforcement Learning with Zero Training-time Violations <https://arxiv.org/pdf/2108.01846>`__
diff --git a/docs/source/spelling_wordlist.txt b/docs/source/spelling_wordlist.txt
@@ -486,3 +486,31 @@ UpdateDynamics
 mathbb
 meger
 Jupyter
+compensator
+CBF
+Vectorize
+gp
+optim
+cvx
+QP
+gpytorch
+ExactGP
+RBF
+parallelization
+compensators
+thetadot
+VK
+Sharma
+Kosaraju
+Seetharaman
+Sadler
+Suttle
+Cheng
+Orosz
+JW
+Burdick
+Vipul
+Sivaranjani
+Vijay
+suttle
+regressor
diff --git a/examples/plot.py b/examples/plot.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -35,6 +35,27 @@
     parser.add_argument('--select', nargs='*')
     parser.add_argument('--exclude', nargs='*')
     parser.add_argument('--estimator', default='mean')
+    parser.add_argument(
+        '--reward-metrics',
+        type=str,
+        choices=[
+            'Metrics/TestEpRet',
+            'Metrics/EpRet',
+        ],
+        default='Metrics/EpRet',
+        help='Specify the reward metric to be used.',
+    )
+    parser.add_argument(
+        '--cost-metrics',
+        type=str,
+        choices=[
+            'Metrics/Max_angle_violation',
+            'Metrics/TestEpCost',
+            'Metrics/EpCost',
+        ],
+        default='Metrics/EpCost',
+        help='Specify the cost metric to be used.',
+    )
     args = parser.parse_args()
 
     plotter = Plotter()
@@ -48,4 +69,6 @@
         select=args.select,
         exclude=args.exclude,
         estimator=args.estimator,
+        cost_metrics=args.cost_metrics,
+        reward_metrics=args.reward_metrics,
     )
diff --git a/omnisafe/adapter/__init__.py b/omnisafe/adapter/__init__.py
@@ -14,11 +14,15 @@
 # ==============================================================================
 """Adapter for the environment and the algorithm."""
 
+from omnisafe.adapter.barrier_function_adapter import BarrierFunctionAdapter
+from omnisafe.adapter.beta_barrier_function_adapter import BetaBarrierFunctionAdapter
 from omnisafe.adapter.early_terminated_adapter import EarlyTerminatedAdapter
 from omnisafe.adapter.modelbased_adapter import ModelBasedAdapter
 from omnisafe.adapter.offline_adapter import OfflineAdapter
 from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter
+from omnisafe.adapter.offpolicy_barrier_function_adapter import OffPolicyBarrierFunctionAdapter
 from omnisafe.adapter.online_adapter import OnlineAdapter
 from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
+from omnisafe.adapter.robust_barrier_function_adapter import RobustBarrierFunctionAdapter
 from omnisafe.adapter.saute_adapter import SauteAdapter
 from omnisafe.adapter.simmer_adapter import SimmerAdapter