Add Schelling diagram payoffs computation for game master

PiperOrigin-RevId: 631039346 Change-Id: I3fcd85b01c74fe02de89d73bd2d465dd136b24c5
google-deepmind · May 6, 2024 · 00d72ae · 00d72ae
1 parent 93710ed
commit 00d72ae
Show file tree

Hide file tree

Showing 2 changed files with 246 additions and 2 deletions.
diff --git a/concordia/components/game_master/__init__.py b/concordia/components/game_master/__init__.py
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-"""Library of components specifically for generative game master."""
+"""Library of components specifically for generative game masters."""
 
 from concordia.components.game_master import conversation
 from concordia.components.game_master import current_scene
@@ -22,4 +21,5 @@
 from concordia.components.game_master import player_status
 from concordia.components.game_master import relevant_events
 from concordia.components.game_master import schedule
+from concordia.components.game_master import schelling_diagram_payoffs
 from concordia.components.game_master import time_display
diff --git a/concordia/components/game_master/schelling_diagram_payoffs.py b/concordia/components/game_master/schelling_diagram_payoffs.py
@@ -0,0 +1,244 @@
+# Copyright 2022 DeepMind Technologies Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A component for computing and delivering payoffs using a Schelling diagram.
+"""
+
+from collections.abc import Callable, Mapping, Sequence
+import datetime
+
+from concordia.agents import basic_agent
+from concordia.associative_memory import associative_memory
+from concordia.components.game_master import current_scene
+from concordia.language_model import language_model
+from concordia.typing import component
+import numpy as np
+
+import termcolor
+
+
+class SchellingPayoffs(component.Component):
+  """Define payoffs for minigames using a Schelling diagram.
+
+  Schelling diagrams are a game representation described in:
+
+  Schelling, T.C., 1973. Hockey helmets, concealed weapons, and daylight saving:
+  A study of binary choices with externalities. Journal of Conflict resolution,
+  17(3), pp.381-428.
+  """
+
+  def __init__(
+      self,
+      model: language_model.LanguageModel,
+      memory: associative_memory.AssociativeMemory,
+      cooperative_option: str,
+      resolution_scene: str,
+      cooperator_reward_fn: Callable[[int], float],
+      defector_reward_fn: Callable[[int], float],
+      players: Sequence[basic_agent.BasicAgent],
+      acting_player_names: Sequence[str],
+      outcome_summarization_fn: Callable[[Mapping[str, int],
+                                          Mapping[str, float]],
+                                         Mapping[str, str]],
+      clock_now: Callable[[], datetime.datetime],
+      name: str = 'scoring function',
+      verbose: bool = False,
+  ):
+    """Initialize a scoring function component.
+
+    Args:
+      model: a language model
+      memory: an associative memory
+      cooperative_option: which option choice constitutes cooperation
+      resolution_scene: on which scene type should this component be updated
+        after the event, i.e. when to check the joint action and compute results
+      cooperator_reward_fn: reward obtained by cooperators as a function of
+        the number of other cooperators
+      defector_reward_fn: reward obtained by defectors as a function of the
+        number of other defectors
+      players: sequence of agents (a superset of the active players)
+      acting_player_names: sequence of names of players who act each stage
+      outcome_summarization_fn: function of binarized joint actions and
+        rewards which returns an outcome description message for each player
+      clock_now: Function to call to get current time.
+      name: name of this component e.g. Possessions, Account, Property, etc
+      verbose: whether to print the full update chain of thought or not
+    """
+    self._model = model
+    self._memory = memory
+    self._cooperative_option = cooperative_option
+    self._cooperator_reward_fn = cooperator_reward_fn
+    self._defector_reward_fn = defector_reward_fn
+    self._players = players
+    self._acting_player_names = acting_player_names
+    self._outcome_summarization_fn = outcome_summarization_fn
+    self._clock_now = clock_now
+    self._name = name
+    self._verbose = verbose
+
+    self._history = []
+    self._state = ''
+    self._last_update = self._clock_now() - datetime.timedelta(days=365)
+    self._partial_states = {player.name: '' for player in self._players}
+    self._player_scores = {player.name: 0 for player in self._players}
+
+    self._resolution_scene = resolution_scene
+    self._current_scene = current_scene.CurrentScene(
+        name='current scene type',
+        memory=self._memory,
+        clock_now=self._clock_now,
+        verbose=self._verbose,
+    )
+
+    self.reset()
+    # Set the initial state's string representation.
+    self.update()
+
+  def reset(self) -> None:
+    self._stage_idx = 0
+    # Per stage, map each player's name to their component of the joint action.
+    self._stage_to_joint_action = [
+        {name: None for name in self._acting_player_names}]
+
+  def name(self) -> str:
+    """Returns the name of this component."""
+    return self._name
+
+  def get_last_log(self):
+    if self._history:
+      return self._history[-1].copy()
+
+  def get_history(self):
+    return self._history.copy()
+
+  def state(self) -> str:
+    return self._state
+
+  def partial_state(
+      self,
+      player_name: str,
+  ) -> str:
+    """Return a player-specific view of the component's state."""
+    return self._partial_states[player_name]
+
+  def update(self) -> None:
+    self._current_scene.update()
+
+  def _joint_action_is_complete(self, joint_action: Mapping[str, str]) -> bool:
+    for acting_player_name in self._acting_player_names:
+      if joint_action[acting_player_name] is None:
+        return False
+    return True
+
+  def _binarize_joint_action(
+      self,
+      joint_action: Mapping[str, str]) -> Mapping[str, int]:
+    binary_joint_action = {name: act == self._cooperative_option
+                           for name, act in joint_action.items()}
+    return binary_joint_action
+
+  def _get_rewards_from_joint_action(
+      self, binary_joint_action: Mapping[str, int]) -> Mapping[str, float]:
+    # For now, this only supports "Schelling style" (binary choice with
+    # externalities) type of game representations. This means the critical
+    # factor is the number of players picking the cooperate option.
+    num_cooperators = np.sum(list(binary_joint_action.values()))
+
+    rewards = {}
+    for player_name, is_cooperator in zip(self._acting_player_names,
+                                          binary_joint_action):
+      if is_cooperator:
+        rewards[player_name] = self._cooperator_reward_fn(num_cooperators)
+      else:
+        rewards[player_name] = self._defector_reward_fn(num_cooperators)
+
+    return rewards
+
+  def _set_outcome_messages(
+      self,
+      binary_joint_action: Mapping[str, int],
+      rewards: Mapping[str, float],
+  ) -> None:
+    # Only the game master sees the actual reward values.
+    game_master_private_state = '\n'.join(
+        [f'{player.name}: {self._player_scores[player.name]}'
+         for player in self._players])
+    # Players see a text-based summarization of the events, which may or may not
+    # include the actual reward values.
+    partial_states = self._outcome_summarization_fn(binary_joint_action,
+                                                    rewards)
+    common_view_of_player_obs = '\n'.join(
+        [f'{name} observed: {observation}' for name, observation
+         in partial_states.items()])
+
+    # State is only observed by the game master since players get
+    # their observations from `partial_states`.
+    self._state = f'{common_view_of_player_obs}\n{game_master_private_state}'
+
+    # The game master gets a memory of the state.
+    self._memory.add(self._state)
+    # Active players observe their own partial state description and inactive
+    # players get the common description.
+    for player in self._players:
+      if player.name in self._acting_player_names:
+        player.observe(partial_states[player.name])
+      else:
+        player.observe(common_view_of_player_obs)
+
+  def update_before_event(self, player_action_attempt: str) -> None:
+    # `player_action_attempt` is formatted as "name: attempt".
+    player_name, choice_str = player_action_attempt.split(': ')
+    self._stage_to_joint_action[self._stage_idx][player_name] = choice_str
+
+  def update_after_event(
+      self,
+      event_statement: str,
+  ) -> None:
+    if self._clock_now() == self._last_update:
+      return
+    self._last_update = self._clock_now()
+
+    current_scene_type = self._current_scene.state()
+    joint_action = []
+    if current_scene_type == self._resolution_scene:
+      # Check if all players have acted so far in the current stage game.
+      joint_action = self._stage_to_joint_action[self._stage_idx]
+      if self._joint_action_is_complete(joint_action):
+        # Map the joint action to rewards per player.
+        binary_joint_action = self._binarize_joint_action(joint_action)
+        rewards = self._get_rewards_from_joint_action(binary_joint_action)
+
+        # Accumulate the rewards per player.
+        for name in self._acting_player_names:
+          self._player_scores[name] += rewards[name]
+
+        # Determine summary messages for each player and the GM.
+        self._set_outcome_messages(binary_joint_action, rewards)
+
+        # Advance to the next stage.
+        self._stage_idx += 1
+        self._stage_to_joint_action.append(
+            {name: None for name in self._acting_player_names})
+
+      if self._verbose:
+        print(termcolor.colored(self.state(), 'yellow'))
+
+    update_log = {
+        'date': self._clock_now(),
+        'Summary': self.name(),
+        'Schelling diagram payoffs': self.state(),
+        'Joint action': str(joint_action),
+    }
+    self._memory.extend([self._state,])
+    self._history.append(update_log)