Skip to content

Commit

Permalink
Add Schelling diagram payoffs computation for game master
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 631039346
Change-Id: I3fcd85b01c74fe02de89d73bd2d465dd136b24c5
  • Loading branch information
jzleibo authored and copybara-github committed May 6, 2024
1 parent 93710ed commit 00d72ae
Show file tree
Hide file tree
Showing 2 changed files with 246 additions and 2 deletions.
4 changes: 2 additions & 2 deletions concordia/components/game_master/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.


"""Library of components specifically for generative game master."""
"""Library of components specifically for generative game masters."""

from concordia.components.game_master import conversation
from concordia.components.game_master import current_scene
Expand All @@ -22,4 +21,5 @@
from concordia.components.game_master import player_status
from concordia.components.game_master import relevant_events
from concordia.components.game_master import schedule
from concordia.components.game_master import schelling_diagram_payoffs
from concordia.components.game_master import time_display
244 changes: 244 additions & 0 deletions concordia/components/game_master/schelling_diagram_payoffs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
# Copyright 2022 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A component for computing and delivering payoffs using a Schelling diagram.
"""

from collections.abc import Callable, Mapping, Sequence
import datetime

from concordia.agents import basic_agent
from concordia.associative_memory import associative_memory
from concordia.components.game_master import current_scene
from concordia.language_model import language_model
from concordia.typing import component
import numpy as np

import termcolor


class SchellingPayoffs(component.Component):
"""Define payoffs for minigames using a Schelling diagram.
Schelling diagrams are a game representation described in:
Schelling, T.C., 1973. Hockey helmets, concealed weapons, and daylight saving:
A study of binary choices with externalities. Journal of Conflict resolution,
17(3), pp.381-428.
"""

def __init__(
self,
model: language_model.LanguageModel,
memory: associative_memory.AssociativeMemory,
cooperative_option: str,
resolution_scene: str,
cooperator_reward_fn: Callable[[int], float],
defector_reward_fn: Callable[[int], float],
players: Sequence[basic_agent.BasicAgent],
acting_player_names: Sequence[str],
outcome_summarization_fn: Callable[[Mapping[str, int],
Mapping[str, float]],
Mapping[str, str]],
clock_now: Callable[[], datetime.datetime],
name: str = 'scoring function',
verbose: bool = False,
):
"""Initialize a scoring function component.
Args:
model: a language model
memory: an associative memory
cooperative_option: which option choice constitutes cooperation
resolution_scene: on which scene type should this component be updated
after the event, i.e. when to check the joint action and compute results
cooperator_reward_fn: reward obtained by cooperators as a function of
the number of other cooperators
defector_reward_fn: reward obtained by defectors as a function of the
number of other defectors
players: sequence of agents (a superset of the active players)
acting_player_names: sequence of names of players who act each stage
outcome_summarization_fn: function of binarized joint actions and
rewards which returns an outcome description message for each player
clock_now: Function to call to get current time.
name: name of this component e.g. Possessions, Account, Property, etc
verbose: whether to print the full update chain of thought or not
"""
self._model = model
self._memory = memory
self._cooperative_option = cooperative_option
self._cooperator_reward_fn = cooperator_reward_fn
self._defector_reward_fn = defector_reward_fn
self._players = players
self._acting_player_names = acting_player_names
self._outcome_summarization_fn = outcome_summarization_fn
self._clock_now = clock_now
self._name = name
self._verbose = verbose

self._history = []
self._state = ''
self._last_update = self._clock_now() - datetime.timedelta(days=365)
self._partial_states = {player.name: '' for player in self._players}
self._player_scores = {player.name: 0 for player in self._players}

self._resolution_scene = resolution_scene
self._current_scene = current_scene.CurrentScene(
name='current scene type',
memory=self._memory,
clock_now=self._clock_now,
verbose=self._verbose,
)

self.reset()
# Set the initial state's string representation.
self.update()

def reset(self) -> None:
self._stage_idx = 0
# Per stage, map each player's name to their component of the joint action.
self._stage_to_joint_action = [
{name: None for name in self._acting_player_names}]

def name(self) -> str:
"""Returns the name of this component."""
return self._name

def get_last_log(self):
if self._history:
return self._history[-1].copy()

def get_history(self):
return self._history.copy()

def state(self) -> str:
return self._state

def partial_state(
self,
player_name: str,
) -> str:
"""Return a player-specific view of the component's state."""
return self._partial_states[player_name]

def update(self) -> None:
self._current_scene.update()

def _joint_action_is_complete(self, joint_action: Mapping[str, str]) -> bool:
for acting_player_name in self._acting_player_names:
if joint_action[acting_player_name] is None:
return False
return True

def _binarize_joint_action(
self,
joint_action: Mapping[str, str]) -> Mapping[str, int]:
binary_joint_action = {name: act == self._cooperative_option
for name, act in joint_action.items()}
return binary_joint_action

def _get_rewards_from_joint_action(
self, binary_joint_action: Mapping[str, int]) -> Mapping[str, float]:
# For now, this only supports "Schelling style" (binary choice with
# externalities) type of game representations. This means the critical
# factor is the number of players picking the cooperate option.
num_cooperators = np.sum(list(binary_joint_action.values()))

rewards = {}
for player_name, is_cooperator in zip(self._acting_player_names,
binary_joint_action):
if is_cooperator:
rewards[player_name] = self._cooperator_reward_fn(num_cooperators)
else:
rewards[player_name] = self._defector_reward_fn(num_cooperators)

return rewards

def _set_outcome_messages(
self,
binary_joint_action: Mapping[str, int],
rewards: Mapping[str, float],
) -> None:
# Only the game master sees the actual reward values.
game_master_private_state = '\n'.join(
[f'{player.name}: {self._player_scores[player.name]}'
for player in self._players])
# Players see a text-based summarization of the events, which may or may not
# include the actual reward values.
partial_states = self._outcome_summarization_fn(binary_joint_action,
rewards)
common_view_of_player_obs = '\n'.join(
[f'{name} observed: {observation}' for name, observation
in partial_states.items()])

# State is only observed by the game master since players get
# their observations from `partial_states`.
self._state = f'{common_view_of_player_obs}\n{game_master_private_state}'

# The game master gets a memory of the state.
self._memory.add(self._state)
# Active players observe their own partial state description and inactive
# players get the common description.
for player in self._players:
if player.name in self._acting_player_names:
player.observe(partial_states[player.name])
else:
player.observe(common_view_of_player_obs)

def update_before_event(self, player_action_attempt: str) -> None:
# `player_action_attempt` is formatted as "name: attempt".
player_name, choice_str = player_action_attempt.split(': ')
self._stage_to_joint_action[self._stage_idx][player_name] = choice_str

def update_after_event(
self,
event_statement: str,
) -> None:
if self._clock_now() == self._last_update:
return
self._last_update = self._clock_now()

current_scene_type = self._current_scene.state()
joint_action = []
if current_scene_type == self._resolution_scene:
# Check if all players have acted so far in the current stage game.
joint_action = self._stage_to_joint_action[self._stage_idx]
if self._joint_action_is_complete(joint_action):
# Map the joint action to rewards per player.
binary_joint_action = self._binarize_joint_action(joint_action)
rewards = self._get_rewards_from_joint_action(binary_joint_action)

# Accumulate the rewards per player.
for name in self._acting_player_names:
self._player_scores[name] += rewards[name]

# Determine summary messages for each player and the GM.
self._set_outcome_messages(binary_joint_action, rewards)

# Advance to the next stage.
self._stage_idx += 1
self._stage_to_joint_action.append(
{name: None for name in self._acting_player_names})

if self._verbose:
print(termcolor.colored(self.state(), 'yellow'))

update_log = {
'date': self._clock_now(),
'Summary': self.name(),
'Schelling diagram payoffs': self.state(),
'Joint action': str(joint_action),
}
self._memory.extend([self._state,])
self._history.append(update_log)

0 comments on commit 00d72ae

Please sign in to comment.