-
Notifications
You must be signed in to change notification settings - Fork 161
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Schelling diagram payoffs computation for game master
PiperOrigin-RevId: 631039346 Change-Id: I3fcd85b01c74fe02de89d73bd2d465dd136b24c5
- Loading branch information
1 parent
93710ed
commit 00d72ae
Showing
2 changed files
with
246 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
244 changes: 244 additions & 0 deletions
244
concordia/components/game_master/schelling_diagram_payoffs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
# Copyright 2022 DeepMind Technologies Limited. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""A component for computing and delivering payoffs using a Schelling diagram. | ||
""" | ||
|
||
from collections.abc import Callable, Mapping, Sequence | ||
import datetime | ||
|
||
from concordia.agents import basic_agent | ||
from concordia.associative_memory import associative_memory | ||
from concordia.components.game_master import current_scene | ||
from concordia.language_model import language_model | ||
from concordia.typing import component | ||
import numpy as np | ||
|
||
import termcolor | ||
|
||
|
||
class SchellingPayoffs(component.Component): | ||
"""Define payoffs for minigames using a Schelling diagram. | ||
Schelling diagrams are a game representation described in: | ||
Schelling, T.C., 1973. Hockey helmets, concealed weapons, and daylight saving: | ||
A study of binary choices with externalities. Journal of Conflict resolution, | ||
17(3), pp.381-428. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
model: language_model.LanguageModel, | ||
memory: associative_memory.AssociativeMemory, | ||
cooperative_option: str, | ||
resolution_scene: str, | ||
cooperator_reward_fn: Callable[[int], float], | ||
defector_reward_fn: Callable[[int], float], | ||
players: Sequence[basic_agent.BasicAgent], | ||
acting_player_names: Sequence[str], | ||
outcome_summarization_fn: Callable[[Mapping[str, int], | ||
Mapping[str, float]], | ||
Mapping[str, str]], | ||
clock_now: Callable[[], datetime.datetime], | ||
name: str = 'scoring function', | ||
verbose: bool = False, | ||
): | ||
"""Initialize a scoring function component. | ||
Args: | ||
model: a language model | ||
memory: an associative memory | ||
cooperative_option: which option choice constitutes cooperation | ||
resolution_scene: on which scene type should this component be updated | ||
after the event, i.e. when to check the joint action and compute results | ||
cooperator_reward_fn: reward obtained by cooperators as a function of | ||
the number of other cooperators | ||
defector_reward_fn: reward obtained by defectors as a function of the | ||
number of other defectors | ||
players: sequence of agents (a superset of the active players) | ||
acting_player_names: sequence of names of players who act each stage | ||
outcome_summarization_fn: function of binarized joint actions and | ||
rewards which returns an outcome description message for each player | ||
clock_now: Function to call to get current time. | ||
name: name of this component e.g. Possessions, Account, Property, etc | ||
verbose: whether to print the full update chain of thought or not | ||
""" | ||
self._model = model | ||
self._memory = memory | ||
self._cooperative_option = cooperative_option | ||
self._cooperator_reward_fn = cooperator_reward_fn | ||
self._defector_reward_fn = defector_reward_fn | ||
self._players = players | ||
self._acting_player_names = acting_player_names | ||
self._outcome_summarization_fn = outcome_summarization_fn | ||
self._clock_now = clock_now | ||
self._name = name | ||
self._verbose = verbose | ||
|
||
self._history = [] | ||
self._state = '' | ||
self._last_update = self._clock_now() - datetime.timedelta(days=365) | ||
self._partial_states = {player.name: '' for player in self._players} | ||
self._player_scores = {player.name: 0 for player in self._players} | ||
|
||
self._resolution_scene = resolution_scene | ||
self._current_scene = current_scene.CurrentScene( | ||
name='current scene type', | ||
memory=self._memory, | ||
clock_now=self._clock_now, | ||
verbose=self._verbose, | ||
) | ||
|
||
self.reset() | ||
# Set the initial state's string representation. | ||
self.update() | ||
|
||
def reset(self) -> None: | ||
self._stage_idx = 0 | ||
# Per stage, map each player's name to their component of the joint action. | ||
self._stage_to_joint_action = [ | ||
{name: None for name in self._acting_player_names}] | ||
|
||
def name(self) -> str: | ||
"""Returns the name of this component.""" | ||
return self._name | ||
|
||
def get_last_log(self): | ||
if self._history: | ||
return self._history[-1].copy() | ||
|
||
def get_history(self): | ||
return self._history.copy() | ||
|
||
def state(self) -> str: | ||
return self._state | ||
|
||
def partial_state( | ||
self, | ||
player_name: str, | ||
) -> str: | ||
"""Return a player-specific view of the component's state.""" | ||
return self._partial_states[player_name] | ||
|
||
def update(self) -> None: | ||
self._current_scene.update() | ||
|
||
def _joint_action_is_complete(self, joint_action: Mapping[str, str]) -> bool: | ||
for acting_player_name in self._acting_player_names: | ||
if joint_action[acting_player_name] is None: | ||
return False | ||
return True | ||
|
||
def _binarize_joint_action( | ||
self, | ||
joint_action: Mapping[str, str]) -> Mapping[str, int]: | ||
binary_joint_action = {name: act == self._cooperative_option | ||
for name, act in joint_action.items()} | ||
return binary_joint_action | ||
|
||
def _get_rewards_from_joint_action( | ||
self, binary_joint_action: Mapping[str, int]) -> Mapping[str, float]: | ||
# For now, this only supports "Schelling style" (binary choice with | ||
# externalities) type of game representations. This means the critical | ||
# factor is the number of players picking the cooperate option. | ||
num_cooperators = np.sum(list(binary_joint_action.values())) | ||
|
||
rewards = {} | ||
for player_name, is_cooperator in zip(self._acting_player_names, | ||
binary_joint_action): | ||
if is_cooperator: | ||
rewards[player_name] = self._cooperator_reward_fn(num_cooperators) | ||
else: | ||
rewards[player_name] = self._defector_reward_fn(num_cooperators) | ||
|
||
return rewards | ||
|
||
def _set_outcome_messages( | ||
self, | ||
binary_joint_action: Mapping[str, int], | ||
rewards: Mapping[str, float], | ||
) -> None: | ||
# Only the game master sees the actual reward values. | ||
game_master_private_state = '\n'.join( | ||
[f'{player.name}: {self._player_scores[player.name]}' | ||
for player in self._players]) | ||
# Players see a text-based summarization of the events, which may or may not | ||
# include the actual reward values. | ||
partial_states = self._outcome_summarization_fn(binary_joint_action, | ||
rewards) | ||
common_view_of_player_obs = '\n'.join( | ||
[f'{name} observed: {observation}' for name, observation | ||
in partial_states.items()]) | ||
|
||
# State is only observed by the game master since players get | ||
# their observations from `partial_states`. | ||
self._state = f'{common_view_of_player_obs}\n{game_master_private_state}' | ||
|
||
# The game master gets a memory of the state. | ||
self._memory.add(self._state) | ||
# Active players observe their own partial state description and inactive | ||
# players get the common description. | ||
for player in self._players: | ||
if player.name in self._acting_player_names: | ||
player.observe(partial_states[player.name]) | ||
else: | ||
player.observe(common_view_of_player_obs) | ||
|
||
def update_before_event(self, player_action_attempt: str) -> None: | ||
# `player_action_attempt` is formatted as "name: attempt". | ||
player_name, choice_str = player_action_attempt.split(': ') | ||
self._stage_to_joint_action[self._stage_idx][player_name] = choice_str | ||
|
||
def update_after_event( | ||
self, | ||
event_statement: str, | ||
) -> None: | ||
if self._clock_now() == self._last_update: | ||
return | ||
self._last_update = self._clock_now() | ||
|
||
current_scene_type = self._current_scene.state() | ||
joint_action = [] | ||
if current_scene_type == self._resolution_scene: | ||
# Check if all players have acted so far in the current stage game. | ||
joint_action = self._stage_to_joint_action[self._stage_idx] | ||
if self._joint_action_is_complete(joint_action): | ||
# Map the joint action to rewards per player. | ||
binary_joint_action = self._binarize_joint_action(joint_action) | ||
rewards = self._get_rewards_from_joint_action(binary_joint_action) | ||
|
||
# Accumulate the rewards per player. | ||
for name in self._acting_player_names: | ||
self._player_scores[name] += rewards[name] | ||
|
||
# Determine summary messages for each player and the GM. | ||
self._set_outcome_messages(binary_joint_action, rewards) | ||
|
||
# Advance to the next stage. | ||
self._stage_idx += 1 | ||
self._stage_to_joint_action.append( | ||
{name: None for name in self._acting_player_names}) | ||
|
||
if self._verbose: | ||
print(termcolor.colored(self.state(), 'yellow')) | ||
|
||
update_log = { | ||
'date': self._clock_now(), | ||
'Summary': self.name(), | ||
'Schelling diagram payoffs': self.state(), | ||
'Joint action': str(joint_action), | ||
} | ||
self._memory.extend([self._state,]) | ||
self._history.append(update_log) |