-
Notifications
You must be signed in to change notification settings - Fork 109
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
a class for computing and delivering payoffs in a coordination game.
PiperOrigin-RevId: 653985895 Change-Id: Ib22fcd71e743668addee9b3069c75720950dc9de
- Loading branch information
1 parent
1e273c1
commit 66b4daa
Showing
1 changed file
with
256 additions
and
0 deletions.
There are no files selected for viewing
256 changes: 256 additions & 0 deletions
256
concordia/components/game_master/coordination_payoffs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,256 @@ | ||
# Copyright 2022 DeepMind Technologies Limited. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""A component for computing and delivering payoffs in a coordination game.""" | ||
|
||
from collections.abc import Callable, Mapping, Sequence | ||
import datetime | ||
|
||
from concordia.agents import basic_agent | ||
from concordia.associative_memory import associative_memory | ||
from concordia.components.game_master import current_scene | ||
from concordia.language_model import language_model | ||
from concordia.typing import component | ||
import numpy as np | ||
import termcolor | ||
|
||
|
||
class CoordinationPayoffs(component.Component): | ||
"""Define payoffs for coordination games. | ||
The players reward is proportional to the number of players who choose the | ||
same option as them, multiplied by the option's multiplier and player | ||
multiplier, divided by the number of players. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
model: language_model.LanguageModel, | ||
memory: associative_memory.AssociativeMemory, | ||
option_multipliers: Mapping[str, float], | ||
player_multipliers: Mapping[str, Mapping[str, float]], | ||
resolution_scene: str, | ||
players: Sequence[basic_agent.BasicAgent], | ||
acting_player_names: Sequence[str], | ||
outcome_summarization_fn: Callable[ | ||
[Mapping[str, str], Mapping[str, float]], Mapping[str, str] | ||
], | ||
clock_now: Callable[[], datetime.datetime], | ||
name: str = 'scoring function', | ||
verbose: bool = False, | ||
): | ||
"""Initialize a scoring function component. | ||
Args: | ||
model: a language model | ||
memory: an associative memory | ||
option_multipliers: per option multipliers of rewards | ||
player_multipliers: per player multipliers of rewards | ||
resolution_scene: on which scene type should this component be updated | ||
after the event, i.e. when to check the joint action and compute results | ||
players: sequence of agents (a superset of the active players) | ||
acting_player_names: sequence of names of players who act each stage | ||
outcome_summarization_fn: function of joint actions and rewards | ||
which returns an outcome description message for each player | ||
clock_now: Function to call to get current time. | ||
name: name of this component e.g. Possessions, Account, Property, etc | ||
verbose: whether to print the full update chain of thought or not | ||
""" | ||
self._model = model | ||
self._memory = memory | ||
|
||
self._option_multipliers = option_multipliers | ||
self._player_multipliers = player_multipliers | ||
|
||
self._players = players | ||
self._acting_player_names = acting_player_names | ||
self._outcome_summarization_fn = outcome_summarization_fn | ||
self._clock_now = clock_now | ||
self._name = name | ||
self._verbose = verbose | ||
|
||
self._history = [] | ||
self._state = '' | ||
self._partial_states = {player.name: '' for player in self._players} | ||
self._player_scores = {player.name: 0 for player in self._players} | ||
|
||
self._resolution_scene = resolution_scene | ||
self._current_scene = current_scene.CurrentScene( | ||
name='current scene type', | ||
memory=self._memory, | ||
clock_now=self._clock_now, | ||
verbose=self._verbose, | ||
) | ||
|
||
self.reset() | ||
# Set the initial state's string representation. | ||
self.update() | ||
|
||
def reset(self) -> None: | ||
self._stage_idx = 0 | ||
# Map each player's name to their component of the joint action. | ||
self._partial_joint_action = { | ||
name: None for name in self._acting_player_names | ||
} | ||
|
||
def name(self) -> str: | ||
"""Returns the name of this component.""" | ||
return self._name | ||
|
||
def get_last_log(self): | ||
if self._history: | ||
return self._history[-1].copy() | ||
|
||
def get_history(self): | ||
return self._history.copy() | ||
|
||
def state(self) -> str: | ||
return self._state | ||
|
||
def partial_state( | ||
self, | ||
player_name: str, | ||
) -> str: | ||
"""Return a player-specific view of the component's state.""" | ||
return self._partial_states[player_name] | ||
|
||
def update(self) -> None: | ||
self._current_scene.update() | ||
|
||
def _joint_action_is_complete(self, joint_action: Mapping[str, str]) -> bool: | ||
for acting_player_name in self._acting_player_names: | ||
if joint_action[acting_player_name] is None: | ||
return False | ||
return True | ||
|
||
def _count_string_occurrences(self, target_string, dictionary): | ||
count = 0 | ||
for value in dictionary.values(): | ||
if value == target_string: | ||
count += 1 | ||
return count | ||
|
||
def _get_rewards_from_joint_action( | ||
self, joint_action: Mapping[str, str] | ||
) -> Mapping[str, float]: | ||
|
||
rewards = {} | ||
num_players = len(self._players) | ||
for player in self._players: | ||
player_action = joint_action[player.name] | ||
same_choice = self._count_string_occurrences(player_action, joint_action) | ||
player_preference = self._player_multipliers[player.name][player_action] | ||
option_multiplier = self._option_multipliers[player_action] | ||
rewards[player.name] = ( | ||
same_choice * player_preference * option_multiplier / num_players | ||
) | ||
|
||
return rewards | ||
|
||
def _set_outcome_messages( | ||
self, | ||
joint_action: Mapping[str, str], | ||
rewards: Mapping[str, float], | ||
) -> None: | ||
# Only the game master sees the actual reward values. | ||
game_master_private_state = '\n'.join([ | ||
f'{player.name}: {self._player_scores[player.name]}' | ||
for player in self._players | ||
]) | ||
# Players see a text-based summarization of the events, which may or may not | ||
# include the actual reward values. | ||
partial_states = self._outcome_summarization_fn(joint_action, rewards) | ||
common_view_of_player_obs = '\n'.join([ | ||
f'{name} observed: {observation}' | ||
for name, observation in partial_states.items() | ||
]) | ||
|
||
# State is only observed by the game master since players get | ||
# their observations from `partial_states`. | ||
self._state = f'{common_view_of_player_obs}\n{game_master_private_state}' | ||
|
||
# The game master gets a memory of the state. | ||
self._memory.add(self._state) | ||
# Active players observe their own partial state description and inactive | ||
# players get the common description. | ||
for player in self._players: | ||
if player.name in self._acting_player_names: | ||
player.observe(partial_states[player.name]) | ||
else: | ||
player.observe(common_view_of_player_obs) | ||
|
||
def update_before_event(self, player_action_attempt: str) -> None: | ||
# `player_action_attempt` is formatted as "name: attempt". | ||
player_name, choice_str = player_action_attempt.split(': ') | ||
self._partial_joint_action[player_name] = choice_str | ||
self._state = '' | ||
|
||
def update_after_event( | ||
self, | ||
event_statement: str, | ||
) -> None: | ||
current_scene_type = self._current_scene.state() | ||
payoffs_for_log = '' | ||
joint_action_for_log = '' | ||
finished = False | ||
if current_scene_type == self._resolution_scene: | ||
# Check if all players have acted so far in the current stage game. | ||
joint_action = self._partial_joint_action.copy() | ||
if self._joint_action_is_complete(joint_action): | ||
# Map the joint action to rewards per player. | ||
rewards = self._get_rewards_from_joint_action(joint_action) | ||
|
||
# Accumulate the rewards per player. | ||
for name in self._acting_player_names: | ||
self._player_scores[name] += rewards[name] | ||
|
||
# Use the outcome summarization function to get the state. | ||
self._set_outcome_messages(joint_action, rewards) | ||
self._memory.extend([ | ||
self.state(), | ||
]) | ||
|
||
joint_action_for_log = str(self._partial_joint_action) | ||
payoffs_for_log = self.state() | ||
finished = True | ||
|
||
if self._verbose: | ||
print(termcolor.colored(self.state(), 'yellow')) | ||
|
||
num_players_already_acted = np.sum( | ||
[value is not None for value in self._partial_joint_action.values()] | ||
) | ||
total_num_players_to_act = len(self._partial_joint_action) | ||
update_log = { | ||
'date': self._clock_now(), | ||
'Summary': self.name(), | ||
'Stage index': self._stage_idx, | ||
'How many players acted so far this stage': ( | ||
f'{num_players_already_acted}/{total_num_players_to_act}' | ||
), | ||
'Payoffs': payoffs_for_log, | ||
'Joint action': joint_action_for_log, | ||
} | ||
self._history.append(update_log) | ||
|
||
if finished: | ||
# Advance to the next stage. | ||
self._stage_idx += 1 | ||
self._partial_joint_action = { | ||
name: None for name in self._acting_player_names | ||
} | ||
|
||
def get_scores(self) -> Mapping[str, float]: | ||
"""Return the cumulative score for each player.""" | ||
return self._player_scores |