From 473037a57a3547e213558093fc8a69a72de97b3c Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Mon, 10 Jun 2024 12:35:50 +0200 Subject: [PATCH] WIP --- kloppy/domain/models/common.py | 70 +++++++++++++++- kloppy/domain/models/event.py | 17 +++- kloppy/domain/models/time.py | 82 +++++++++++++------ .../domain/services/aggregators/aggregator.py | 10 +++ .../services/aggregators/minutes_played.py | 72 ++++++++++++++++ .../infra/serializers/event/deserializer.py | 28 +++++++ .../event/statsbomb/deserializer.py | 21 +++-- .../event/statsbomb/specification.py | 1 + .../event/statsperform/deserializer.py | 2 +- .../event/statsperform/parsers/base.py | 2 +- .../event/statsperform/parsers/f7_xml.py | 20 ++--- .../event/statsperform/parsers/ma1_json.py | 10 ++- .../event/statsperform/parsers/ma1_xml.py | 10 ++- .../infra/serializers/tracking/skillcorner.py | 18 ++-- .../serializers/tracking/statsperform.py | 8 +- .../serializers/tracking/tracab/tracab_dat.py | 14 ++-- kloppy/tests/test_opta.py | 6 +- kloppy/tests/test_statsbomb.py | 4 +- kloppy/tests/test_statsperform.py | 2 +- kloppy/tests/test_time.py | 72 +++++++++++++--- 20 files changed, 379 insertions(+), 90 deletions(-) create mode 100644 kloppy/domain/services/aggregators/aggregator.py create mode 100644 kloppy/domain/services/aggregators/minutes_played.py diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 5b1dd842..d2b59aee 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -17,9 +17,9 @@ overload, Iterable, NamedTuple, + Tuple, ) - if sys.version_info >= (3, 8): from typing import Literal else: @@ -128,6 +128,10 @@ class Position: def __str__(self): return self.name + @classmethod + def unknown(cls) -> "Position": + return cls(position_id="", name="Unknown") + @dataclass(frozen=True) class Player: @@ -152,8 +156,9 @@ class Player: last_name: str = None # match specific - starting: bool = None - position: Position = None + positions: TimeContainer[Position] = field( + default_factory=TimeContainer, compare=False + ) attributes: Optional[Dict] = field(default_factory=dict, compare=False) @@ -165,6 +170,25 @@ def full_name(self): return f"{self.first_name} {self.last_name}" return f"{self.team.ground}_{self.jersey_no}" + @property + def position(self) -> Optional[Position]: + try: + return self.positions.last() + except KeyError: + return None + + @property + def starting(self) -> bool: + """Return if the player has a position at the beginning of the match.""" + return self.starting_position is not None + + @property + def starting_position(self): + try: + return self.positions.at_start() + except KeyError: + return None + def __str__(self): return self.full_name @@ -176,6 +200,46 @@ def __eq__(self, other): return False return self.player_id == other.player_id + @classmethod + def build( + cls, + player_id: str, + team: "Team", + jersey_no: Optional[int], + name: str = None, + first_name: str = None, + last_name: str = None, + starting_position: Optional[Position] = None, + periods: Optional[List[Period]] = None, + attributes: Optional[dict] = None, + ): + + if attributes is None: + attributes = {} + + positions = TimeContainer() + if starting_position: + if not periods: + raise KloppyError( + "You must pass periods when using starting_position" + ) + + positions.set(periods[0].start_time, starting_position) + + return cls( + player_id=player_id, + team=team, + jersey_no=jersey_no, + name=name, + first_name=first_name, + last_name=last_name, + positions=positions, + attributes=attributes, + ) + + def set_position(self, time: Time, position: Optional[Position]): + self.positions.set(time, position) + @dataclass class Team: diff --git a/kloppy/domain/models/event.py b/kloppy/domain/models/event.py index f4802ee5..1e3a6329 100644 --- a/kloppy/domain/models/event.py +++ b/kloppy/domain/models/event.py @@ -10,12 +10,14 @@ Callable, Optional, TYPE_CHECKING, + NamedTuple, ) from kloppy.domain.models.common import ( DatasetType, AttackingDirection, OrientationError, + Position, ) from kloppy.utils import ( camelcase_to_snakecase, @@ -29,7 +31,7 @@ from .formation import FormationType from .pitch import Point -from ...exceptions import OrphanedRecordError, InvalidFilterError +from ...exceptions import OrphanedRecordError, InvalidFilterError, KloppyError if TYPE_CHECKING: from .tracking import Frame @@ -879,6 +881,7 @@ class SubstitutionEvent(Event): """ replacement_player: Player + position: Optional[Position] = None event_type: EventType = EventType.SUBSTITUTION event_name: str = "substitution" @@ -1113,6 +1116,18 @@ def generic_record_converter(event: Event): map(generic_record_converter, self.records) ) + def aggregate(self, type_: str) -> List[Any]: + if type_ == "minutes_played": + from kloppy.domain.services.aggregators.minutes_played import ( + MinutesPlayedAggregator, + ) + + aggregator = MinutesPlayedAggregator() + else: + raise KloppyError(f"No aggregator {type_} not found") + + return aggregator.aggregate(self) + __all__ = [ "EnumQualifier", diff --git a/kloppy/domain/models/time.py b/kloppy/domain/models/time.py index f60ef4b5..4209dcbf 100644 --- a/kloppy/domain/models/time.py +++ b/kloppy/domain/models/time.py @@ -9,9 +9,10 @@ List, Tuple, NamedTuple, + Literal, ) -from sortedcontainers import SortedList +from sortedcontainers import SortedDict from kloppy.exceptions import KloppyError @@ -51,11 +52,13 @@ def contains(self, timestamp: datetime): @property def start_time(self) -> "Time": - return Time(period=self, timestamp=self.start_timestamp) + return Time(period=self, timestamp=timedelta(0)) @property def end_time(self) -> "Time": - return Time(period=self, timestamp=self.end_timestamp) + return Time( + period=self, timestamp=self.end_timestamp - self.start_timestamp + ) @property def duration(self) -> timedelta: @@ -94,6 +97,17 @@ class Time: period: "Period" timestamp: timedelta + @classmethod + def from_period( + cls, + period: Period, + type_: Union[Literal["start"], Literal["end"]] = "start", + ): + return cls( + period=period, + timestamp=timedelta(0) if type_ == "start" else period.duration, + ) + @overload def __sub__(self, other: timedelta) -> "Time": ... @@ -178,46 +192,60 @@ def __str__(self): m, s = divmod(self.timestamp.total_seconds(), 60) return f"P{self.period.id}T{m:02.0f}:{s:02.0f}" - -T = TypeVar("T") + def __hash__(self): + return hash((self.period.id, self.timestamp.total_seconds())) -class Pair(NamedTuple): - key: Time - item: T +T = TypeVar("T") class TimeContainer(Generic[T]): def __init__(self): - self.items: SortedList = SortedList(key=lambda pair: pair.key) + self.items: SortedDict = SortedDict() - def add(self, time: Time, item: T): - self.items.add(Pair(key=time, item=item)) + def set(self, time: Time, item: Optional[T]): + self.items[time] = item # Pair(key=time, item=item) - def value_at(self, time: Time) -> T: - idx = self.items.bisect_left(Pair(key=time, item=None)) - 1 + def value_at(self, time: Time) -> Optional[T]: + idx = self.items.bisect_right(time) - 1 if idx < 0: - raise ValueError("Not found") - return self.items[idx].item + raise KeyError("Not found") + return self.items.values()[idx] + + def __getitem__(self, item: Time): + return self.value_at(item) + + def __setitem__(self, key: Time, value: Optional[T]): + self.set(key, value) - def ranges(self, add_end: bool = True) -> List[Tuple[Time, Time, T]]: + def ranges(self) -> List[Tuple[Time, Time, T]]: items = list(self.items) if not items: return [] - if add_end: - items.append( - Pair( - # Ugly way to get us to the end of the last period - key=items[0].key + timedelta(seconds=10_000_000), - item=None, - ) - ) - if len(items) < 2: raise ValueError("Cannot create ranges when length < 2") ranges_ = [] - for start_pair, end_pair in zip(items[:-1], items[1:]): - ranges_.append((start_pair.key, end_pair.key, start_pair.item)) + for start_time, end_time in zip(items[:-1], items[1:]): + ranges_.append((start_time, end_time, self.items[start_time])) return ranges_ + + def last(self): + if not len(self.items): + raise KeyError + + return self.items[self.items.keys()[-1]] + + def at_start(self): + """Return the value at the beginning of the match""" + if not self.items: + raise KeyError + + first_item: Time = self.items.keys()[0] + + tmp_period = first_item.period + while tmp_period.prev_period: + tmp_period = tmp_period.prev_period + + return self.value_at(Time.from_period(tmp_period, "start")) diff --git a/kloppy/domain/services/aggregators/aggregator.py b/kloppy/domain/services/aggregators/aggregator.py new file mode 100644 index 00000000..60fa8707 --- /dev/null +++ b/kloppy/domain/services/aggregators/aggregator.py @@ -0,0 +1,10 @@ +from abc import ABC, abstractmethod +from typing import Dict, Any, Hashable, List, NamedTuple + +from kloppy.domain import EventDataset + + +class EventDatasetAggregator(ABC): + @abstractmethod + def aggregate(self, dataset: EventDataset) -> List[NamedTuple]: + raise NotImplementedError diff --git a/kloppy/domain/services/aggregators/minutes_played.py b/kloppy/domain/services/aggregators/minutes_played.py new file mode 100644 index 00000000..a3c6013b --- /dev/null +++ b/kloppy/domain/services/aggregators/minutes_played.py @@ -0,0 +1,72 @@ +from datetime import timedelta +from typing import Dict, List, NamedTuple, Union, Optional + +from kloppy.domain import EventDataset, Player, Position, Time +from kloppy.domain.services.aggregators.aggregator import ( + EventDatasetAggregator, +) + + +class MinutesPlayed(NamedTuple): + player: Player + start_time: Time + end_time: Time + duration: timedelta + + +class MinutesPlayedPerPosition(NamedTuple): + player: Player + position: Position + start_time: Time + end_time: Time + duration: timedelta + + +class MinutesPlayedAggregator(EventDatasetAggregator): + def __init__(self, aggregate_position: bool = True): + self.aggregate_position = aggregate_position + + def aggregate( + self, dataset: EventDataset + ) -> List[Union[MinutesPlayedPerPosition, MinutesPlayed]]: + items = [] + + for team in dataset.metadata.teams: + for player in team.players: + if self.aggregate_position: + _start_time = None + end_time = None + for ( + start_time, + end_time, + position, + ) in player.positions.ranges(): + if not _start_time: + _start_time = start_time + + if _start_time: + items.append( + MinutesPlayed( + player=player, + start_time=_start_time, + end_time=_start_time, + duration=end_time - _start_time, + ) + ) + else: + for ( + start_time, + end_time, + position, + ) in player.positions.ranges(): + items.append( + MinutesPlayedPerPosition( + player=player, + position=position, + start_time=start_time, + end_time=end_time, + duration=end_time - start_time, + ) + ) + + return items diff --git a/kloppy/infra/serializers/event/deserializer.py b/kloppy/infra/serializers/event/deserializer.py index 7a670772..8b5cf1c8 100644 --- a/kloppy/infra/serializers/event/deserializer.py +++ b/kloppy/infra/serializers/event/deserializer.py @@ -10,6 +10,10 @@ EventFactory, DatasetType, DatasetTransformerBuilder, + Team, + SubstitutionEvent, + Position, + Period, ) T = TypeVar("T") @@ -56,6 +60,30 @@ def get_transformer( pitch_width=pitch_width, ) + def _update_player_positions( + self, teams: List[Team], events: List[Event], periods: List[Period] + ): + for event in events: + if isinstance(event, SubstitutionEvent): + event: SubstitutionEvent + + event.player.set_position(event.time, None) + + event.replacement_player.set_position( + event.time, Position.unknown() + ) + + # Set all player positions to None at end of match + end_of_match = periods[-1].end_time + for team in teams: + for player in team.players: + try: + if player.positions.value_at(end_of_match) is not None: + player.positions.set(end_of_match, None) + except KeyError: + # Was not in the pitch + pass + @property @abstractmethod def provider(self) -> Provider: diff --git a/kloppy/infra/serializers/event/statsbomb/deserializer.py b/kloppy/infra/serializers/event/statsbomb/deserializer.py index 8d426825..e9a4ee3f 100644 --- a/kloppy/infra/serializers/event/statsbomb/deserializer.py +++ b/kloppy/infra/serializers/event/statsbomb/deserializer.py @@ -55,14 +55,14 @@ def deserialize(self, inputs: StatsBombInputs) -> EventDataset: data_version.xy_fidelity_version, ) - # Create teams and players - with performance_logging("parse teams ans players", logger=logger): - teams = self.create_teams_and_players(raw_events, lineups) - # Create periods with performance_logging("parse periods", logger=logger): periods = self.create_periods(raw_events) + # Create teams and players + with performance_logging("parse teams ans players", logger=logger): + teams = self.create_teams_and_players(raw_events, lineups, periods) + # Create events with performance_logging("parse events", logger=logger): events = [] @@ -108,6 +108,8 @@ def deserialize(self, inputs: StatsBombInputs) -> EventDataset: ) events.append(event) + self._update_player_positions(teams, events, periods) + metadata = Metadata( teams=teams, periods=periods, @@ -160,7 +162,7 @@ def load_data(self, inputs: StatsBombInputs): return raw_events, lineups, three_sixty_data, version - def create_teams_and_players(self, raw_events, lineups): + def create_teams_and_players(self, raw_events, lineups, periods): it_events = iter(raw_events.values()) starting_xi_events = [ next(it_events).raw_event, @@ -199,13 +201,16 @@ def create_team(lineup, ground_type): starting_formation=starting_formations[lineup["team_id"]], ) team.players = [ - Player( + Player.build( player_id=str(player["player_id"]), team=team, name=player["player_name"], jersey_no=int(player["jersey_number"]), - starting=str(player["player_id"]) in player_positions, - position=player_positions.get(str(player["player_id"])), + # Consider a way to pass this without the periods + starting_position=player_positions.get( + str(player["player_id"]) + ), + periods=periods, ) for player in lineup["lineup"] ] diff --git a/kloppy/infra/serializers/event/statsbomb/specification.py b/kloppy/infra/serializers/event/statsbomb/specification.py index 2196e47f..a9a378ee 100644 --- a/kloppy/infra/serializers/event/statsbomb/specification.py +++ b/kloppy/infra/serializers/event/statsbomb/specification.py @@ -20,6 +20,7 @@ PassQualifier, PassResult, PassType, + Position, SetPieceQualifier, SetPieceType, ShotResult, diff --git a/kloppy/infra/serializers/event/statsperform/deserializer.py b/kloppy/infra/serializers/event/statsperform/deserializer.py index dc7fbb87..1d6aebb8 100644 --- a/kloppy/infra/serializers/event/statsperform/deserializer.py +++ b/kloppy/infra/serializers/event/statsperform/deserializer.py @@ -609,7 +609,7 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset: with performance_logging("parse data", logger=logger): periods = metadata_parser.extract_periods() score = metadata_parser.extract_score() - teams = metadata_parser.extract_lineups() + teams = metadata_parser.extract_lineups(periods) raw_events = [ event for event in events_parser.extract_events() diff --git a/kloppy/infra/serializers/event/statsperform/parsers/base.py b/kloppy/infra/serializers/event/statsperform/parsers/base.py index 9bb60f43..ed5ffdb7 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/base.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/base.py @@ -53,7 +53,7 @@ def extract_score(self) -> Optional[Score]: """Return the score of the game.""" return None - def extract_lineups(self) -> Tuple[Team, Team]: + def extract_lineups(self, periods: List[Period]) -> Tuple[Team, Team]: """Return the home and away team.""" raise NotImplementedError diff --git a/kloppy/infra/serializers/event/statsperform/parsers/f7_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/f7_xml.py index d40e68a5..d2122f78 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/f7_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/f7_xml.py @@ -95,7 +95,7 @@ def extract_score(self) -> Optional[Score]: return None return Score(home=home_score, away=away_score) - def extract_lineups(self) -> Tuple[Team, Team]: + def extract_lineups(self, periods) -> Tuple[Team, Team]: """Return a dictionary with all available teams. Returns @@ -109,9 +109,9 @@ def extract_lineups(self) -> Tuple[Team, Team]: away_team = None for team_elm in team_elms: if team_elm.attrib["Side"] == "Home": - home_team = self._team_from_xml_elm(team_elm) + home_team = self._team_from_xml_elm(team_elm, periods) elif team_elm.attrib["Side"] == "Away": - away_team = self._team_from_xml_elm(team_elm) + away_team = self._team_from_xml_elm(team_elm, periods) else: raise DeserializationError( f"Unknown side: {team_elm.attrib['Side']}" @@ -124,7 +124,7 @@ def extract_lineups(self) -> Tuple[Team, Team]: raise DeserializationError("Lineup incomplete") return home_team, away_team - def _team_from_xml_elm(self, team_elm: Any) -> Team: + def _team_from_xml_elm(self, team_elm: Any, periods) -> Team: # This should not happen here team_name, team_players = self._parse_team_players( team_elm.attrib["TeamRef"] @@ -142,7 +142,7 @@ def _team_from_xml_elm(self, team_elm: Any) -> Team: ), ) team.players = [ - Player( + Player.build( player_id=player_elm.attrib["PlayerRef"].lstrip("p"), team=team, jersey_no=int(player_elm.attrib["ShirtNumber"]), @@ -152,14 +152,14 @@ def _team_from_xml_elm(self, team_elm: Any) -> Team: last_name=team_players[player_elm.attrib["PlayerRef"]][ "last_name" ], - starting=( - True if player_elm.attrib["Status"] == "Start" else False - ), - position=Position( + starting_position=Position( position_id=player_elm.attrib["Formation_Place"], name=player_elm.attrib["Position"], coordinates=None, - ), + ) + if player_elm.attrib["Status"] == "Start" + else None, + periods=periods, ) for player_elm in team_elm.find("PlayerLineUp").iterchildren( "MatchPlayer" diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py index fbfe0e48..88cae3a1 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py @@ -32,7 +32,7 @@ def extract_periods(self) -> List[Period]: def extract_score(self) -> Optional[Score]: return None - def extract_lineups(self) -> Tuple[Team, Team]: + def extract_lineups(self, periods) -> Tuple[Team, Team]: teams = {} for parsed_team in self._parse_teams(): team_id = parsed_team["team_id"] @@ -48,15 +48,17 @@ def extract_lineups(self) -> Tuple[Team, Team]: player_id = parsed_player["player_id"] team_id = parsed_player["team_id"] team = teams[team_id] - player = Player( + player = Player.build( player_id=player_id, team=team, jersey_no=parsed_player["jersey_no"], name=parsed_player["name"], first_name=parsed_player["first_name"], last_name=parsed_player["last_name"], - starting=parsed_player["starting"], - position=parsed_player["position"], + starting_position=parsed_player["position"] + if parsed_player["starting"] + else None, + periods=periods, ) team.players.append(player) diff --git a/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py b/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py index 54c06bb8..9d7d1b9d 100644 --- a/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py +++ b/kloppy/infra/serializers/event/statsperform/parsers/ma1_xml.py @@ -33,7 +33,7 @@ def extract_periods(self) -> List[Period]: def extract_score(self) -> Optional[Score]: return None - def extract_lineups(self) -> Tuple[Team, Team]: + def extract_lineups(self, periods: List[Period]) -> Tuple[Team, Team]: teams = {} for parsed_team in self._parse_teams(): team_id = parsed_team["team_id"] @@ -49,15 +49,17 @@ def extract_lineups(self) -> Tuple[Team, Team]: player_id = parsed_player["player_id"] team_id = parsed_player["team_id"] team = teams[team_id] - player = Player( + player = Player.build( player_id=player_id, team=team, jersey_no=parsed_player["jersey_no"], name=parsed_player["name"], first_name=parsed_player["first_name"], last_name=parsed_player["last_name"], - starting=parsed_player["starting"], - position=parsed_player["position"], + starting_position=parsed_player["position"] + if parsed_player["starting"] + else None, + periods=periods, ) team.players.append(player) diff --git a/kloppy/infra/serializers/tracking/skillcorner.py b/kloppy/infra/serializers/tracking/skillcorner.py index 39d07c1c..8404e746 100644 --- a/kloppy/infra/serializers/tracking/skillcorner.py +++ b/kloppy/infra/serializers/tracking/skillcorner.py @@ -275,15 +275,13 @@ def __create_anon_player(cls, teams, frame_record): f"anonymous player with track_id `{track_id}` does not have a specified group_name." ) - return Player( + return Player.build( player_id=f"{team.ground}_anon_{track_id}", team=team, jersey_no=None, name=f"Anon_{track_id}", first_name="Anon", last_name=track_id, - starting=None, - position=None, attributes={}, ) @@ -340,6 +338,8 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: ) teams = [home_team, away_team] + periods_list = sorted(periods.values(), key=lambda p: p.id) + for player_track_obj_id, player in player_dict.items(): team_id = player["team_id"] @@ -350,19 +350,21 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset: team_string = "AWAY" team = away_team - players[team_string][player_track_obj_id] = Player( + players[team_string][player_track_obj_id] = Player.build( player_id=f"{player['id']}", team=team, jersey_no=player["number"], name=f"{player['first_name']} {player['last_name']}", first_name=player["first_name"], last_name=player["last_name"], - starting=player["start_time"] == "00:00:00", - position=Position( + starting_position=Position( position_id=player["player_role"].get("id"), name=player["player_role"].get("name"), coordinates=None, - ), + ) + if player["start_time"] == "00:00:00" + else None, + periods=periods_list, attributes={}, ) @@ -428,7 +430,7 @@ def _iter(): metadata = Metadata( teams=teams, - periods=sorted(periods.values(), key=lambda p: p.id), + periods=periods_list, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=Score( home=metadata["home_team_score"], diff --git a/kloppy/infra/serializers/tracking/statsperform.py b/kloppy/infra/serializers/tracking/statsperform.py index 88d47965..d86adc73 100644 --- a/kloppy/infra/serializers/tracking/statsperform.py +++ b/kloppy/infra/serializers/tracking/statsperform.py @@ -135,7 +135,13 @@ def deserialize(self, inputs: StatsPerformInputs) -> TrackingDataset: period.id: period for period in meta_data_parser.extract_periods() } - teams_list = list(meta_data_parser.extract_lineups()) + teams_list = list( + meta_data_parser.extract_lineups( + list( + sorted(periods.values(), key=lambda period: period.id) + ) + ) + ) with performance_logging("Loading tracking data", logger=logger): tracking_data = inputs.raw_data.read().decode("ascii").splitlines() diff --git a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py index d32642dc..9db0b96c 100644 --- a/kloppy/infra/serializers/tracking/tracab/tracab_dat.py +++ b/kloppy/infra/serializers/tracking/tracab/tracab_dat.py @@ -23,6 +23,7 @@ Player, Provider, PlayerData, + Position, ) from kloppy.exceptions import DeserializationError @@ -131,7 +132,7 @@ def __validate_inputs(inputs: Dict[str, Readable]): raise ValueError("Please specify a value for 'raw_data'") @staticmethod - def create_team(team_data, ground, start_frame_id): + def create_team(team_data, ground, start_frame_id, periods): team = Team( team_id=str(team_data["TeamId"]), name=html.unescape(team_data["ShortName"]), @@ -139,7 +140,7 @@ def create_team(team_data, ground, start_frame_id): ) team.players = [ - Player( + Player.build( player_id=str(player["PlayerId"]), team=team, first_name=html.unescape(player["FirstName"]), @@ -148,9 +149,10 @@ def create_team(team_data, ground, start_frame_id): player["FirstName"] + " " + player["LastName"] ), jersey_no=int(player["JerseyNo"]), - starting=True + starting_position=Position.unknown() if player["StartFrameCount"] == start_frame_id - else False, + else None, + periods=periods, ) for player in team_data["Players"]["Player"] ] @@ -183,10 +185,10 @@ def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: ) home_team = self.create_team( - meta_data["HomeTeam"], Ground.HOME, start_frame_id + meta_data["HomeTeam"], Ground.HOME, start_frame_id, periods ) away_team = self.create_team( - meta_data["AwayTeam"], Ground.AWAY, start_frame_id + meta_data["AwayTeam"], Ground.AWAY, start_frame_id, periods ) teams = [home_team, away_team] diff --git a/kloppy/tests/test_opta.py b/kloppy/tests/test_opta.py index 175fc97b..4e0e33d8 100644 --- a/kloppy/tests/test_opta.py +++ b/kloppy/tests/test_opta.py @@ -111,11 +111,9 @@ def test_player_position(self, dataset): ) assert player.starting - # Substituted players have a "Substitute" position + # Substituted players don't have a position sub_player = dataset.metadata.teams[0].get_player_by_id("88022") - assert sub_player.position == Position( - position_id="0", name="Substitute", coordinates=None - ) + assert sub_player.position is None assert not sub_player.starting def test_periods(self, dataset): diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index 929d3da8..17b3fc11 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -138,7 +138,9 @@ def test_player_position(self, dataset): """It should set the correct player position from the events""" # Starting players get their position from the STARTING_XI event player = dataset.metadata.teams[0].get_player_by_id("3089") - assert player.position == Position( + player.positions.at_start() + + assert player.starting_position == Position( position_id="18", name="Right Attacking Midfield", coordinates=None ) assert player.starting diff --git a/kloppy/tests/test_statsperform.py b/kloppy/tests/test_statsperform.py index cb829ae3..db4c2677 100644 --- a/kloppy/tests/test_statsperform.py +++ b/kloppy/tests/test_statsperform.py @@ -119,7 +119,7 @@ def test_teams(self, tracking_dataset: TrackingDataset): away_substitute = away_team.players[15] assert away_substitute.jersey_no == 18 - assert away_substitute.position == "Substitute" + assert away_substitute.position is None assert not away_substitute.starting assert away_substitute.team == away_team diff --git a/kloppy/tests/test_time.py b/kloppy/tests/test_time.py index 10ad20e5..f43c4470 100644 --- a/kloppy/tests/test_time.py +++ b/kloppy/tests/test_time.py @@ -119,7 +119,7 @@ def test_add_timedelta_spans_periods(self, periods): period=period2, timestamp=timedelta(seconds=700) ) - def test_statsbomb(self, base_dir): + def test_statsbomb_formation_changes(self, base_dir): dataset = statsbomb.load( lineup_data=base_dir / "files/statsbomb_lineup.json", event_data=base_dir / "files/statsbomb_event.json", @@ -138,6 +138,55 @@ def test_statsbomb(self, base_dir): ) assert diff == timedelta(seconds=5067.367) + def test_statsbomb_minuted_played(self, base_dir): + dataset = statsbomb.load( + lineup_data=base_dir / "files/statsbomb_lineup.json", + event_data=base_dir / "files/statsbomb_event.json", + ) + + minutes_played = dataset.aggregate("minutes_played") + + for item in minutes_played: + print(f"{item.player} - {item.duration}") + + home_team, away_team = dataset.metadata.teams + + minutes_played_map = { + item.player: item.duration for item in minutes_played + } + + """ + 3109 - 0:00:00.000000 - Malcom + 3501 - 0:47:32.053000 - Coutinho + 5203 - 1:24:12.343000 - Busquets + 5211 - 1:32:37.320000 - Ramos + """ + + # Didn't play + player_malcon = home_team.get_player_by_id(3109) + assert player_malcon not in minutes_played_map + + # Started second half + player_coutinho = home_team.get_player_by_id(3501) + assert minutes_played_map[player_coutinho] == timedelta( + seconds=2852.053 + ) + + # Replaced in second half + player_busquets = home_team.get_player_by_id(5203) + assert minutes_played_map[player_busquets] == timedelta( + seconds=5052.343 + ) + + # Played entire match + player_ramos = home_team.get_player_by_id(5211) + assert minutes_played_map[player_ramos] == ( + dataset.metadata.periods[0].duration + + dataset.metadata.periods[1].duration + ) + + # assert + class TestAbsTimeContainer: def test_value_at(self, periods): @@ -145,7 +194,7 @@ def test_value_at(self, periods): time1 = Time(period=period1, timestamp=timedelta(seconds=800)) container = TimeContainer() - container.add(time1, 10) + container[time1] = 10 value = container.value_at(time1 + timedelta(seconds=1)) assert value == 10 @@ -153,26 +202,29 @@ def test_value_at(self, periods): value = container.value_at(time1 + timedelta(seconds=10000)) assert value == 10 - with pytest.raises(ValueError): + with pytest.raises(KeyError): container.value_at(time1 - timedelta(seconds=1)) def test_ranges(self, periods): period1, period2, _ = periods - time1 = Time(period=period1, timestamp=timedelta(seconds=15 * 60)) container = TimeContainer() # Player gets on the pitch - container.add(time1, "LB") + substitution_time = Time( + period=period1, timestamp=timedelta(seconds=15 * 60) + ) + container.set(substitution_time, "LB") # Switches from LB to RB - container.add(time1 + timedelta(seconds=40 * 60), "RB") + container.set(substitution_time + timedelta(seconds=40 * 60), "RB") # Player gets of the pitch - container.add( + container.set( Time(period=period2, timestamp=timedelta(seconds=20 * 60)), None ) - print("") - for start, end, item in container.ranges(add_end=False): - print(f"{start} - {end} = {end - start} -> {item}") + for start, end, position in container.ranges(): + print(f"{start} - {end} = {end - start} -> {position}") + + assert container.last() is None