Skip to content

Commit

Permalink
Enriched metadata with date, game_week, game_id and coach
Browse files Browse the repository at this point in the history
  • Loading branch information
SportsDynamicsDS authored and BenSarfatiSD committed Aug 22, 2024
1 parent 923fca2 commit d81ae7d
Show file tree
Hide file tree
Showing 26 changed files with 449 additions and 33 deletions.
2 changes: 2 additions & 0 deletions kloppy/_providers/statsbomb.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def load(
event_types: Optional[List[str]] = None,
coordinates: Optional[str] = None,
event_factory: Optional[EventFactory] = None,
additional_metadata: dict = {}
) -> EventDataset:
"""
Load StatsBomb event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset]
Expand Down Expand Up @@ -48,6 +49,7 @@ def load(
lineup_data=lineup_data_fp,
three_sixty_data=three_sixty_data_fp,
),
additional_metadata=additional_metadata,
)


Expand Down
9 changes: 9 additions & 0 deletions kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,10 @@ class Metadata:
orientation: See [`Orientation`][kloppy.domain.models.common.Orientation]
flags:
provider: See [`Provider`][kloppy.domain.models.common.Provider]
date: Date of the game.
game_week: Game week (or match day) of the game. It can also be the stage
(ex: "8th Finals"), if the game is happening during a cup or a play-off.
game_id: Game id of the game from the provider.
"""

teams: List[Team]
Expand All @@ -1014,6 +1018,11 @@ class Metadata:
coordinate_system: CoordinateSystem
score: Optional[Score] = None
frame_rate: Optional[float] = None
date: Optional[datetime] = None
game_week: Optional[str] = None
game_id: Optional[str] = None
home_coach: Optional[str] = None
away_coach: Optional[str] = None
attributes: Optional[Dict] = field(default_factory=dict, compare=False)

def __post_init__(self):
Expand Down
17 changes: 17 additions & 0 deletions kloppy/infra/serializers/event/datafactory/deserializer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
from datetime import timedelta, datetime, timezone
from dateutil.parser import parse, _parser
from dataclasses import replace
from typing import Dict, List, Tuple, Union, IO, NamedTuple

Expand Down Expand Up @@ -453,6 +454,19 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
periods[half], end_timestamp=timestamp
)

try:
date = match["date"]
if date:
date = parse(date).astimezone(timezone.utc)
except _parser.ParserError:
date = None
game_week = match.get("week", None)
if game_week:
game_week = str(game_week)
game_id = match.get("matchId", None)
if game_id:
game_id = str(game_id)

# exclude goals, already listed as shots too
incidences.pop(DF_EVENT_CLASS_GOALS)
raw_events = [
Expand Down Expand Up @@ -613,6 +627,9 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
score=score,
provider=Provider.DATAFACTORY,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return EventDataset(
Expand Down
24 changes: 24 additions & 0 deletions kloppy/infra/serializers/event/sportec/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class SportecMetadata(NamedTuple):
x_max: float
y_max: float
fps: int
home_coach: str
away_coach: str


def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
Expand All @@ -99,10 +101,17 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:

home_team = away_team = None
for team_elm in team_elms:
head_coach = [
trainer.attrib["Shortname"]
for trainer in team_elm.TrainerStaff.iterchildren("Trainer")
if trainer.attrib["Role"] == "headcoach"
]
if team_elm.attrib["Role"] == "home":
home_team = _team_from_xml_elm(team_elm)
home_coach = head_coach[0] if len(head_coach) else None
elif team_elm.attrib["Role"] == "guest":
away_team = _team_from_xml_elm(team_elm)
away_coach = head_coach[0] if len(head_coach) else None
else:
raise DeserializationError(
f"Unknown side: {team_elm.attrib['Role']}"
Expand Down Expand Up @@ -194,6 +203,8 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
x_max=x_max,
y_max=y_max,
fps=SPORTEC_FPS,
home_coach=home_coach,
away_coach=away_coach,
)


Expand Down Expand Up @@ -404,12 +415,20 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
event_root = objectify.fromstring(inputs.event_data.read())

with performance_logging("parse data", logger=logger):
date = parse(
match_root.MatchInformation.General.attrib["KickoffTime"]
).astimezone(timezone.utc)
game_week = match_root.MatchInformation.General.attrib["MatchDay"]
game_id = match_root.MatchInformation.General.attrib["MatchId"]

sportec_metadata = sportec_metadata_from_xml_elm(match_root)
teams = home_team, away_team = sportec_metadata.teams
transformer = self.get_transformer(
pitch_length=sportec_metadata.x_max,
pitch_width=sportec_metadata.y_max,
)
home_coach = sportec_metadata.home_coach
away_coach = sportec_metadata.away_coach

periods = []
period_id = 0
Expand Down Expand Up @@ -632,6 +651,11 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
provider=Provider.SPORTEC,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
home_coach=home_coach,
away_coach=away_coach,
)

return EventDataset(
Expand Down
3 changes: 2 additions & 1 deletion kloppy/infra/serializers/event/statsbomb/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class StatsBombDeserializer(EventDataDeserializer[StatsBombInputs]):
def provider(self) -> Provider:
return Provider.STATSBOMB

def deserialize(self, inputs: StatsBombInputs) -> EventDataset:
def deserialize(self, inputs: StatsBombInputs, additional_metadata) -> EventDataset:
# Intialize coordinate system transformer
self.transformer = self.get_transformer()

Expand Down Expand Up @@ -118,6 +118,7 @@ def deserialize(self, inputs: StatsBombInputs) -> EventDataset:
score=None,
provider=Provider.STATSBOMB,
coordinate_system=self.transformer.get_to_coordinate_system(),
**additional_metadata
)
return EventDataset(metadata=metadata, records=events)

Expand Down
6 changes: 6 additions & 0 deletions kloppy/infra/serializers/event/statsperform/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
periods = metadata_parser.extract_periods()
score = metadata_parser.extract_score()
teams = metadata_parser.extract_lineups()
date = events_parser.extract_date()
game_week = events_parser.extract_game_week()
game_id = events_parser.extract_game_id()
raw_events = [
event
for event in events_parser.extract_events()
Expand Down Expand Up @@ -827,6 +830,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
if inputs.event_feed.upper() == "F24"
else Provider.STATSPERFORM,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return EventDataset(
Expand Down
13 changes: 12 additions & 1 deletion kloppy/infra/serializers/event/statsperform/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

from datetime import datetime
from dataclasses import dataclass, field
from typing import List, Optional


@dataclass
Expand Down Expand Up @@ -53,6 +52,18 @@ def extract_score(self) -> Optional[Score]:
"""Return the score of the game."""
return None

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
return None

def extract_lineups(self) -> Tuple[Team, Team]:
"""Return the home and away team."""
raise NotImplementedError
Expand Down
30 changes: 27 additions & 3 deletions kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""XML parser for Opta F24 feeds."""
import pytz
from datetime import datetime
from typing import List
from datetime import datetime, timezone
from typing import List, Optional
from dateutil.parser import parse

from kloppy.domain import Period
from .base import OptaXMLParser, OptaEvent


Expand Down Expand Up @@ -53,3 +53,27 @@ def extract_events(self) -> List[OptaEvent]:
)
for event in game_elm.iterchildren("Event")
]

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
game_elm = self.root.find("Game")
if game_elm and "game_date" in game_elm.attrib:
return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc)
else:
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
game_elm = self.root.find("Game")
if game_elm and "matchday" in game_elm.attrib:
return game_elm.attrib["matchday"]
else:
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
game_elm = self.root.find("Game")
if game_elm and "id" in game_elm.attrib:
return game_elm.attrib["id"]
else:
return None
33 changes: 31 additions & 2 deletions kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""JSON parser for Stats Perform MA1 feeds."""
import pytz
from datetime import datetime
from datetime import datetime, timezone
from typing import Any, Optional, List, Tuple, Dict

from kloppy.domain import Period, Score, Team, Ground, Player
Expand Down Expand Up @@ -30,7 +30,13 @@ def extract_periods(self) -> List[Period]:
return parsed_periods

def extract_score(self) -> Optional[Score]:
return None
live_data = self.root["liveData"]
match_details = live_data["matchDetails"]
home_score = match_details["scores"]["total"]["home"]
away_score = match_details["scores"]["total"]["away"]
if home_score is None or away_score is None:
return None
return Score(home=home_score, away=away_score)

def extract_lineups(self) -> Tuple[Team, Team]:
teams = {}
Expand Down Expand Up @@ -76,6 +82,29 @@ def extract_lineups(self) -> Tuple[Team, Team]:
raise DeserializationError("Lineup incomplete")
return home_team, away_team

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
if "matchInfo" in self.root and "date" in self.root["matchInfo"]:
return datetime.strptime(
self.root["matchInfo"]["date"], "%Y-%m-%dZ"
).astimezone(timezone.utc)
else:
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
if "matchInfo" in self.root and "week" in self.root["matchInfo"]:
return self.root["matchInfo"]["week"]
else:
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
if "matchInfo" in self.root and "id" in self.root["matchInfo"]:
return self.root["matchInfo"]["id"]
else:
return None

def _parse_teams(self) -> List[Dict[str, Any]]:
parsed_teams = []
match_info = self.root["matchInfo"]
Expand Down
4 changes: 4 additions & 0 deletions kloppy/infra/serializers/event/wyscout/deserializer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
for wyId, team in teams.items()
]
)
game_id = raw_events["events"][0].get("matchId", None)
if game_id:
game_id = str(game_id)

events = []

Expand Down Expand Up @@ -730,6 +733,7 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
flags=None,
provider=Provider.WYSCOUT,
coordinate_system=transformer.get_to_coordinate_system(),
game_id=game_id,
)

return EventDataset(metadata=metadata, records=events)
28 changes: 26 additions & 2 deletions kloppy/infra/serializers/event/wyscout/deserializer_v3.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import json
import logging
from dataclasses import replace
from datetime import timedelta
from typing import Dict, List, Tuple, NamedTuple, IO
from datetime import timedelta, timezone
from dateutil.parser import parse
from typing import Dict, List

from kloppy.domain import (
BallOutEvent,
Expand Down Expand Up @@ -536,6 +537,24 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
for wyId, team in teams.items()
]
)
date = raw_events["match"].get("dateutc")
if date:
date = parse(date).astimezone(timezone.utc)
game_week = raw_events["match"].get("gameweek")
if game_week:
game_week = str(game_week)
game_id = raw_events["events"][0].get("matchId")
if game_id:
game_id = str(game_id)
coaches = raw_events["coaches"]
if home_team_id in coaches and "coach" in coaches[home_team_id]:
home_coach = coaches[home_team_id]["coach"].get("shortName")
else:
home_coach = None
if away_team_id in coaches and "coach" in coaches[away_team_id]:
away_coach = coaches[away_team_id]["coach"].get("shortName")
else:
away_coach = None

events = []

Expand Down Expand Up @@ -757,6 +776,11 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
flags=None,
provider=Provider.WYSCOUT,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
home_coach=home_coach,
away_coach=away_coach,
)

return EventDataset(metadata=metadata, records=events)
Loading

0 comments on commit d81ae7d

Please sign in to comment.