Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enriched metadata with date, game_week and game_id #340

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions kloppy/_providers/statsbomb.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def load(
event_types: Optional[List[str]] = None,
coordinates: Optional[str] = None,
event_factory: Optional[EventFactory] = None,
additional_metadata: dict = {}
) -> EventDataset:
"""
Load StatsBomb event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset]
Expand Down Expand Up @@ -48,6 +49,7 @@ def load(
lineup_data=lineup_data_fp,
three_sixty_data=three_sixty_data_fp,
),
additional_metadata=additional_metadata,
)


Expand Down
9 changes: 9 additions & 0 deletions kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,10 @@ class Metadata:
orientation: See [`Orientation`][kloppy.domain.models.common.Orientation]
flags:
provider: See [`Provider`][kloppy.domain.models.common.Provider]
date: Date of the game.
game_week: Game week (or match day) of the game. It can also be the stage
(ex: "8th Finals"), if the game is happening during a cup or a play-off.
game_id: Game id of the game from the provider.
"""

teams: List[Team]
Expand All @@ -1014,6 +1018,11 @@ class Metadata:
coordinate_system: CoordinateSystem
score: Optional[Score] = None
frame_rate: Optional[float] = None
date: Optional[datetime] = None
game_week: Optional[str] = None
game_id: Optional[str] = None
home_coach: Optional[str] = None
away_coach: Optional[str] = None
attributes: Optional[Dict] = field(default_factory=dict, compare=False)

def __post_init__(self):
Expand Down
17 changes: 17 additions & 0 deletions kloppy/infra/serializers/event/datafactory/deserializer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
from datetime import timedelta, datetime, timezone
from dateutil.parser import parse, _parser
from dataclasses import replace
from typing import Dict, List, Tuple, Union, IO, NamedTuple

Expand Down Expand Up @@ -453,6 +454,19 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
periods[half], end_timestamp=timestamp
)

try:
date = match["date"]
if date:
date = parse(date).astimezone(timezone.utc)
except _parser.ParserError:
date = None
game_week = match.get("week", None)
if game_week:
game_week = str(game_week)
game_id = match.get("matchId", None)
if game_id:
game_id = str(game_id)

# exclude goals, already listed as shots too
incidences.pop(DF_EVENT_CLASS_GOALS)
raw_events = [
Expand Down Expand Up @@ -613,6 +627,9 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
score=score,
provider=Provider.DATAFACTORY,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return EventDataset(
Expand Down
24 changes: 24 additions & 0 deletions kloppy/infra/serializers/event/sportec/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class SportecMetadata(NamedTuple):
x_max: float
y_max: float
fps: int
home_coach: str
away_coach: str


def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
Expand All @@ -99,10 +101,17 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:

home_team = away_team = None
for team_elm in team_elms:
head_coach = [
trainer.attrib["Shortname"]
for trainer in team_elm.TrainerStaff.iterchildren("Trainer")
if trainer.attrib["Role"] == "headcoach"
]
if team_elm.attrib["Role"] == "home":
home_team = _team_from_xml_elm(team_elm)
home_coach = head_coach[0] if len(head_coach) else None
elif team_elm.attrib["Role"] == "guest":
away_team = _team_from_xml_elm(team_elm)
away_coach = head_coach[0] if len(head_coach) else None
else:
raise DeserializationError(
f"Unknown side: {team_elm.attrib['Role']}"
Expand Down Expand Up @@ -194,6 +203,8 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
x_max=x_max,
y_max=y_max,
fps=SPORTEC_FPS,
home_coach=home_coach,
away_coach=away_coach,
)


Expand Down Expand Up @@ -404,12 +415,20 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
event_root = objectify.fromstring(inputs.event_data.read())

with performance_logging("parse data", logger=logger):
date = parse(
match_root.MatchInformation.General.attrib["KickoffTime"]
).astimezone(timezone.utc)
game_week = match_root.MatchInformation.General.attrib["MatchDay"]
game_id = match_root.MatchInformation.General.attrib["MatchId"]

sportec_metadata = sportec_metadata_from_xml_elm(match_root)
teams = home_team, away_team = sportec_metadata.teams
transformer = self.get_transformer(
pitch_length=sportec_metadata.x_max,
pitch_width=sportec_metadata.y_max,
)
home_coach = sportec_metadata.home_coach
away_coach = sportec_metadata.away_coach

periods = []
period_id = 0
Expand Down Expand Up @@ -632,6 +651,11 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
provider=Provider.SPORTEC,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
home_coach=home_coach,
away_coach=away_coach,
)

return EventDataset(
Expand Down
3 changes: 2 additions & 1 deletion kloppy/infra/serializers/event/statsbomb/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class StatsBombDeserializer(EventDataDeserializer[StatsBombInputs]):
def provider(self) -> Provider:
return Provider.STATSBOMB

def deserialize(self, inputs: StatsBombInputs) -> EventDataset:
def deserialize(self, inputs: StatsBombInputs, additional_metadata) -> EventDataset:
# Intialize coordinate system transformer
self.transformer = self.get_transformer()

Expand Down Expand Up @@ -118,6 +118,7 @@ def deserialize(self, inputs: StatsBombInputs) -> EventDataset:
score=None,
provider=Provider.STATSBOMB,
coordinate_system=self.transformer.get_to_coordinate_system(),
**additional_metadata
)
return EventDataset(metadata=metadata, records=events)

Expand Down
6 changes: 6 additions & 0 deletions kloppy/infra/serializers/event/statsperform/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
periods = metadata_parser.extract_periods()
score = metadata_parser.extract_score()
teams = metadata_parser.extract_lineups()
date = events_parser.extract_date()
game_week = events_parser.extract_game_week()
game_id = events_parser.extract_game_id()
raw_events = [
event
for event in events_parser.extract_events()
Expand Down Expand Up @@ -827,6 +830,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
if inputs.event_feed.upper() == "F24"
else Provider.STATSPERFORM,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return EventDataset(
Expand Down
13 changes: 12 additions & 1 deletion kloppy/infra/serializers/event/statsperform/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

from datetime import datetime
from dataclasses import dataclass, field
from typing import List, Optional


@dataclass
Expand Down Expand Up @@ -53,6 +52,18 @@ def extract_score(self) -> Optional[Score]:
"""Return the score of the game."""
return None

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
return None

def extract_lineups(self) -> Tuple[Team, Team]:
"""Return the home and away team."""
raise NotImplementedError
Expand Down
30 changes: 27 additions & 3 deletions kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""XML parser for Opta F24 feeds."""
import pytz
from datetime import datetime
from typing import List
from datetime import datetime, timezone
from typing import List, Optional
from dateutil.parser import parse

from kloppy.domain import Period
from .base import OptaXMLParser, OptaEvent


Expand Down Expand Up @@ -53,3 +53,27 @@ def extract_events(self) -> List[OptaEvent]:
)
for event in game_elm.iterchildren("Event")
]

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
game_elm = self.root.find("Game")
if game_elm and "game_date" in game_elm.attrib:
return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc)
else:
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
game_elm = self.root.find("Game")
if game_elm and "matchday" in game_elm.attrib:
return game_elm.attrib["matchday"]
else:
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
game_elm = self.root.find("Game")
if game_elm and "id" in game_elm.attrib:
return game_elm.attrib["id"]
else:
return None
33 changes: 31 additions & 2 deletions kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""JSON parser for Stats Perform MA1 feeds."""
import pytz
from datetime import datetime
from datetime import datetime, timezone
from typing import Any, Optional, List, Tuple, Dict

from kloppy.domain import Period, Score, Team, Ground, Player
Expand Down Expand Up @@ -30,7 +30,13 @@ def extract_periods(self) -> List[Period]:
return parsed_periods

def extract_score(self) -> Optional[Score]:
return None
live_data = self.root["liveData"]
match_details = live_data["matchDetails"]
home_score = match_details["scores"]["total"]["home"]
away_score = match_details["scores"]["total"]["away"]
if home_score is None or away_score is None:
return None
return Score(home=home_score, away=away_score)

def extract_lineups(self) -> Tuple[Team, Team]:
teams = {}
Expand Down Expand Up @@ -76,6 +82,29 @@ def extract_lineups(self) -> Tuple[Team, Team]:
raise DeserializationError("Lineup incomplete")
return home_team, away_team

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
if "matchInfo" in self.root and "date" in self.root["matchInfo"]:
return datetime.strptime(
self.root["matchInfo"]["date"], "%Y-%m-%dZ"
).astimezone(timezone.utc)
else:
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
if "matchInfo" in self.root and "week" in self.root["matchInfo"]:
return self.root["matchInfo"]["week"]
else:
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
if "matchInfo" in self.root and "id" in self.root["matchInfo"]:
return self.root["matchInfo"]["id"]
else:
return None

def _parse_teams(self) -> List[Dict[str, Any]]:
parsed_teams = []
match_info = self.root["matchInfo"]
Expand Down
4 changes: 4 additions & 0 deletions kloppy/infra/serializers/event/wyscout/deserializer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
for wyId, team in teams.items()
]
)
game_id = raw_events["events"][0].get("matchId", None)
if game_id:
game_id = str(game_id)

events = []

Expand Down Expand Up @@ -730,6 +733,7 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
flags=None,
provider=Provider.WYSCOUT,
coordinate_system=transformer.get_to_coordinate_system(),
game_id=game_id,
)

return EventDataset(metadata=metadata, records=events)
28 changes: 26 additions & 2 deletions kloppy/infra/serializers/event/wyscout/deserializer_v3.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import json
import logging
from dataclasses import replace
from datetime import timedelta
from typing import Dict, List, Tuple, NamedTuple, IO
from datetime import timedelta, timezone
from dateutil.parser import parse
from typing import Dict, List

from kloppy.domain import (
BallOutEvent,
Expand Down Expand Up @@ -536,6 +537,24 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
for wyId, team in teams.items()
]
)
date = raw_events["match"].get("dateutc")
if date:
date = parse(date).astimezone(timezone.utc)
game_week = raw_events["match"].get("gameweek")
if game_week:
game_week = str(game_week)
game_id = raw_events["events"][0].get("matchId")
if game_id:
game_id = str(game_id)
coaches = raw_events["coaches"]
if home_team_id in coaches and "coach" in coaches[home_team_id]:
home_coach = coaches[home_team_id]["coach"].get("shortName")
else:
home_coach = None
if away_team_id in coaches and "coach" in coaches[away_team_id]:
away_coach = coaches[away_team_id]["coach"].get("shortName")
else:
away_coach = None

events = []

Expand Down Expand Up @@ -757,6 +776,11 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
flags=None,
provider=Provider.WYSCOUT,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
home_coach=home_coach,
away_coach=away_coach,
)

return EventDataset(metadata=metadata, records=events)
Loading