Skip to content

Commit

Permalink
Enriched metadata with date, game_week and game_id
Browse files Browse the repository at this point in the history
  • Loading branch information
SportsDynamicsDS committed Jul 19, 2024
1 parent d001eb0 commit 8eaeb0b
Show file tree
Hide file tree
Showing 23 changed files with 336 additions and 31 deletions.
7 changes: 7 additions & 0 deletions kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,10 @@ class Metadata:
orientation: See [`Orientation`][kloppy.domain.models.common.Orientation]
flags:
provider: See [`Provider`][kloppy.domain.models.common.Provider]
date: Date of the game.
game_week: Game week (or match day) of the game. It can also be the stage
(ex: "8th Finals"), if the game is happening during a cup or a play-off.
game_id: Game id of the game from the provider.
"""

teams: List[Team]
Expand All @@ -1014,6 +1018,9 @@ class Metadata:
coordinate_system: CoordinateSystem
score: Optional[Score] = None
frame_rate: Optional[float] = None
date: Optional[datetime] = None
game_week: Optional[str] = None
game_id: Optional[str] = None
attributes: Optional[Dict] = field(default_factory=dict, compare=False)

def __post_init__(self):
Expand Down
17 changes: 17 additions & 0 deletions kloppy/infra/serializers/event/datafactory/deserializer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
from datetime import timedelta, datetime, timezone
from dateutil.parser import parse, _parser
from dataclasses import replace
from typing import Dict, List, Tuple, Union, IO, NamedTuple

Expand Down Expand Up @@ -453,6 +454,19 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
periods[half], end_timestamp=timestamp
)

try:
date = match["date"]
if date:
date = parse(date).astimezone(timezone.utc)
except _parser.ParserError:
date = None
game_week = match.get("week", None)
if game_week:
game_week = str(game_week)
game_id = match.get("matchId", None)
if game_id:
game_id = str(game_id)

# exclude goals, already listed as shots too
incidences.pop(DF_EVENT_CLASS_GOALS)
raw_events = [
Expand Down Expand Up @@ -613,6 +627,9 @@ def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:
score=score,
provider=Provider.DATAFACTORY,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return EventDataset(
Expand Down
9 changes: 9 additions & 0 deletions kloppy/infra/serializers/event/sportec/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,12 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
event_root = objectify.fromstring(inputs.event_data.read())

with performance_logging("parse data", logger=logger):
date = parse(
match_root.MatchInformation.General.attrib["KickoffTime"]
).astimezone(timezone.utc)
game_week = match_root.MatchInformation.General.attrib["MatchDay"]
game_id = match_root.MatchInformation.General.attrib["MatchId"]

sportec_metadata = sportec_metadata_from_xml_elm(match_root)
teams = home_team, away_team = sportec_metadata.teams
transformer = self.get_transformer(
Expand Down Expand Up @@ -632,6 +638,9 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
provider=Provider.SPORTEC,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return EventDataset(
Expand Down
6 changes: 6 additions & 0 deletions kloppy/infra/serializers/event/statsperform/deserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
periods = metadata_parser.extract_periods()
score = metadata_parser.extract_score()
teams = metadata_parser.extract_lineups()
date = events_parser.extract_date()
game_week = events_parser.extract_game_week()
game_id = events_parser.extract_game_id()
raw_events = [
event
for event in events_parser.extract_events()
Expand Down Expand Up @@ -827,6 +830,9 @@ def deserialize(self, inputs: StatsPerformInputs) -> EventDataset:
if inputs.event_feed.upper() == "F24"
else Provider.STATSPERFORM,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return EventDataset(
Expand Down
13 changes: 12 additions & 1 deletion kloppy/infra/serializers/event/statsperform/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

from datetime import datetime
from dataclasses import dataclass, field
from typing import List, Optional


@dataclass
Expand Down Expand Up @@ -53,6 +52,18 @@ def extract_score(self) -> Optional[Score]:
"""Return the score of the game."""
return None

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
return None

def extract_lineups(self) -> Tuple[Team, Team]:
"""Return the home and away team."""
raise NotImplementedError
Expand Down
30 changes: 27 additions & 3 deletions kloppy/infra/serializers/event/statsperform/parsers/f24_xml.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""XML parser for Opta F24 feeds."""
import pytz
from datetime import datetime
from typing import List
from datetime import datetime, timezone
from typing import List, Optional
from dateutil.parser import parse

from kloppy.domain import Period
from .base import OptaXMLParser, OptaEvent


Expand Down Expand Up @@ -53,3 +53,27 @@ def extract_events(self) -> List[OptaEvent]:
)
for event in game_elm.iterchildren("Event")
]

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
game_elm = self.root.find("Game")
if game_elm and "game_date" in game_elm.attrib:
return parse(game_elm.attrib["game_date"]).astimezone(timezone.utc)
else:
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
game_elm = self.root.find("Game")
if game_elm and "matchday" in game_elm.attrib:
return game_elm.attrib["matchday"]
else:
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
game_elm = self.root.find("Game")
if game_elm and "id" in game_elm.attrib:
return game_elm.attrib["id"]
else:
return None
33 changes: 31 additions & 2 deletions kloppy/infra/serializers/event/statsperform/parsers/ma1_json.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""JSON parser for Stats Perform MA1 feeds."""
import pytz
from datetime import datetime
from datetime import datetime, timezone
from typing import Any, Optional, List, Tuple, Dict

from kloppy.domain import Period, Score, Team, Ground, Player
Expand Down Expand Up @@ -30,7 +30,13 @@ def extract_periods(self) -> List[Period]:
return parsed_periods

def extract_score(self) -> Optional[Score]:
return None
live_data = self.root["liveData"]
match_details = live_data["matchDetails"]
home_score = match_details["scores"]["total"]["home"]
away_score = match_details["scores"]["total"]["away"]
if home_score is None or away_score is None:
return None
return Score(home=home_score, away=away_score)

def extract_lineups(self) -> Tuple[Team, Team]:
teams = {}
Expand Down Expand Up @@ -76,6 +82,29 @@ def extract_lineups(self) -> Tuple[Team, Team]:
raise DeserializationError("Lineup incomplete")
return home_team, away_team

def extract_date(self) -> Optional[str]:
"""Return the date of the game."""
if "matchInfo" in self.root and "date" in self.root["matchInfo"]:
return datetime.strptime(
self.root["matchInfo"]["date"], "%Y-%m-%dZ"
).astimezone(timezone.utc)
else:
return None

def extract_game_week(self) -> Optional[str]:
"""Return the game_week of the game."""
if "matchInfo" in self.root and "week" in self.root["matchInfo"]:
return self.root["matchInfo"]["week"]
else:
return None

def extract_game_id(self) -> Optional[str]:
"""Return the game_id of the game."""
if "matchInfo" in self.root and "id" in self.root["matchInfo"]:
return self.root["matchInfo"]["id"]
else:
return None

def _parse_teams(self) -> List[Dict[str, Any]]:
parsed_teams = []
match_info = self.root["matchInfo"]
Expand Down
4 changes: 4 additions & 0 deletions kloppy/infra/serializers/event/wyscout/deserializer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
for wyId, team in teams.items()
]
)
game_id = raw_events["events"][0].get("matchId", None)
if game_id:
game_id = str(game_id)

events = []

Expand Down Expand Up @@ -730,6 +733,7 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
flags=None,
provider=Provider.WYSCOUT,
coordinate_system=transformer.get_to_coordinate_system(),
game_id=game_id,
)

return EventDataset(metadata=metadata, records=events)
17 changes: 15 additions & 2 deletions kloppy/infra/serializers/event/wyscout/deserializer_v3.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import json
import logging
from dataclasses import replace
from datetime import timedelta
from typing import Dict, List, Tuple, NamedTuple, IO
from datetime import timedelta, timezone
from dateutil.parser import parse
from typing import Dict, List

from kloppy.domain import (
BallOutEvent,
Expand Down Expand Up @@ -536,6 +537,15 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
for wyId, team in teams.items()
]
)
date = raw_events["match"].get("dateutc", None)
if date:
date = parse(date).astimezone(timezone.utc)
game_week = raw_events["match"].get("gameweek", None)
if game_week:
game_week = str(game_week)
game_id = raw_events["events"][0].get("matchId", None)
if game_id:
game_id = str(game_id)

events = []

Expand Down Expand Up @@ -757,6 +767,9 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
flags=None,
provider=Provider.WYSCOUT,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_week=game_week,
game_id=game_id,
)

return EventDataset(metadata=metadata, records=events)
25 changes: 22 additions & 3 deletions kloppy/infra/serializers/tracking/secondspectrum.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json
import logging
from datetime import timedelta
from datetime import datetime, timedelta, timezone
import warnings
from typing import Tuple, Dict, Optional, Union, NamedTuple, IO
from typing import Dict, Optional, Union, NamedTuple, IO

from lxml import objectify

Expand All @@ -23,6 +23,7 @@
Player,
Provider,
PlayerData,
Score,
)

from kloppy.utils import Readable, performance_logging
Expand Down Expand Up @@ -290,16 +291,34 @@ def _iter():
)
orientation = Orientation.NOT_SET

if metadata:
score = Score(
home=metadata["homeScore"], away=metadata["awayScore"]
)
year, month, day = (
metadata["year"],
metadata["month"],
metadata["day"],
)
date = datetime(year, month, day, 0, 0, tzinfo=timezone.utc)
game_id = metadata["ssiId"]
else:
score = None
date = None
game_id = None

metadata = Metadata(
teams=teams,
periods=periods,
pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions,
score=None,
score=score,
frame_rate=frame_rate,
orientation=orientation,
provider=Provider.SECONDSPECTRUM,
flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_id=game_id,
)

return TrackingDataset(
Expand Down
16 changes: 13 additions & 3 deletions kloppy/infra/serializers/tracking/skillcorner.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import logging
from datetime import timedelta
from datetime import timedelta, timezone
from dateutil.parser import parse
import warnings
from typing import List, Dict, Tuple, NamedTuple, IO, Optional, Union
from enum import Enum, Flag
from typing import NamedTuple, IO, Optional, Union
from collections import Counter
import numpy as np
import json
Expand Down Expand Up @@ -340,6 +340,14 @@ def deserialize(self, inputs: SkillCornerInputs) -> TrackingDataset:
)
teams = [home_team, away_team]

date = metadata.get("date_time", None)
if date:
date = parse(date).astimezone(timezone.utc)

game_id = metadata.get("id", None)
if game_id:
game_id = str(game_id)

for player_track_obj_id, player in player_dict.items():
team_id = player["team_id"]

Expand Down Expand Up @@ -439,6 +447,8 @@ def _iter():
provider=Provider.SKILLCORNER,
flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
coordinate_system=transformer.get_to_coordinate_system(),
date=date,
game_id=game_id
)

return TrackingDataset(
Expand Down
Loading

0 comments on commit 8eaeb0b

Please sign in to comment.