| """Base schemas used by all event stream serializers. |
| |
| Each serializer should create dataframes that contain at least the fields |
| included in these base schemas. Each serializer can add different additional |
| fields on top. |
| |
| """ |
|
|
| import pandas as pd |
| import pandera as pa |
| from pandera.typing import DateTime, Object, Series |
|
|
|
|
| class CompetitionSchema(pa.SchemaModel): |
| """Definition of a dataframe containing a list of competitions and seasons.""" |
|
|
| season_id: Series[Object] = pa.Field() |
| """The unique identifier for the season.""" |
| season_name: Series[str] = pa.Field() |
| """The name of the season.""" |
| competition_id: Series[Object] = pa.Field() |
| """The unique identifier for the competition.""" |
| competition_name: Series[str] = pa.Field() |
| """The name of the competition.""" |
|
|
| class Config: |
| strict = True |
| coerce = True |
|
|
|
|
| class GameSchema(pa.SchemaModel): |
| """Definition of a dataframe containing a list of games.""" |
|
|
| game_id: Series[Object] = pa.Field() |
| """The unique identifier for the game.""" |
| season_id: Series[Object] = pa.Field() |
| """The unique identifier for the season.""" |
| competition_id: Series[Object] = pa.Field() |
| """The unique identifier for the competition.""" |
| game_day: Series[pd.Int64Dtype] = pa.Field(nullable=True) |
| """Number corresponding to the weeks or rounds into the competition this game is.""" |
| game_date: Series[DateTime] = pa.Field() |
| """The date when the game was played.""" |
| home_team_id: Series[Object] = pa.Field() |
| """The unique identifier for the home team in this game.""" |
| away_team_id: Series[Object] = pa.Field() |
| """The unique identifier for the away team in this game.""" |
|
|
| class Config: |
| strict = True |
| coerce = True |
|
|
|
|
| class TeamSchema(pa.SchemaModel): |
| """Definition of a dataframe containing the list of teams of a game.""" |
|
|
| team_id: Series[Object] = pa.Field() |
| """The unique identifier for the team.""" |
| team_name: Series[str] = pa.Field() |
| """The name of the team.""" |
|
|
| class Config: |
| strict = True |
| coerce = True |
|
|
|
|
| class PlayerSchema(pa.SchemaModel): |
| """Definition of a dataframe containing the list of players on the teamsheet of a game.""" |
|
|
| game_id: Series[Object] = pa.Field() |
| """The unique identifier for the game.""" |
| team_id: Series[Object] = pa.Field() |
| """The unique identifier for the player's team.""" |
| player_id: Series[Object] = pa.Field() |
| """The unique identifier for the player.""" |
| player_name: Series[str] = pa.Field() |
| """The name of the player.""" |
| is_starter: Series[bool] = pa.Field() |
| """Whether the player is in the starting lineup.""" |
| minutes_played: Series[int] = pa.Field() |
| """The number of minutes the player played in the game.""" |
| jersey_number: Series[int] = pa.Field() |
| """The player's jersey number.""" |
|
|
| class Config: |
| strict = True |
| coerce = True |
|
|
|
|
| class EventSchema(pa.SchemaModel): |
| """Definition of a dataframe containing event stream data of a game.""" |
|
|
| game_id: Series[Object] = pa.Field() |
| """The unique identifier for the game.""" |
| event_id: Series[Object] = pa.Field() |
| """The unique identifier for the event.""" |
| period_id: Series[int] = pa.Field() |
| """The unique identifier for the part of the game in which the event took place.""" |
| team_id: Series[Object] = pa.Field(nullable=True) |
| """The unique identifier for the team this event relates to.""" |
| player_id: Series[Object] = pa.Field(nullable=True) |
| """The unique identifier for the player this event relates to.""" |
| type_id: Series[int] = pa.Field() |
| """The unique identifier for the type of this event.""" |
| type_name: Series[str] = pa.Field() |
| """The name of the type of this event.""" |
|
|
| class Config: |
| strict = True |
| coerce = True |
|
|