diff --git a/impectPy/__init__.py b/impectPy/__init__.py index cb32a31..74cf7fa 100644 --- a/impectPy/__init__.py +++ b/impectPy/__init__.py @@ -8,6 +8,6 @@ from .player_scores import getPlayerMatchScores, getPlayerIterationScores from .squad_scores import getSquadMatchScores, getSquadIterationScores from .player_profile_scores import getPlayerProfileScores -from .sportscode_xml import generateSportsCodeXML +from .sportscode_xml import generateXML from .set_pieces import getSetPieces from .squad_ratings import getSquadRatings \ No newline at end of file diff --git a/impectPy/sportscode_xml.py b/impectPy/sportscode_xml.py index a2a2ecd..b2bc127 100644 --- a/impectPy/sportscode_xml.py +++ b/impectPy/sportscode_xml.py @@ -1,6 +1,7 @@ -# load packages -from xml.etree import ElementTree as ET +import numpy as np import pandas as pd +import sys +from xml.etree import ElementTree as ET ###### # @@ -8,43 +9,183 @@ # ###### +#define allowed KPIs, labels and codes +allowed_labels = [ + {"order": "00 | ", "name": "eventId"}, + {"order": "01 | ", "name": "matchId"}, + {"order": "02 | ", "name": "periodId"}, + {"order": "03 | ", "name": "phase"}, + {"order": "04 | ", "name": "gameState"}, + {"order": "05 | ", "name": "playerPosition"}, + {"order": "06 | ", "name": "action"}, + {"order": "07 | ", "name": "actionType"}, + {"order": "08a | ", "name": "bodyPart"}, + {"order": "08b | ", "name": "bodyPartExtended"}, + {"order": "08c | ", "name": "previousPassHeight"}, + {"order": "09 | ", "name": "actionTypeResult"}, + {"order": "10 | ", "name": "startPackingZone"}, + {"order": "11 | ", "name": "startPackingZoneGroup"}, + {"order": "12 | ", "name": "startPitchPosition"}, + {"order": "13 | ", "name": "startLane"}, + {"order": "14 | ", "name": "endPackingZone"}, + {"order": "15 | ", "name": "endPackingZoneGroup"}, + {"order": "16 | ", "name": "endPitchPosition"}, + {"order": "17 | ", "name": "endLane"}, + {"order": "18 | ", "name": "opponents"}, + {"order": "19 | ", "name": "pressure"}, + {"order": "20 | ", "name": "pxTTeam"}, + {"order": "21 | ", "name": "pressingPlayerName"}, + {"order": "22 | ", "name": "duelType"}, + {"order": "23 | ", "name": "duelPlayerName"}, + {"order": "24 | ", "name": "fouledPlayerName"}, + {"order": "25 | ", "name": "passDistance"}, + {"order": "26 | ", "name": "passReceiverPlayerName"}, + {"order": "27 | ", "name": "leadsToShot"}, + {"order": "28 | ", "name": "leadsToGoal"}, + {"order": "29 | ", "name": "squadName"}, + {"order": "30 | ", "name": "playerName"}, + {"order": "31 | ", "name": "pxTTeamStart"}, + {"order": "32 | ", "name": "pxTTeamEnd"}, +] + +allowed_kpis = [ + {"order": "KPI: ", "name": "PXT_DELTA"}, + {"order": "KPI: ", "name": "BYPASSED_OPPONENTS"}, + {"order": "KPI: ", "name": "BYPASSED_DEFENDERS"}, + {"order": "KPI: ", "name": "BYPASSED_OPPONENTS_RECEIVING"}, + {"order": "KPI: ", "name": "BYPASSED_DEFENDERS_RECEIVING"}, + {"order": "KPI: ", "name": "BALL_LOSS_ADDED_OPPONENTS"}, + {"order": "KPI: ", "name": "BALL_LOSS_REMOVED_TEAMMATES"}, + {"order": "KPI: ", "name": "BALL_WIN_ADDED_TEAMMATES"}, + {"order": "KPI: ", "name": "BALL_WIN_REMOVED_OPPONENTS"}, + {"order": "KPI: ", "name": "REVERSE_PLAY_ADDED_OPPONENTS"}, + {"order": "KPI: ", "name": "REVERSE_PLAY_ADDED_OPPONENTS_DEFENDERS"}, + {"order": "KPI: ", "name": "BYPASSED_OPPONENTS_RAW"}, + {"order": "KPI: ", "name": "BYPASSED_OPPONENTS_DEFENDERS_RAW"}, + {"order": "KPI: ", "name": "SHOT_XG"}, + {"order": "KPI: ", "name": "POSTSHOT_XG"}, + {"order": "KPI: ", "name": "PACKING_XG"} +] + +allowed_codes = [ + "playerName", + "squadName", + "actionType", + "action" +] + +# define allowed label/code combinations +combinations = { + "eventId": {"playerName": True, "team": False, "action": True, "actionType": True}, + "matchId": {"playerName": True, "team": True, "action": True, "actionType": True}, + "periodId": {"playerName": True, "team": True, "action": True, "actionType": True}, + "phase": {"playerName": True, "team": False, "action": True, "actionType": True}, + "gameState": {"playerName": True, "team": True, "action": True, "actionType": True}, + "playerPosition": {"playerName": True, "team": False, "action": True, "actionType": True}, + "action": {"playerName": True, "team": False, "action": False, "actionType": True}, + "actionType": {"playerName": True, "team": False, "action": True, "actionType": False}, + "bodyPart": {"playerName": True, "team": False, "action": True, "actionType": True}, + "bodyPartExtended": {"playerName": True, "team": False, "action": True, "actionType": True}, + "previousPassHeight": {"playerName": True, "team": False, "action": True, "actionType": True}, + "actionTypeResult": {"playerName": True, "team": False, "action": True, "actionType": True}, + "startPackingZone": {"playerName": True, "team": False, "action": True, "actionType": True}, + "startPackingZoneGroup": {"playerName": True, "team": False, "action": True, "actionType": True}, + "startPitchPosition": {"playerName": True, "team": False, "action": True, "actionType": True}, + "startLane": {"playerName": True, "team": False, "action": True, "actionType": True}, + "endPackingZone": {"playerName": True, "team": False, "action": True, "actionType": True}, + "endPackingZoneGroup": {"playerName": True, "team": False, "action": True, "actionType": True}, + "endPitchPosition": {"playerName": True, "team": False, "action": True, "actionType": True}, + "endLane": {"playerName": True, "team": False, "action": True, "actionType": True}, + "opponents": {"playerName": True, "team": False, "action": True, "actionType": True}, + "pressure": {"playerName": True, "team": False, "action": True, "actionType": True}, + "pxTTeam": {"playerName": True, "team": False, "action": True, "actionType": True}, + "pressingPlayerName": {"playerName": True, "team": False, "action": True, "actionType": True}, + "duelType": {"playerName": True, "team": False, "action": True, "actionType": True}, + "duelPlayerName": {"playerName": True, "team": False, "action": True, "actionType": True}, + "fouledPlayerName": {"playerName": True, "team": False, "action": True, "actionType": True}, + "passDistance": {"playerName": True, "team": False, "action": True, "actionType": True}, + "passReceiverPlayerName": {"playerName": True, "team": False, "action": True, "actionType": True}, + "leadsToShot": {"playerName": True, "team": True, "action": True, "actionType": True}, + "leadsToGoal": {"playerName": True, "team": True, "action": True, "actionType": True}, + "squadName": {"playerName": True, "team": False, "action": True, "actionType": True}, + "playerName": {"playerName": False, "team": True, "action": True, "actionType": True}, + "pxTTeamStart": {"playerName": False, "team": True, "action": False, "actionType": False}, + "pxTTeamEnd": {"playerName": False, "team": True, "action": False, "actionType": False} +} + + +# define function to generate xml +def generateXML( + events: pd.DataFrame, + lead: int, + lag: int, + p1Start: int, + p2Start: int, + p3Start: int, + p4Start: int, + p5Start: int, + codeTag: str, + labels=None, + kpis=None, + labelSorting: bool = True, + sequencing: bool = True, + buckets: bool = True +) -> ET.ElementTree: + + # handle kpis and labels defaults + if labels is None: + labels = [label["name"] for label in allowed_labels if combinations.get(label.get("name")).get(codeTag)] + if kpis is None: + kpis = [kpi["name"] for kpi in allowed_kpis] + + # check for invalid kpis + invalid_kpis = [kpi for kpi in kpis if kpi not in [kpi["name"] for kpi in allowed_kpis]] + if len(invalid_kpis) > 0: + raise ValueError(f"Invalid KPIs: {invalid_kpis}") + + # check for invalid labels + invalid_labels = [lbl for lbl in labels if lbl not in [label["name"] for label in allowed_labels]] + if len(invalid_labels) > 0: + raise ValueError(f"Invalid Labels: {invalid_labels}") + + # check for invalid code tag + if not codeTag in allowed_codes: + raise ValueError(f"Invalid Code: {codeTag}") + + # keep only : + # - if KPI in kpis + # - if Label in labels + # - if code matches legend + labels_and_kpis = [] + invalid_labels = [] + for label in allowed_labels: + if label.get("name") in labels and label.get("name") != codeTag: # ensure code attribute is not repeated as a label + if combinations.get(label.get("name")).get(codeTag): + labels_and_kpis.append(label) + else: + invalid_labels.append(label.get("name")) + + if len(invalid_labels) > 0: + raise ValueError( + f"With the selected code ('{codeTag}') following labels are invalid:\n{', '.join(invalid_labels)}" + ) + + for kpi in allowed_kpis: + if kpi.get("name") in kpis: + labels_and_kpis.append(kpi) + + if labelSorting: + labels_and_kpis = sorted(labels_and_kpis, key=lambda x: x["order"]) -# define function -def generateSportsCodeXML(events: pd.DataFrame, - lead: int, - lag: int, - p1Start: int, - p2Start: int, - p3Start: int, - p4Start: int, - p5Start: int, - sequencing: bool = True, - apply_buckets: bool = True) -> ET.ElementTree: - # define parameters # compile periods start times into dict - offsets = {"p1": p1Start, - "p2": p2Start, - "p3": p3Start, - "p4": p4Start, - "p5": p5Start} - - # define list of kpis to be included - kpis = ["BYPASSED_OPPONENTS", - "BYPASSED_DEFENDERS", - "BYPASSED_OPPONENTS_RECEIVING", - "BYPASSED_DEFENDERS_RECEIVING", - "BALL_LOSS_ADDED_OPPONENTS", - "BALL_LOSS_REMOVED_TEAMMATES", - "BALL_WIN_ADDED_TEAMMATES", - "BALL_WIN_REMOVED_OPPONENTS", - "REVERSE_PLAY_ADDED_OPPONENTS", - "REVERSE_PLAY_ADDED_OPPONENTS_DEFENDERS", - "BYPASSED_OPPONENTS_RAW", - "BYPASSED_OPPONENTS_DEFENDERS_RAW", - "SHOT_XG", - "POSTSHOT_XG", - "PACKING_XG"] + offsets = { + "p1": p1Start, + "p2": p2Start, + "p3": p3Start, + "p4": p4Start, + "p5": p5Start + } # create empty dict to store bucket definitions for kpis kpi_buckets = {} @@ -62,7 +203,8 @@ def generateSportsCodeXML(events: pd.DataFrame, "max": 5}, {"label": "[5,∞]", "min": 5, - "max": 50}] + "max": 50} + ] bucket_shotxg = [ {"label": "[0,0.02[", @@ -133,6 +275,40 @@ def generateSportsCodeXML(events: pd.DataFrame, "max": 1.1} ] + # define delta pxt bucket + bucket_pxt = [ + {"label": "[0%,1%[", + "min": 0, + "max": 0.01}, + {"label": "[1%,2%[", + "min": 0.01, + "max": 0.02}, + {"label": "[2%,5%[", + "min": 0.02, + "max": 0.05}, + {"label": "[5%,10%[", + "min": 0.05, + "max": 0.1}, + {"label": "[10%,100%]", + "min": 0.1, + "max": 1.01}, + {"label": "[-1%,0%[", + "min": -0.01, + "max": 0}, + {"label": "[-2%,-1%[", + "min": -0.02, + "max": -0.01}, + {"label": "[-5%,-2%[", + "min": -0.05, + "max": -0.02}, + {"label": "[-10%,-5%[", + "min": -0.1, + "max": -0.05}, + {"label": "[-100%,-10%[", + "min": -1., + "max": -0.1} + ] + # iterate over kpis and add buckets to dict for kpi in kpis: if kpi == "SHOT_XG": @@ -141,191 +317,135 @@ def generateSportsCodeXML(events: pd.DataFrame, kpi_buckets[kpi] = bucket_postshotxg elif kpi == "PACKING_XG": kpi_buckets[kpi] = bucket_packingxg + elif kpi == "PXT_DELTA": + kpi_buckets[kpi] = bucket_pxt else: kpi_buckets[kpi] = buckets_packing # define pressure buckets - pressure_buckets = [{"label": "[0,30[", - "min": -1, - "max": 30}, - {"label": "[30,70[", - "min": 30, - "max": 70}, - {"label": "[70,100]", - "min": 70, - "max": 101}] + pressure_buckets = [ + {"label": "[0,30[", + "min": -1, + "max": 30}, + {"label": "[30,70[", + "min": 30, + "max": 70}, + {"label": "[70,100]", + "min": 70, + "max": 101} + ] # define opponent buckets - opponent_buckets = [{"label": "[0,5[", - "min": -1, - "max": 5}, - {"label": "[5,9[", - "min": 5, - "max": 9}, - {"label": "[9,11]", - "min": 9, - "max": 12}] - - # define delta pxt bucket - pxt_buckets = [{"label": "[0%,1%[", - "min": 0, - "max": 0.01}, - {"label": "[1%,2%[", - "min": 0.01, - "max": 0.02}, - {"label": "[2%,5%[", - "min": 0.02, - "max": 0.05}, - {"label": "[5%,10%[", - "min": 0.05, - "max": 0.1}, - {"label": "[10%,100%]", - "min": 0.1, - "max": 1.01}, - {"label": "[-1%,0%[", - "min": -0.01, - "max": 0}, - {"label": "[-2%,-1%[", - "min": -0.02, - "max": -0.01}, - {"label": "[-5%,-2%[", - "min": -0.05, - "max": -0.02}, - {"label": "[-10%,-5%[", - "min": -0.1, - "max": -0.05}, - {"label": "[-100%,-10%[", - "min": -1., - "max": -0.1}] + opponent_buckets = [ + {"label": "[0,5[", + "min": -1, + "max": 5}, + {"label": "[5,9[", + "min": 5, + "max": 9}, + {"label": "[9,11]", + "min": 9, + "max": 12} + ] # define pass length buckets - pass_buckets = [{"label": "[0,5[", - "min": 0, - "max": 5}, - {"label": "[5,15[", - "min": 5, - "max": 15}, - {"label": "[15,25[", - "min": 15, - "max": 25}, - {"label": "[25,∞]", - "min": 25, - "max": 200}] + pass_buckets = [ + {"label": "[0,5[", + "min": 0, + "max": 5}, + {"label": "[5,15[", + "min": 5, + "max": 15}, + {"label": "[15,25[", + "min": 15, + "max": 25}, + {"label": "[25,∞]", + "min": 25, + "max": 200} + ] # define color schemes - home_colors = {"r": "62929", - "g": "9225", - "b": "105"} + home_colors = { + "r": "62929", + "g": "9225", + "b": "105" + } - away_colors = {"r": "13171", - "g": "20724", - "b": "40300"} + away_colors = { + "r": "13171", + "g": "20724", + "b": "40300" + } - neutral_colors = {"r": "13001", - "g": "13001", - "b": "13001"} + neutral_colors = { + "r": "13001", + "g": "13001", + "b": "13001" + } # combine pxT kpis into single score for players (incl. PXT_REC) and team (excl. PXT_REC) - events["PXT_PLAYER_DELTA"] = events[["PXT_BLOCK", "PXT_DRIBBLE", "PXT_FOUL", - "PXT_BALL_WIN", "PXT_PASS", "PXT_REC", - "PXT_SHOT", "PXT_SETPIECE"]].sum(axis=1) + events["PXT_PLAYER_DELTA"] = events[ + ["PXT_BLOCK", "PXT_DRIBBLE", "PXT_FOUL", "PXT_BALL_WIN", "PXT_PASS", "PXT_REC", "PXT_SHOT", "PXT_SETPIECE"] + ].sum(axis=1) - # events["PXT_TEAM_DELTA"] = events.apply( - events["PXT_TEAM_DELTA"] = events[["PXT_BLOCK", "PXT_DRIBBLE", "PXT_FOUL", - "PXT_BALL_WIN", "PXT_PASS", "PXT_SHOT", - "PXT_SETPIECE"]].sum(axis=1) + events["PXT_TEAM_DELTA"] = events[ + ["PXT_BLOCK", "PXT_DRIBBLE", "PXT_FOUL", "PXT_BALL_WIN", "PXT_PASS", "PXT_SHOT", "PXT_SETPIECE"] + ].sum(axis=1) # add grouping for packing zones - - # define zone groups - zone_groups = { - 'AMC': 'AM', - 'AML': 'AM', - 'AMR': 'AM', - 'CBC': 'CB', - 'CBL': 'CB', - 'CBR': 'CB', - 'CMC': 'CM', - 'CML': 'CM', - 'CMR': 'CM', - 'DMC': 'DM', - 'DML': 'DM', - 'DMR': 'DM', - 'FBL': 'FBL', - 'FBR': 'FBR', - 'GKC': 'GK', - 'GKR': 'GK', - 'GKL': 'GK', - 'IBC': 'IBC', - 'IBR': 'IBC', - 'IBL': 'IBC', - 'IBWL': 'IBWL', - 'IBWR': 'IBWR', - 'WL': 'WL', - 'WR': 'WR', - 'OPP_AMC': 'OPP_AM', - 'OPP_AML': 'OPP_AM', - 'OPP_AMR': 'OPP_AM', - 'OPP_CBC': 'OPP_CB', - 'OPP_CBL': 'OPP_CB', - 'OPP_CBR': 'OPP_CB', - 'OPP_CMC': 'OPP_CM', - 'OPP_CML': 'OPP_CM', - 'OPP_CMR': 'OPP_CM', - 'OPP_DMC': 'OPP_DM', - 'OPP_DML': 'OPP_DM', - 'OPP_DMR': 'OPP_DM', - 'OPP_FBL': 'OPP_FBL', - 'OPP_FBR': 'OPP_FBR', - 'OPP_GKC': 'OPP_GK', - 'OPP_GKR': 'OPP_GK', - 'OPP_GKL': 'OPP_GK', - 'OPP_IBC': 'OPP_IBC', - 'OPP_IBR': 'OPP_IBC', - 'OPP_IBL': 'OPP_IBC', - 'OPP_IBWL': 'OPP_IBWL', - 'OPP_IBWR': 'OPP_IBWR', - 'OPP_WL': 'OPP_WL', - 'OPP_WR': 'OPP_WR' + base_zone_groups = { + 'AM': ['AMC', 'AML', 'AMR'], + 'CB': ['CBC', 'CBL', 'CBR'], + 'CM': ['CMC', 'CML', 'CMR'], + 'DM': ['DMC', 'DML', 'DMR'], + 'FBL': ['FBL'], + 'FBR': ['FBR'], + 'GK': ['GKC', 'GKR', 'GKL'], + 'IBC': ['IBC', 'IBR', 'IBL'], + 'IBWL': ['IBWL'], + 'IBWR': ['IBWR'], + 'WL': ['WL'], + 'WR': ['WR'], } + # build mapping dictionary + zone_groups = {} + for group, zones in base_zone_groups.items(): + for zone in zones: + zone_groups[zone] = group + zone_groups[f'OPP_{zone}'] = f'OPP_{group}' + # add new columns - events["startPackingZoneGroup"] = events.apply( - lambda x: zone_groups[x.startPackingZone] if x.startPackingZone in zone_groups.keys() else x.startPackingZone, - axis=1 - ) - events["endPackingZoneGroup"] = events.apply( - lambda x: zone_groups[x.endPackingZone] if x.endPackingZone in zone_groups.keys() else x.endPackingZone, - axis=1 - ) + events["startPackingZoneGroup"] = events["startPackingZone"].map(zone_groups).fillna(events["startPackingZone"]) + events["endPackingZoneGroup"] = events["endPackingZone"].map(zone_groups).fillna(events["endPackingZone"]) # determine video timestamps - # define function to calculate start time - def start_time(gameTimeInSec, periodId): - # get period offset - offset = offsets[f"p{periodId}"] - # calculate and return start time - return max(gameTimeInSec - (periodId - 1) * 10000 + offset - lead, 0) - - # define function to calculate end time - def end_time(gameTimeInSec, periodId, duration): - # get period offset - offset = offsets[f"p{periodId}"] - # calculate and return end time - return gameTimeInSec - (periodId - 1) * 10000 + offset + duration + lag - - # apply start and end time functions - events["start"] = events.apply( - lambda x: start_time(x["gameTimeInSec"], x["periodId"]), axis=1) - events["end"] = events.apply( - lambda x: end_time(x["gameTimeInSec"], x["periodId"], x["duration"]), axis=1) + # vectorize period offset lookup + period_ids = events["periodId"] + offsets_series = period_ids.map(lambda period_id: offsets[f"p{period_id}"]) + + # Compute start and end time + events["start"] = (events["gameTimeInSec"] + - (period_ids - 1) * 10000 + + offsets_series + - lead).clip(lower=0) + + events["end"] = (events["gameTimeInSec"] + - (period_ids - 1) * 10000 + + offsets_series + + events["duration"] + + lag) # fix end time for final whistles # (The duration of first half final whistles is always extremely high, as it is computed using the # gameTimeInSec of the FINAL_WHISTLE event (e.g. 2730) and the gameTimeInSec of the next KICKOFF event # (e.g. 10000).) - events.end = events.apply(lambda x: x.end if x.action != "FINAL_WHISTLE" else x.start + lead + lag, axis=1) + events["end"] = np.where( + events["action"] != "FINAL_WHISTLE", + events["end"], + events["start"] + lead + lag + ) # Group sequential plays by same player @@ -343,8 +463,9 @@ def end_time(gameTimeInSec, periodId, duration): players["playerId_lag"] = players.playerId.shift(1, fill_value=0) # detect changes in playerId compared to previous event using lag column - players["player_change_flag"] = players.apply( - lambda x: 0 if x.playerId == x.playerId_lag else 1, axis=1) + players["player_change_flag"] = np.where( + players["playerId"] == players["playerId_lag"], 0, 1 + ) # apply cumulative sum function to phase_change_flag to create ID column players["sequence_id"] = players.player_change_flag.cumsum() @@ -358,12 +479,19 @@ def end_time(gameTimeInSec, periodId, duration): # calculate game state # detect goals scored - players["goal_home"] = players.apply( - lambda x: 1 if (x.action == "GOAL" and x.squadId == x.homeSquadId) - or (x.action == "OWN_GOAL" and x.squadId == x.awaySquadId) else 0, axis=1) - players["goal_away"] = players.apply( - lambda x: 1 if (x.action == "GOAL" and x.squadId == x.awaySquadId) - or (x.action == "OWN_GOAL" and x.squadId == x.homeSquadId) else 0, axis=1) + players["goal_home"] = np.where( + ((players["action"] == "GOAL") & (players["squadId"] == players["homeSquadId"])) | + ((players["action"] == "OWN_GOAL") & (players["squadId"] == players["awaySquadId"])), + 1, + 0 + ) + + players["goal_away"] = np.where( + ((players["action"] == "GOAL") & (players["squadId"] == players["awaySquadId"])) | + ((players["action"] == "OWN_GOAL") & (players["squadId"] == players["homeSquadId"])), + 1, + 0 + ) # create lag column for goals because the game state should change after the goal is scored not on the # goal event itself @@ -375,18 +503,37 @@ def end_time(gameTimeInSec, periodId, duration): players["goal_away_sum"] = players.goal_away_lag.cumsum() # calculate teamGoals and opponentGoals - players["teamGoals"] = players.apply( - lambda x: x.goal_home_sum if x.squadId == x.homeSquadId else ( - x.goal_away_sum if x.squadId == x.awaySquadId else None), axis=1) - players["opponentGoals"] = players.apply( - lambda x: x.goal_home_sum if x.squadId == x.awaySquadId else ( - x.goal_away_sum if x.squadId == x.homeSquadId else None), axis=1) + players["teamGoals"] = np.where( + players["squadId"] == players["homeSquadId"], + players["goal_home_sum"], + np.where( + players["squadId"] == players["awaySquadId"], + players["goal_away_sum"], + np.nan + ) + ) + + players["opponentGoals"] = np.where( + players["squadId"] == players["awaySquadId"], + players["goal_home_sum"], + np.where( + players["squadId"] == players["homeSquadId"], + players["goal_away_sum"], + np.nan + ) + ) # calculate game state - players["gameState"] = players.apply( - lambda x: "tied" if x.teamGoals == x.opponentGoals else ( - "leading" if x.teamGoals > x.opponentGoals else ("trailing" if x.teamGoals < x.opponentGoals else None)), - axis=1) + players["gameState"] = np.where( + players["teamGoals"] == players["opponentGoals"], "tied", + np.where( + players["teamGoals"] > players["opponentGoals"], "leading", + np.where( + players["teamGoals"] < players["opponentGoals"], "trailing", + np.NaN + ) + ) + ) # group possession phases @@ -397,15 +544,18 @@ def end_time(gameTimeInSec, periodId, duration): players["attackingSquadId_lag"] = players.attackingSquadId.shift(1) # detect changes in attackingSquadName compared to previous event using lag column - players["possession_change_flag"] = players.apply( - lambda x: 0 if x.attackingSquadId == x.attackingSquadId_lag else 1, axis=1) + players["possession_change_flag"] = np.where( + players["attackingSquadId"] == players["attackingSquadId_lag"], 0, 1 + ) # apply cumulative sum function to possession_change_flag to create ID column players["possession_id"] = players.possession_change_flag.cumsum() # create columns to detect shots and goal - players["is_shot"] = players.apply(lambda x: 1 if x.actionType == "SHOT" else 0, axis=1) - players["is_goal"] = players.apply(lambda x: 1 if x.actionType == "SHOT" and x.result == "SUCCESS" else 0, axis=1) + players["is_shot"] = np.where(players["actionType"] == "SHOT", 1, 0) + players["is_goal"] = np.where( + (players["actionType"] == "SHOT") & (players["result"] == "SUCCESS"), 1, 0 + ) # create separate df to aggregate possession results possession_results = players.copy().groupby("possession_id").agg( @@ -414,15 +564,17 @@ def end_time(gameTimeInSec, periodId, duration): ) # convert sum of goals/shots to boolean type - possession_results["leadsToShot"] = possession_results.apply(lambda x: True if x.is_shot > 0 else False, axis=1) - possession_results["leadsToGoal"] = possession_results.apply(lambda x: True if x.is_goal > 0 else False, axis=1) + possession_results["leadsToShot"] = possession_results["is_shot"] > 0 + possession_results["leadsToGoal"] = possession_results["is_goal"] > 0 # add possession result to players df - players = pd.merge(players, - possession_results, - how="left", - left_on=["possession_id"], - right_on=["possession_id"]) + players = pd.merge( + players, + possession_results, + how="left", + left_on=["possession_id"], + right_on=["possession_id"] + ) # group phases on team level @@ -438,8 +590,11 @@ def end_time(gameTimeInSec, periodId, duration): phases["squadId_lag"] = phases.squadId.shift(1) # detect changes in either phase or squadId compared to previous event using lag columns - phases["phase_change_flag"] = phases.apply( - lambda x: 0 if x.phase == x.phase_lag and x.squadId == x.squadId_lag else 1, axis=1) + phases["phase_change_flag"] = np.where( + (phases["phase"] == phases["phase_lag"]) & (phases["squadId"] == phases["squadId_lag"]), + 0, + 1 + ) # apply cumulative sum function to phase_change_flag to create ID column phases["phase_id"] = phases.phase_change_flag.cumsum() @@ -449,8 +604,10 @@ def end_time(gameTimeInSec, periodId, duration): phases["pxTTeamEnd"] = phases.pxTTeam # create columns to detect shots and goal - phases["is_shot"] = phases.apply(lambda x: 1 if x.actionType == "SHOT" else 0, axis=1) - phases["is_goal"] = phases.apply(lambda x: 1 if x.actionType == "SHOT" and x.result == "SUCCESS" else 0, axis=1) + phases["is_shot"] = np.where(phases["actionType"] == "SHOT", 1, 0) + phases["is_goal"] = np.where( + (phases["actionType"] == "SHOT") & (phases["result"] == "SUCCESS"), 1, 0 + ) # groupy by and aggregate phases = phases.groupby(["phase_id", "phase", "squadId", "squadName"]).agg( @@ -483,14 +640,14 @@ def end_time(gameTimeInSec, periodId, duration): ) # convert sum of goals/shots to boolean type - phases["leadsToShot"] = phases.apply(lambda x: True if x.is_shot > 0 else False, axis=1) - phases["leadsToGoal"] = phases.apply(lambda x: True if x.is_goal > 0 else False, axis=1) + phases["leadsToShot"] = phases["is_shot"] > 0 + phases["leadsToGoal"] = phases["is_goal"] > 0 # reset index phases.reset_index(inplace=True) # merge phase and squadName into one column to later pass into code tag - phases["teamPhase"] = phases.apply(lambda x: x["squadName"] + " - " + x["phase"].replace("_", " "), axis=1) + phases["teamPhase"] = phases["squadName"] + " - " + phases["phase"].str.replace("_", " ") # get period starts @@ -513,7 +670,7 @@ def end_time(gameTimeInSec, periodId, duration): phases = phases.rename(columns={"PXT_TEAM_DELTA": "PXT_DELTA"}) # apply bucket logic - if apply_buckets: + if buckets: # define function to apply bucket logic for events def get_bucket(bucket, value, zero_value, error_value): # check if value is 0.0 @@ -545,28 +702,27 @@ def get_bucket(bucket, value, zero_value, error_value): # apply pass length bucket players.passDistance = players.passDistance.apply(lambda x: get_bucket(pass_buckets, x, "<15", None)) - # apply pxt bucket to PXT_DELTA - players.PXT_DELTA = players.PXT_DELTA.apply(lambda x: get_bucket(pxt_buckets, x, None, None)) - # apply pxT Team bucket - players.pxTTeam = players.pxTTeam.apply(lambda x: get_bucket(pxt_buckets, x, "[0%,1%[", None)) + players.pxTTeam = players.pxTTeam.apply(lambda x: get_bucket(bucket_pxt, x, "[0%,1%[", None)) # apply on team level # apply pxt bucket to PXT_DELTA - phases.PXT_DELTA = phases.PXT_DELTA.apply(lambda x: get_bucket(pxt_buckets, x, "[0%,1%[", None)) + phases.PXT_DELTA = phases.PXT_DELTA.apply(lambda x: get_bucket(bucket_pxt, x, "[0%,1%[", None)) # apply pxT Team bucket - phases.pxTTeamStart = phases.pxTTeamStart.apply(lambda x: get_bucket(pxt_buckets, x, "[0%,1%[", None)) - phases.pxTTeamEnd = phases.pxTTeamEnd.apply(lambda x: get_bucket(pxt_buckets, x, "[0%,1%[", None)) + phases.pxTTeamStart = phases.pxTTeamStart.apply(lambda x: get_bucket(bucket_pxt, x, "[0%,1%[", None)) + phases.pxTTeamEnd = phases.pxTTeamEnd.apply(lambda x: get_bucket(bucket_pxt, x, "[0%,1%[", None)) # iterate over kpis and apply buckets for kpi in kpis: + if kpi == "PXT_DELTA": + continue # get bucket for column bucket = kpi_buckets[kpi] # apply function phases[kpi] = phases[kpi].apply(lambda x: get_bucket(bucket, x, None, None)) - # convert to sportscode xml + # convert to xml # build a tree structure root = ET.Element("file") @@ -617,306 +773,153 @@ def get_bucket(bucket, value, zero_value, error_value): max_id = max(kickoffs.periodId.tolist()) - 1 # concatenate actionType and result into one column if result exists - players["actionTypeResult"] = players.apply(lambda x: x.actionType + "_" + x.result if x.result else None, axis=1) - - # define labels to be added - labels = [{"order": "00 | ", - "name": "eventId"}, - {"order": "01 | ", - "name": "matchId"}, - {"order": "02 | ", - "name": "periodId"}, - {"order": "03 | ", - "name": "phase"}, - {"order": "04 | ", - "name": "gameState"}, - {"order": "05 | ", - "name": "playerPosition"}, - {"order": "06 | ", - "name": "action"}, - {"order": "07 | ", - "name": "actionType"}, - {"order": "08 | ", - "name": "bodyPart"}, - {"order": "09a | ", - "name": "bodyPartExtended"}, - {"order": "09b | ", - "name": "previousPassHeight"}, - {"order": "09c | ", - "name": "actionTypeResult"}, - {"order": "10 | ", - "name": "startPackingZone"}, - {"order": "11 | ", - "name": "startPackingZoneGroup"}, - {"order": "12 | ", - "name": "startPitchPosition"}, - {"order": "13 | ", - "name": "startLane"}, - {"order": "14 | ", - "name": "endPackingZone"}, - {"order": "15 | ", - "name": "endPackingZoneGroup"}, - {"order": "16 | ", - "name": "endPitchPosition"}, - {"order": "17 | ", - "name": "endLane"}, - {"order": "18 | ", - "name": "opponents"}, - {"order": "19 | ", - "name": "pressure"}, - {"order": "20 | ", - "name": "pxTTeam"}, - {"order": "21 | ", - "name": "pressingPlayerName"}, - {"order": "22 | ", - "name": "duelType"}, - {"order": "23 | ", - "name": "duelPlayerName"}, - {"order": "24 | ", - "name": "fouledPlayerName"}, - {"order": "25 | ", - "name": "passDistance"}, - {"order": "26 | ", - "name": "passReceiverPlayerName"}, - {"order": "27 | ", - "name": "leadsToShot"}, - {"order": "28 | ", - "name": "leadsToGoal"}, - {"order": "29 | ", - "name": "squadName"}, - {"order": "KPI: ", - "name": "PXT_DELTA"}, - {"order": "KPI: ", - "name": "BYPASSED_OPPONENTS"}, - {"order": "KPI: ", - "name": "BYPASSED_DEFENDERS"}, - {"order": "KPI: ", - "name": "BYPASSED_OPPONENTS_RECEIVING"}, - {"order": "KPI: ", - "name": "BYPASSED_DEFENDERS_RECEIVING"}, - {"order": "KPI: ", - "name": "BALL_LOSS_ADDED_OPPONENTS"}, - {"order": "KPI: ", - "name": "BALL_LOSS_REMOVED_TEAMMATES"}, - {"order": "KPI: ", - "name": "BALL_WIN_ADDED_TEAMMATES"}, - {"order": "KPI: ", - "name": "BALL_WIN_REMOVED_OPPONENTS"}, - {"order": "KPI: ", - "name": "REVERSE_PLAY_ADDED_OPPONENTS"}, - {"order": "KPI: ", - "name": "REVERSE_PLAY_ADDED_OPPONENTS_DEFENDERS"}, - {"order": "KPI: ", - "name": "BYPASSED_OPPONENTS_RAW"}, - {"order": "KPI: ", - "name": "BYPASSED_OPPONENTS_DEFENDERS_RAW"}, - {"order": "KPI: ", - "name": "SHOT_XG"}, - {"order": "KPI: ", - "name": "POSTSHOT_XG"}, - {"order": "KPI: ", - "name": "PACKING_XG"}] + players["actionTypeResult"] = np.where( + players["result"].notna(), + players["actionType"] + "_" + players["result"], + np.NaN + ) # add data to xml structure # the idea is to still iterate over each event separately but chose between # creating a new instance and appending to the existing instance if sequencing: - for index, event in players.iterrows(): + seq_id_current = None - # skip row if no player (e.g. no video, referee interception, etc.) - if pd.notnull(event.playerName): + # If the selected code attribute is "squadName", generate XML entries from the `phases` DataFrame + if codeTag == "squadName": + for index, phase in phases.iterrows(): + # Create a new XML instance for each team phase + instance = ET.SubElement(instances, "instance") - # if first iteration set seq_id_current to 1 - if index == 0: - seq_id_current = 0 - else: - pass + # Set unique ID using phase_id offset by max_id + event_id = ET.SubElement(instance, "ID") + event_id.text = str(phase.phase_id + max_id) - # get new sequence_id - seq_id_new = event.sequence_id + # Define the time range of the instance + start = ET.SubElement(instance, "start") + start.text = str(round(phase.start, 2)) + end = ET.SubElement(instance, "end") + end.text = str(round(phase.end, 2)) - # check if new sequence_id or first iteration - if seq_id_new != seq_id_current or index == 0: - # add instance - instance = ET.SubElement(instances, "instance") - # add event id - event_id = ET.SubElement(instance, "ID") - event_id.text = str(event.sequence_id + max_id) - # add start time - start = ET.SubElement(instance, "start") - start.text = str(round(sequence_timing.at[seq_id_new - 1, "start"], 2)) - # add end time - end = ET.SubElement(instance, "end") - end.text = str(round(sequence_timing.at[seq_id_new - 1, "end"], 2)) - # add player as code - code = ET.SubElement(instance, "code") - code.text = event.playerName - # add description - free_text = ET.SubElement(instance, "free_text") - free_text.text = f"({event.gameTime}) {event.playerName}: {event.action.lower().replace('_', ' ')}" - else: - # append current action to existing description - free_text.text += f" | {event.action.lower().replace('_', ' ')}" - - # add labels - for label in labels: - # check for nan and None (those values should be omitted and not added as label) - if (value := str(event[label["name"]])) not in ["None", "nan"]: - # get value from previous event to compare if the value remains the same (and can be omitted - # or if the value changed and therefore has to be added) - try: - prev_value = str(players.at[index - 1, label["name"]]) - # if the key doesn't exist (previous to first row), assign current value - except KeyError: - prev_value = event[label["name"]] - # check if first event of a sequence or the value is unequal to previous row - if seq_id_new != seq_id_current or value != prev_value: - # add label - wrapper = ET.SubElement(instance, "label") - group = ET.SubElement(wrapper, "group") - group.text = label["order"] + label["name"] - text = ET.SubElement(wrapper, "text") - text.text = value - else: - # don't add label - pass + # Set the code to the team phase + code = ET.SubElement(instance, "code") + code.text = phase.teamPhase + + # Add labels to the instance + for label in labels_and_kpis: + if label["name"] not in phase: + continue + value = str(phase[label["name"]]) + if value not in ["None", "nan"]: + wrapper = ET.SubElement(instance, "label") + group = ET.SubElement(wrapper, "group") + group.text = label["order"] + label["name"] if labelSorting else label["name"] + text = ET.SubElement(wrapper, "text") + text.text = value - # update current sequence_id - seq_id_current = seq_id_new + # If not team-level, use player-level data from `players` DataFrame + else: + for index, event in players.iterrows(): + # Skip entries without valid player name + if pd.notnull(event.playerName): + # Set first sequence_id + if index == 0: + seq_id_current = 0 + + seq_id_new = event.sequence_id + + # Start new clip if new sequence or first event + if seq_id_new != seq_id_current or index == 0: + instance = ET.SubElement(instances, "instance") + + event_id = ET.SubElement(instance, "ID") + event_id.text = str(event.sequence_id + max_id) + + start = ET.SubElement(instance, "start") + start.text = str(round(sequence_timing.at[seq_id_new - 1, "start"], 2)) + end = ET.SubElement(instance, "end") + end.text = str(round(sequence_timing.at[seq_id_new - 1, "end"], 2)) + + # Use selected attribute (e.g., playerName, action) as the main code + code = ET.SubElement(instance, "code") + code.text = str(event[codeTag]) + + # Free-text description showing action sequence + free_text = ET.SubElement(instance, "free_text") + free_text.text = f"({event.gameTime}) {event.playerName}: {event.action.lower().replace('_', ' ')}" + else: + # Append to existing free-text if still same sequence + free_text.text += f" | {event.action.lower().replace('_', ' ')}" + + # Add labels to the instance + for label in labels_and_kpis: + if label["name"] not in event: + continue + value = str(event[label["name"]]) + if value not in ["None", "nan"]: + try: + prev_value = str(players.at[index - 1, label["name"]]) + except KeyError: + prev_value = value + # Only add label if it changed or is the first event of the sequence + if seq_id_new != seq_id_current or value != prev_value: + wrapper = ET.SubElement(instance, "label") + group = ET.SubElement(wrapper, "group") + group.text = label["order"] + label["name"] if labelSorting else label["name"] + text = ET.SubElement(wrapper, "text") + text.text = value + + # Update current sequence ID + seq_id_current = seq_id_new else: - for index, event in players.iterrows(): - - # skip row if no player (e.g. no video, referee interception, etc.) - if pd.notnull(event.playerName): - - # add instance + # Same logic as above, but without sequencing (i.e., one clip per row) + if codeTag == "squadName": + for index, phase in phases.iterrows(): instance = ET.SubElement(instances, "instance") - # add event id event_id = ET.SubElement(instance, "ID") - event_id.text = str(event.eventNumber + max_id) - # add start time + event_id.text = str(phase.phase_id + max_id) start = ET.SubElement(instance, "start") - start.text = str(round(event.start, 2)) - # add end time + start.text = str(round(phase.start, 2)) end = ET.SubElement(instance, "end") - end.text = str(round(event.end, 2)) - # add player as code + end.text = str(round(phase.end, 2)) code = ET.SubElement(instance, "code") - code.text = event.playerName - # add description - free_text = ET.SubElement(instance, "free_text") - free_text.text = f"({event.gameTime}) {event.playerName}: {event.action.lower().replace('_', ' ')}" - - # add labels - for label in labels: - # check for nan and None (those values should be omitted and not added as label) - if (value := str(event[label["name"]])) not in ["None", "nan"]: + code.text = phase.teamPhase - # add label + for label in labels_and_kpis: + if label["name"] not in phase: + continue + value = str(phase[label["name"]]) + if value not in ["None", "nan"]: wrapper = ET.SubElement(instance, "label") group = ET.SubElement(wrapper, "group") - group.text = label["order"] + label["name"] + group.text = label["order"] + label["name"] if labelSorting else label["name"] text = ET.SubElement(wrapper, "text") text.text = value + else: + for index, event in players.iterrows(): + if pd.notnull(event.playerName): + instance = ET.SubElement(instances, "instance") + event_id = ET.SubElement(instance, "ID") + event_id.text = str(event.eventNumber + max_id) + start = ET.SubElement(instance, "start") + start.text = str(round(event.start, 2)) + end = ET.SubElement(instance, "end") + end.text = str(round(event.end, 2)) + code = ET.SubElement(instance, "code") + code.text = str(event[codeTag]) + free_text = ET.SubElement(instance, "free_text") + free_text.text = f"({event.gameTime}) {event.playerName}: {event.action.lower().replace('_', ' ')}" - - # add team level data - - # define labels - labels = [{"order": "01 | ", - "name": "matchId"}, - {"order": "02 | ", - "name": "periodId"}, - {"order": "04 | ", - "name": "gameState"}, - {"order": "29 | ", - "name": "playerName"}, - {"order": "30 | ", - "name": "pxTTeamStart"}, - {"order": "31 | ", - "name": "pxTTeamEnd"}, - {"order": "27 | ", - "name": "leadsToShot"}, - {"order": "28 | ", - "name": "leadsToGoal"}, - {"order": "KPI: ", - "name": "PXT_DELTA"}, - {"order": "KPI: ", - "name": "BYPASSED_OPPONENTS"}, - {"order": "KPI: ", - "name": "BYPASSED_DEFENDERS"}, - {"order": "KPI: ", - "name": "BYPASSED_OPPONENTS_RECEIVING"}, - {"order": "KPI: ", - "name": "BYPASSED_DEFENDERS_RECEIVING"}, - {"order": "KPI: ", - "name": "BALL_LOSS_ADDED_OPPONENTS"}, - {"order": "KPI: ", - "name": "BALL_LOSS_REMOVED_TEAMMATES"}, - {"order": "KPI: ", - "name": "BALL_WIN_ADDED_TEAMMATES"}, - {"order": "KPI: ", - "name": "BALL_WIN_REMOVED_OPPONENTS"}, - {"order": "KPI: ", - "name": "REVERSE_PLAY_ADDED_OPPONENTS"}, - {"order": "KPI: ", - "name": "REVERSE_PLAY_ADDED_OPPONENTS_DEFENDERS"}, - {"order": "KPI: ", - "name": "BYPASSED_OPPONENTS_RAW"}, - {"order": "KPI: ", - "name": "BYPASSED_OPPONENTS_DEFENDERS_RAW"}, - {"order": "KPI: ", - "name": "SHOT_XG"}, - {"order": "KPI: ", - "name": "POSTSHOT_XG"}, - {"order": "KPI: ", - "name": "PACKING_XG"}] - - if sequencing: - # update max id after adding players - max_id += players.sequence_id.max() + 1 - else: - # update max id after adding players - max_id += event.eventNumber + 1 - - - # add to xml structure - for index, phase in phases.iterrows(): - # add instance - instance = ET.SubElement(instances, "instance") - # add event id - event_id = ET.SubElement(instance, "ID") - event_id.text = str(phase.phase_id + max_id) - # add start time - start = ET.SubElement(instance, "start") - start.text = str(round(phase.start, 2)) - # add end time - end = ET.SubElement(instance, "end") - end.text = str(round(phase.end, 2)) - # add teamPhase as code - code = ET.SubElement(instance, "code") - code.text = phase.teamPhase - # add labels - for label in labels: - # check for label - if label["name"] == "playerName": - # for label "playerName" the list of players involved need to be unpacked - for player in phase[label["name"]]: - wrapper = ET.SubElement(instance, "label") - group = ET.SubElement(wrapper, "group") - group.text = "27 | playerInvolved" - text = ET.SubElement(wrapper, "text") - text.text = player - else: - # check for nan or None (those values should be omitted and not added as label) - if (value := str(phase[label["name"]])) not in ["None", "nan"]: - wrapper = ET.SubElement(instance, "label") - group = ET.SubElement(wrapper, "group") - group.text = label["order"] + label["name"] - text = ET.SubElement(wrapper, "text") - text.text = value - else: - pass + for label in labels_and_kpis: + if label["name"] not in event: + continue + value = str(event[label["name"]]) + if value not in ["None", "nan"]: + wrapper = ET.SubElement(instance, "label") + group = ET.SubElement(wrapper, "group") + group.text = label["order"] + label["name"] if labelSorting else label["name"] + text = ET.SubElement(wrapper, "text") + text.text = value # create row order @@ -954,28 +957,34 @@ def row(value, colors): # add entries for kickoffs for each period row("Start", neutral_colors) - # add entries for away team players - for player in away_players: - # call function - row(player, away_colors) + if codeTag == "playerName": + # add entries for away team players + for player in away_players: + # call function + row(player, away_colors) - # add entries for home team players - for player in home_players: - # call function - row(player, home_colors) + # add entries for home team players + for player in home_players: + # call function + row(player, home_colors) - # add entries for away team phases - for phase in away_phases: - # call function - row(phase, away_colors) + elif codeTag == "squadName": + # add entries for away team phases + for phase in away_phases: + # call function + row(phase, away_colors) - # add entries for home team phases - for phase in home_phases: - # call function - row(phase, home_colors) + # add entries for home team phases + for phase in home_phases: + # call function + row(phase, home_colors) # wrap into ElementTree and save as XML tree = ET.ElementTree(root) + # only apply indent if Python version >= 3.9 + if sys.version_info >= (3, 9): + ET.indent(tree, space=" ") + # return xml tree return tree \ No newline at end of file