diff --git a/.gitignore b/.gitignore index e2aa141..bb0bbd2 100644 --- a/.gitignore +++ b/.gitignore @@ -130,6 +130,8 @@ celerybeat.pid # Environments .venv +venv-testpypi +venv-pypi env/ venv/ ENV/ @@ -409,4 +411,7 @@ $RECYCLE.BIN/ # End of https://www.toptal.com/developers/gitignore/api/windows,macos,pycharm+all,python,flask # Tests -tests/ \ No newline at end of file +tests/ + +# Maintenance checklist +maintenance_checklist.md \ No newline at end of file diff --git a/NEWS.md b/NEWS.md index ce7b269..7e9a47d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,29 @@ +# impectPy 2.5.0 + +## Major Changes +* Use new endpoints to drastically improve performance of `getPlayerMatchScores()` and `getPlayerIterationScores()`. The argument `positions` is no longer required. If it is not spplied the function defaults to the new endpoints and returns all unique player-position-squad combinations. +* Add coaches ot the following functions: + * `getEvents()` + * `getPlayerMatchSums()` + * `getSquadMatchSums()` + * `getPlayerMatchScores()` + * `getSquadMatchScores()` +* Add function `getSquadCoefficients()` to retrieve detailed model coefficients to enable match predictions + +## Minor Changes +* Fix error in `getPlayerIterationAverages()` regarding type conversions +* Use `NA` as fill value instead of 0 for score related functions +* Minor fixes to enable PyPi submission +* Improve error handling + # impectPy 2.4.5 -# Minor Changes +## Minor Changes * fix bug in `getPlayerIterationAverages()`function # impectPy 2.4.4 -# Major Changes +## Major Changes * Rename function `generateSportsCodeXML()` to `generateXML()` * Add proper xml structure to the `generateXML()` function for Python versions >= 3.9 * Significantly improve customization options for new `generateXML()` function with new function arguments @@ -14,7 +32,7 @@ * `codeTag`: Customize code tag selection * `labelSorting`: Enable/Disable label sorting -# Minor Changes +## Minor Changes * fix bug in `getEvents()` that prevented the column `duelPlayerName`from being populated correctly # impectPy 2.4.3 diff --git a/README.md b/README.md index d42a21f..3e688a6 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ A package provided by: Impect GmbH -Version: v2.4.5 +Version: v2.5.0 -**Updated: June 26th 2025** +**Updated: October 15th 2025** --- -**Supported API Version: V5**
+**Supported API Version: V5** For older versions, please see list below: - API V4: https://github.com/ImpectAPI/impectPy/tree/v1.0.3 @@ -26,11 +26,16 @@ match and season level. ## Installation -You can install the latest version of impectPy from -[GitHub](https://github.com/) with: +You can install the latest version of impectPy from PyPi with: -``` cmd -pip install git+https://github.com/ImpectAPI/impectPy.git@v2.4.5 +```cmd +pip install impectPy +``` + +You can also install it from [GitHub](https://github.com/) with: + +```cmd +pip install git+https://github.com/ImpectAPI/impectPy.git@v2.5.0 ``` ## Usage @@ -41,7 +46,7 @@ Before accessing any data via our API, you will need to request a bearer token for authorization. You can get this authorization token using the following code snippet: -``` python +```python import impectPy as ip import pandas as pd @@ -59,7 +64,7 @@ competition iterations that are enabled for your account. ### Retrieve Basic Information -``` python +```python # get list of iterations iterations = ip.getIterations(token=token) @@ -72,7 +77,7 @@ your sales representative. Now let’s assume you are interested in data for 2022/23 season of the 1. Bundesliga (iteration = 518). The following snippet gets you a list of matches for this iteration: -``` python +```python # get matches for iteration matchplan = ip.getMatches(iteration=518, token=token) @@ -91,7 +96,7 @@ as team formation, starting position and substitution data. As the functions all for multiple games to be requested at once, we need to wrap the matchId into a list. Hence, to request data for this game, run the following code snippet: -``` python +```python # define matches to get event data for matches = [84344] @@ -120,7 +125,7 @@ Scouting and Analysis portals. On player level, these are calculated across positions which is why you have to supply the function with a list of positions your want to retrieve data for: -``` python +```python # define matches to get further data for matches = [84344] @@ -152,7 +157,7 @@ the following method to do so in order to minimize the amount of requests sent to the API. Let’s also get the event data for the RB Leipzig vs FSV Mainz 05 game (matchId = 84350) from the same day: -``` python +```python # define list of matches matches = [84344, 84350] @@ -179,8 +184,8 @@ positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"] # get player scores and ratios for match and positions per player playerMatchScores = ip.getPlayerMatchScores( matches=matches, - positions=positions, - token=token + token=token, + positions=positions # optional ) # get squad scores and ratios for match per squad @@ -201,7 +206,7 @@ your want to retrieve data for. Let's assume you were interested in wing backs in the 2022/2023 Bundesliga season, then you could use this code snippet: -``` python +```python # define iteration ID iteration = 518 @@ -223,8 +228,8 @@ squadIterationAverages = ip.getSquadIterationAverages( # get player scores and ratios for iteration and positions playerIterationScores = ip.getPlayerIterationScores( iteration=iteration, - positions=positions, - token=token + token=token, + positions=positions # optional ) # get squad scores and ratios for iteration @@ -232,9 +237,19 @@ squadIterationScores = ip.getSquadIterationScores( iteration=iteration, token=token ) +``` + +The squad rating values that you can find on the league ranking in the Scouting portal can +also be retrieved from the API. In addition, we also provide you with the more detailed squad +coefficients that can be used to make match predictions. See [this example script](https://github.com/ImpectAPI/impectPy/blob/release/examples/predict_matches.ipynb) +for further details. +```python # get squad rating for iteration -squadRatings = ip.getSquadRatings(iteration=iteration, token=token +squadRatings = ip.getSquadRatings(iteration=iteration, token=token) + +# get squad coefficients for iteration +squadCoefficients = ip.getSquadCoefficients(iteration=iteration, token=token) ``` You can now also retrieve the positional profile scores for players via our API. This @@ -243,7 +258,7 @@ positional input that determines which matchShares to consider when computing th In the below example, all matchShares that a player played as either a left back or a right back are included for profile score calculation. -``` python +```python # define iteration ID iteration = 518 @@ -251,7 +266,11 @@ iteration = 518 positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"] # get player profile scores -playerProfileScores = getPlayerProfileScores(iteration, positions, token) +playerProfileScores = ip.getPlayerProfileScores( + iteration=iteration, + positions=positions, + token=token +) ``` Please keep in mind that Impect enforces a rate limit of 10 requests per second @@ -278,7 +297,7 @@ please see the beginning of the [function definition](https://github.com/ImpectA Please make sure to only retrieve event data for one game at a time. Let's use the Bayern vs Dortmund game from earlier as an example: -``` python +```python # define matchId matches = [84344] @@ -360,51 +379,56 @@ positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"] iterations = api.getIterations() # get squad ratings -ratings = api.getSquadRatings(iteration) +ratings = api.getSquadRatings(iteration=iteration) + +# get squad coefficients +coefficients = api.getSquadCoefficients(iteration=iteration) # get matches -matchplan = api.getMatches(iteration) +matchplan = api.getMatches(iteration=iteration) # get match info -formations = api.getFormations(matches) -substitutions = api.getSubstitutions(matches) -startingPositions = api.getStartingPositions(matches) +formations = api.getFormations(matches=matches) +substitutions = api.getSubstitutions(matches=matches) +startingPositions = api.getStartingPositions(matches=matches) # get match events -events = api.getEvents(matches, include_kpis=True, include_set_pieces=True) +events = api.getEvents(matches=matches, include_kpis=False, include_set_pieces=False) # get set pieces -setPieces = api.getSetPieces(matches) +set_pieces = api.getSetPieces(matches=matches) # get player iteration averages -playerIterationAverages = api.getPlayerIterationAverages(iteration) +playerIterationAverages = api.getPlayerIterationAverages(iteration=iteration) # get player matchsums -playerMatchsums = api.getPlayerMatchsums(matches) +playerMatchsums = api.getPlayerMatchsums(matches=matches) # get squad iteration averages -squadIterationAverages = api.getSquadIterationAverages(iteration) +squadIterationAverages = api.getSquadIterationAverages(iteration=iteration) # get squad matchsums -squadMatchsums = api.getSquadMatchsums(matches) +squadMatchsums = api.getSquadMatchsums(matches=matches) # get player match scores -playerMatchScores = api.getPlayerMatchScores(matches, positions) +playerMatchScores = api.getPlayerMatchScores(matches=matches, positions=positions) # specific positions +playerMatchScoresAll = api.getPlayerMatchScores(matches=matches) # all positions # get squad match scores -squadMatchScores = api.getSquadMatchScores(matches) +squadMatchScores = api.getSquadMatchScores(matches=matches) # get player iteration scores -playerIterationScores = api.getPlayerIterationScores(iteration, positions) +playerIterationScores = api.getPlayerIterationScores(iteration=iteration, positions=positions) # specific positions +playerIterationScoresAll = api.getPlayerIterationScores(iteration=iteration) # all positions # get squad iteration scores -squadIterationScores = api.getSquadIterationScores(iteration) +squadIterationScores = api.getSquadIterationScores(iteration=iteration) # get player profile scores -playerProfileScores = api.getPlayerProfileScores(iteration, positions) +playerProfileScores = api.getPlayerProfileScores(iteration=iteration, positions=positions) ``` ## Final Notes Further documentation on the data and explanations of variables can be -found in our [glossary](https://glossary.impect.com/). \ No newline at end of file +found in our [Glossary](https://glossary.impect.com/). \ No newline at end of file diff --git a/examples/predict_matches.ipynb b/examples/predict_matches.ipynb new file mode 100644 index 0000000..b9cc1b8 --- /dev/null +++ b/examples/predict_matches.ipynb @@ -0,0 +1,157 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# load packages\n", + "import impectPy\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set login credentials\n", + "username = \"yourUsername\"\n", + "password = \"yourPassword\"\n", + "\n", + "# create Impect instance and login\n", + "api = impectPy.Impect()\n", + "api.login(username=username, password=password)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set iterationId\n", + "iteration = 1385\n", + "\n", + "# fetch matches for iteration\n", + "matches = api.getMatches(iteration=iteration)\n", + "\n", + "# show matches dataframe\n", + "matches.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fetch prediction model coefficients\n", + "coefficients = api.getSquadCoefficients(iteration=iteration)\n", + "\n", + "# show coefficients\n", + "coefficients.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# prepare columns for merging\n", + "matches[\"date\"] = pd.to_datetime(matches[\"scheduledDate\"]).dt.tz_localize(None).dt.normalize()\n", + "matches[\"homeSquadId\"] = matches[\"homeSquadId\"].astype(\"int64\")\n", + "matches[\"awaySquadId\"] = matches[\"awaySquadId\"].astype(\"int64\")\n", + "coefficients[\"date\"] = pd.to_datetime(coefficients[\"date\"]).dt.normalize()\n", + "coefficients[\"squadId\"] = coefficients[\"squadId\"].astype(\"int64\")\n", + "\n", + "# sort by date\n", + "matches = matches.sort_values(\"date\")\n", + "coefficients = coefficients.sort_values(\"date\")\n", + "\n", + "# merge competition-specific coefficients using the most recent date\n", + "matches = pd.merge_asof(\n", + " matches,\n", + " coefficients[\n", + " [\"date\", \"interceptCoefficient\", \"homeCoefficient\", \"competitionCoefficient\"]\n", + " ].drop_duplicates(\"date\"),\n", + " on=\"date\",\n", + " direction=\"backward\"\n", + ")\n", + "\n", + "# merge squad-specific coefficients using the most recent date\n", + "def get_squad_coeffs(row, coeff_df, squad_id_col) -> pd.Series:\n", + " squad_id = row[squad_id_col]\n", + " match_date = row[\"date\"]\n", + " squad_coeffs = coeff_df[(coeff_df[\"squadId\"] == squad_id) & (coeff_df[\"date\"] <= match_date)]\n", + " if len(squad_coeffs) > 0:\n", + " latest = squad_coeffs.sort_values(\"date\").iloc[-1]\n", + " return pd.Series({\"attack\": latest[\"attackCoefficient\"], \"defense\": latest[\"defenseCoefficient\"]})\n", + " return pd.Series({\"attack\": None, \"defense\": None})\n", + "\n", + "# merge homeSquad coefficients\n", + "home_coeffs = matches.apply(lambda row: get_squad_coeffs(row, coefficients, \"homeSquadId\"), axis=1)\n", + "matches[\"attackCoefficientHome\"] = home_coeffs[\"attack\"]\n", + "matches[\"defenseCoefficientHome\"] = home_coeffs[\"defense\"]\n", + "\n", + "# merge awaySquad coefficients\n", + "away_coeffs = matches.apply(lambda row: get_squad_coeffs(row, coefficients, \"awaySquadId\"), axis=1)\n", + "matches[\"attackCoefficientAway\"] = away_coeffs[\"attack\"]\n", + "matches[\"defenseCoefficientAway\"] = away_coeffs[\"defense\"]\n", + "\n", + "# show new matches dataframe\n", + "matches.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# compute predictions\n", + "matches[\"predHome\"] = np.exp(\n", + " matches[\"interceptCoefficient\"] +\n", + " matches[\"homeCoefficient\"] +\n", + " matches[\"competitionCoefficient\"] +\n", + " matches[\"attackCoefficientHome\"] +\n", + " matches[\"defenseCoefficientAway\"]\n", + ")\n", + "matches[\"predAway\"] = np.exp(\n", + " matches[\"interceptCoefficient\"] +\n", + " matches[\"competitionCoefficient\"] +\n", + " matches[\"attackCoefficientAway\"] +\n", + " matches[\"defenseCoefficientHome\"]\n", + ")\n", + "\n", + "# show matches including predictions\n", + "matches.head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/impectPy/__init__.py b/impectPy/__init__.py index 7c3cd89..2f3e671 100644 --- a/impectPy/__init__.py +++ b/impectPy/__init__.py @@ -1,3 +1,6 @@ +# define version attribute +__version__ = "2.5.0" + # import modules from .access_token import getAccessToken from .iterations import getIterations @@ -11,6 +14,7 @@ from .xml import generateXML from .set_pieces import getSetPieces from .squad_ratings import getSquadRatings +from .squad_coefficients import getSquadCoefficients from .match_info import getFormations, getSubstitutions, getStartingPositions from .config import Config as Config from .impect import Impect as Impect \ No newline at end of file diff --git a/impectPy/events.py b/impectPy/events.py index 7ab69b5..6807b69 100644 --- a/impectPy/events.py +++ b/impectPy/events.py @@ -38,18 +38,18 @@ def getEventsFromHost( raise Exception("Argument 'matches' must be a list of integers.") # get match info - iterations = pd.concat( + match_data = pd.concat( map(lambda match: connection.make_api_request_limited( url=f"{host}/v5/customerapi/matches/{match}", method="GET" ).process_response( - endpoint="Iterations" + endpoint="Match Info" ), matches), ignore_index=True) # filter for matches that are unavailable - fail_matches = iterations[iterations.lastCalculationDate.isnull()].id.drop_duplicates().to_list() + fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() # drop matches that are unavailable from list of matches matches = [match for match in matches if match not in fail_matches] @@ -62,7 +62,7 @@ def getEventsFromHost( print(f"The following matches are not available yet and were ignored:\n{fail_matches}") # extract iterationIds - iterations = list(iterations[iterations.lastCalculationDate.notnull()].iterationId.unique()) + iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) # get match events events = pd.concat( @@ -118,6 +118,18 @@ def getEventsFromHost( iterations), ignore_index=True)[["id", "name"]].drop_duplicates() + # get coaches + coaches = pd.concat( + map(lambda iteration: connection.make_api_request_limited( + url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", + method="GET" + ).process_response( + endpoint="Coaches", + raise_exception=False + ), + iterations), + ignore_index=True)[["id", "name"]].drop_duplicates() + # get matches matchplan = pd.concat( map(lambda iteration: getMatchesFromHost( @@ -188,7 +200,7 @@ def getEventsFromHost( # start merging dfs - # merge events with squads + # merge events with secondary data events = events.merge( squads[["id", "name"]].rename(columns={"id": "squadId", "name": "squadName"}), left_on="squadId", @@ -201,10 +213,7 @@ def getEventsFromHost( right_on="squadId", how="left", suffixes=("", "_away") - ) - - # merge events with players - events = events.merge( + ).merge( players[["id", "commonname"]].rename(columns={"id": "playerId", "commonname": "playerName"}), left_on="playerId", right_on="playerId", @@ -243,19 +252,32 @@ def getEventsFromHost( right_on="dribbleOpponentPlayerId", how="left", suffixes=("", "_right") - ) - - # merge with matches info - events = events.merge( + ).merge( matchplan, left_on="matchId", right_on="id", how="left", suffixes=("", "_right") - ) - - # merge with competition info - events = events.merge( + ).merge( + match_data[["id", "squadHomeCoachId", "squadAwayCoachId"]].rename( + columns={"squadHomeCoachId": "homeSquadCoachId", "squadAwayCoachId": "awaySquadCoachId"}), + left_on="matchId", + right_on="id", + how="left", + suffixes=("", "_right") + ).merge( + coaches[["id", "name"]].rename(columns={"id": "homeCoachId", "name": "homeCoachName"}), + left_on="homeSquadCoachId", + right_on="homeCoachId", + how="left", + suffixes=("", "_right") + ).merge( + coaches[["id", "name"]].rename(columns={"id": "awayCoachId", "name": "awayCoachName"}), + left_on="awaySquadCoachId", + right_on="awayCoachId", + how="left", + suffixes=("", "_right") + ).merge( iterations, left_on="iterationId", right_on="id", @@ -374,12 +396,16 @@ def getEventsFromHost( "homeSquadName", "homeSquadCountryId", "homeSquadCountryName", + "homeCoachId", + "homeCoachName", "homeSquadType", "awaySquadId", "awaySquadName", "awaySquadCountryId", "awaySquadCountryName", "awaySquadType", + "awayCoachId", + "awayCoachName", "eventId", "eventNumber", "sequenceIndex", diff --git a/impectPy/helpers.py b/impectPy/helpers.py index 033b335..365c608 100644 --- a/impectPy/helpers.py +++ b/impectPy/helpers.py @@ -109,13 +109,22 @@ def make_api_request( f", retrying in {retry_delay} seconds...") time.sleep(retry_delay) # check status code and terminate if 401 or 403 - elif response.status_code in [401, 403]: + elif response.status_code == 401: raise Exception(f"Received status code {response.status_code} " - f"({response.json().get('message', 'Unauthorized')})") + f"(You do not have API access.)\n" + f"Request-ID: {response.headers['x-request-id']} " + f"(Make sure to include this in any support request.)") + elif response.status_code == 403: + raise Exception(f"Received status code {response.status_code} " + f"(You do not have access to this resource.)\n" + f"Request-ID: {response.headers['x-request-id']} " + f"(Make sure to include this in any support request.)") # check status code and terminate if other error else: raise Exception(f"Received status code {response.status_code} " - f"({response.json().get('message', 'Unknown error')})") + f"({response.json().get('message', 'Unknown error')})\n" + f"Request-ID: {response.headers['x-request-id']} " + f"(Make sure to include this in any support request.)") ###### diff --git a/impectPy/impect.py b/impectPy/impect.py index d8b8334..1fe0f4c 100644 --- a/impectPy/impect.py +++ b/impectPy/impect.py @@ -12,13 +12,14 @@ from .xml import generateXML from .set_pieces import getSetPiecesFromHost from .squad_ratings import getSquadRatingsFromHost +from .squad_coefficients import getSquadCoefficientsFromHost from .match_info import getFormationsFromHost, getSubstitutionsFromHost, getStartingPositionsFromHost import pandas as pd from xml.etree import ElementTree as ET class Impect: - def __init__(self,config: Config = Config(), connection: RateLimitedAPI = RateLimitedAPI()): + def __init__(self, config: Config = Config(), connection: RateLimitedAPI = RateLimitedAPI()): self.__config = config self.connection = connection @@ -68,14 +69,14 @@ def getSquadIterationAverages(self, iteration: int) -> pd.DataFrame: iteration, self.connection, self.__config.HOST ) - def getPlayerMatchScores(self, matches: list, positions: list) -> pd.DataFrame: + def getPlayerMatchScores(self, matches: list, positions: list = None) -> pd.DataFrame: return getPlayerMatchScoresFromHost( - matches, positions, self.connection, self.__config.HOST + matches, self.connection, self.__config.HOST, positions ) - def getPlayerIterationScores(self, iteration: int, positions: list) -> pd.DataFrame: + def getPlayerIterationScores(self, iteration: int, positions: list = None) -> pd.DataFrame: return getPlayerIterationScoresFromHost( - iteration, positions, self.connection, self.__config.HOST + iteration, self.connection, self.__config.HOST, positions ) def getSquadMatchScores(self, matches: list) -> pd.DataFrame: @@ -103,6 +104,11 @@ def getSquadRatings(self, iteration: int) -> pd.DataFrame: iteration, self.connection, self.__config.HOST ) + def getSquadCoefficients(self, iteration: int) -> pd.DataFrame: + return getSquadCoefficientsFromHost( + iteration, self.connection, self.__config.HOST + ) + def getFormations(self, matches: list) -> pd.DataFrame: return getFormationsFromHost( matches, self.connection, self.__config.HOST diff --git a/impectPy/iteration_averages.py b/impectPy/iteration_averages.py index eee1c5e..73df156 100644 --- a/impectPy/iteration_averages.py +++ b/impectPy/iteration_averages.py @@ -114,10 +114,6 @@ def getPlayerIterationAveragesFromHost( suffixes=("", "_right") ) - # get matchShares - match_shares_raw = averages_raw[ - ["iterationId", "squadId", "playerId", "position", "playDuration", "matchShare"]].drop_duplicates() - # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost if len(averages_raw["name"][averages_raw["name"].isnull()]) > 0: averages_raw["name"] = averages_raw["name"].fillna("-1") @@ -133,6 +129,10 @@ def getPlayerIterationAveragesFromHost( # fill join cols with placeholder averages_raw.loc[mask] = averages_raw.loc[mask].fillna(-1) + # get matchShares + match_shares_raw = averages_raw[ + ["iterationId", "squadId", "playerId", "position", "playDuration", "matchShare"]].drop_duplicates() + # pivot kpi values averages_raw = pd.pivot_table( averages_raw, diff --git a/impectPy/matchsums.py b/impectPy/matchsums.py index 4d68461..9db651c 100644 --- a/impectPy/matchsums.py +++ b/impectPy/matchsums.py @@ -30,18 +30,18 @@ def getPlayerMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: raise Exception("Argument 'matches' must be a list of integers.") # get match info - iterations = pd.concat( + match_data = pd.concat( map(lambda match: connection.make_api_request_limited( url=f"{host}/v5/customerapi/matches/{match}", method="GET" ).process_response( - endpoint="Iterations" + endpoint="Match Info" ), matches), ignore_index=True) # filter for matches that are unavailable - fail_matches = iterations[iterations.lastCalculationDate.isnull()].id.drop_duplicates().to_list() + fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() # drop matches that are unavailable from list of matches matches = [match for match in matches if match not in fail_matches] @@ -54,7 +54,7 @@ def getPlayerMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: print(f"The following matches are not available yet and were ignored:\n{fail_matches}") # extract iterationIds - iterations = list(iterations[iterations.lastCalculationDate.notnull()].iterationId.unique()) + iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) # get player match sums matchsums_raw = pd.concat( @@ -101,6 +101,18 @@ def getPlayerMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: iterations), ignore_index=True)[["id", "name"]].drop_duplicates() + # get coaches + coaches = pd.concat( + map(lambda iteration: connection.make_api_request_limited( + url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", + method="GET" + ).process_response( + endpoint="Coaches", + raise_exception=False + ), + iterations), + ignore_index=True)[["id", "name"]].drop_duplicates() + # get kpis kpis = connection.make_api_request_limited( url=f"{host}/v5/customerapi/kpis", @@ -202,6 +214,15 @@ def getPlayerMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: right_on="id", how="left", suffixes=("", "_right") + ).merge( + pd.concat([ + match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}), + match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"}) + ], ignore_index=True), + left_on=["matchId", "squadId"], + right_on=["id", "squadId"], + how="left", + suffixes=("", "_right") ).merge( iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], left_on="iterationId", @@ -227,6 +248,14 @@ def getPlayerMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: right_on="id", how="left", suffixes=("", "_right") + ).merge( + coaches[["id", "name"]].rename( + columns={"id": "coachId", "name": "coachName"} + ), + left_on="coachId", + right_on="coachId", + how="left", + suffixes=("", "_right") ).merge( countries.rename(columns={"fifaName": "playerCountry"}), left_on="countryId", @@ -254,6 +283,8 @@ def getPlayerMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: "matchDayName", "squadId", "squadName", + "coachId", + "coachName", "playerId", "wyscoutId", "heimSpielId", @@ -313,18 +344,18 @@ def getSquadMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: s raise Exception("Input vor matches argument must be a list of integers") # get match info - iterations = pd.concat( + match_data = pd.concat( map(lambda match: connection.make_api_request_limited( url=f"{host}/v5/customerapi/matches/{match}", method="GET" ).process_response( - endpoint="Iterations" + endpoint="Match Info" ), matches), ignore_index=True) # filter for matches that are unavailable - fail_matches = iterations[iterations.lastCalculationDate.isnull()].id.drop_duplicates().to_list() + fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() # drop matches that are unavailable from list of matches matches = [match for match in matches if match not in fail_matches] @@ -337,7 +368,7 @@ def getSquadMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: s print(f"The following matches are not available yet and were ignored:\n{fail_matches}") # extract iterationIds - iterations = list(iterations[iterations.lastCalculationDate.notnull()].iterationId.unique()) + iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) # get squad match sums matchsums_raw = pd.concat( @@ -363,6 +394,18 @@ def getSquadMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: s iterations), ignore_index=True)[["id", "name", "idMappings"]] + # get coaches + coaches = pd.concat( + map(lambda iteration: connection.make_api_request_limited( + url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", + method="GET" + ).process_response( + endpoint="Coaches", + raise_exception=False + ), + iterations), + ignore_index=True)[["id", "name"]].drop_duplicates() + # unnest mappings squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() @@ -436,6 +479,15 @@ def getSquadMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: s right_on="id", how="left", suffixes=("", "_right") + ).merge( + pd.concat([ + match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}), + match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"}) + ], ignore_index=True), + left_on=["matchId", "squadId"], + right_on=["id", "squadId"], + how="left", + suffixes=("", "_right") ).merge( iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], left_on="iterationId", @@ -450,6 +502,14 @@ def getSquadMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: s right_on="squadId", how="left", suffixes=("", "_home") + ).merge( + coaches[["id", "name"]].rename( + columns={"id": "coachId", "name": "coachName"} + ), + left_on="coachId", + right_on="coachId", + how="left", + suffixes=("", "_right") ) # rename some columns @@ -472,7 +532,9 @@ def getSquadMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: s "wyscoutId", "heimSpielId", "skillCornerId", - "squadName" + "squadName", + "coachId", + "coachName" ] # add kpiNames to order diff --git a/impectPy/player_scores.py b/impectPy/player_scores.py index a07c1a3..1bfa7ad 100644 --- a/impectPy/player_scores.py +++ b/impectPy/player_scores.py @@ -28,7 +28,7 @@ def getPlayerMatchScores( - matches: list, positions: list, token: str, session: requests.Session = requests.Session() + matches: list, token: str, positions: list = None, session: requests.Session = requests.Session() ) -> pd.DataFrame: # create an instance of RateLimitedAPI @@ -37,39 +37,40 @@ def getPlayerMatchScores( # construct header with access token connection.session.headers.update({"Authorization": f"Bearer {token}"}) - return getPlayerMatchScoresFromHost(matches, positions, connection, "https://api.impect.com") + return getPlayerMatchScoresFromHost(matches, connection, "https://api.impect.com", positions) -def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame: +def getPlayerMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: str, positions: list = None) -> pd.DataFrame: # check input for matches argument if not isinstance(matches, list): raise Exception("Argument 'matches' must be a list of integers.") # check input for positions argument - if not isinstance(positions, list): + if not isinstance(positions, list) and positions is not None: raise Exception("Input for positions argument must be a list") # check if the input positions are valid - invalid_positions = [position for position in positions if position not in allowed_positions] - if len(invalid_positions) > 0: - raise Exception( - f"Invalid position(s): {', '.join(invalid_positions)}." - f"\nChoose one or more of: {', '.join(allowed_positions)}" - ) + if positions is not None: + invalid_positions = [position for position in positions if position not in allowed_positions] + if len(invalid_positions) > 0: + raise Exception( + f"Invalid position(s): {', '.join(invalid_positions)}." + f"\nChoose one or more of: {', '.join(allowed_positions)}" + ) # get match info - iterations = pd.concat( + match_data = pd.concat( map(lambda match: connection.make_api_request_limited( url=f"{host}/v5/customerapi/matches/{match}", method="GET" ).process_response( - endpoint="Iterations" + endpoint="Match Info" ), matches), ignore_index=True) # filter for matches that are unavailable - fail_matches = iterations[iterations.lastCalculationDate.isnull()].id.drop_duplicates().to_list() + fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() # drop matches that are unavailable from list of matches matches = [match for match in matches if match not in fail_matches] @@ -82,24 +83,40 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat print(f"The following matches are not available yet and were ignored:\n{fail_matches}") # extract iterationIds - iterations = list(iterations[iterations.lastCalculationDate.notnull()].iterationId.unique()) - - # compile list of positions - position_string = ",".join(positions) + iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) # get player scores - scores_raw = pd.concat( - map(lambda match: connection.make_api_request_limited( - url=f"{host}/v5/customerapi/matches/{match}/positions/{position_string}/player-scores", - method="GET" - ).process_response( - endpoint="PlayerMatchScores" - ).assign( - matchId=match, - positions=position_string - ), - matches), - ignore_index=True) + if positions is None: + # query positions at once + scores_raw = pd.concat( + map(lambda match: connection.make_api_request_limited( + url=f"{host}/v5/customerapi/matches/{match}/player-scores", + method="GET" + ).process_response( + endpoint="PlayerMatchScores" + ).assign( + matchId=match, + ), + matches), + ignore_index=True) + else: + + # compile list of positions + position_string = ",".join(positions) + + # query positions individually + scores_raw = pd.concat( + map(lambda match: connection.make_api_request_limited( + url=f"{host}/v5/customerapi/matches/{match}/positions/{position_string}/player-scores", + method="GET" + ).process_response( + endpoint="PlayerMatchScores" + ).assign( + matchId=match, + positions=position_string + ), + matches), + ignore_index=True) # get players players = pd.concat( @@ -133,6 +150,18 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat iterations), ignore_index=True)[["id", "name"]].drop_duplicates() + # get coaches + coaches = pd.concat( + map(lambda iteration: connection.make_api_request_limited( + url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", + method="GET" + ).process_response( + endpoint="Coaches", + raise_exception=False + ), + iterations), + ignore_index=True)[["id", "name"]].drop_duplicates() + # get player scores scores = connection.make_api_request_limited( url=f"{host}/v5/customerapi/player-scores", @@ -159,7 +188,7 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat url=f"{host}/v5/customerapi/countries", method="GET" ).process_response( - endpoint="KPIs" + endpoint="Countries" ) # create empty df to store player scores @@ -184,16 +213,26 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat continue # convert to pandas df - temp = pd.DataFrame(temp).assign( - matchId=scores_raw.matchId.loc[i], - squadId=scores_raw[side.replace("Players", "Id")].loc[i], - positions=scores_raw.positions.loc[i] - ) - - # extract matchshares - matchshares = temp[["matchId", "squadId", "id", "matchShare", "playDuration"]].drop_duplicates().assign( - positions=position_string - ) + if positions is None: + temp = pd.DataFrame(temp).assign( + matchId=scores_raw.matchId.loc[i], + squadId=scores_raw[side.replace("Players", "Id")].loc[i], + ) + + # extract matchshares + matchshares = temp[["matchId", "squadId", "id", "matchShare", "playDuration", "position"]].drop_duplicates() + + else: + temp = pd.DataFrame(temp).assign( + matchId=scores_raw.matchId.loc[i], + squadId=scores_raw[side.replace("Players", "Id")].loc[i], + positions=scores_raw.positions.loc[i] + ) + + # extract matchshares + matchshares = temp[["matchId", "squadId", "id", "matchShare", "playDuration"]].drop_duplicates().assign( + positions=position_string + ) # explode kpis column temp = temp.explode("playerScores") @@ -215,25 +254,46 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat ) # pivot data - temp = pd.pivot_table( - temp, - values="value", - index=["matchId", "squadId", "positions", "id"], - columns="name", - aggfunc="sum", - fill_value=0, - dropna=False - ).reset_index() - - # inner join with matchshares - temp = pd.merge( - temp, - matchshares, - left_on=["matchId", "squadId", "id", "positions"], - right_on=["matchId", "squadId", "id", "positions"], - how="inner", - suffixes=("", "_right") - ) + if positions is None: + temp = pd.pivot_table( + temp, + values="value", + index=["matchId", "squadId", "position", "id"], + columns="name", + aggfunc="sum", + fill_value=0, + dropna=False + ).reset_index() + + # inner join with matchshares + temp = pd.merge( + temp, + matchshares, + left_on=["matchId", "squadId", "id", "position"], + right_on=["matchId", "squadId", "id", "position"], + how="inner", + suffixes=("", "_right") + ) + else: + temp = pd.pivot_table( + temp, + values="value", + index=["matchId", "squadId", "positions", "id"], + columns="name", + aggfunc="sum", + fill_value=0, + dropna=False + ).reset_index() + + # inner join with matchshares + temp = pd.merge( + temp, + matchshares, + left_on=["matchId", "squadId", "id", "positions"], + right_on=["matchId", "squadId", "id", "positions"], + how="inner", + suffixes=("", "_right") + ) # append to match_player_scores match_player_scores = pd.concat([match_player_scores, temp]) @@ -256,6 +316,15 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat right_on="id", how="left", suffixes=("", "_right") + ).merge( + pd.concat([ + match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}), + match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"}) + ], ignore_index=True), + left_on=["matchId", "squadId"], + right_on=["id", "squadId"], + how="left", + suffixes=("", "_right") ).merge( iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], left_on="iterationId", @@ -281,6 +350,14 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat right_on="id", how="left", suffixes=("", "_right") + ).merge( + coaches[["id", "name"]].rename( + columns={"id": "coachId", "name": "coachName"} + ), + left_on="coachId", + right_on="coachId", + how="left", + suffixes=("", "_right") ).merge( countries.rename(columns={"fifaName": "playerCountry"}), left_on="countryId", @@ -308,6 +385,8 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat "matchDayName", "squadId", "squadName", + "coachId", + "coachName", "playerId", "wyscoutId", "heimSpielId", @@ -319,7 +398,7 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat "birthplace", "playerCountry", "leg", - "positions", + "positions" if positions is not None else "position", "matchShare", "playDuration", ] @@ -351,7 +430,7 @@ def getPlayerMatchScoresFromHost(matches: list, positions: list, connection: Rat def getPlayerIterationScores( - iteration: int, positions: list, token: str, session: requests.Session = requests.Session() + iteration: int, token: str, positions: list = None, session: requests.Session = requests.Session() ) -> pd.DataFrame: # create an instance of RateLimitedAPI @@ -360,10 +439,10 @@ def getPlayerIterationScores( # construct header with access token connection.session.headers.update({"Authorization": f"Bearer {token}"}) - return getPlayerIterationScoresFromHost(iteration, positions, connection, "https://api.impect.com") + return getPlayerIterationScoresFromHost(iteration, connection, "https://api.impect.com", positions) def getPlayerIterationScoresFromHost( - iteration: int, positions: list, connection: RateLimitedAPI, host: str + iteration: int, connection: RateLimitedAPI, host: str, positions: list = None ) -> pd.DataFrame: # check input for iteration argument @@ -371,16 +450,17 @@ def getPlayerIterationScoresFromHost( raise Exception("Input for iteration argument must be an integer") # check input for positions argument - if not isinstance(positions, list): + if not isinstance(positions, list) and positions is not None: raise Exception("Input for positions argument must be a list") # check if the input positions are valid - invalid_positions = [position for position in positions if position not in allowed_positions] - if len(invalid_positions) > 0: - raise Exception( - f"Invalid position(s): {', '.join(invalid_positions)}." - f"\nChoose one or more of: {', '.join(allowed_positions)}" - ) + if positions is not None: + invalid_positions = [position for position in positions if position not in allowed_positions] + if len(invalid_positions) > 0: + raise Exception( + f"Invalid position(s): {', '.join(invalid_positions)}." + f"\nChoose one or more of: {', '.join(allowed_positions)}" + ) # get squads squads = connection.make_api_request_limited( @@ -393,25 +473,44 @@ def getPlayerIterationScoresFromHost( # get squadIds squad_ids = squads[squads.access].id.to_list() - # compile position string - position_string = ",".join(positions) - # get player iteration averages per squad - scores_raw = pd.concat( - map(lambda squadId: connection.make_api_request_limited( - url=f"{host}/v5/customerapi/iterations/{iteration}/" - f"squads/{squadId}/positions/{position_string}/player-scores", - method="GET" - ).process_response( - endpoint="PlayerIterationScores", - raise_exception=False - ).assign( - iterationId=iteration, - squadId=squadId, - positions=position_string - ), - squad_ids), - ignore_index=True) + if positions is None: + + scores_raw = pd.concat( + map(lambda squadId: connection.make_api_request_limited( + url=f"{host}/v5/customerapi/iterations/{iteration}/" + f"squads/{squadId}/player-scores", + method="GET" + ).process_response( + endpoint="PlayerIterationScores", + raise_exception=False + ).assign( + iterationId=iteration, + squadId=squadId + ), + squad_ids), + ignore_index=True) + + else: + + # compile position string + position_string = ",".join(positions) + + scores_raw = pd.concat( + map(lambda squadId: connection.make_api_request_limited( + url=f"{host}/v5/customerapi/iterations/{iteration}/" + f"squads/{squadId}/positions/{position_string}/player-scores", + method="GET" + ).process_response( + endpoint="PlayerIterationScores", + raise_exception=False + ).assign( + iterationId=iteration, + squadId=squadId, + positions=position_string + ), + squad_ids), + ignore_index=True) # raise exception if no player played at given positions in entire iteration if len(scores_raw) == 0: @@ -476,36 +575,71 @@ def getPlayerIterationScoresFromHost( ) # get matchShares - match_shares = averages[ - ["iterationId", "squadId", "playerId", "positions", "playDuration", "matchShare"]].drop_duplicates() - - # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost - if len(averages["name"][averages["name"].isnull()]) > 0: - averages["name"] = averages["name"].fillna("-1") - - # pivot kpi values - averages = pd.pivot_table( - averages, - values="value", - index=["iterationId", "squadId", "playerId", "positions"], - columns="name", - aggfunc="sum", - fill_value=0, - dropna=False - ).reset_index() - - # drop "-1" column - if "-1" in averages.columns: - averages.drop(["-1"], inplace=True, axis=1) - - # merge with playDuration and matchShare - averages = averages.merge( - match_shares, - left_on=["iterationId", "squadId", "playerId", "positions"], - right_on=["iterationId", "squadId", "playerId", "positions"], - how="inner", - suffixes=("", "_right") - ) + if positions is None: + match_shares = averages[ + ["iterationId", "squadId", "playerId", "position", "playDuration", "matchShare"] + ].drop_duplicates() + + # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost + if len(averages["name"][averages["name"].isnull()]) > 0: + averages["name"] = averages["name"].fillna("-1") + + # pivot kpi values + averages = pd.pivot_table( + averages, + values="value", + index=["iterationId", "squadId", "playerId", "position"], + columns="name", + aggfunc="sum", + fill_value=0, + dropna=False + ).reset_index() + + # drop "-1" column + if "-1" in averages.columns: + averages.drop(["-1"], inplace=True, axis=1) + + # merge with playDuration and matchShare + averages = averages.merge( + match_shares, + left_on=["iterationId", "squadId", "playerId", "position"], + right_on=["iterationId", "squadId", "playerId", "position"], + how="inner", + suffixes=("", "_right") + ) + else: + match_shares = averages[ + ["iterationId", "squadId", "playerId", "positions", "playDuration", "matchShare"] + ].drop_duplicates() + + # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost + if len(averages["name"][averages["name"].isnull()]) > 0: + averages["name"] = averages["name"].fillna("-1") + + # pivot kpi values + averages = pd.pivot_table( + averages, + values="value", + index=["iterationId", "squadId", "playerId", "positions"], + columns="name", + aggfunc="sum", + fill_value=0, + dropna=False + ).reset_index() + + # drop "-1" column + if "-1" in averages.columns: + averages.drop(["-1"], inplace=True, axis=1) + + # merge with playDuration and matchShare + averages = averages.merge( + match_shares, + left_on=["iterationId", "squadId", "playerId", "positions"], + right_on=["iterationId", "squadId", "playerId", "positions"], + how="inner", + suffixes=("", "_right") + ) + # merge with other data averages = averages.merge( iterations[["id", "competitionName", "season"]], @@ -566,7 +700,7 @@ def getPlayerIterationScoresFromHost( "birthplace", "playerCountry", "leg", - "positions", + "positions" if positions is not None else "position", "matchShare", "playDuration" ] diff --git a/impectPy/squad_coefficients.py b/impectPy/squad_coefficients.py new file mode 100644 index 0000000..cc57555 --- /dev/null +++ b/impectPy/squad_coefficients.py @@ -0,0 +1,135 @@ +# load packages +import pandas as pd +import requests +from impectPy.helpers import RateLimitedAPI, unnest_mappings_df +from .iterations import getIterationsFromHost + +###### +# +# This function returns a pandas dataframe that contains all squad ratings for a given iteration +# +###### + + +# define function +def getSquadCoefficients(iteration: int, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: + + # create an instance of RateLimitedAPI + connection = RateLimitedAPI(session) + + # construct header with access token + connection.session.headers.update({"Authorization": f"Bearer {token}"}) + + return getSquadCoefficientsFromHost(iteration, connection, "https://api.impect.com") + +def getSquadCoefficientsFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame: + + # check input for matches argument + if not isinstance(iteration, int): + raise Exception("Argument 'iteration' must be an integer.") + + # get iterations + iterations = getIterationsFromHost(connection=connection, host=host) + + # raise exception if provided iteration id doesn't exist + if iteration not in list(iterations.id): + raise Exception("The supplied iteration id does not exist. Execution stopped.") + + # get squads + squads = connection.make_api_request_limited( + url=f"{host}/v5/customerapi/iterations/{iteration}/squads", + method="GET" + ).process_response( + endpoint="Squads" + )[["id", "name", "idMappings"]] + + # unnest mappings + squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() + + # get squad coefficients + coefficients_raw = connection.make_api_request_limited( + url=f"{host}/v5/customerapi/iterations/{iteration}/predictions/model-coefficients", + method="GET" + ).process_response( + endpoint="Squad Coefficients" + ) + + # extract JSON from the column + nested_data = coefficients_raw["entries"][0] + + # flatten coefficients df + coefficients = [] + for entry in nested_data: + date = entry["date"] + for squad in entry["squads"]: + coefficients.append({ + "iterationId": iteration, + "date": date, + "interceptCoefficient": entry["competition"]["intercept"], + "homeCoefficient": entry["competition"]["home"], + "competitionCoefficient": entry["competition"]["comp"], + "squadId": squad["id"], + "attackCoefficient": squad["att"], + "defenseCoefficient": squad["def"] + }) + + # convert to df + coefficients = pd.DataFrame(coefficients) + + # merge with competition info + coefficients = coefficients.merge( + iterations[["id", "competitionId", "competitionName", "competitionType", "season", "competitionGender"]], + left_on="iterationId", + right_on="id", + how="left", + suffixes=("", "_right") + ) + + # merge events with squads + coefficients = coefficients.merge( + squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename( + columns={"id": "squadId", "name": "squadName"} + ), + left_on="squadId", + right_on="squadId", + how="left", + suffixes=("", "_home") + ) + + # fix some column types + coefficients["iterationId"] = coefficients["iterationId"].astype("Int64") + coefficients["competitionId"] = coefficients["competitionId"].astype("Int64") + coefficients["squadId"] = coefficients["squadId"].astype("Int64") + coefficients["wyscoutId"] = coefficients["wyscoutId"].astype("Int64") + coefficients["heimSpielId"] = coefficients["heimSpielId"].astype("Int64") + coefficients["skillCornerId"] = coefficients["skillCornerId"].astype("Int64") + + # define desired column order + order = [ + "iterationId", + "competitionId", + "competitionName", + "competitionType", + "season", + "competitionGender", + "interceptCoefficient", + "homeCoefficient", + "competitionCoefficient", + "date", + "squadId", + "wyscoutId", + "heimSpielId", + "skillCornerId", + "squadName", + "attackCoefficient", + "defenseCoefficient", + ] + + # reorder data + coefficients = coefficients[order] + + # reorder rows + coefficients = coefficients.sort_values(["date", "squadId"]) + + # return events + return coefficients \ No newline at end of file diff --git a/impectPy/squad_scores.py b/impectPy/squad_scores.py index 139c533..b2e732a 100644 --- a/impectPy/squad_scores.py +++ b/impectPy/squad_scores.py @@ -24,38 +24,38 @@ def getSquadMatchScores(matches: list, token: str, session: requests.Session = r return getSquadMatchScoresFromHost(matches, connection, "https://api.impect.com") def getSquadMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame: - + # check input for matches argument if not isinstance(matches, list): raise Exception("Argument 'matches' must be a list of integers.") - + # get match info - iterations = pd.concat( + match_data = pd.concat( map(lambda match: connection.make_api_request_limited( url=f"{host}/v5/customerapi/matches/{match}", method="GET" ).process_response( - endpoint="Iterations" + endpoint="Match Info" ), matches), ignore_index=True) - + # filter for matches that are unavailable - fail_matches = iterations[iterations.lastCalculationDate.isnull()].id.drop_duplicates().to_list() - + fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() + # drop matches that are unavailable from list of matches matches = [match for match in matches if match not in fail_matches] - + # raise warnings if len(fail_matches) > 0: if len(matches) == 0: raise Exception("All supplied matches are unavailable. Execution stopped.") else: print(f"The following matches are not available yet and were ignored:\n{fail_matches}") - + # extract iterationIds - iterations = list(iterations[iterations.lastCalculationDate.notnull()].iterationId.unique()) - + iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) + # get squad scores scores_raw = pd.concat( map(lambda match: connection.make_api_request_limited( @@ -68,7 +68,7 @@ def getSquadMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: ), matches), ignore_index=True) - + # get squads squads = pd.concat( map(lambda iteration: connection.make_api_request_limited( @@ -80,6 +80,18 @@ def getSquadMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: iterations), ignore_index=True)[["id", "name", "idMappings"]] + # get coaches + coaches = pd.concat( + map(lambda iteration: connection.make_api_request_limited( + url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", + method="GET" + ).process_response( + endpoint="Coaches", + raise_exception=False + ), + iterations), + ignore_index=True)[["id", "name"]].drop_duplicates() + # unnest mappings squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() @@ -154,6 +166,15 @@ def getSquadMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: right_on="id", how="left", suffixes=("", "_right") + ).merge( + pd.concat([ + match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}), + match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"}) + ], ignore_index=True), + left_on=["matchId", "squadId"], + right_on=["id", "squadId"], + how="left", + suffixes=("", "_right") ).merge( iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], left_on="iterationId", @@ -168,6 +189,14 @@ def getSquadMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: right_on="squadId", how="left", suffixes=("", "_right") + ).merge( + coaches[["id", "name"]].rename( + columns={"id": "coachId", "name": "coachName"} + ), + left_on="coachId", + right_on="coachId", + how="left", + suffixes=("", "_right") ) # rename some columns @@ -190,7 +219,9 @@ def getSquadMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: "wyscoutId", "heimSpielId", "skillCornerId", - "squadName" + "squadName", + "coachId", + "coachName" ] # add scoreNames to order @@ -208,7 +239,7 @@ def getSquadMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: squad_scores["wyscoutId"] = squad_scores["wyscoutId"].astype("Int64") squad_scores["heimSpielId"] = squad_scores["heimSpielId"].astype("Int64") squad_scores["skillCornerId"] = squad_scores["skillCornerId"].astype("Int64") - + # return data return squad_scores @@ -231,11 +262,11 @@ def getSquadIterationScores(iteration: int, token: str, session: requests.Sessio return getSquadIterationScoresFromHost(iteration, connection, "https://api.impect.com") def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame: - + # check input for matches argument if not isinstance(iteration, int): raise Exception("Input for iteration argument must be an integer") - + # get squads squads = connection.make_api_request_limited( url=f"{host}/v5/customerapi/iterations/{iteration}/squads", @@ -246,7 +277,7 @@ def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, # unnest mappings squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() - + # get squad iteration averages scores_raw = connection.make_api_request_limited( url=f"{host}/v5/customerapi/iterations/{iteration}/squad-scores", @@ -254,7 +285,7 @@ def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, ).process_response( endpoint="SquadIterationScores" ).assign(iterationId=iteration) - + # get scores scores_definitions = connection.make_api_request_limited( url=f"{host}/v5/customerapi/squad-scores", @@ -262,22 +293,22 @@ def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, ).process_response( endpoint="scoreDefinitions" )[["id", "name"]] - + # get iterations iterations = getIterationsFromHost(connection=connection, host=host) - + # get matches played matches = scores_raw[["squadId", "matches"]].drop_duplicates() - + # unnest scores scores = scores_raw.explode("squadScores").reset_index(drop=True) - + # unnest dictionary in kpis column scores = pd.concat( [scores.drop(["squadScores"], axis=1), pd.json_normalize(scores["squadScores"])], axis=1 ) - + # merge with kpis to ensure all kpis are present scores = scores.merge( scores_definitions, @@ -286,7 +317,7 @@ def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, how="outer", suffixes=("", "_right") ) - + # pivot kpi values scores = pd.pivot_table( scores, @@ -297,7 +328,7 @@ def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, fill_value=0, dropna=False ).reset_index() - + # inner join with matches played scores = pd.merge( scores, @@ -307,7 +338,7 @@ def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, how="inner", suffixes=("", "_right") ) - + # merge with other data scores = scores.merge( iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], @@ -324,10 +355,10 @@ def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, how="left", suffixes=("", "_right") ) - + # remove NA rows averages = scores[scores.iterationId.notnull()] - + # fix column types averages["matches"] = averages["matches"].astype("Int64") averages["iterationId"] = averages["iterationId"].astype("Int64") @@ -335,7 +366,7 @@ def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, averages["wyscoutId"] = averages["wyscoutId"].astype("Int64") averages["heimSpielId"] = averages["heimSpielId"].astype("Int64") averages["skillCornerId"] = averages["skillCornerId"].astype("Int64") - + # define column order order = [ "iterationId", @@ -348,12 +379,12 @@ def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, "squadName", "matches" ] - + # add scoreNames to order order = order + scores_definitions.name.to_list() - + # select columns averages = averages[order] - + # return result return averages \ No newline at end of file diff --git a/setup.py b/setup.py index d0a8eb1..bfaf7b5 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( # Needed to silence warnings (and to be a worthwhile package) name="impectPy", - url="tbd", + url="https://github.com/ImpectAPI/impectPy", author="Impect", author_email="info@impect.com", # Needed to actually package something @@ -17,9 +17,10 @@ "pandas>=2.0.0", "numpy>=1.24.2,<2.0"], # *strongly* suggested for sharing - version="2.4.5", + version="2.5.0", # The license can be anything you like license="MIT", description="A Python package to facilitate interaction with the Impect customer API", long_description=README, + long_description_content_type="text/markdown", ) \ No newline at end of file