In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import matplotlib.colors as mcolors
import matplotlib.patheffects as path_effects
from mplsoccer import Pitch, VerticalPitch, lines
from PIL import Image
import urllib
import socceraction
import socceraction.atomic.spadl as atomicspadl
import socceraction.spadl as spadl
In [2]:
# Load custom fonts for visualization
fe_regular = fm.FontEntry(
fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
name='SourceSansPro-Regular'
)
semibold_font = fm.FontProperties(fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf')
# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)
# Set the font family to the custom regular font
matplotlib.rcParams['font.family'] = fe_regular.name
In [3]:
season = 2425
In [4]:
# Load datasets from CSV files
VAEP = pd.read_csv("aVAEPactions.csv", index_col = 0)
xP = pd.read_csv("xPactions.csv", index_col = 0)
fb = pd.read_csv("teamsFOTMOB.csv", index_col = 0)
players = pd.read_csv(f"players{season}.csv", index_col = 0)
games = pd.read_csv(f"games{season}.csv", index_col = 0)
aactions = pd.read_csv(f"atomic_actions{season}.csv", index_col = 0)
actions = pd.read_csv(f"actions{season}.csv", index_col = 0)
chains = pd.read_csv(f"possession_chains_info{season}.csv", index_col = 0)
recoveries = pd.read_csv(f"recoveries_id{season}.csv", index_col = 0)
recoveries.rename(columns = {'event_id':'original_event_id'}, inplace = True)
In [5]:
# Add descriptive action names to the atomic actions DataFrame
aactions = atomicspadl.add_names(aactions)
In [6]:
# Add descriptive action names to the actions DataFrame
actions = spadl.add_names(actions)
In [7]:
# Merge datasets to create a unified DataFrame
dfa = (
aactions
.merge(players, how="left")
.merge(fb, how="left")
.merge(VAEP, how="left"))
In [8]:
# Merge datasets to create a unified DataFrame
dfb = (
actions
.merge(players, how="left")
.merge(fb, how="left")
.merge(xP, how="left")
.merge(chains, how="left"))
In [9]:
#Adding on both frames a set of features that we need to define progressive actions
dfa["beginning_distance"] = np.sqrt(np.square(105-dfa['x_a0']) + np.square(34-dfa['y_a0'])).round(2)
dfa["end_distance"] = np.sqrt(np.square(105-dfa['end_x']) + np.square(34-dfa['end_y'])).round(2)
dfa["length"] = dfa["end_distance"] - dfa["beginning_distance"]
dfa['length'] = dfa['length'].abs()
dfa["angle"] = np.arctan2(dfa["end_y"] - dfa["y_a0"], dfa["end_x"] - dfa["x_a0"])
dfa['angle_degrees'] = np.degrees(dfa['angle']) % 360
In [10]:
#Adding features for next actions we'll use for calculations and plotting later on
#In the def action + pass map
dfa["next_type_name"] = dfa.shift(-1, fill_value=0)["type_name"]
dfa["next_team_name"] = dfa.shift(-1, fill_value=0)["team_name"]
dfa["next_starting_position"] = dfa.shift(-1, fill_value=0)["starting_position"]
dfa["next_player_name"] = dfa.shift(-1, fill_value=0)["player_name"]
dfa["next_original_event_id"] = dfa.shift(-1, fill_value=0)["original_event_id"]
dfa["next_start_x"] = dfa.shift(-1, fill_value=0)["x_a0"]
dfa["next_start_y"] = dfa.shift(-1, fill_value=0)["y_a0"]
dfa["next_end_x"] = dfa.shift(-1, fill_value=0)["end_x"]
dfa["next_end_y"] = dfa.shift(-1, fill_value=0)["end_y"]
dfa["next_vaep_value"] = dfa.shift(-1, fill_value=0)["vaep_value"]
In [11]:
#Flagging progressive actions given a custom defintion of passes not starting in the box, with lenght of more than 5m
#not backwards or horizontal and the pass closes the distance from starting point to center of the goal by at least 17.5%
dfa['progressive'] = np.where(
((dfa['beginning_distance'] - dfa['end_distance']) / dfa['beginning_distance'] >= 0.175) & (dfa['length'] > 5) &
(((dfa['angle_degrees'] >= 0) & (dfa['angle_degrees'] <= 60)) | ((dfa['angle_degrees'] >= 260) & (dfa['angle_degrees'] <= 360))) &
~((dfa['x_a0'] >= 88.5) & (dfa['y_a0'] >= 13.885) & (dfa['y_a0'] <= 54.115)),
True, False)
In [12]:
# Function to format season ID into a readable format
def format_season_id(season_id):
# Convert to integer if it's a float
season_id = int(season_id)
# Extract the last two digits of the year
start_year = str(season_id -1)[-2:]
# Calculate the end year
end_year = str(season_id)[-2:]
# Format as 20/21
formatted_season = f"{start_year}/{end_year}"
return formatted_season
In [13]:
#Keeping only passes in both dataframe
df1a = dfa[dfa["type_name"] == 'pass']
df1b = dfb[dfb["type_name"] == 'pass']
#Creating an outcome column and then calculating completed (1) and not (0) passes minus probability (xP)
df1b['outcome'] = np.where((df1b["result_name"] == 'success'), 1, 0)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/1603123889.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df1b['outcome'] = np.where((df1b["result_name"] == 'success'), 1, 0)
In [14]:
#Get the whole list of players in the data
playerlist = df1a['player_name'].unique().tolist()
cleaned_playerlist = [name for name in playerlist if pd.notna(name)]
cleaned_playerlist.sort()
In [15]:
from IPython.display import display, HTML
# Generate the HTML dropdown to easily search for players
options_html = ''.join([f'<option value="{name}">{name}</option>' for name in cleaned_playerlist])
dropdown_html = f"""
<input list="players" id="dropdown" oninput="handleInput()" placeholder="Choose Someone">
<datalist id="players">
{options_html}
</datalist>
<p id="output"></p>
<script>
function handleInput() {{
var input = document.getElementById("dropdown").value;
var output = document.getElementById("output");
output.innerHTML = "Selected: " + input;
}}
</script>
"""
# Display the dropdown
display(HTML(dropdown_html))
In [16]:
#Selecting player and filtering down both dataframes
player = 'Dean Huijsen'
df2a = df1a[df1a["player_name"] == player]
df2b = df1b[df1b["player_name"] == player]
#Check the teams he played for in the season/dataset to filter down eventually
df2a.team_name.unique()
Out[16]:
array(['Bournemouth'], dtype=object)
In [17]:
# Apply the function to the 'season_id' column
df2a['formatted_season'] = df2a['season_id'].apply(format_season_id)
df2b['formatted_season'] = df2b['season_id'].apply(format_season_id)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/4271577116.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df2a['formatted_season'] = df2a['season_id'].apply(format_season_id) /var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/4271577116.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df2b['formatted_season'] = df2b['season_id'].apply(format_season_id)
In [18]:
#Of course we keep this lines if we want to keep action for specific teams
df3a = df2a[df2a["team_name"] == "Bournemouth"]
df3b = df2b[df2b["team_name"] == "Bournemouth"]
In [19]:
#Keeping a dataframe from each filtered dataframe for merging reasons later on
fa = df3a.filter(items=['game_id', 'original_event_id', 'team_id', 'vaep_value'])
fb = df3b.filter(items=['game_id', 'original_event_id', 'result_name', 'team_id', 'xP', 'PAx', 'outcome'])
In [20]:
#Reformatting original event id column in all dataframes which is needed for merging
fa['original_event_id'] = fa['original_event_id'].astype(int)
fb['original_event_id'] = fb['original_event_id'].astype(int)
df3a['original_event_id'] = df3a['original_event_id'].astype(int)
df3b['original_event_id'] = df3b['original_event_id'].astype(int)
In [21]:
#Actual merging operations
Xa = df3a.merge(fb, how="left")
Xb = df3b.merge(fa, how="left")
In [22]:
#Selecting only progressive passes
df4a = Xa[Xa["progressive"] == True]
df4a['PAx'] = df4a['outcome'] - df4a['xP']
# Group once and calculate all metrics in a single operation
metrics = (df4a.groupby(
["player_id", "player_name", "team_name", "season_id"],
observed=True
).agg(
expected_passes=("xP", "sum"),
attempted_passes=("outcome", "count"),
successful_passes=("outcome", "sum")
).reset_index())
# Calculate PAxpp (Passes above expectations per pass, normalized per 100 passes)
metrics['PAxpp'] = (
((metrics['successful_passes'] - metrics['expected_passes']) /
metrics['attempted_passes']) * 100
).round(3)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/2894729846.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df4a['PAx'] = df4a['outcome'] - df4a['xP']
In [23]:
# setting up the pitch, bins and figure
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='black', half=False)
bins = (9, 6)
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)
# plot the heatmap - more intense red = more passes originating from that square
cmap = mcolors.LinearSegmentedColormap.from_list("custom_red", ["#D7D1CF", "#FF0000"])
bs_heatmap = pitch.bin_statistic(df4a.x_a0, df4a.y_a0, statistic='count', bins=bins)
hm = pitch.heatmap(bs_heatmap, ax=ax, cmap=cmap, zorder = 3, alpha = 0.8)
# plot the underliying passes
am = pitch.arrows(df4a.x_a0, df4a.y_a0, df4a.end_x, df4a.end_y, width=0.8, alpha = 0.5, zorder = 1,
headwidth = 10, headlength = 8, color = '#000000', label = 'successful passes', ax=ax)
# Create bin statistics for different metrics
bs_successful = pitch.bin_statistic(df4a.end_x, df4a.end_y,
values=df4a.outcome, statistic='sum', bins=bins)
bs_expected = pitch.bin_statistic(df4a.end_x, df4a.end_y,
values=df4a.xP, statistic='sum', bins=bins)
bs_attempts = pitch.bin_statistic(df4a.end_x, df4a.end_y,
statistic='count', bins=bins)
bs_pax = pitch.bin_statistic(df4a.end_x, df4a.end_y,
values=df4a.PAx, statistic='sum', bins=bins)
# Calculate PAxpp for each bin (normalize per 100 passes)
bs_paxpp = {
'statistic': np.zeros_like(bs_pax['statistic']),
'cx': bs_pax['cx'],
'cy': bs_pax['cy']}
# Calculate PAxpp for each bin (successful - expected)
for i in range(bs_attempts['statistic'].shape[0]):
for j in range(bs_attempts['statistic'].shape[1]):
if bs_attempts['statistic'][i, j] > 0: # Avoid division by zero
bs_paxpp['statistic'][i, j] = (bs_successful['statistic'][i, j] - bs_expected['statistic'][i, j])
total_passes = bs_heatmap['statistic'].sum() # Total number of passes
# Then in your annotation loop, replace bs_pax with bs_paxpp:
for i in range(bs_heatmap['statistic'].shape[0]):
for j in range(bs_heatmap['statistic'].shape[1]):
count = bs_heatmap['statistic'][i, j]
if count > 0: # Annotate only non-empty bins
# Get the PAxpp value for this bin
paxpp_value = bs_paxpp['statistic'][i, j]
percentage = (count / total_passes) * 100
# Only annotate bins that contain at least 2% of the total passes
if percentage > 2:
bin_center_x = bs_heatmap['cx'][i, j]
bin_center_y = bs_heatmap['cy'][i, j]
# Display the PAxpp value for this bin
text = ax.text(bin_center_x, bin_center_y, f"{paxpp_value:.2f}", color="#FFFFFF",
ha="center", va="center", fontsize=18, zorder=4)
# Add path effects for the edge
text.set_path_effects([path_effects.Stroke(linewidth=4, foreground='#000000'),
path_effects.Normal()])
#Adding notes and titles
ax.text(0.5, 1.06, f"{player}'s progressive passes for {df3a.team_name.unique()[0]}", fontsize=25, va='center', ha='center', transform=ax.transAxes, fontproperties=semibold_font)
ax.text(0.5, 1.01, f"Passes : {(df4a['type_name'].count())} | Passes above expectations (PAx) %: {metrics['PAxpp'].unique()[0]} | {', '.join(df3a['competition_id'].unique())} {', '.join(df3a['formatted_season'].unique())}\nHeatmap: amount of passes from that zone | Annotations: PAx from zone",
fontsize=12, va='center', ha='center', transform=ax.transAxes)
ax.text(0.5, 0.02, f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
fontsize=10, va='center', ha='center', transform=ax.transAxes)
ax.annotate(text="", xy=(65, 69.5), xytext=(-200, 0), textcoords="offset points", size=27, color="#000000",
arrowprops=dict(arrowstyle="-|>", shrinkA=0, color="black", linewidth=2))
#Adding team logo
fotmob_url = "https://images.fotmob.com/image_resources/logo/teamlogo/"
logo_ax = fig.add_axes([.135, 0.85, 0.09, 0.09], zorder=1)
club_icon = Image.open(urllib.request.urlopen(f"{fotmob_url}{df4a['fotmob_id'].iloc[0]}.png"))
logo_ax.imshow(club_icon)
logo_ax.axis("off")
# Save the figure with adjusted face color and transparency
plt.savefig(f'{player}-progressive-{season}.png', dpi=500, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)
In [24]:
# Selecting only buildup passes
df6a = Xb[Xb["is_buildup"] == True]
df6a['PAx'] = df6a['outcome'] - df6a['xP']
# Group once and calculate all metrics in a single operation
metrics = (df6a.groupby(
["player_id", "player_name", "team_name", "season_id"],
observed=True
).agg(
expected_passes=("xP", "sum"),
attempted_passes=("outcome", "count"),
successful_passes=("outcome", "sum")
).reset_index())
# Calculate PAxpp (Passes above expectations per pass, normalized per 100 passes)
metrics['PAxpp'] = (
((metrics['successful_passes'] - metrics['expected_passes']) /
metrics['attempted_passes']) * 100
).round(3)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/1780603104.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df6a['PAx'] = df6a['outcome'] - df6a['xP']
In [25]:
# setting up the pitch, bins and figure
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='black', half=False)
bins = (9, 6)
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)
# plot the heatmap - more intense red = more passes originating from that square
cmap = mcolors.LinearSegmentedColormap.from_list("custom_red", ["#D7D1CF", "#FF0000"])
bs_heatmap = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0, statistic='count', bins=bins)
hm = pitch.heatmap(bs_heatmap, ax=ax, cmap=cmap, zorder = 3, alpha = 0.8)
# plot the underliying passes
am = pitch.arrows(df6a.start_x_a0, df6a.start_y_a0, df6a.end_x_a0, df6a.end_y_a0, width=0.8, alpha = 0.5, zorder = 1,
headwidth = 10, headlength = 8, color = '#000000', label = 'successful passes', ax=ax)
# Create bin statistics for different metrics
bs_successful = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0,
values=df6a.outcome, statistic='sum', bins=bins)
bs_expected = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0,
values=df6a.xP, statistic='sum', bins=bins)
bs_attempts = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0,
statistic='count', bins=bins)
bs_pax = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0,
values=df6a.PAx, statistic='sum', bins=bins)
# Calculate PAxpp for each bin (normalize per 100 passes)
bs_paxpp = {
'statistic': np.zeros_like(bs_pax['statistic']),
'cx': bs_pax['cx'],
'cy': bs_pax['cy']
}
# Calculate PAxpp for each bin (successful - expected)
for i in range(bs_attempts['statistic'].shape[0]):
for j in range(bs_attempts['statistic'].shape[1]):
if bs_attempts['statistic'][i, j] > 0: # Avoid division by zero
bs_paxpp['statistic'][i, j] = (bs_successful['statistic'][i, j] - bs_expected['statistic'][i, j])
total_passes = bs_heatmap['statistic'].sum() # Total number of passes
# Then in your annotation loop, replace bs_pax with bs_paxpp:
for i in range(bs_heatmap['statistic'].shape[0]):
for j in range(bs_heatmap['statistic'].shape[1]):
count = bs_heatmap['statistic'][i, j]
if count > 0: # Annotate only non-empty bins
# Get the PAxpp value for this bin
paxpp_value = bs_paxpp['statistic'][i, j]
percentage = (count / total_passes) * 100
# Only annotate bins that contain at least 2% of the total passes
if percentage > 2:
bin_center_x = bs_heatmap['cx'][i, j]
bin_center_y = bs_heatmap['cy'][i, j]
# Display the PAxpp value for this bin
text = ax.text(bin_center_x, bin_center_y, f"{paxpp_value:.2f}", color="#FFFFFF",
ha="center", va="center", fontsize=18, zorder=4)
# Add path effects for the edge
text.set_path_effects([path_effects.Stroke(linewidth=4, foreground='#000000'),
path_effects.Normal()])
#Adding notes and titles
ax.text(0.5, 1.06, f"{player}'s buildup passes for {df3a.team_name.unique()[0]}", fontsize=25, va='center', ha='center', transform=ax.transAxes, fontproperties=semibold_font)
ax.text(0.5, 1.01, f"Passes : {(df6a['type_name'].count())} | Passes above expectations (PAx) %: {metrics['PAxpp'].unique()[0]} | {', '.join(df3a['competition_id'].unique())} {', '.join(df3a['formatted_season'].unique())}\nHeatmap: amount of passes into that zone | Annotations: PAx into zone",
fontsize=12, va='center', ha='center', transform=ax.transAxes)
ax.text(0.5, 0.02, f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
fontsize=10, va='center', ha='center', transform=ax.transAxes)
ax.annotate(text="", xy=(65, 69.5), xytext=(-200, 0), textcoords="offset points", size=27, color="#000000",
arrowprops=dict(arrowstyle="-|>", shrinkA=0, color="black", linewidth=2))
#Adding team logo
fotmob_url = "https://images.fotmob.com/image_resources/logo/teamlogo/"
logo_ax = fig.add_axes([.135, 0.85, 0.09, 0.09], zorder=1)
club_icon = Image.open(urllib.request.urlopen(f"{fotmob_url}{df6a['fotmob_id'].iloc[0]}.png"))
logo_ax.imshow(club_icon)
logo_ax.axis("off")
# Save the figure with adjusted face color and transparency
plt.savefig(f'{player}-buildup-{season}.png', dpi=500, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)
In [ ]:
In [ ]:
In [ ]:
In [26]:
#Selecting the defensive actions and cleaning the original event id from data source
def_actions = dfa[dfa["type_name"].isin(['interception', 'clearance', 'tackle', 'foul', 'dribble'])]
def_actions = def_actions.dropna(subset=['original_event_id'])
def_actions['original_event_id'] = def_actions['original_event_id'].astype(int)
In [27]:
#Creating a column to flag carries that are recoveries for the data collection company
def_actions['is_recovery'] = def_actions['original_event_id'].isin(recoveries['original_event_id'])
In [28]:
#Filtering the dataframe to include the defensive actions we want + successive pass
#We do all of this as the atomic dataframe has the atomic vaep feature but not the xP feature
#And in both dataframes we have recoveries flagged as carries
df7a = def_actions[~((def_actions['type_name'] == 'dribble') & (def_actions['is_recovery'] == False))]
df8a = df7a[df7a["next_type_name"] == 'pass']
df8a['vaep_net'] = df8a['vaep_value']+df8a['next_vaep_value']
df9a = df8a[(df8a["player_name"] == player) & (df8a["next_player_name"] == player)]
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/2554384895.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df8a['vaep_net'] = df8a['vaep_value']+df8a['next_vaep_value']
In [29]:
#We actually merge the xP values on the atomic dataframe that contains the atomic vaep feature to have the final dataframe to work with
df9a['next_original_event_id'] = df9a['next_original_event_id'].astype(int)
fb['next_original_event_id'] = fb['original_event_id']
fb = fb.filter(items=['game_id', 'next_original_event_id', 'team_id', 'xP', 'PAx', 'outcome'])
df10a = df9a.merge(fb, how="left")
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/2302839364.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df9a['next_original_event_id'] = df9a['next_original_event_id'].astype(int)
In [30]:
#Checking that we actually have only passes where the defensive action + passes is made from the same player
print(df10a.player_name.unique())
print(df10a.next_player_name.unique())
['Dean Huijsen'] ['Dean Huijsen']
In [31]:
df10a['PAx'] = df10a['outcome'] - df10a['xP']
# Group once and calculate all metrics in a single operation
metrics = (df10a.groupby(
["player_id", "player_name", "team_name", "season_id"],
observed=True
).agg(
expected_passes=("xP", "sum"),
attempted_passes=("outcome", "count"),
successful_passes=("outcome", "sum")
).reset_index())
# Calculate PAxpp (Passes above expectations per pass, normalized per 100 passes)
metrics['PAxpp'] = (
((metrics['successful_passes'] - metrics['expected_passes']) /
metrics['attempted_passes']) * 100
).round(3)
In [32]:
# setting up the pitch, bins and figure
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='black', half=False)
bins = (9, 6)
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)
# plot the heatmap - darker colors = more passes originating from that square
cmap = mcolors.LinearSegmentedColormap.from_list("custom_red", ["#D7D1CF", "#FF0000"])
bs_heatmap = pitch.bin_statistic(df10a.next_start_x, df10a.next_start_y, values=df10a.vaep_net, statistic='count', bins=bins)
hm = pitch.heatmap(bs_heatmap, ax=ax, cmap=cmap, zorder = 3, alpha = 0.8)
# plot the underliying passes
am = pitch.arrows(df10a.next_start_x, df10a.next_start_y, df10a.next_end_x, df10a.next_end_y, width=0.8, alpha = 0.5, zorder = 1,
headwidth = 10, headlength = 8, color = '#000000', label = 'successful passes', ax=ax)
# Calculate sum value of vaep_net for each bin
bs_vaep = pitch.bin_statistic(df10a.next_start_x, df10a.next_start_y, values=df10a.vaep_net, statistic='sum', bins=bins)
# Add text annotations for each bin with the sum vaep_net value
for i in range(bs_vaep['statistic'].shape[0]):
for j in range(bs_vaep['statistic'].shape[1]):
# Get the bin center coordinates
bin_center_x, bin_center_y = bs_vaep['cx'][i, j], bs_vaep['cy'][i, j]
# Get the sum value of vaep_net for the bin
sum_vaep = bs_vaep['statistic'][i, j]
# Add annotation if sum value is not close to zero
if (sum_vaep < -0.01) | (sum_vaep > 0.01):
text = ax.text(bin_center_x, bin_center_y, f"{sum_vaep:.3f}", color="#FFFFFF", ha="center", va="center",
fontsize=20, zorder=4)
# Add path effects for the edge
text.set_path_effects([path_effects.Stroke(linewidth=4, foreground='#000000'), path_effects.Normal()])
#Adding notes and titles
ax.text(0.5, 1.06, f"{player}'s passes after tackle, interception or recovery for {df3a.team_name.unique()[0]}", fontsize=21.5, va='center', ha='center', transform=ax.transAxes, fontproperties=semibold_font)
ax.text(0.5, 1.01, f"Passes : {(df10a['type_name'].count())} | Passes above expectations (PAx) %: {metrics['PAxpp'].unique()[0]} | {', '.join(df3a['competition_id'].unique())} {', '.join(df3a['formatted_season'].unique())}\nHeatmap: amount of passes from that zone | Annotations: sum of atomic vaep value of def action + pass, when passes originate from zone",
fontsize=12, va='center', ha='center', transform=ax.transAxes)
ax.text(0.5, 0.02, f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
fontsize=10, va='center', ha='center', transform=ax.transAxes)
ax.annotate(text="", xy=(65, 69.5), xytext=(-200, 0), textcoords="offset points", size=27, color="#000000",
arrowprops=dict(arrowstyle="-|>", shrinkA=0, color="black", linewidth=2))
#Adding team logo
fotmob_url = "https://images.fotmob.com/image_resources/logo/teamlogo/"
logo_ax = fig.add_axes([.135, 0.85, 0.08, 0.08], zorder=1)
club_icon = Image.open(urllib.request.urlopen(f"{fotmob_url}{df10a['fotmob_id'].iloc[0]}.png"))
logo_ax.imshow(club_icon)
logo_ax.axis("off")
# Save the figure with adjusted face color and transparency
plt.savefig(f'{player}-passesafterdef-{season}.png', dpi=500, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)
In [ ]:
In [ ]:
In [ ]: