In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import matplotlib.colors as mcolors
import matplotlib.patheffects as path_effects

from mplsoccer import Pitch, VerticalPitch, lines
from PIL import Image
import urllib
import socceraction
import socceraction.atomic.spadl as atomicspadl
import socceraction.spadl as spadl
In [2]:
# Load custom fonts for visualization
fe_regular = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
    name='SourceSansPro-Regular'
)

semibold_font = fm.FontProperties(fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf')

# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)

# Set the font family to the custom regular font
matplotlib.rcParams['font.family'] = fe_regular.name
In [3]:
season = 2425
In [4]:
# Load datasets from CSV files
VAEP = pd.read_csv("aVAEPactions.csv", index_col = 0)
xP = pd.read_csv("xPactions.csv", index_col = 0)
fb = pd.read_csv("teamsFOTMOB.csv", index_col = 0)
players = pd.read_csv(f"players{season}.csv", index_col = 0)
games = pd.read_csv(f"games{season}.csv", index_col = 0)
aactions = pd.read_csv(f"atomic_actions{season}.csv", index_col = 0)
actions = pd.read_csv(f"actions{season}.csv", index_col = 0)
chains = pd.read_csv(f"possession_chains_info{season}.csv", index_col = 0)
recoveries = pd.read_csv(f"recoveries_id{season}.csv", index_col = 0)
recoveries.rename(columns = {'event_id':'original_event_id'}, inplace = True)
In [5]:
# Add descriptive action names to the atomic actions DataFrame
aactions = atomicspadl.add_names(aactions)
In [6]:
# Add descriptive action names to the actions DataFrame
actions = spadl.add_names(actions)
In [7]:
# Merge datasets to create a unified DataFrame
dfa = (
    aactions
    .merge(players, how="left")
    .merge(fb, how="left")
    .merge(VAEP, how="left"))
In [8]:
# Merge datasets to create a unified DataFrame
dfb = (
    actions
    .merge(players, how="left")
    .merge(fb, how="left")
    .merge(xP, how="left")
    .merge(chains, how="left"))
In [9]:
#Adding on both frames a set of features that we need to define progressive actions
dfa["beginning_distance"] = np.sqrt(np.square(105-dfa['x_a0']) + np.square(34-dfa['y_a0'])).round(2)
dfa["end_distance"] = np.sqrt(np.square(105-dfa['end_x']) + np.square(34-dfa['end_y'])).round(2)
dfa["length"] = dfa["end_distance"] - dfa["beginning_distance"]
dfa['length'] = dfa['length'].abs()
dfa["angle"] = np.arctan2(dfa["end_y"] - dfa["y_a0"], dfa["end_x"] - dfa["x_a0"])
dfa['angle_degrees'] = np.degrees(dfa['angle']) % 360
In [10]:
#Adding features for next actions we'll use for calculations and plotting later on
#In the def action + pass map
dfa["next_type_name"] = dfa.shift(-1, fill_value=0)["type_name"]
dfa["next_team_name"] = dfa.shift(-1, fill_value=0)["team_name"]
dfa["next_starting_position"] = dfa.shift(-1, fill_value=0)["starting_position"]
dfa["next_player_name"] = dfa.shift(-1, fill_value=0)["player_name"]
dfa["next_original_event_id"] = dfa.shift(-1, fill_value=0)["original_event_id"]
dfa["next_start_x"] = dfa.shift(-1, fill_value=0)["x_a0"]
dfa["next_start_y"] = dfa.shift(-1, fill_value=0)["y_a0"]
dfa["next_end_x"] = dfa.shift(-1, fill_value=0)["end_x"]
dfa["next_end_y"] = dfa.shift(-1, fill_value=0)["end_y"]
dfa["next_vaep_value"] = dfa.shift(-1, fill_value=0)["vaep_value"]
In [11]:
#Flagging progressive actions given a custom defintion of passes not starting in the box, with lenght of more than 5m 
#not backwards or horizontal and the pass closes the distance from starting point to center of the goal by at least 17.5%
dfa['progressive'] = np.where(
    ((dfa['beginning_distance'] - dfa['end_distance']) / dfa['beginning_distance'] >= 0.175) & (dfa['length'] > 5) & 
    (((dfa['angle_degrees'] >= 0) & (dfa['angle_degrees'] <= 60)) | ((dfa['angle_degrees'] >= 260) & (dfa['angle_degrees'] <= 360))) &
    ~((dfa['x_a0'] >= 88.5) & (dfa['y_a0'] >= 13.885) & (dfa['y_a0'] <= 54.115)),
    True, False)
In [12]:
# Function to format season ID into a readable format
def format_season_id(season_id):
    # Convert to integer if it's a float
    season_id = int(season_id)
    # Extract the last two digits of the year
    start_year = str(season_id -1)[-2:]
    # Calculate the end year
    end_year = str(season_id)[-2:]
    # Format as 20/21
    formatted_season = f"{start_year}/{end_year}"
    return formatted_season
In [13]:
#Keeping only passes in both dataframe
df1a = dfa[dfa["type_name"] == 'pass']
df1b = dfb[dfb["type_name"] == 'pass']

#Creating an outcome column and then calculating completed (1) and not (0) passes minus probability (xP)
df1b['outcome'] = np.where((df1b["result_name"] == 'success'), 1, 0)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/1603123889.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1b['outcome'] = np.where((df1b["result_name"] == 'success'), 1, 0)
In [14]:
#Get the whole list of players in the data
playerlist = df1a['player_name'].unique().tolist()
cleaned_playerlist = [name for name in playerlist if pd.notna(name)]
cleaned_playerlist.sort()
In [15]:
from IPython.display import display, HTML

# Generate the HTML dropdown to easily search for players
options_html = ''.join([f'<option value="{name}">{name}</option>' for name in cleaned_playerlist])

dropdown_html = f"""
<input list="players" id="dropdown" oninput="handleInput()" placeholder="Choose Someone">
<datalist id="players">
    {options_html}
</datalist>
<p id="output"></p>
<script>
function handleInput() {{
    var input = document.getElementById("dropdown").value;
    var output = document.getElementById("output");
    output.innerHTML = "Selected: " + input;
}}
</script>
"""

# Display the dropdown
display(HTML(dropdown_html))

In [16]:
#Selecting player and filtering down both dataframes
player = 'Dean Huijsen'
df2a = df1a[df1a["player_name"] == player]
df2b = df1b[df1b["player_name"] == player]

#Check the teams he played for in the season/dataset to filter down eventually
df2a.team_name.unique()
Out[16]:
array(['Bournemouth'], dtype=object)
In [17]:
# Apply the function to the 'season_id' column
df2a['formatted_season'] = df2a['season_id'].apply(format_season_id)
df2b['formatted_season'] = df2b['season_id'].apply(format_season_id)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/4271577116.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2a['formatted_season'] = df2a['season_id'].apply(format_season_id)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/4271577116.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2b['formatted_season'] = df2b['season_id'].apply(format_season_id)
In [18]:
#Of course we keep this lines if we want to keep action for specific teams
df3a = df2a[df2a["team_name"] == "Bournemouth"]
df3b = df2b[df2b["team_name"] == "Bournemouth"]
In [19]:
#Keeping a dataframe from each filtered dataframe for merging reasons later on
fa = df3a.filter(items=['game_id', 'original_event_id', 'team_id', 'vaep_value'])
fb = df3b.filter(items=['game_id', 'original_event_id', 'result_name', 'team_id', 'xP', 'PAx', 'outcome'])
In [20]:
#Reformatting original event id column in all dataframes which is needed for merging
fa['original_event_id'] = fa['original_event_id'].astype(int)
fb['original_event_id'] = fb['original_event_id'].astype(int)
df3a['original_event_id'] = df3a['original_event_id'].astype(int)
df3b['original_event_id'] = df3b['original_event_id'].astype(int)
In [21]:
#Actual merging operations
Xa = df3a.merge(fb, how="left")
Xb = df3b.merge(fa, how="left")
In [22]:
#Selecting only progressive passes
df4a = Xa[Xa["progressive"] == True]
df4a['PAx'] = df4a['outcome'] - df4a['xP']

# Group once and calculate all metrics in a single operation
metrics = (df4a.groupby(
    ["player_id", "player_name", "team_name", "season_id"], 
    observed=True
).agg(
    expected_passes=("xP", "sum"),
    attempted_passes=("outcome", "count"),
    successful_passes=("outcome", "sum")
).reset_index())

# Calculate PAxpp (Passes above expectations per pass, normalized per 100 passes)
metrics['PAxpp'] = (
    ((metrics['successful_passes'] - metrics['expected_passes']) / 
     metrics['attempted_passes']) * 100
).round(3)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/2894729846.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4a['PAx'] = df4a['outcome'] - df4a['xP']
In [23]:
# setting up the pitch, bins and figure
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='black', half=False)
bins = (9, 6)
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)

# plot the heatmap - more intense red = more passes originating from that square
cmap = mcolors.LinearSegmentedColormap.from_list("custom_red", ["#D7D1CF", "#FF0000"])
bs_heatmap = pitch.bin_statistic(df4a.x_a0, df4a.y_a0, statistic='count', bins=bins)
hm = pitch.heatmap(bs_heatmap, ax=ax, cmap=cmap, zorder = 3, alpha = 0.8)

# plot the underliying passes
am = pitch.arrows(df4a.x_a0, df4a.y_a0, df4a.end_x, df4a.end_y, width=0.8, alpha = 0.5, zorder = 1,
             headwidth = 10, headlength = 8, color = '#000000', label = 'successful passes', ax=ax)

# Create bin statistics for different metrics
bs_successful = pitch.bin_statistic(df4a.end_x, df4a.end_y, 
                                   values=df4a.outcome, statistic='sum', bins=bins)
bs_expected = pitch.bin_statistic(df4a.end_x, df4a.end_y, 
                                 values=df4a.xP, statistic='sum', bins=bins)
bs_attempts = pitch.bin_statistic(df4a.end_x, df4a.end_y, 
                                 statistic='count', bins=bins)
bs_pax = pitch.bin_statistic(df4a.end_x, df4a.end_y, 
                            values=df4a.PAx, statistic='sum', bins=bins)

# Calculate PAxpp for each bin (normalize per 100 passes)
bs_paxpp = {
    'statistic': np.zeros_like(bs_pax['statistic']),
    'cx': bs_pax['cx'],
    'cy': bs_pax['cy']}

# Calculate PAxpp for each bin (successful - expected)
for i in range(bs_attempts['statistic'].shape[0]):
    for j in range(bs_attempts['statistic'].shape[1]):
        if bs_attempts['statistic'][i, j] > 0:  # Avoid division by zero
            bs_paxpp['statistic'][i, j] = (bs_successful['statistic'][i, j] - bs_expected['statistic'][i, j])

total_passes = bs_heatmap['statistic'].sum()  # Total number of passes
# Then in your annotation loop, replace bs_pax with bs_paxpp:
for i in range(bs_heatmap['statistic'].shape[0]):
    for j in range(bs_heatmap['statistic'].shape[1]):
        count = bs_heatmap['statistic'][i, j]
        if count > 0:  # Annotate only non-empty bins
            # Get the PAxpp value for this bin
            paxpp_value = bs_paxpp['statistic'][i, j]
            percentage = (count / total_passes) * 100
            
            # Only annotate bins that contain at least 2% of the total passes
            if percentage > 2:
                bin_center_x = bs_heatmap['cx'][i, j]
                bin_center_y = bs_heatmap['cy'][i, j]
                
                # Display the PAxpp value for this bin
                text = ax.text(bin_center_x, bin_center_y, f"{paxpp_value:.2f}", color="#FFFFFF", 
                              ha="center", va="center", fontsize=18, zorder=4)
                
                # Add path effects for the edge
                text.set_path_effects([path_effects.Stroke(linewidth=4, foreground='#000000'),
                                      path_effects.Normal()])

#Adding notes and titles
ax.text(0.5, 1.06, f"{player}'s progressive passes for {df3a.team_name.unique()[0]}", fontsize=25, va='center', ha='center', transform=ax.transAxes, fontproperties=semibold_font)
ax.text(0.5, 1.01, f"Passes : {(df4a['type_name'].count())}  |  Passes above expectations (PAx) %: {metrics['PAxpp'].unique()[0]}  |  {', '.join(df3a['competition_id'].unique())} {', '.join(df3a['formatted_season'].unique())}\nHeatmap: amount of passes from that zone  |  Annotations: PAx from zone",
        fontsize=12, va='center', ha='center', transform=ax.transAxes)
ax.text(0.5, 0.02, f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
        fontsize=10, va='center', ha='center', transform=ax.transAxes)
ax.annotate(text="", xy=(65, 69.5), xytext=(-200, 0), textcoords="offset points", size=27, color="#000000", 
        arrowprops=dict(arrowstyle="-|>", shrinkA=0, color="black", linewidth=2))

#Adding team logo
fotmob_url = "https://images.fotmob.com/image_resources/logo/teamlogo/"
logo_ax = fig.add_axes([.135, 0.85, 0.09, 0.09], zorder=1)
club_icon = Image.open(urllib.request.urlopen(f"{fotmob_url}{df4a['fotmob_id'].iloc[0]}.png"))
logo_ax.imshow(club_icon)
logo_ax.axis("off")

# Save the figure with adjusted face color and transparency
plt.savefig(f'{player}-progressive-{season}.png', dpi=500, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)
No description has been provided for this image
In [24]:
# Selecting only buildup passes 
df6a = Xb[Xb["is_buildup"] == True]
df6a['PAx'] = df6a['outcome'] - df6a['xP']

# Group once and calculate all metrics in a single operation
metrics = (df6a.groupby(
    ["player_id", "player_name", "team_name", "season_id"], 
    observed=True
).agg(
    expected_passes=("xP", "sum"),
    attempted_passes=("outcome", "count"),
    successful_passes=("outcome", "sum")
).reset_index())

# Calculate PAxpp (Passes above expectations per pass, normalized per 100 passes)
metrics['PAxpp'] = (
    ((metrics['successful_passes'] - metrics['expected_passes']) / 
     metrics['attempted_passes']) * 100
).round(3)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/1780603104.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df6a['PAx'] = df6a['outcome'] - df6a['xP']
In [25]:
# setting up the pitch, bins and figure
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='black', half=False)
bins = (9, 6)
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)

# plot the heatmap - more intense red = more passes originating from that square
cmap = mcolors.LinearSegmentedColormap.from_list("custom_red", ["#D7D1CF", "#FF0000"])
bs_heatmap = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0, statistic='count', bins=bins)
hm = pitch.heatmap(bs_heatmap, ax=ax, cmap=cmap, zorder = 3, alpha = 0.8)

# plot the underliying passes
am = pitch.arrows(df6a.start_x_a0, df6a.start_y_a0, df6a.end_x_a0, df6a.end_y_a0, width=0.8, alpha = 0.5, zorder = 1,
             headwidth = 10, headlength = 8, color = '#000000', label = 'successful passes', ax=ax)

# Create bin statistics for different metrics
bs_successful = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0, 
                                   values=df6a.outcome, statistic='sum', bins=bins)
bs_expected = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0, 
                                 values=df6a.xP, statistic='sum', bins=bins)
bs_attempts = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0, 
                                 statistic='count', bins=bins)
bs_pax = pitch.bin_statistic(df6a.end_x_a0, df6a.end_y_a0, 
                            values=df6a.PAx, statistic='sum', bins=bins)

# Calculate PAxpp for each bin (normalize per 100 passes)
bs_paxpp = {
    'statistic': np.zeros_like(bs_pax['statistic']),
    'cx': bs_pax['cx'],
    'cy': bs_pax['cy']
}

# Calculate PAxpp for each bin (successful - expected)
for i in range(bs_attempts['statistic'].shape[0]):
    for j in range(bs_attempts['statistic'].shape[1]):
        if bs_attempts['statistic'][i, j] > 0:  # Avoid division by zero
            bs_paxpp['statistic'][i, j] = (bs_successful['statistic'][i, j] - bs_expected['statistic'][i, j])

total_passes = bs_heatmap['statistic'].sum()  # Total number of passes
# Then in your annotation loop, replace bs_pax with bs_paxpp:
for i in range(bs_heatmap['statistic'].shape[0]):
    for j in range(bs_heatmap['statistic'].shape[1]):
        count = bs_heatmap['statistic'][i, j]
        if count > 0:  # Annotate only non-empty bins
            # Get the PAxpp value for this bin
            paxpp_value = bs_paxpp['statistic'][i, j]
            percentage = (count / total_passes) * 100
            
            # Only annotate bins that contain at least 2% of the total passes
            if percentage > 2:
                bin_center_x = bs_heatmap['cx'][i, j]
                bin_center_y = bs_heatmap['cy'][i, j]
                
                # Display the PAxpp value for this bin
                text = ax.text(bin_center_x, bin_center_y, f"{paxpp_value:.2f}", color="#FFFFFF", 
                              ha="center", va="center", fontsize=18, zorder=4)
                
                # Add path effects for the edge
                text.set_path_effects([path_effects.Stroke(linewidth=4, foreground='#000000'),
                                      path_effects.Normal()])

#Adding notes and titles
ax.text(0.5, 1.06, f"{player}'s buildup passes for {df3a.team_name.unique()[0]}", fontsize=25, va='center', ha='center', transform=ax.transAxes, fontproperties=semibold_font)
ax.text(0.5, 1.01, f"Passes : {(df6a['type_name'].count())}  |  Passes above expectations (PAx) %: {metrics['PAxpp'].unique()[0]}  |  {', '.join(df3a['competition_id'].unique())} {', '.join(df3a['formatted_season'].unique())}\nHeatmap: amount of passes into that zone  |  Annotations: PAx into zone",
        fontsize=12, va='center', ha='center', transform=ax.transAxes)
ax.text(0.5, 0.02, f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com', 
        fontsize=10, va='center', ha='center', transform=ax.transAxes)
ax.annotate(text="", xy=(65, 69.5), xytext=(-200, 0), textcoords="offset points", size=27, color="#000000", 
        arrowprops=dict(arrowstyle="-|>", shrinkA=0, color="black", linewidth=2))

#Adding team logo
fotmob_url = "https://images.fotmob.com/image_resources/logo/teamlogo/"
logo_ax = fig.add_axes([.135, 0.85, 0.09, 0.09], zorder=1)
club_icon = Image.open(urllib.request.urlopen(f"{fotmob_url}{df6a['fotmob_id'].iloc[0]}.png"))
logo_ax.imshow(club_icon)
logo_ax.axis("off")

# Save the figure with adjusted face color and transparency
plt.savefig(f'{player}-buildup-{season}.png', dpi=500, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [26]:
#Selecting the defensive actions and cleaning the original event id from data source
def_actions = dfa[dfa["type_name"].isin(['interception', 'clearance', 'tackle', 'foul', 'dribble'])]
def_actions = def_actions.dropna(subset=['original_event_id'])
def_actions['original_event_id'] = def_actions['original_event_id'].astype(int)
In [27]:
#Creating a column to flag carries that are recoveries for the data collection company
def_actions['is_recovery'] = def_actions['original_event_id'].isin(recoveries['original_event_id'])
In [28]:
#Filtering the dataframe to include the defensive actions we want + successive pass
#We do all of this as the atomic dataframe has the atomic vaep feature but not the xP feature
#And in both dataframes we have recoveries flagged as carries
df7a = def_actions[~((def_actions['type_name'] == 'dribble') & (def_actions['is_recovery'] == False))]
df8a = df7a[df7a["next_type_name"] == 'pass']
df8a['vaep_net'] = df8a['vaep_value']+df8a['next_vaep_value']
df9a = df8a[(df8a["player_name"] == player) & (df8a["next_player_name"] == player)]
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/2554384895.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df8a['vaep_net'] = df8a['vaep_value']+df8a['next_vaep_value']
In [29]:
#We actually merge the xP values on the atomic dataframe that contains the atomic vaep feature to have the final dataframe to work with
df9a['next_original_event_id'] = df9a['next_original_event_id'].astype(int)
fb['next_original_event_id'] = fb['original_event_id']
fb = fb.filter(items=['game_id', 'next_original_event_id', 'team_id', 'xP', 'PAx', 'outcome'])
df10a = df9a.merge(fb, how="left")
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7391/2302839364.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df9a['next_original_event_id'] = df9a['next_original_event_id'].astype(int)
In [30]:
#Checking that we actually have only passes where the defensive action + passes is made from the same player
print(df10a.player_name.unique())
print(df10a.next_player_name.unique())
['Dean Huijsen']
['Dean Huijsen']
In [31]:
df10a['PAx'] = df10a['outcome'] - df10a['xP']

# Group once and calculate all metrics in a single operation
metrics = (df10a.groupby(
    ["player_id", "player_name", "team_name", "season_id"], 
    observed=True
).agg(
    expected_passes=("xP", "sum"),
    attempted_passes=("outcome", "count"),
    successful_passes=("outcome", "sum")
).reset_index())

# Calculate PAxpp (Passes above expectations per pass, normalized per 100 passes)
metrics['PAxpp'] = (
    ((metrics['successful_passes'] - metrics['expected_passes']) / 
     metrics['attempted_passes']) * 100
).round(3)
In [32]:
# setting up the pitch, bins and figure
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='black', half=False)
bins = (9, 6)
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)

# plot the heatmap - darker colors = more passes originating from that square
cmap = mcolors.LinearSegmentedColormap.from_list("custom_red", ["#D7D1CF", "#FF0000"])
bs_heatmap = pitch.bin_statistic(df10a.next_start_x, df10a.next_start_y, values=df10a.vaep_net, statistic='count', bins=bins)
hm = pitch.heatmap(bs_heatmap, ax=ax, cmap=cmap, zorder = 3, alpha = 0.8)

# plot the underliying passes
am = pitch.arrows(df10a.next_start_x, df10a.next_start_y, df10a.next_end_x, df10a.next_end_y, width=0.8, alpha = 0.5, zorder = 1,
             headwidth = 10, headlength = 8, color = '#000000', label = 'successful passes', ax=ax)

# Calculate sum value of vaep_net for each bin
bs_vaep = pitch.bin_statistic(df10a.next_start_x, df10a.next_start_y, values=df10a.vaep_net, statistic='sum', bins=bins)

# Add text annotations for each bin with the sum vaep_net value
for i in range(bs_vaep['statistic'].shape[0]):
    for j in range(bs_vaep['statistic'].shape[1]):
        # Get the bin center coordinates
        bin_center_x, bin_center_y = bs_vaep['cx'][i, j], bs_vaep['cy'][i, j]
        # Get the sum value of vaep_net for the bin
        sum_vaep = bs_vaep['statistic'][i, j]
        # Add annotation if sum value is not close to zero
        if (sum_vaep < -0.01) | (sum_vaep > 0.01):
            text = ax.text(bin_center_x, bin_center_y, f"{sum_vaep:.3f}", color="#FFFFFF", ha="center", va="center",
                               fontsize=20, zorder=4)
            # Add path effects for the edge
            text.set_path_effects([path_effects.Stroke(linewidth=4, foreground='#000000'), path_effects.Normal()])

#Adding notes and titles
ax.text(0.5, 1.06, f"{player}'s passes after tackle, interception or recovery for {df3a.team_name.unique()[0]}", fontsize=21.5, va='center', ha='center', transform=ax.transAxes, fontproperties=semibold_font)
ax.text(0.5, 1.01, f"Passes : {(df10a['type_name'].count())}  |  Passes above expectations (PAx) %: {metrics['PAxpp'].unique()[0]}  |  {', '.join(df3a['competition_id'].unique())} {', '.join(df3a['formatted_season'].unique())}\nHeatmap: amount of passes from that zone  |  Annotations: sum of atomic vaep value of def action + pass, when passes originate from zone",
        fontsize=12, va='center', ha='center', transform=ax.transAxes)
ax.text(0.5, 0.02, f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
        fontsize=10, va='center', ha='center', transform=ax.transAxes)
ax.annotate(text="", xy=(65, 69.5), xytext=(-200, 0), textcoords="offset points", size=27, color="#000000", 
        arrowprops=dict(arrowstyle="-|>", shrinkA=0, color="black", linewidth=2))

#Adding team logo
fotmob_url = "https://images.fotmob.com/image_resources/logo/teamlogo/"
logo_ax = fig.add_axes([.135, 0.85, 0.08, 0.08], zorder=1)
club_icon = Image.open(urllib.request.urlopen(f"{fotmob_url}{df10a['fotmob_id'].iloc[0]}.png"))
logo_ax.imshow(club_icon)
logo_ax.axis("off")

# Save the figure with adjusted face color and transparency
plt.savefig(f'{player}-passesafterdef-{season}.png', dpi=500, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [ ]: