In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import math
import matplotlib
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from highlight_text import fig_text, ax_text
import socceraction
import socceraction.spadl as spadl
In [2]:
# Load custom fonts for visualization
fe_regular = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
    name='SourceSansPro-Regular'
)
fe_semibold = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf',
    name='SourceSansPro-SemiBold'
)

# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)
fm.fontManager.ttflist.insert(1, fe_semibold)

# Set the font family to the custom regular font
matplotlib.rcParams['font.family'] = fe_regular.name
In [3]:
season = 2425
In [4]:
# Load datasets from CSV files
xP = pd.read_csv("xPactions.csv", index_col = 0)
fb = pd.read_csv("teamsFOTMOB.csv", index_col=0)
positions = pd.read_csv("clustered_position.csv", index_col = 0)
players = pd.read_csv(f"players{season}.csv", index_col = 0)
games = pd.read_csv(f"games{season}.csv", index_col = 0)
actions = pd.read_csv(f"actions{season}.csv", index_col = 0)
In [5]:
# Select relevant columns from games dataset
games0 = games[["game_id", "competition_id", "season_id"]]

# Select relevant player information
players_info = players[['game_id', 'team_id', 'player_id', 'player_name', 'season_id', 'competition_id']]

# Add descriptive action names to the actions DataFrame
actions = spadl.add_names(actions)
In [6]:
# Merge datasets to create a unified DataFrame
df = (
    actions
    .merge(players_info, how="left")
    .merge(fb, how="left")
    .merge(xP, how="left")
    )
In [7]:
#Selecting only passes
df1 = df[df["type_name"] == 'pass']

#Creating necessary columns for our analysis
df1['outcome'] = np.where((df1["result_name"] == 'success'), 1, 0)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7043/4087633236.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['outcome'] = np.where((df1["result_name"] == 'success'), 1, 0)
In [8]:
#Creating different tables for our analysis
#Total xP
X0 = (df1.groupby(["player_id", "player_name", "team_name", "season_id"], observed = True)["xP"].sum().reset_index())

#Attempted passes
Y0 = (df1.groupby(["player_id", "player_name", "team_name", "season_id"], observed = True)["outcome"].count().reset_index(name='attempted_passes'))

#Successful passes
K0 = (df1.groupby(["player_id", "player_name", "team_name", "season_id"], observed = True)["outcome"].sum().reset_index(name='successful_passes'))
In [9]:
#Creating the final table
W0 = (X0
    .merge(Y0, how="left")
    .merge(K0, how="left")
    .merge(positions, how="left")
     )
In [10]:
#Creating the column Paxpp (Passes above expectations per pass normalized for 100 passes)
W0['PAx100'] = (((W0['successful_passes'] - W0['xP']) / W0['attempted_passes']) * 100).round(3)

#Creating also expected and actual completion percentage
W0['xP%'] = (W0['xP'] / W0['attempted_passes']) * 100
W0['Passes %'] = (W0['successful_passes'] / W0['attempted_passes']) * 100
In [11]:
#We watch unique positions for filtering 
W0.position.unique()
Out[11]:
array(['GK', nan, 'SV', 'ST', 'LCB', 'RWB', 'RW', 'SS', 'AMR', 'RCB',
       'LW', 'CB', 'LWB', 'CM', 'AM', 'AML', 'DM'], dtype=object)
In [12]:
#We do actually filter the table so to explore what we want to
U1 = W0[W0['attempted_passes'] >= 500]
U2 = U1[U1['position'].isin(['CB', 'LCB', 'RCB'])]
U3 = U2.sort_values(by = ["PAx100"], ascending = False).reset_index(drop = True).head(10)
In [13]:
U4 = U3.filter(items=['player_name', 'player_id', 'team_name', 'PAx100']).sort_values(by = ["PAx100"], ascending = True).reset_index(drop = True)
U4
Out[13]:
player_name player_id team_name PAx100
0 Auston Trusty 298687.0 Celtic 8.245
1 Ezri Konsa 301440.0 Aston Villa 8.362
2 Carl Starfelt 378626.0 Celta Vigo 8.376
3 Nico Elvedi 243534.0 Borussia M.Gladbach 8.653
4 Federico Gatti 439909.0 Juventus 8.787
5 Leny Yoro 437299.0 Man Utd 8.803
6 Mujaid Sadick 357387.0 Genk 8.834
7 Manuel Akanji 297390.0 Man City 8.929
8 Pierre Kalulu 391836.0 Juventus 9.036
9 Jalen Neal 433616.0 L.A. Galaxy 9.144
In [14]:
#Setting the figure, the axes and the dimension of the figure to make it all fit pleasingly
fig = plt.figure(figsize=(1800/500, 1800/500), dpi=500)
ax = plt.subplot()

ncols = U4.shape[1]
nrows = U4.shape[0]

ax.set_xlim(0, ncols + 1)
ax.set_ylim(0, nrows + 1)

positions = [0.1, 2.8, 4.2]
columns = ['player_name', 'team_name', 'PAx100']

#Conditioning for names in different columns
for i in range(nrows):
    for j, column in enumerate(columns):
        if j == 0:
            ha = 'left'
        else:
            ha = 'center'
        if column == 'PAx100':
            fontsize = 10
            color = '#FFFFFF'
            fontname = fe_semibold.name
        elif column == 'team_name':
            fontsize = 4  
            color = '#4E616C' 
            fontname = fe_regular.name
        else:
            fontsize = 9 
            color = '#000000' 
            fontname = fe_semibold.name
        ax.annotate(
            xy=(positions[j], i + .5), text=str(U4[column].iloc[i]), ha=ha, va='center', fontsize=fontsize, color=color, fontname=fontname)

# Add dividing lines and color for the column to highlight
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [nrows, nrows], lw=1.5, color='black', marker='', zorder=4)
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [0, 0], lw=1.5, color='black', marker='', zorder=4)
for x in range(1, nrows):
    ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [x, x], lw=0.5, color='gray', ls='-', zorder=3 , marker='')
    
    ax.fill_between(x=[3.7, 4.7], y1=nrows, y2=0, color='#D32F2F', alpha=0.5, ec='None')

#Adding notes and titles
plt.text(0.5, 0.86, f'Passes above expectations (PAx)', transform=fig.transFigure,
         horizontalalignment='center', fontsize = 12, fontfamily='SourceSansPro-SemiBold')
plt.text(0.5, 0.83, f'Centerbacks | Minimum 500 attempted passes | Passes above expectations %',
         transform=fig.transFigure, horizontalalignment='center', fontsize = 4, color = '#4E616C')
fig.suptitle(f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
             horizontalalignment='center', x = 0.5, y = 0.09, fontsize=3, color = "#000000")

ax.set_axis_off()
plt.savefig(f'PAx100.png', dpi=500, facecolor = "#D7D1CF", bbox_inches = "tight", transparent = True)
No description has been provided for this image
In [15]:
#Creating the figure
fig = plt.figure(figsize=(1800/500, 1800/500), dpi=500, facecolor = '#D7D1CF')
ax = plt.subplot(111, facecolor = '#D7D1CF')

#Customization of the spines
ax.spines["top"].set(visible = False)
ax.spines["right"].set(visible = False)
ax.spines["bottom"].set_color('#ACA7A5')
ax.spines["left"].set_color('#ACA7A5')

#Customization of the grid
ax.grid(lw = 0.1, color = "#ACA7A5", axis = 'x', ls = "-")
ax.grid(lw = 0.1, color = "#ACA7A5", axis = 'y', ls = "-")

#Adding limits to the plot
ax.set_ylim(50, 100)
ax.set_xlim(50, 100)

#Creating the scatters
ax.scatter(
    U2['xP%'], 
    U2['Passes %'], 
    zorder = 3, 
    s = 15, 
    fc = '#1565C0', # The background color
    ec = "#000000", # The edge color
    alpha = 0.70, # Transparency (from zero to 1)
    lw = 0.5)

ax.scatter(
    U3['xP%'], 
    U3['Passes %'], 
    zorder = 3, 
    s = 15, 
    fc = '#D32F2F', # The background color
    ec = "#000000", # The edge color
    alpha = 0.70, # Transparency (from zero to 1)
    lw = 0.5)

# We set the major tick positions every integer
ax.xaxis.set_major_locator(ticker.MultipleLocator(10))
ax.yaxis.set_major_locator(ticker.MultipleLocator(10))

# Create the consistency in font size between the legend & ticks
ax.tick_params(axis = 'both', labelsize = 7, color = '#ACA7A5', labelcolor = '#ACA7A5')

# Add axes legends
ax.yaxis.set_label_text("passes completion %", size = 7, color = "#4E616C")
ax.xaxis.set_label_text("expected passes completion %", size = 7, color = "#4E616C")

#Adding line of identity to compare expected and actual completion
ax.plot([100, 0], [100, 0], ls="-", lw=0.8, color = '#ACA7A5')

#Adding notes
ax.text(
    x = 61, y = 51,
    s = "More difficult passes",
    color = "#4E616C",
    size = 4)
ax.text(
    x = 92, y = 51,
    s = "Less difficult passes",
    color = "#4E616C",
    size = 4)

#Adding titles
title_ = ax.text(
    x = 50, y = 104,
    s = "Passes above expectations (PAx)",
    color = "#000000",
    size = 12, fontfamily='SourceSansPro-SemiBold')
title_ = ax.text(
    x = 50, y = 102,
    s = "Top 10 in PAx % vs the Rest | Difference between actual and expected completion",
    color = "#4E616C",
    size = 5)

#Adding endnote
plt.annotate('X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
             (0.5,-0.15),
             xycoords='axes fraction',
             textcoords='offset points',
             color = "#000000",
             va='top',
             ha='center', 
             size = 3)

plt.savefig(f"PassingProfile.png", dpi=500, bbox_inches = "tight")
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7043/2475199569.py:79: UserWarning: You have used the `textcoords` kwarg, but not the `xytext` kwarg.  This can lead to surprising results.
  plt.annotate('X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [16]:
#Creating the bins division to do comparison player on player
bins = [0, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 1]
df1['bin'] = pd.cut(df1['xP'], bins)

#Creating new tables as before
#xP
X1 = (df1.groupby(["player_id", "player_name", "team_name", "season_id", "bin"], observed = True)["xP"].sum().reset_index())

#Attempted Passes
Y1 = (df1.groupby(["player_id", "player_name", "team_name", "season_id", "bin"], observed = True)["outcome"].count().reset_index(name='attempted_passes'))

#Successful Passes
K1 = (df1.groupby(["player_id", "player_name", "team_name", "season_id", "bin"], observed = True)["outcome"].sum().reset_index(name='successful_passes'))
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7043/2436291344.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['bin'] = pd.cut(df1['xP'], bins)
In [17]:
#Merging new tables into one
W1 = (X1
    .merge(Y1, how="left")
    .merge(K1, how="left"))
In [18]:
#Creating the columns for actual and expected completion here too
W1['xP%'] = (W1['xP'] / W1['attempted_passes']) * 100
W1['Passes %'] = (W1['successful_passes'] / W1['attempted_passes']) * 100
In [19]:
#Get the whole list of players in the data
playerlist = W1['player_name'].unique().tolist()
cleaned_playerlist = [name for name in playerlist if pd.notna(name)]
cleaned_playerlist.sort()
In [20]:
from IPython.display import display, HTML

# Generate the HTML dropdown to easily search for players
options_html = ''.join([f'<option value="{name}">{name}</option>' for name in cleaned_playerlist])

dropdown_html = f"""
<input list="players" id="dropdown" oninput="handleInput()" placeholder="Choose Someone">
<datalist id="players">
    {options_html}
</datalist>
<p id="output"></p>
<script>
function handleInput() {{
    var input = document.getElementById("dropdown").value;
    var output = document.getElementById("output");
    output.innerHTML = "Selected: " + input;
}}
</script>
"""

# Display the dropdown
display(HTML(dropdown_html))

In [21]:
#Selecting the players to compare and checking they are unique players
R0a = W1[W1['player_name'] == 'Pierre Kalulu']
print(R0a.player_id.unique())
R0b = W1[W1['player_name'] == 'Federico Gatti']
print(R0b.player_id.unique())
[391836.]
[439909.]
In [22]:
#Taking the names of the players for plotting purposes
nrows = R0a.shape[0]
for y in range(nrows):
    title1 = R0a['player_name'].iloc[y]
    
nrows = R0b.shape[0]
for y in range(nrows):
    title2 = R0b['player_name'].iloc[y]

#Creating the figure
fig = plt.figure(figsize=(1800/500, 1800/500), dpi=500, facecolor = '#D7D1CF')
ax = plt.subplot(111, facecolor = '#D7D1CF')

#Customization of the spines
ax.spines["top"].set(visible = False)
ax.spines["right"].set(visible = False)
ax.spines["bottom"].set_color('#ACA7A5')
ax.spines["left"].set_color('#ACA7A5')

#Customization of the grid
ax.grid(lw = 0.1, color = "#ACA7A5", axis = 'x', ls = "-")
ax.grid(lw = 0.1, color = "#ACA7A5", axis = 'y', ls = "-")

#Adding limits to the plot
ax.set_ylim(0, 105)
ax.set_xlim(0, 105)

#Creating the scatters
ax.scatter(
    R0a['xP%'], 
    R0a['Passes %'], 
    zorder = 3, 
    s = 20, 
    fc = '#1565C0', # The background color
    ec = "#000000", # The edge color
    alpha = 0.70, # Transparency (from zero to 1)
    lw = 0.5)
ax.scatter(
    R0b['xP%'], 
    R0b['Passes %'], 
    zorder = 3, 
    s = 20, 
    fc = '#D32F2F', # The background color
    ec = "#000000", # The edge color
    alpha = 0.70, # Transparency (from zero to 1)
    lw = 0.5)

# We set the major tick positions every integer
ax.xaxis.set_major_locator(ticker.MultipleLocator(20))
ax.yaxis.set_major_locator(ticker.MultipleLocator(20))

# Create the consistency in font size between the legend & ticks.
ax.tick_params(axis = 'both', labelsize = 7, color = '#ACA7A5', labelcolor = '#ACA7A5')

# Add axes legends
ax.yaxis.set_label_text("passes completion %", size = 7, color = "#4E616C")
ax.xaxis.set_label_text("expected passes completion %", size = 7, color = "#4E616C")

#Adding line of identity to compare expected and actual completion
ax.plot([105, 0], [105, 0], ls="-", lw=0.8, color = '#ACA7A5')

# Adding the one-liner title with different colors for player names
fig_text(
    x = 0.5, y = 0.95,
    s = f"<{title1}> vs <{title2}>",
    highlight_textprops = [       
        {'color': '#1565C0', 'fontfamily': 'SourceSansPro-SemiBold', 'size': '12'}, 
        {'color': '#D32F2F', 'fontfamily': 'SourceSansPro-SemiBold', 'size': '12'}
    ],
    color = "#000000",
    size = 12,
    fontfamily = 'SourceSansPro-SemiBold',
    ha = 'center',
    va = 'center',
    annotationbbox_kw={"xycoords": "figure fraction"})

# Subtitle text
title_ = ax.text(
    x = 50, y = 109,
    s = "Difference between actual and expected completion | Passes grouped for expected completion rate",
    color = "#4E616C", ha='center',
    size = 4.5)

# Footer/endnote
plt.annotate('X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
             (0.5,-0.15),
             xycoords='axes fraction',
             textcoords='offset points',
             color = "#000000",
             va='top',
             ha='center', 
             size = 3)

plt.savefig(f"Passing-Performance-VS.png", dpi=500, bbox_inches = "tight")
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7043/3785998667.py:85: UserWarning: You have used the `textcoords` kwarg, but not the `xytext` kwarg.  This can lead to surprising results.
  plt.annotate('X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
No description has been provided for this image
In [ ]: