In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import math
import matplotlib
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from highlight_text import fig_text, ax_text
import socceraction
import socceraction.spadl as spadl
In [2]:
# Load custom fonts for visualization
fe_regular = fm.FontEntry(
fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
name='SourceSansPro-Regular'
)
fe_semibold = fm.FontEntry(
fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf',
name='SourceSansPro-SemiBold'
)
# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)
fm.fontManager.ttflist.insert(1, fe_semibold)
# Set the font family to the custom regular font
matplotlib.rcParams['font.family'] = fe_regular.name
In [3]:
season = 2425
In [4]:
# Load datasets from CSV files
xP = pd.read_csv("xPactions.csv", index_col = 0)
fb = pd.read_csv("teamsFOTMOB.csv", index_col=0)
positions = pd.read_csv("clustered_position.csv", index_col = 0)
players = pd.read_csv(f"players{season}.csv", index_col = 0)
games = pd.read_csv(f"games{season}.csv", index_col = 0)
actions = pd.read_csv(f"actions{season}.csv", index_col = 0)
In [5]:
# Select relevant columns from games dataset
games0 = games[["game_id", "competition_id", "season_id"]]
# Select relevant player information
players_info = players[['game_id', 'team_id', 'player_id', 'player_name', 'season_id', 'competition_id']]
# Add descriptive action names to the actions DataFrame
actions = spadl.add_names(actions)
In [6]:
# Merge datasets to create a unified DataFrame
df = (
actions
.merge(players_info, how="left")
.merge(fb, how="left")
.merge(xP, how="left")
)
In [7]:
#Selecting only passes
df1 = df[df["type_name"] == 'pass']
#Creating necessary columns for our analysis
df1['outcome'] = np.where((df1["result_name"] == 'success'), 1, 0)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7043/4087633236.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df1['outcome'] = np.where((df1["result_name"] == 'success'), 1, 0)
In [8]:
#Creating different tables for our analysis
#Total xP
X0 = (df1.groupby(["player_id", "player_name", "team_name", "season_id"], observed = True)["xP"].sum().reset_index())
#Attempted passes
Y0 = (df1.groupby(["player_id", "player_name", "team_name", "season_id"], observed = True)["outcome"].count().reset_index(name='attempted_passes'))
#Successful passes
K0 = (df1.groupby(["player_id", "player_name", "team_name", "season_id"], observed = True)["outcome"].sum().reset_index(name='successful_passes'))
In [9]:
#Creating the final table
W0 = (X0
.merge(Y0, how="left")
.merge(K0, how="left")
.merge(positions, how="left")
)
In [10]:
#Creating the column Paxpp (Passes above expectations per pass normalized for 100 passes)
W0['PAx100'] = (((W0['successful_passes'] - W0['xP']) / W0['attempted_passes']) * 100).round(3)
#Creating also expected and actual completion percentage
W0['xP%'] = (W0['xP'] / W0['attempted_passes']) * 100
W0['Passes %'] = (W0['successful_passes'] / W0['attempted_passes']) * 100
In [11]:
#We watch unique positions for filtering
W0.position.unique()
Out[11]:
array(['GK', nan, 'SV', 'ST', 'LCB', 'RWB', 'RW', 'SS', 'AMR', 'RCB', 'LW', 'CB', 'LWB', 'CM', 'AM', 'AML', 'DM'], dtype=object)
In [12]:
#We do actually filter the table so to explore what we want to
U1 = W0[W0['attempted_passes'] >= 500]
U2 = U1[U1['position'].isin(['CB', 'LCB', 'RCB'])]
U3 = U2.sort_values(by = ["PAx100"], ascending = False).reset_index(drop = True).head(10)
In [13]:
U4 = U3.filter(items=['player_name', 'player_id', 'team_name', 'PAx100']).sort_values(by = ["PAx100"], ascending = True).reset_index(drop = True)
U4
Out[13]:
player_name | player_id | team_name | PAx100 | |
---|---|---|---|---|
0 | Auston Trusty | 298687.0 | Celtic | 8.245 |
1 | Ezri Konsa | 301440.0 | Aston Villa | 8.362 |
2 | Carl Starfelt | 378626.0 | Celta Vigo | 8.376 |
3 | Nico Elvedi | 243534.0 | Borussia M.Gladbach | 8.653 |
4 | Federico Gatti | 439909.0 | Juventus | 8.787 |
5 | Leny Yoro | 437299.0 | Man Utd | 8.803 |
6 | Mujaid Sadick | 357387.0 | Genk | 8.834 |
7 | Manuel Akanji | 297390.0 | Man City | 8.929 |
8 | Pierre Kalulu | 391836.0 | Juventus | 9.036 |
9 | Jalen Neal | 433616.0 | L.A. Galaxy | 9.144 |
In [14]:
#Setting the figure, the axes and the dimension of the figure to make it all fit pleasingly
fig = plt.figure(figsize=(1800/500, 1800/500), dpi=500)
ax = plt.subplot()
ncols = U4.shape[1]
nrows = U4.shape[0]
ax.set_xlim(0, ncols + 1)
ax.set_ylim(0, nrows + 1)
positions = [0.1, 2.8, 4.2]
columns = ['player_name', 'team_name', 'PAx100']
#Conditioning for names in different columns
for i in range(nrows):
for j, column in enumerate(columns):
if j == 0:
ha = 'left'
else:
ha = 'center'
if column == 'PAx100':
fontsize = 10
color = '#FFFFFF'
fontname = fe_semibold.name
elif column == 'team_name':
fontsize = 4
color = '#4E616C'
fontname = fe_regular.name
else:
fontsize = 9
color = '#000000'
fontname = fe_semibold.name
ax.annotate(
xy=(positions[j], i + .5), text=str(U4[column].iloc[i]), ha=ha, va='center', fontsize=fontsize, color=color, fontname=fontname)
# Add dividing lines and color for the column to highlight
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [nrows, nrows], lw=1.5, color='black', marker='', zorder=4)
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [0, 0], lw=1.5, color='black', marker='', zorder=4)
for x in range(1, nrows):
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [x, x], lw=0.5, color='gray', ls='-', zorder=3 , marker='')
ax.fill_between(x=[3.7, 4.7], y1=nrows, y2=0, color='#D32F2F', alpha=0.5, ec='None')
#Adding notes and titles
plt.text(0.5, 0.86, f'Passes above expectations (PAx)', transform=fig.transFigure,
horizontalalignment='center', fontsize = 12, fontfamily='SourceSansPro-SemiBold')
plt.text(0.5, 0.83, f'Centerbacks | Minimum 500 attempted passes | Passes above expectations %',
transform=fig.transFigure, horizontalalignment='center', fontsize = 4, color = '#4E616C')
fig.suptitle(f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
horizontalalignment='center', x = 0.5, y = 0.09, fontsize=3, color = "#000000")
ax.set_axis_off()
plt.savefig(f'PAx100.png', dpi=500, facecolor = "#D7D1CF", bbox_inches = "tight", transparent = True)
In [15]:
#Creating the figure
fig = plt.figure(figsize=(1800/500, 1800/500), dpi=500, facecolor = '#D7D1CF')
ax = plt.subplot(111, facecolor = '#D7D1CF')
#Customization of the spines
ax.spines["top"].set(visible = False)
ax.spines["right"].set(visible = False)
ax.spines["bottom"].set_color('#ACA7A5')
ax.spines["left"].set_color('#ACA7A5')
#Customization of the grid
ax.grid(lw = 0.1, color = "#ACA7A5", axis = 'x', ls = "-")
ax.grid(lw = 0.1, color = "#ACA7A5", axis = 'y', ls = "-")
#Adding limits to the plot
ax.set_ylim(50, 100)
ax.set_xlim(50, 100)
#Creating the scatters
ax.scatter(
U2['xP%'],
U2['Passes %'],
zorder = 3,
s = 15,
fc = '#1565C0', # The background color
ec = "#000000", # The edge color
alpha = 0.70, # Transparency (from zero to 1)
lw = 0.5)
ax.scatter(
U3['xP%'],
U3['Passes %'],
zorder = 3,
s = 15,
fc = '#D32F2F', # The background color
ec = "#000000", # The edge color
alpha = 0.70, # Transparency (from zero to 1)
lw = 0.5)
# We set the major tick positions every integer
ax.xaxis.set_major_locator(ticker.MultipleLocator(10))
ax.yaxis.set_major_locator(ticker.MultipleLocator(10))
# Create the consistency in font size between the legend & ticks
ax.tick_params(axis = 'both', labelsize = 7, color = '#ACA7A5', labelcolor = '#ACA7A5')
# Add axes legends
ax.yaxis.set_label_text("passes completion %", size = 7, color = "#4E616C")
ax.xaxis.set_label_text("expected passes completion %", size = 7, color = "#4E616C")
#Adding line of identity to compare expected and actual completion
ax.plot([100, 0], [100, 0], ls="-", lw=0.8, color = '#ACA7A5')
#Adding notes
ax.text(
x = 61, y = 51,
s = "More difficult passes",
color = "#4E616C",
size = 4)
ax.text(
x = 92, y = 51,
s = "Less difficult passes",
color = "#4E616C",
size = 4)
#Adding titles
title_ = ax.text(
x = 50, y = 104,
s = "Passes above expectations (PAx)",
color = "#000000",
size = 12, fontfamily='SourceSansPro-SemiBold')
title_ = ax.text(
x = 50, y = 102,
s = "Top 10 in PAx % vs the Rest | Difference between actual and expected completion",
color = "#4E616C",
size = 5)
#Adding endnote
plt.annotate('X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
(0.5,-0.15),
xycoords='axes fraction',
textcoords='offset points',
color = "#000000",
va='top',
ha='center',
size = 3)
plt.savefig(f"PassingProfile.png", dpi=500, bbox_inches = "tight")
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7043/2475199569.py:79: UserWarning: You have used the `textcoords` kwarg, but not the `xytext` kwarg. This can lead to surprising results. plt.annotate('X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
In [ ]:
In [ ]:
In [ ]:
In [16]:
#Creating the bins division to do comparison player on player
bins = [0, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 1]
df1['bin'] = pd.cut(df1['xP'], bins)
#Creating new tables as before
#xP
X1 = (df1.groupby(["player_id", "player_name", "team_name", "season_id", "bin"], observed = True)["xP"].sum().reset_index())
#Attempted Passes
Y1 = (df1.groupby(["player_id", "player_name", "team_name", "season_id", "bin"], observed = True)["outcome"].count().reset_index(name='attempted_passes'))
#Successful Passes
K1 = (df1.groupby(["player_id", "player_name", "team_name", "season_id", "bin"], observed = True)["outcome"].sum().reset_index(name='successful_passes'))
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7043/2436291344.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df1['bin'] = pd.cut(df1['xP'], bins)
In [17]:
#Merging new tables into one
W1 = (X1
.merge(Y1, how="left")
.merge(K1, how="left"))
In [18]:
#Creating the columns for actual and expected completion here too
W1['xP%'] = (W1['xP'] / W1['attempted_passes']) * 100
W1['Passes %'] = (W1['successful_passes'] / W1['attempted_passes']) * 100
In [19]:
#Get the whole list of players in the data
playerlist = W1['player_name'].unique().tolist()
cleaned_playerlist = [name for name in playerlist if pd.notna(name)]
cleaned_playerlist.sort()
In [20]:
from IPython.display import display, HTML
# Generate the HTML dropdown to easily search for players
options_html = ''.join([f'<option value="{name}">{name}</option>' for name in cleaned_playerlist])
dropdown_html = f"""
<input list="players" id="dropdown" oninput="handleInput()" placeholder="Choose Someone">
<datalist id="players">
{options_html}
</datalist>
<p id="output"></p>
<script>
function handleInput() {{
var input = document.getElementById("dropdown").value;
var output = document.getElementById("output");
output.innerHTML = "Selected: " + input;
}}
</script>
"""
# Display the dropdown
display(HTML(dropdown_html))
In [21]:
#Selecting the players to compare and checking they are unique players
R0a = W1[W1['player_name'] == 'Pierre Kalulu']
print(R0a.player_id.unique())
R0b = W1[W1['player_name'] == 'Federico Gatti']
print(R0b.player_id.unique())
[391836.] [439909.]
In [22]:
#Taking the names of the players for plotting purposes
nrows = R0a.shape[0]
for y in range(nrows):
title1 = R0a['player_name'].iloc[y]
nrows = R0b.shape[0]
for y in range(nrows):
title2 = R0b['player_name'].iloc[y]
#Creating the figure
fig = plt.figure(figsize=(1800/500, 1800/500), dpi=500, facecolor = '#D7D1CF')
ax = plt.subplot(111, facecolor = '#D7D1CF')
#Customization of the spines
ax.spines["top"].set(visible = False)
ax.spines["right"].set(visible = False)
ax.spines["bottom"].set_color('#ACA7A5')
ax.spines["left"].set_color('#ACA7A5')
#Customization of the grid
ax.grid(lw = 0.1, color = "#ACA7A5", axis = 'x', ls = "-")
ax.grid(lw = 0.1, color = "#ACA7A5", axis = 'y', ls = "-")
#Adding limits to the plot
ax.set_ylim(0, 105)
ax.set_xlim(0, 105)
#Creating the scatters
ax.scatter(
R0a['xP%'],
R0a['Passes %'],
zorder = 3,
s = 20,
fc = '#1565C0', # The background color
ec = "#000000", # The edge color
alpha = 0.70, # Transparency (from zero to 1)
lw = 0.5)
ax.scatter(
R0b['xP%'],
R0b['Passes %'],
zorder = 3,
s = 20,
fc = '#D32F2F', # The background color
ec = "#000000", # The edge color
alpha = 0.70, # Transparency (from zero to 1)
lw = 0.5)
# We set the major tick positions every integer
ax.xaxis.set_major_locator(ticker.MultipleLocator(20))
ax.yaxis.set_major_locator(ticker.MultipleLocator(20))
# Create the consistency in font size between the legend & ticks.
ax.tick_params(axis = 'both', labelsize = 7, color = '#ACA7A5', labelcolor = '#ACA7A5')
# Add axes legends
ax.yaxis.set_label_text("passes completion %", size = 7, color = "#4E616C")
ax.xaxis.set_label_text("expected passes completion %", size = 7, color = "#4E616C")
#Adding line of identity to compare expected and actual completion
ax.plot([105, 0], [105, 0], ls="-", lw=0.8, color = '#ACA7A5')
# Adding the one-liner title with different colors for player names
fig_text(
x = 0.5, y = 0.95,
s = f"<{title1}> vs <{title2}>",
highlight_textprops = [
{'color': '#1565C0', 'fontfamily': 'SourceSansPro-SemiBold', 'size': '12'},
{'color': '#D32F2F', 'fontfamily': 'SourceSansPro-SemiBold', 'size': '12'}
],
color = "#000000",
size = 12,
fontfamily = 'SourceSansPro-SemiBold',
ha = 'center',
va = 'center',
annotationbbox_kw={"xycoords": "figure fraction"})
# Subtitle text
title_ = ax.text(
x = 50, y = 109,
s = "Difference between actual and expected completion | Passes grouped for expected completion rate",
color = "#4E616C", ha='center',
size = 4.5)
# Footer/endnote
plt.annotate('X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
(0.5,-0.15),
xycoords='axes fraction',
textcoords='offset points',
color = "#000000",
va='top',
ha='center',
size = 3)
plt.savefig(f"Passing-Performance-VS.png", dpi=500, bbox_inches = "tight")
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_7043/3785998667.py:85: UserWarning: You have used the `textcoords` kwarg, but not the `xytext` kwarg. This can lead to surprising results. plt.annotate('X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
In [ ]: