In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import matplotlib.ticker as ticker
import matplotlib.patheffects as path_effects
from matplotlib.colors import LinearSegmentedColormap, NoNorm
from highlight_text import fig_text, ax_text
import socceraction
import socceraction.spadl as spadl
from PIL import Image
import urllib
In [2]:
# Load custom fonts for visualization
fe_regular = fm.FontEntry(
fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
name='SourceSansPro-Regular'
)
fe_semibold = fm.FontEntry(
fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf',
name='SourceSansPro-SemiBold'
)
# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)
fm.fontManager.ttflist.insert(1, fe_semibold)
# Set the font family to the custom regular font
matplotlib.rcParams['font.family'] = fe_regular.name
In [3]:
# Load datasets from CSV files
xG = pd.read_csv("xGactions.csv", index_col=0) # Expected goals data
fb = pd.read_csv("teamsFOTMOB.csv", index_col=0) # Ids mapping from FOTMOB and Whoscored
players0 = pd.read_csv("players2324.csv", index_col=0) # Players data
games0 = pd.read_csv("games2324.csv", index_col=0) # Games data
actions0 = pd.read_csv("actions2324.csv", index_col=0) # Events data
players1 = pd.read_csv("players2425.csv", index_col=0) # Players data
games1 = pd.read_csv("games2425.csv", index_col=0) # Games data
actions1 = pd.read_csv("actions2425.csv", index_col=0) # Events data
In [4]:
# Select relevant columns from games dataset
games0 = games0[["game_id", "competition_id", "season_id"]]
games1 = games1[["game_id", "competition_id", "season_id"]]
# Select relevant player information
players_info0 = players0[['game_id', 'team_id', 'player_id', 'player_name', 'season_id', 'competition_id']]
players_info1 = players1[['game_id', 'team_id', 'player_id', 'player_name', 'season_id', 'competition_id']]
# Add descriptive action names to the actions DataFrame
actions0 = spadl.add_names(actions0)
actions1 = spadl.add_names(actions1)
In [5]:
# Merge datasets to create a unified DataFrame
df0 = (
actions0
.merge(fb, how="left")
.merge(xG, how="left")
.merge(games0, how="left")
.merge(players_info0, how="left")
)
df1 = (
actions1
.merge(fb, how="left")
.merge(xG, how="left")
.merge(games1, how="left")
.merge(players_info1, how="left")
)
In [6]:
# Only keep non penalty shot
df0a = df0[df0['type_name'].isin(['shot', 'shot_freekick'])]
df1a = df1[df1['type_name'].isin(['shot', 'shot_freekick'])]
# Only keep Italian Serie A teams
df0b = df0a[df0a['competition_id'].isin(['ITA-Serie A'])]
df1b = df1a[df1a['competition_id'].isin(['ITA-Serie A'])]
In [7]:
#Creating the necessary dataframe for the viz
dfA = df0b.groupby(["team_name", "fotmob_id", "season_id"], observed=True).agg(
xG_2324=('xG', 'sum'),
games_played_2324=('game_id', 'nunique')
).reset_index()
dfA = dfA.rename(columns={"season_id": "season_id_2324"})
dfB = df1b.groupby(["team_name", "fotmob_id", "season_id"], observed=True).agg(
xG_2425=('xG', 'sum'),
games_played_2425=('game_id', 'nunique')
).reset_index()
dfB = dfB.rename(columns={"season_id": "season_id_2425"})
dfX = dfA.merge(dfB, how='left').dropna().reset_index(drop = True)
In [8]:
#Creating difference column
dfX['xG_2324'] = dfX['xG_2324']/dfX['games_played_2324']
dfX['xG_2425'] = dfX['xG_2425']/dfX['games_played_2425']
dfX['xG_difference'] = dfX['xG_2425']-dfX['xG_2324']
dfX = dfX.sort_values(by='xG_difference')
In [9]:
dfX
Out[9]:
team_name | fotmob_id | season_id_2324 | xG_2324 | games_played_2324 | season_id_2425 | xG_2425 | games_played_2425 | xG_difference | |
---|---|---|---|---|---|---|---|---|---|
11 | Monza | 6504 | 2324 | 0.993045 | 38 | 2425.0 | 0.690324 | 29.0 | -0.302720 |
0 | AC Milan | 8564 | 2324 | 1.550125 | 38 | 2425.0 | 1.408875 | 29.0 | -0.141249 |
5 | Fiorentina | 8535 | 2324 | 1.316467 | 38 | 2425.0 | 1.178957 | 29.0 | -0.137510 |
8 | Juventus | 9885 | 2324 | 1.416085 | 38 | 2425.0 | 1.308571 | 29.0 | -0.107515 |
10 | Lecce | 9888 | 2324 | 0.945089 | 38 | 2425.0 | 0.849355 | 29.0 | -0.095734 |
4 | Empoli | 8534 | 2324 | 0.807251 | 38 | 2425.0 | 0.752375 | 29.0 | -0.054876 |
12 | Napoli | 9875 | 2324 | 1.427827 | 38 | 2425.0 | 1.426459 | 29.0 | -0.001367 |
14 | Torino | 9804 | 2324 | 1.037767 | 38 | 2425.0 | 1.059406 | 29.0 | 0.021640 |
2 | Bologna | 9857 | 2324 | 1.180622 | 38 | 2425.0 | 1.208334 | 29.0 | 0.027712 |
16 | Verona | 9876 | 2324 | 0.791179 | 38 | 2425.0 | 0.852616 | 29.0 | 0.061438 |
7 | Inter | 8636 | 2324 | 1.612623 | 38 | 2425.0 | 1.708535 | 28.0 | 0.095911 |
6 | Genoa | 10233 | 2324 | 0.885958 | 38 | 2425.0 | 0.983407 | 29.0 | 0.097449 |
3 | Cagliari | 8529 | 2324 | 0.911231 | 38 | 2425.0 | 1.011990 | 29.0 | 0.100758 |
15 | Udinese | 8600 | 2324 | 0.955441 | 38 | 2425.0 | 1.070051 | 29.0 | 0.114609 |
13 | Roma | 8686 | 2324 | 1.089352 | 38 | 2425.0 | 1.278095 | 29.0 | 0.188743 |
9 | Lazio | 8543 | 2324 | 1.144830 | 38 | 2425.0 | 1.392538 | 29.0 | 0.247708 |
1 | Atalanta | 8524 | 2324 | 1.537138 | 38 | 2425.0 | 1.812275 | 29.0 | 0.275137 |
In [10]:
fig = plt.figure(figsize=(8,8), dpi=300)
ax = plt.subplot(111)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.set_yticks([])
ax.xaxis.set_label_text('npxG 2023/24 vs npxG 2024/25', size=7)
ax.tick_params(labelsize=7)
ax.grid(axis='x', color='#ACA7A5', ls=':')
# First set up axis limits properly
ax.set_ylim(-0.5, len(dfX)-0.5)
# Plot 2023/24 xG bars (wider, higher transparency)
bars_ = ax.barh(np.arange(len(dfX)), dfX['xG_2324'], height=0.65, color='none', edgecolor='none')
for bar in bars_:
bar.set_zorder(1)
x, y = bar.get_xy()
w, h = bar.get_width(), bar.get_height()
grad = np.atleast_2d(np.linspace(0, 1*w/max(dfX['xG_2324']), 256))
ax.imshow(
grad, extent=[x, x+w, y, y+h],
aspect='auto', zorder=2,
norm=NoNorm(vmin=0, vmax=1), cmap='YlOrRd', alpha=0.45)
# Plot 2024/25 xG bars (narrower, more saturated)
bars_ = ax.barh(np.arange(len(dfX)), dfX['xG_2425'], height=0.3, color='none', edgecolor='none')
for bar in bars_:
bar.set_zorder(1)
x, y = bar.get_xy()
w, h = bar.get_width(), bar.get_height()
grad = np.atleast_2d(np.linspace(0, 1*w/max(dfX['xG_2425']), 256))
ax.imshow(
grad, extent=[x, x+w, y, y+h],
aspect='auto', zorder=3,
norm=NoNorm(vmin=0, vmax=1), cmap='YlOrRd')
# Define coordinate transformations
DC_to_FC = ax.transData.transform
FC_to_NFC = fig.transFigure.inverted().transform
DC_to_NFC = lambda x: FC_to_NFC(DC_to_FC(x))
# Add team logos and difference annotations
for i, (idx, row) in enumerate(dfX.iterrows()):
# Calculate proper logo position based on data coordinates
logo_pos = DC_to_NFC((-0.1, i)) # Position logos at x=-6 in data coordinates
# Create logo axes at the correct position
ax_size = 0.035
image_ax = fig.add_axes(
[logo_pos[0], logo_pos[1] - ax_size/2, ax_size, ax_size], # Center vertically
fc='None', anchor='C'
)
try:
fotmob_url = 'https://images.fotmob.com/image_resources/logo/teamlogo/'
player_face = Image.open(urllib.request.urlopen(f"{fotmob_url}{row['fotmob_id']}.png")).convert('RGBA')
image_ax.imshow(player_face)
image_ax.axis("off")
except:
# Fallback if image loading fails
ax.text(-8, i, row['team_name'], fontsize=6, ha='right', va='center')
# Add difference annotation
diff_xg = row['xG_difference']
text_sign = '+' if diff_xg > 0 else ''
text_ = ax.annotate(
xy=(row['xG_2425'], i),
xytext=(5, 0),
text=f'{text_sign}{diff_xg:.2f}',
size=8,
ha='left',
va='center',
textcoords='offset points',
fontfamily='SourceSansPro-SemiBold'
)
text_.set_path_effects([path_effects.Stroke(linewidth=2, foreground='white'), path_effects.Normal()])
# Add title and subtitle
plt.figtext(
x=0.18, y=.9,
s="Serie A | npxG per game produced | comparison 2023/24 vs 2024/25",
va="bottom", ha="left",
fontsize=12, color="black", fontfamily='SourceSansPro-SemiBold',
)
plt.figtext(
x=0.5, y=0.05,
s="X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com",
va="bottom", ha="center",
fontsize=6, color="#4E616C",
)
plt.savefig('doublebardifferential.png', dpi=600, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)
In [ ]: