In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import matplotlib.ticker as ticker
import matplotlib.patheffects as path_effects
from matplotlib.colors import LinearSegmentedColormap, NoNorm
from highlight_text import fig_text, ax_text
import socceraction
import socceraction.spadl as spadl
from PIL import Image
import urllib
In [2]:
# Load custom fonts for visualization
fe_regular = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
    name='SourceSansPro-Regular'
)
fe_semibold = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf',
    name='SourceSansPro-SemiBold'
)

# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)
fm.fontManager.ttflist.insert(1, fe_semibold)

# Set the font family to the custom regular font
matplotlib.rcParams['font.family'] = fe_regular.name
In [3]:
# Load datasets from CSV files
xG = pd.read_csv("xGactions.csv", index_col=0)  # Expected goals data
fb = pd.read_csv("teamsFOTMOB.csv", index_col=0)  # Ids mapping from FOTMOB and Whoscored

players0 = pd.read_csv("players2324.csv", index_col=0)  # Players data
games0 = pd.read_csv("games2324.csv", index_col=0)  # Games data
actions0 = pd.read_csv("actions2324.csv", index_col=0)  # Events data

players1 = pd.read_csv("players2425.csv", index_col=0)  # Players data
games1 = pd.read_csv("games2425.csv", index_col=0)  # Games data
actions1 = pd.read_csv("actions2425.csv", index_col=0)  # Events data
In [4]:
# Select relevant columns from games dataset
games0 = games0[["game_id", "competition_id", "season_id"]]
games1 = games1[["game_id", "competition_id", "season_id"]]

# Select relevant player information
players_info0 = players0[['game_id', 'team_id', 'player_id', 'player_name', 'season_id', 'competition_id']]
players_info1 = players1[['game_id', 'team_id', 'player_id', 'player_name', 'season_id', 'competition_id']]

# Add descriptive action names to the actions DataFrame
actions0 = spadl.add_names(actions0)
actions1 = spadl.add_names(actions1)
In [5]:
# Merge datasets to create a unified DataFrame
df0 = (
    actions0
    .merge(fb, how="left")
    .merge(xG, how="left")
    .merge(games0, how="left")
    .merge(players_info0, how="left")
)

df1 = (
    actions1
    .merge(fb, how="left")
    .merge(xG, how="left")
    .merge(games1, how="left")
    .merge(players_info1, how="left")
)
In [6]:
# Only keep non penalty shot
df0a = df0[df0['type_name'].isin(['shot', 'shot_freekick'])]
df1a = df1[df1['type_name'].isin(['shot', 'shot_freekick'])]

# Only keep Italian Serie A teams
df0b = df0a[df0a['competition_id'].isin(['ITA-Serie A'])]
df1b = df1a[df1a['competition_id'].isin(['ITA-Serie A'])]
In [7]:
#Creating the necessary dataframe for the viz
dfA = df0b.groupby(["team_name", "fotmob_id", "season_id"], observed=True).agg(
    xG_2324=('xG', 'sum'),
    games_played_2324=('game_id', 'nunique')
).reset_index()
dfA = dfA.rename(columns={"season_id": "season_id_2324"})

dfB = df1b.groupby(["team_name", "fotmob_id", "season_id"], observed=True).agg(
    xG_2425=('xG', 'sum'),
    games_played_2425=('game_id', 'nunique')
).reset_index()
dfB = dfB.rename(columns={"season_id": "season_id_2425"})

dfX = dfA.merge(dfB, how='left').dropna().reset_index(drop = True)
In [8]:
#Creating difference column
dfX['xG_2324'] = dfX['xG_2324']/dfX['games_played_2324']
dfX['xG_2425'] = dfX['xG_2425']/dfX['games_played_2425']
dfX['xG_difference'] = dfX['xG_2425']-dfX['xG_2324']

dfX = dfX.sort_values(by='xG_difference')
In [9]:
dfX
Out[9]:
team_name fotmob_id season_id_2324 xG_2324 games_played_2324 season_id_2425 xG_2425 games_played_2425 xG_difference
11 Monza 6504 2324 0.993045 38 2425.0 0.690324 29.0 -0.302720
0 AC Milan 8564 2324 1.550125 38 2425.0 1.408875 29.0 -0.141249
5 Fiorentina 8535 2324 1.316467 38 2425.0 1.178957 29.0 -0.137510
8 Juventus 9885 2324 1.416085 38 2425.0 1.308571 29.0 -0.107515
10 Lecce 9888 2324 0.945089 38 2425.0 0.849355 29.0 -0.095734
4 Empoli 8534 2324 0.807251 38 2425.0 0.752375 29.0 -0.054876
12 Napoli 9875 2324 1.427827 38 2425.0 1.426459 29.0 -0.001367
14 Torino 9804 2324 1.037767 38 2425.0 1.059406 29.0 0.021640
2 Bologna 9857 2324 1.180622 38 2425.0 1.208334 29.0 0.027712
16 Verona 9876 2324 0.791179 38 2425.0 0.852616 29.0 0.061438
7 Inter 8636 2324 1.612623 38 2425.0 1.708535 28.0 0.095911
6 Genoa 10233 2324 0.885958 38 2425.0 0.983407 29.0 0.097449
3 Cagliari 8529 2324 0.911231 38 2425.0 1.011990 29.0 0.100758
15 Udinese 8600 2324 0.955441 38 2425.0 1.070051 29.0 0.114609
13 Roma 8686 2324 1.089352 38 2425.0 1.278095 29.0 0.188743
9 Lazio 8543 2324 1.144830 38 2425.0 1.392538 29.0 0.247708
1 Atalanta 8524 2324 1.537138 38 2425.0 1.812275 29.0 0.275137
In [10]:
fig = plt.figure(figsize=(8,8), dpi=300)
ax = plt.subplot(111)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.set_yticks([])
ax.xaxis.set_label_text('npxG 2023/24 vs npxG 2024/25', size=7)
ax.tick_params(labelsize=7)
ax.grid(axis='x', color='#ACA7A5', ls=':')

# First set up axis limits properly
ax.set_ylim(-0.5, len(dfX)-0.5)

# Plot 2023/24 xG bars (wider, higher transparency)
bars_ = ax.barh(np.arange(len(dfX)), dfX['xG_2324'], height=0.65, color='none', edgecolor='none')
for bar in bars_:
    bar.set_zorder(1)
    x, y = bar.get_xy()
    w, h = bar.get_width(), bar.get_height()
    grad = np.atleast_2d(np.linspace(0, 1*w/max(dfX['xG_2324']), 256))
    ax.imshow(
        grad, extent=[x, x+w, y, y+h], 
        aspect='auto', zorder=2, 
        norm=NoNorm(vmin=0, vmax=1), cmap='YlOrRd', alpha=0.45)
    
# Plot 2024/25 xG bars (narrower, more saturated)
bars_ = ax.barh(np.arange(len(dfX)), dfX['xG_2425'], height=0.3, color='none', edgecolor='none')
for bar in bars_:
    bar.set_zorder(1)
    x, y = bar.get_xy()
    w, h = bar.get_width(), bar.get_height()
    grad = np.atleast_2d(np.linspace(0, 1*w/max(dfX['xG_2425']), 256))
    ax.imshow(
        grad, extent=[x, x+w, y, y+h], 
        aspect='auto', zorder=3, 
        norm=NoNorm(vmin=0, vmax=1), cmap='YlOrRd')

# Define coordinate transformations
DC_to_FC = ax.transData.transform
FC_to_NFC = fig.transFigure.inverted().transform
DC_to_NFC = lambda x: FC_to_NFC(DC_to_FC(x))

# Add team logos and difference annotations
for i, (idx, row) in enumerate(dfX.iterrows()):
    # Calculate proper logo position based on data coordinates
    logo_pos = DC_to_NFC((-0.1, i))  # Position logos at x=-6 in data coordinates
    
    # Create logo axes at the correct position
    ax_size = 0.035
    image_ax = fig.add_axes(
        [logo_pos[0], logo_pos[1] - ax_size/2, ax_size, ax_size],  # Center vertically
        fc='None', anchor='C'
    )
    
    try:
        fotmob_url = 'https://images.fotmob.com/image_resources/logo/teamlogo/'
        player_face = Image.open(urllib.request.urlopen(f"{fotmob_url}{row['fotmob_id']}.png")).convert('RGBA')
        image_ax.imshow(player_face)
        image_ax.axis("off")
    except:
        # Fallback if image loading fails
        ax.text(-8, i, row['team_name'], fontsize=6, ha='right', va='center')
    
    # Add difference annotation
    diff_xg = row['xG_difference']
    text_sign = '+' if diff_xg > 0 else ''
    
    text_ = ax.annotate(
        xy=(row['xG_2425'], i),
        xytext=(5, 0),
        text=f'{text_sign}{diff_xg:.2f}',
        size=8,
        ha='left',
        va='center',
        textcoords='offset points',
        fontfamily='SourceSansPro-SemiBold'
    )
    text_.set_path_effects([path_effects.Stroke(linewidth=2, foreground='white'), path_effects.Normal()])

# Add title and subtitle
plt.figtext(
    x=0.18, y=.9, 
    s="Serie A | npxG per game produced | comparison 2023/24 vs 2024/25",
    va="bottom", ha="left",
    fontsize=12, color="black", fontfamily='SourceSansPro-SemiBold',
)
plt.figtext(
    x=0.5, y=0.05, 
    s="X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com",
    va="bottom", ha="center",
    fontsize=6, color="#4E616C",
)

plt.savefig('doublebardifferential.png', dpi=600, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)
No description has been provided for this image
In [ ]: