In [1]:
# Import necessary libraries for data manipulation, visualization, and analysis
import socceraction
import socceraction.spadl as spadl
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.ticker as ticker
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
from highlight_text import fig_text, ax_text
from PIL import Image
import urllib
from scipy.signal import savgol_filter
In [2]:
# Load custom fonts for visualization
fe_regular = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
    name='SourceSansPro-Regular'
)
fe_semibold = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf',
    name='SourceSansPro-SemiBold'
)

# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)
fm.fontManager.ttflist.insert(1, fe_semibold)

# Set the font family to the custom regular font
matplotlib.rcParams['font.family'] = fe_regular.name
In [3]:
# Load datasets from CSV files
xG = pd.read_csv("xGactions.csv", index_col = 0)
fb = pd.read_csv("teamsFOTMOB.csv", index_col = 0)
aactions = pd.read_csv("actions2425.csv", index_col = 0)
games = pd.read_csv("games2425.csv", index_col = 0)
In [4]:
#1st phase work dataframe creation 
actions0 = spadl.add_names(aactions) #spadl function for more infos
actions1 = actions0.merge(xG, how="left") #merging xG data
In [5]:
#2nd phase work dataframe creation getting game infos + merging on the dataframe
games_info = games[['game_id', "game_date", 'competition_id', 'season_id']]
actions2 = actions1.merge(games_info, how="left")
In [6]:
#Getting non penalty shots and identifying goals
actions = actions2[actions2['type_name'].isin(['shot', 'shot_freekick'])]
actions['is_goal'] = np.where(actions['result_name'] == 'success', 1, 0)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_9546/410113042.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  actions['is_goal'] = np.where(actions['result_name'] == 'success', 1, 0)
In [7]:
#Creating tables with xG and goals sums
dfa = actions.groupby(["game_id", "team_id", "game_date", "competition_id", "season_id"])["xG"].sum().reset_index(name='xG')
dfb = actions.groupby(["game_id", "team_id", "game_date", "competition_id", "season_id"])["is_goal"].sum().reset_index(name='is_goal')

#Creating the final table and the value we want to plot
df = dfb.merge(dfa)
df['weighted_xG'] = (df['xG'] * (1 - 0.3072911590783282)) + (df['is_goal'] * 0.3072911590783282) #using weighting  
df = df[['game_id', 'team_id', 'game_date', 'competition_id', 'season_id', 'weighted_xG']]
In [8]:
# Create a team number (1, 2, ...) for each row within the same game_id
df['team_num'] = df.groupby('game_id').cumcount() + 1

# Pivot the DataFrame using a multi-index that includes the extra columns
df_wide = df.pivot(
    index=['game_id', 'game_date', 'competition_id', 'season_id'],
    columns='team_num',
    values=['team_id', 'weighted_xG']
).reset_index()

# Flatten the multi-level columns
df_wide.columns = [
    f"{col[0]}" if isinstance(col, tuple) and col[1] == "" 
    else f"{col[0]}_{col[1]}" if isinstance(col, tuple) 
    else col 
    for col in df_wide.columns
]
In [9]:
#Creating the table for when teams are at home so we have their metrics and opponents rating
home_df = df_wide.copy()
home_df = home_df.melt(id_vars = ["game_id",  "game_date", "competition_id", "season_id", "team_id_1", "team_id_2"])
home_df.rename(columns = {"team_id_1":"team_id", "team_id_2":"opponent_id"}, inplace = True)
home_df.replace({"variable":{"weighted_xG_1":"weighted_xG_for", "weighted_xG_2":"weighted_xG_ag"}}, inplace = True)
In [10]:
#Creating the table for when teams are away so we have their metrics and opponents rating
away_df = df_wide.copy()
away_df = away_df.melt(id_vars = ["game_id",  "game_date", "competition_id", "season_id", "team_id_1", "team_id_2"])
away_df.rename(columns = {"team_id_2":"team_id", "team_id_1":"opponent_id"}, inplace = True)
away_df.replace({"variable":{"weighted_xG_2":"weighted_xG_for", "weighted_xG_1":"weighted_xG_ag"}}, inplace = True)
In [11]:
#Merging home and away in one final table
dfx0 = pd.concat([home_df, away_df]).reset_index(drop=True)
In [12]:
# Function to format season ID into a readable format
def format_season_id(season_id):
    # Convert to integer if it's a float
    season_id = int(season_id)
    # Extract the last two digits of the year
    start_year = str(season_id -1)[-2:]
    # Calculate the end year
    end_year = str(season_id)[-2:]
    # Format as 20/21
    formatted_season = f"{start_year}/{end_year}"
    return formatted_season
In [13]:
#Transform gamedate format
dfx0['game_date'] = pd.to_datetime(dfx0['game_date'])
In [14]:
#Sort dataframe for game dates and add teams' ids mapping from Whoscored to Fotmob
dfx1 = dfx0.sort_values(by='game_date').reset_index(drop=True)
dfx = dfx1.merge(fb, how='left')
In [15]:
#Creating a list of teams to search them easily with the following cell
teamlist = dfx['team_name'].unique().tolist()
cleaned_teamlist = [name for name in teamlist if pd.notna(name)]
cleaned_teamlist.sort()
In [16]:
from IPython.display import display, HTML

# Step 3: Generate the HTML dropdown
options_html = ''.join([f'<option value="{name}">{name}</option>' for name in cleaned_teamlist])

dropdown_html = f"""
<input list="players" id="dropdown" oninput="handleInput()" placeholder="Choose Someone">
<datalist id="players">
    {options_html}
</datalist>
<p id="output"></p>
<script>
function handleInput() {{
    var input = document.getElementById("dropdown").value;
    var output = document.getElementById("output");
    output.innerHTML = "Selected: " + input;
}}
</script>
"""

# Display the dropdown
display(HTML(dropdown_html))

In [17]:
#Selecting the team to plot
club = 'Liverpool'
In [18]:
#Filter for the team
dfy = dfx[dfx["team_name"] == club]
# Apply the function to the 'season_id' column
dfy['season_id'] = dfy['season_id'].apply(format_season_id)
/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_9546/3097605033.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfy['season_id'] = dfy['season_id'].apply(format_season_id)
In [19]:
#Get Fotmob id
fotmob = dfy.fotmob_id.iloc[0].astype(int)
In [20]:
#Cleaning the team dataframe
dfy = dfy.dropna()
dfy = dfy.drop_duplicates()
In [21]:
# xG conceded and xG created
Y_for = dfy[dfy["variable"] == "weighted_xG_for"].reset_index(drop = True)
Y_ag = dfy[dfy["variable"] == "weighted_xG_ag"].reset_index(drop = True)
X = pd.to_datetime(Y_for['game_date']) 
In [22]:
#Creating the rolling calculated metric for the visualization
Y_for['rolling'] = Y_for['value'].rolling(window=3, min_periods=1, center=True).mean()
Y_ag['rolling'] = Y_ag['value'].rolling(window=3, min_periods=1, center=True).mean()

# Define smoothing parameters
window_length = 5  # Choose an odd number close to 10% of total matches
polyorder = 2  # A quadratic fit is ideal for xG trends

# Apply smoothing
Y_for['smoothed'] = savgol_filter(Y_for['rolling'], window_length=window_length, polyorder=polyorder, mode="nearest")
Y_ag['smoothed'] = savgol_filter(Y_ag['rolling'], window_length=window_length, polyorder=polyorder, mode="nearest")
In [23]:
#Creating the figure and ax
fig = plt.figure(figsize=(8, 2.5), dpi = 300, facecolor = "#D7D1CF")
ax = plt.subplot(111, facecolor = "#D7D1CF")
# Remove top & right spines and change the color.
ax.spines[["top", "right"]].set_visible(False)
ax.spines[["left", "bottom"]].set_color("#ACA7A5")
# Set the grid
ax.grid(
    axis='y',
    lw = 0.3,
    ls = ":",
    color = "#ACA7A5"
)

#Plotting the lines
line_1 = ax.plot(X, Y_for['smoothed'], color = "#39FF14", zorder=4, lw=0.9)
line_2 = ax.plot(X, Y_ag['smoothed'], color = "#FF1439", zorder = 4, lw = 0.9)

# Fill between the lines based on which one is higher/bigger
ax.fill_between(
    X,
    Y_ag['smoothed'],
    Y_for['smoothed'],
    where=Y_for['smoothed'] >= Y_ag['smoothed'],
    interpolate=True,
    alpha=0.85,
    zorder=3,
    color=line_1[0].get_color()
)

ax.fill_between(
    X,
    Y_ag['smoothed'],
    Y_for['smoothed'],
    where=Y_ag['smoothed'] > Y_for['smoothed'],
    interpolate=True,
    alpha=0.8,
    color=line_2[0].get_color()
)

# Customize the ticks to match spine color and adjust label size.
ax.tick_params(
    color = "#ACA7A5", 
    length = 5, 
    which = "both", 
    labelsize = 5,
    labelcolor = "#ACA7A5",
    zorder = 3
)

# Define padding (e.g., 7 days before first match, 7 days after last match)
padding_days = 7
x_min = X.min() - pd.Timedelta(days=padding_days)
x_max = X.max() + pd.Timedelta(days=padding_days)

# Set x-ticks at game dates
ax.set_xticks(X) 

# Format x-tick labels correctly
ax.set_xticklabels(X.dt.strftime('%b %d'), rotation=45, ha="right", fontsize=4, color="#000000")

# Set y-axis major tick positions to only 0.5 xG multiples.
ax.yaxis.set_major_locator(ticker.MultipleLocator(0.5))

# Title and subtitle for the legend
fig_text(
    x = 0.12, y = 1.08,
    s = club,
    color = "#000000",
    weight = "bold",
    size = 10,
    annotationbbox_kw={"xycoords": "figure fraction"},
    fontfamily='SourceSansPro-SemiBold'
)

competition_ids = ', '.join(dfy['competition_id'].unique())
formatted_season = dfy['season_id'].iloc[0]

fig_text(
    x = 0.12, y = 1.02,
    s = f"Weighted non penalty xG   <created>   and   <conceded>   | 3-match rolling average then Savitzky-Golay smoothed\n{competition_ids} {formatted_season}",
    highlight_textprops = [       
        {'size':'6', 'bbox': {'edgecolor': line_1[0].get_color(), 'facecolor': line_1[0].get_color(), 'pad': 1}, 'color': '#000000'}, 
        {'size':'6', 'bbox': {'edgecolor': line_2[0].get_color(), 'facecolor': line_2[0].get_color(), 'pad': 1}, 'color': '#FFFFFF'}
    ],
    color = "#000000",
    size = 6,
    annotationbbox_kw={"xycoords": "figure fraction"})

fig.text(0.5, -0.1, "X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com", 
         ha='center', fontsize=5, color="#000000")

#Adding the logo and saving
fotmob_url = "https://images.fotmob.com/image_resources/logo/teamlogo/"
logo_ax = fig.add_axes([0.75, .92, 0.2, 0.15], zorder=1)
club_icon = Image.open(urllib.request.urlopen(f"{fotmob_url}{fotmob}.png"))
logo_ax.imshow(club_icon)
logo_ax.axis("off")
plt.savefig(f'{club}rollingxG.png', dpi=600, bbox_inches = "tight")
No description has been provided for this image
In [ ]: