In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.font_manager as fm
import matplotlib.cm as cm
from scipy.ndimage import gaussian_filter
from scipy.spatial.distance import cosine
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.manifold import TSNE, MDS
from mplsoccer import Pitch, VerticalPitch
import socceraction
import socceraction.spadl as spadl
import seaborn as sns
import tqdm
import warnings
warnings.filterwarnings('ignore')
# Set plotting style
plt.style.use('fivethirtyeight')
sns.set_context("notebook", font_scale=1.2)
In [2]:
fe_regular = fm.FontEntry(
fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
name='SourceSansPro-Regular'
)
fe_semibold = fm.FontEntry(
fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf',
name='SourceSansPro-SemiBold'
)
# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)
fm.fontManager.ttflist.insert(1, fe_semibold)
# Set the font family
matplotlib.rcParams['font.family'] = fe_regular.name # Default to Regular
In [3]:
# Load data
fb = pd.read_csv("teamsFOTMOB.csv", index_col=0)
positions = pd.read_csv("clustered_position.csv", index_col = 0)
players = pd.read_csv("players2425.csv", index_col=0)
games = pd.read_csv("games2425.csv", index_col=0)
actions = pd.read_csv("actions2425.csv", index_col=0)
chains = pd.read_csv("possession_chains_info2425.csv", index_col=0)
In [4]:
#Merging chains info on events
actions = actions.merge(chains)
In [5]:
#Cleaning players dataframe and adding wanted information like position
playersA = players.merge(fb, how="left")
playersB = playersA.merge(positions, how='left')
players_info = playersB[['game_id', 'team_id', 'player_id', 'position']]
In [6]:
#Creating team_id - team_manager columns disposition for home teams
gamesA = games[["game_id", "competition_id", "season_id", 'home_team_id', 'home_manager']]
gamesA = gamesA.rename(columns={'home_team_id': 'team_id'})
gamesA = gamesA.rename(columns={'home_manager': 'manager'})
#Creating team_id - team_manager columns disposition for away teams
gamesB = games[["game_id", "competition_id", "season_id", 'away_team_id', 'away_manager']]
gamesB = gamesB.rename(columns={'away_team_id': 'team_id'})
gamesB = gamesB.rename(columns={'away_manager': 'manager'})
In [7]:
#Creating a dataframe with manager info to merge on events for later identifier creation
gamesX = pd.concat([gamesA, gamesB]).sort_values('game_id')
In [8]:
# Process actions data
actions.drop(columns=['action_id'], inplace=True)
actions.reset_index(drop=True, inplace=True)
actions.reset_index(inplace=True)
actions.rename(columns={'index': 'action_id'}, inplace=True)
actions = spadl.add_names(actions)
In [9]:
# Merge all data
df = (
actions
.merge(gamesX, how="left")
.merge(players_info, how="left")
.merge(fb, how="left"))
In [10]:
# Clean data
df = df.dropna(subset=['team_name', 'season_id', 'manager'])
df['team_name'] = df['team_name'].astype(str)
df['season_id'] = df['season_id'].astype(str)
df['manager'] = df['manager'].astype(str)
# Create team identifier with manager between team_name and season_id
df['team_identifier'] = df['team_name'] + '-' + df['manager'] + '-' + df['season_id']
In [11]:
df.competition_id.unique()
Out[11]:
array(['BEL-Jupiler Pro League', 'BRA-Brasileirão', 'ENG-Championship', 'ENG-FA Cup', 'ENG-League Cup', 'ENG-League One', 'ENG-League Two', 'ENG-Premier League', 'ESP-La Liga', 'EU-Champions League', 'EU-Europa League', 'FRA-Ligue 1', 'GER-Bundesliga', 'ITA-Serie A', 'NED-Eredivisie', 'POR-Liga Portugal', 'RUS-Premier League', 'SCO-Premiership', 'USA-Major League Soccer'], dtype=object)
In [12]:
dfx = df[df['competition_id'].isin(['ITA-Serie A'])]
In [13]:
# Calculate touches
touches = (dfx.groupby(["team_identifier"], observed=True)["type_name"].count().reset_index())
# Calculate touches against
against0 = (dfx.groupby(["team_identifier", "team_id", "team_name", "game_id"], observed=True)["type_name"].count().reset_index(name='count'))
In [14]:
# Calculate touches against each team
merged_df = against0.merge(
against0,
on='game_id',
suffixes=('_team', '_opponent')
)
merged_df = merged_df[merged_df['team_id_team'] != merged_df['team_id_opponent']]
result_df = merged_df[['team_identifier_team', 'count_opponent']]
result_df.columns = ['team_identifier', 'touches_against']
against = (result_df.groupby(["team_identifier"], observed=True)["touches_against"].sum().reset_index())
In [15]:
# Setup pitch
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='#000000')
In [16]:
# Cell 1: Filter by specific action types
df0 = dfx[dfx['type_name'] == 'bad_touch']
df2 = dfx[dfx['type_name'] == 'shot']
df4 = dfx[dfx['type_name'] == 'goalkick']
df5 = dfx[dfx['type_name'] == 'cross']
df6 = dfx[(dfx['type_name'].isin(['dribble', 'pass', 'take_on'])) & (dfx['is_inbox'] == True)]
df7 = dfx[(dfx['type_name'].isin(['dribble', 'take_on'])) & (dfx['is_buildup'] == True)]
df7a = df7[df7['position'].isin(['GK', 'CB', 'RCB', 'LCB'])]
df7b = df7[df7['position'].isin(['LWB', 'RWB'])]
df7c = df7[df7['position'].isin(['DM', 'CM', 'LCM'])]
df7d = df7[df7['position'].isin(['AWL', 'AWR'])]
df7e = df7[df7['position'].isin(['ST', 'SS'])]
df8 = dfx[(dfx['type_name'].isin(['dribble', 'take_on'])) & (dfx['is_consolidate'] == True)]
df8a = df8[df8['position'].isin(['GK', 'CB', 'RCB', 'LCB'])]
df8b = df8[df8['position'].isin(['LWB', 'RWB'])]
df8c = df8[df8['position'].isin(['DM', 'CM', 'LCM'])]
df8d = df8[df8['position'].isin(['AWL', 'AWR'])]
df8e = df8[df8['position'].isin(['ST', 'SS'])]
df9 = dfx[(dfx['type_name'].isin(['pass'])) & (dfx['is_buildup'] == True)]
df9a = df9[df9['position'].isin(['GK', 'CB', 'RCB', 'LCB'])]
df9b = df9[df9['position'].isin(['LWB', 'RWB'])]
df9c = df9[df9['position'].isin(['DM', 'CM', 'LCM'])]
df9d = df9[df9['position'].isin(['AWL', 'AWR'])]
df9e = df9[df9['position'].isin(['ST', 'SS'])]
df10 = dfx[(dfx['type_name'].isin(['pass'])) & (dfx['is_consolidate'] == True)]
df10a = df10[df10['position'].isin(['GK', 'CB', 'RCB', 'LCB'])]
df10b = df10[df10['position'].isin(['LWB', 'RWB'])]
df10c = df10[df10['position'].isin(['DM', 'CM', 'LCM'])]
df10d = df10[df10['position'].isin(['AWL', 'AWR'])]
df10e = df10[df10['position'].isin(['ST', 'SS'])]
df11 = dfx[dfx['is_transition'] == True]
df12 = dfx[dfx['type_name'].isin(['tackle', 'interception', 'clearance', 'foul'])]
df13 = dfx[dfx['is_regain'] == True]
# For each possession_chain, keep only the first occurrence in df13a
df13a = df13.groupby('possession_chain').first().reset_index()
# For each possession_chain, keep only the second occurrence in df13b
# First sort to ensure proper ordering within each possession_chain
sorted_df13 = df13.sort_values(['possession_chain', 'action_id'])
# Then use groupby with nth to get the second row of each group
df13b = sorted_df13.groupby('possession_chain').nth(1).reset_index()
# First, identify possession chains where the first occurrence has is_longball == True
# Sort to ensure we're looking at the first event in each possession chain
sorted_dfx = dfx.sort_values(['possession_chain', 'action_id'])
# Get the first row for each possession_chain
first_rows = sorted_dfx.groupby('possession_chain').first()
# Filter to only keep possession chains where the first occurrence has is_longball == True
valid_chains = first_rows[first_rows['is_longball'] == True].index.tolist()
# Filter the original dataframe to only include these valid chains
longball_df = dfx[dfx['possession_chain'].isin(valid_chains)]
# Now create df14a (first occurrence) and df14b (second occurrence)
df14a = longball_df.groupby('possession_chain').first().reset_index()
# For the second occurrence
df14b = longball_df.groupby('possession_chain').nth(1).reset_index()
In [17]:
# Enhanced combined dictionary with more contextual information
dfp_dict_combined = {
# Possession loss events
'bad_touch_start': (df0, 'start_x_a0', 'start_y_a0', 'possession_loss'),
# Attacking events
'shots_and_goals_start': (df2, 'start_x_a0', 'start_y_a0', 'attacking'),
'crosses_start': (df5, 'start_x_a0', 'start_y_a0', 'attacking'),
'crosses_end': (df5, 'end_x_a0', 'end_y_a0', 'attacking'),
# Possession restart
'goalkicks_end': (df4, 'end_x_a0', 'end_y_a0', 'possession_restart'),
'regains_start': (df13a, 'start_x_a0', 'start_y_a0', 'possession_restart'),
'regains_end': (df13a, 'end_x_a0', 'end_y_a0', 'possession_restart'),
# Inbox/Intobox events
'box_start': (df6, 'start_x_a0', 'start_y_a0', 'box'),
'box_end': (df6, 'end_x_a0', 'end_y_a0', 'box'),
#Transition event
'transition_start': (df11, 'start_x_a0', 'start_y_a0', 'transition'),
'transition_end': (df11, 'end_x_a0', 'end_y_a0', 'transition'),
# Defensive events
'defensive_actions_start': (df12, 'start_x_a0', 'start_y_a0', 'defensive'),
# Passes event
'pass_buildup_def_start': (df9a, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_buildup_def_end': (df9a, 'end_x_a0', 'end_y_a0', 'passes_events'),
'pass_buildup_wb_start': (df9b, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_buildup_wb_end': (df9b, 'end_x_a0', 'end_y_a0', 'passes_events'),
'pass_buildup_cm_start': (df9c, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_buildup_cm_end': (df9c, 'end_x_a0', 'end_y_a0', 'passes_events'),
'pass_buildup_am_start': (df9d, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_buildup_am_end': (df9d, 'end_x_a0', 'end_y_a0', 'passes_events'),
'pass_buildup_st_start': (df9e, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_buildup_st_end': (df9e, 'end_x_a0', 'end_y_a0', 'passes_events'),
'pass_cons_def_start': (df10a, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_cons_def_end': (df10a, 'end_x_a0', 'end_y_a0', 'passes_events'),
'pass_cons_wb_start': (df10b, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_cons_wb_end': (df10b, 'end_x_a0', 'end_y_a0', 'passes_events'),
'pass_cons_cm_start': (df10c, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_cons_cm_end': (df10c, 'end_x_a0', 'end_y_a0', 'passes_events'),
'pass_cons_am_start': (df10d, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_cons_am_end': (df10d, 'end_x_a0', 'end_y_a0', 'passes_events'),
'pass_cons_st_start': (df10e, 'start_x_a0', 'start_y_a0', 'passes_events'),
'pass_cons_st_end': (df10e, 'end_x_a0', 'end_y_a0', 'passes_events'),
# Dribbles + Take Ons events
'drb_buildup_def_start': (df7a, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_buildup_def_end': (df7a, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
'drb_buildup_wb_start': (df7b, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_buildup_wb_end': (df7b, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
'drb_buildup_cm_start': (df7c, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_buildup_cm_end': (df7c, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
'drb_buildup_am_start': (df7d, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_buildup_am_end': (df7d, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
'drb_buildup_st_start': (df7e, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_buildup_st_end': (df7e, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
'drb_cons_def_start': (df8a, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_cons_def_end': (df8a, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
'drb_cons_wb_start': (df8b, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_cons_wb_end': (df8b, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
'drb_cons_cm_start': (df8c, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_cons_cm_end': (df8c, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
'drb_cons_am_start': (df8d, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_cons_am_end': (df8d, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
'drb_cons_st_start': (df8e, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
'drb_cons_st_end': (df8e, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
# Longball chains events
'longball_chain_start': (df14a, 'start_x_a0', 'start_y_a0', 'longballs_events'),
'longball_chain_end': (df14a, 'end_x_a0', 'end_y_a0', 'longballs_events'),
'longball_chain_second_end': (df14b, 'end_x_a0', 'end_y_a0', 'longballs_events'),
}
In [18]:
# Initialize dictionaries to store both possession and out-of-possession results
arrays_start = {} # In-possession, start coordinates
arrays_end = {} # In-possession, end coordinates
arrays_out_start = {} # Out-of-possession, start coordinates
arrays_out_end = {} # Out-of-possession, end coordinates
team_ids = None
def calculate_bin_statistics(df, x_column, y_column, bins=(105, 68), statistic='count'):
"""Calculate bin statistics with enhanced grid resolution."""
# Handle empty dataframe
if df.empty:
bin_stat = {'statistic': np.zeros((105, 68))} # Note: Changed shape here
return bin_stat
# Ensure all coordinates are within pitch boundaries
df_valid = df.dropna(subset=[x_column, y_column])
df_valid = df_valid[(df_valid[x_column] >= 0) & (df_valid[x_column] <= 105) &
(df_valid[y_column] >= 0) & (df_valid[y_column] <= 68)]
if df_valid.empty:
bin_stat = {'statistic': np.zeros((105, 68))} # Note: Changed shape here
return bin_stat
# Calculate bin statistics using pitch dimensions
bin_stat = pitch.bin_statistic(df_valid[x_column], df_valid[y_column],
statistic=statistic, bins=bins)
# Transpose the result to ensure (105, 68) shape
if bin_stat['statistic'].shape == (68, 105):
bin_stat['statistic'] = bin_stat['statistic'].T
return bin_stat
# Function for fixed Gaussian smoothing
def fixed_smooth(bin_statistic, sigma=2.5):
"""Apply Gaussian smoothing with a fixed sigma value of 2.5."""
statistic = bin_statistic['statistic'].copy()
# Apply Gaussian filter with fixed sigma value
smoothed_statistic = gaussian_filter(statistic, sigma)
# Create a copy to avoid modifying the original
smoothed_bin_statistic = bin_statistic.copy()
smoothed_bin_statistic['statistic'] = smoothed_statistic
return smoothed_bin_statistic
# Events that will use the simplified calculation (no normalization)
simplified_events = ['bad_touch_start',
'shots_and_goals_start', 'crosses_start', 'goalkicks_end', 'crosses_end']
# Extract team_ids from a sample event
sample_event = list(dfp_dict_combined.values())[0][0]
team_ids = np.sort(sample_event['team_identifier'].unique())
# Calculate total iterations
total_iterations = len(dfp_dict_combined) * len(team_ids)
print(f"Processing {total_iterations} team-event combinations for both possession and out-of-possession...")
# Process each event for both possession and out-of-possession analysis
with tqdm.tqdm(total=total_iterations, desc="Processing Events and Teams") as pbar:
# Process each DataFrame in the combined dictionary
for event_name, (dfp_event, x_col, y_col, event_type) in dfp_dict_combined.items():
array_event = [] # For possession (team's own actions)
array_out_event = [] # For out-of-possession (opponent actions)
# Loop through each team identifier for the current event
for team_id in team_ids:
# POSSESSION ANALYSIS (team's own actions)
df_event_team = dfp_event[dfp_event['team_identifier'] == team_id]
# Handle different event types
if event_name == "defensive_actions_start":
# For defensive actions, we actually want what the team does (different from other events)
bin_statistica = calculate_bin_statistics(df_event_team, x_col, y_col)
bin_statistica = fixed_smooth(bin_statistica)
elif any(event_key in event_name for event_key in simplified_events):
# Simplified calculation for possession events
bin_statistica = calculate_bin_statistics(df_event_team, x_col, y_col)
bin_statistica = fixed_smooth(bin_statistica)
else:
# Normalized calculation
df_full_team = dfx[dfx['team_identifier'] == team_id]
# Calculate bin statistics for the event and the team
bin_statistica = calculate_bin_statistics(df_event_team, x_col, y_col)
bin_statistic1 = calculate_bin_statistics(df_full_team, x_col, y_col)
# Normalize the statistics
with np.errstate(divide='ignore', invalid='ignore'):
normalized_statistic = np.where(bin_statistic1['statistic'] != 0,
bin_statistica['statistic'] / bin_statistic1['statistic'], 0)
normalized_statistic = np.where(np.isnan(normalized_statistic), 0, normalized_statistic)
# Apply fixed smoothing
bin_statistica['statistic'] = normalized_statistic
bin_statistica = fixed_smooth(bin_statistica)
# Append the result to the possession event array
array_event.append(bin_statistica['statistic'])
# OUT-OF-POSSESSION ANALYSIS (opponent actions)
# Get opponent event data for the same games
df_opponent_eventa = dfp_event[dfp_event["game_id"].isin(df_event_team['game_id'].unique())]
df_opponent_event = df_opponent_eventa[df_opponent_eventa["team_identifier"] != team_id]
if event_name == "defensive_actions_start":
# For defensive actions, we want to invert coordinates and use opponent data
df_opponent_event = df_opponent_event.copy()
df_opponent_event['x_a0_inverted'] = 105 - df_opponent_event[x_col]
# Calculate bin statistics for the opponent team
bin_statistica_out = calculate_bin_statistics(df_opponent_event, 'x_a0_inverted', y_col)
# No need to normalize here since we're analyzing raw defensive patterns
bin_statistica_out = fixed_smooth(bin_statistica_out)
elif any(event_key in event_name for event_key in simplified_events):
# Simplified calculation for opponent events
bin_statistica_out = calculate_bin_statistics(df_opponent_event, x_col, y_col)
bin_statistica_out = fixed_smooth(bin_statistica_out)
else:
# Normalized calculation for opponent events
df_opponent_full = dfx[dfx["game_id"].isin(df_event_team['game_id'].unique())]
df_opponent_full = df_opponent_full[df_opponent_full["team_identifier"] != team_id]
# Calculate bin statistics for the opponent event and full actions
bin_statistica_out = calculate_bin_statistics(df_opponent_event, x_col, y_col)
bin_statistic1_out = calculate_bin_statistics(df_opponent_full, x_col, y_col)
# Normalize the statistics
with np.errstate(divide='ignore', invalid='ignore'):
normalized_statistic_out = np.where(bin_statistic1_out['statistic'] != 0,
bin_statistica_out['statistic'] / bin_statistic1_out['statistic'], 0)
normalized_statistic_out = np.where(np.isnan(normalized_statistic_out), 0, normalized_statistic_out)
# Apply fixed smoothing
bin_statistica_out['statistic'] = normalized_statistic_out
bin_statistica_out = fixed_smooth(bin_statistica_out)
# Append the result to the out-of-possession event array
array_out_event.append(bin_statistica_out['statistic'])
# Update the progress bar
pbar.update(1)
# Store the results for this event type
if '_start' in event_name:
base_name = event_name.replace('_start', '')
arrays_start[base_name] = array_event
arrays_out_start[base_name] = array_out_event
elif '_end' in event_name:
base_name = event_name.replace('_end', '')
arrays_end[base_name] = array_event
arrays_out_end[base_name] = array_out_event
# Map arrays to individual variables for backward compatibility - UPDATED FOR NEW STRUCTURE
# In-possession arrays (start coordinates)
array0a = arrays_start.get('bad_touch', [])
array3a = arrays_start.get('shots_and_goals', [])
array5a = arrays_start.get('crosses', [])
array12a = arrays_start.get('defensive_actions', [])
# Pass events (start coordinates)
pass_buildup_def_start = arrays_start.get('pass_buildup_def', [])
pass_buildup_wb_start = arrays_start.get('pass_buildup_wb', [])
pass_buildup_cm_start = arrays_start.get('pass_buildup_cm', [])
pass_buildup_am_start = arrays_start.get('pass_buildup_am', [])
pass_buildup_st_start = arrays_start.get('pass_buildup_st', [])
pass_cons_def_start = arrays_start.get('pass_cons_def', [])
pass_cons_wb_start = arrays_start.get('pass_cons_wb', [])
pass_cons_cm_start = arrays_start.get('pass_cons_cm', [])
pass_cons_am_start = arrays_start.get('pass_cons_am', [])
pass_cons_st_start = arrays_start.get('pass_cons_st', [])
# Dribble events (start coordinates)
drb_buildup_def_start = arrays_start.get('drb_buildup_def', [])
drb_buildup_wb_start = arrays_start.get('drb_buildup_wb', [])
drb_buildup_cm_start = arrays_start.get('drb_buildup_cm', [])
drb_buildup_am_start = arrays_start.get('drb_buildup_am', [])
drb_buildup_st_start = arrays_start.get('drb_buildup_st', [])
drb_cons_def_start = arrays_start.get('drb_cons_def', [])
drb_cons_wb_start = arrays_start.get('drb_cons_wb', [])
drb_cons_cm_start = arrays_start.get('drb_cons_cm', [])
drb_cons_am_start = arrays_start.get('drb_cons_am', [])
drb_cons_st_start = arrays_start.get('drb_cons_st', [])
# Box events
box_start = arrays_start.get('box', [])
# Transition events
transition_start = arrays_start.get('transition', [])
# Regain events
regains_start = arrays_start.get('regains', [])
# Longball events
longball_chain_start = arrays_start.get('longball_chain', [])
# In-possession arrays (end coordinates)
array4b = arrays_end.get('goalkicks', [])
array5b = arrays_end.get('crosses', [])
# Pass events (end coordinates)
pass_buildup_def_end = arrays_end.get('pass_buildup_def', [])
pass_buildup_wb_end = arrays_end.get('pass_buildup_wb', [])
pass_buildup_cm_end = arrays_end.get('pass_buildup_cm', [])
pass_buildup_am_end = arrays_end.get('pass_buildup_am', [])
pass_buildup_st_end = arrays_end.get('pass_buildup_st', [])
pass_cons_def_end = arrays_end.get('pass_cons_def', [])
pass_cons_wb_end = arrays_end.get('pass_cons_wb', [])
pass_cons_cm_end = arrays_end.get('pass_cons_cm', [])
pass_cons_am_end = arrays_end.get('pass_cons_am', [])
pass_cons_st_end = arrays_end.get('pass_cons_st', [])
# Dribble events (end coordinates)
drb_buildup_def_end = arrays_end.get('drb_buildup_def', [])
drb_buildup_wb_end = arrays_end.get('drb_buildup_wb', [])
drb_buildup_cm_end = arrays_end.get('drb_buildup_cm', [])
drb_buildup_am_end = arrays_end.get('drb_buildup_am', [])
drb_buildup_st_end = arrays_end.get('drb_buildup_st', [])
drb_cons_def_end = arrays_end.get('drb_cons_def', [])
drb_cons_wb_end = arrays_end.get('drb_cons_wb', [])
drb_cons_cm_end = arrays_end.get('drb_cons_cm', [])
drb_cons_am_end = arrays_end.get('drb_cons_am', [])
drb_cons_st_end = arrays_end.get('drb_cons_st', [])
# Box events (end)
box_end = arrays_end.get('box', [])
# Transition events (end)
transition_end = arrays_end.get('transition', [])
# Regain events (end)
regains_end = arrays_end.get('regains', [])
# Longball events (end)
longball_chain_end = arrays_end.get('longball_chain', [])
longball_chain_second_end = arrays_end.get('longball_chain_second', [])
# OUT-OF-POSSESSION ARRAYS
# Original position-based arrays (start coordinates)
array0a_out = arrays_out_start.get('bad_touch', [])
array3a_out = arrays_out_start.get('shots_and_goals', [])
array5a_out = arrays_out_start.get('crosses', [])
array12a_out = arrays_out_start.get('defensive_actions', [])
# Pass events (start coordinates) - out of possession
pass_buildup_def_start_out = arrays_out_start.get('pass_buildup_def', [])
pass_buildup_wb_start_out = arrays_out_start.get('pass_buildup_wb', [])
pass_buildup_cm_start_out = arrays_out_start.get('pass_buildup_cm', [])
pass_buildup_am_start_out = arrays_out_start.get('pass_buildup_am', [])
pass_buildup_st_start_out = arrays_out_start.get('pass_buildup_st', [])
pass_cons_def_start_out = arrays_out_start.get('pass_cons_def', [])
pass_cons_wb_start_out = arrays_out_start.get('pass_cons_wb', [])
pass_cons_cm_start_out = arrays_out_start.get('pass_cons_cm', [])
pass_cons_am_start_out = arrays_out_start.get('pass_cons_am', [])
pass_cons_st_start_out = arrays_out_start.get('pass_cons_st', [])
# Dribble events (start coordinates) - out of possession
drb_buildup_def_start_out = arrays_out_start.get('drb_buildup_def', [])
drb_buildup_wb_start_out = arrays_out_start.get('drb_buildup_wb', [])
drb_buildup_cm_start_out = arrays_out_start.get('drb_buildup_cm', [])
drb_buildup_am_start_out = arrays_out_start.get('drb_buildup_am', [])
drb_buildup_st_start_out = arrays_out_start.get('drb_buildup_st', [])
drb_cons_def_start_out = arrays_out_start.get('drb_cons_def', [])
drb_cons_wb_start_out = arrays_out_start.get('drb_cons_wb', [])
drb_cons_cm_start_out = arrays_out_start.get('drb_cons_cm', [])
drb_cons_am_start_out = arrays_out_start.get('drb_cons_am', [])
drb_cons_st_start_out = arrays_out_start.get('drb_cons_st', [])
# Box events - out of possession
box_start_out = arrays_out_start.get('box', [])
# Transition events - out of possession
transition_start_out = arrays_out_start.get('transition', [])
# Regain events - out of possession
regains_start_out = arrays_out_start.get('regains', [])
# Longball events - out of possession
longball_chain_start_out = arrays_out_start.get('longball_chain', [])
# Out-of-possession arrays (end coordinates)
# Original position-based arrays
array4b_out = arrays_out_end.get('goalkicks', [])
array5b_out = arrays_out_end.get('crosses', [])
# Pass events (end coordinates) - out of possession
pass_buildup_def_end_out = arrays_out_end.get('pass_buildup_def', [])
pass_buildup_wb_end_out = arrays_out_end.get('pass_buildup_wb', [])
pass_buildup_cm_end_out = arrays_out_end.get('pass_buildup_cm', [])
pass_buildup_am_end_out = arrays_out_end.get('pass_buildup_am', [])
pass_buildup_st_end_out = arrays_out_end.get('pass_buildup_st', [])
pass_cons_def_end_out = arrays_out_end.get('pass_cons_def', [])
pass_cons_wb_end_out = arrays_out_end.get('pass_cons_wb', [])
pass_cons_cm_end_out = arrays_out_end.get('pass_cons_cm', [])
pass_cons_am_end_out = arrays_out_end.get('pass_cons_am', [])
pass_cons_st_end_out = arrays_out_end.get('pass_cons_st', [])
# Dribble events (end coordinates) - out of possession
drb_buildup_def_end_out = arrays_out_end.get('drb_buildup_def', [])
drb_buildup_wb_end_out = arrays_out_end.get('drb_buildup_wb', [])
drb_buildup_cm_end_out = arrays_out_end.get('drb_buildup_cm', [])
drb_buildup_am_end_out = arrays_out_end.get('drb_buildup_am', [])
drb_buildup_st_end_out = arrays_out_end.get('drb_buildup_st', [])
drb_cons_def_end_out = arrays_out_end.get('drb_cons_def', [])
drb_cons_wb_end_out = arrays_out_end.get('drb_cons_wb', [])
drb_cons_cm_end_out = arrays_out_end.get('drb_cons_cm', [])
drb_cons_am_end_out = arrays_out_end.get('drb_cons_am', [])
drb_cons_st_end_out = arrays_out_end.get('drb_cons_st', [])
# Box events (end) - out of possession
box_end_out = arrays_out_end.get('box', [])
# Transition events (end) - out of possession
transition_end_out = arrays_out_end.get('transition', [])
# Regain events (end) - out of possession
regains_end_out = arrays_out_end.get('regains', [])
# Longball events (end) - out of possession
longball_chain_end_out = arrays_out_end.get('longball_chain', [])
longball_chain_second_end_out = arrays_out_end.get('longball_chain_second', [])
# Longball events (end) - out of possession
longball_chain_end_out = arrays_out_end.get('longball_chain', [])
longball_chain_second_end_out = arrays_out_end.get('longball_chain_second', [])
print("Processing completed for both possession and out-of-possession.")
print(f"Results stored in arrays_start/end ({len(arrays_start)} events) and arrays_out_start/end ({len(arrays_out_start)} events).")
Processing 1485 team-event combinations for both possession and out-of-possession...
Processing Events and Teams: 100%|██████████| 1485/1485 [00:38<00:00, 38.10it/s]
Processing completed for both possession and out-of-possession. Results stored in arrays_start/end (28 events) and arrays_out_start/end (28 events).
In [19]:
# Initialize dictionaries to store similarity metrics for both possession types
similarity_scores_start = {}
similarity_scores_end = {}
similarity_scores_out_start = {}
similarity_scores_out_end = {}
# Function to calculate similarity metrics
def calculate_similarity_metrics(arrays_dict, team_ids):
"""Calculate cosine similarity between teams based on spatial patterns."""
similarity_dict = {}
for event_name, array_list in arrays_dict.items():
try:
# Flatten arrays for comparison
flattened = [array.flatten() for array in array_list]
# Calculate cosine similarity
similarity_matrix = cosine_similarity(flattened)
# Create DataFrame with team IDs
df_sim = pd.DataFrame(similarity_matrix, index=team_ids, columns=team_ids)
df_sim.index.name = 'team_identifier'
# Store in dictionary
similarity_dict[event_name] = df_sim
except Exception as e:
print(f"Error calculating similarity for {event_name}: {e}")
# Create empty DataFrame as fallback
df_sim = pd.DataFrame(np.eye(len(team_ids)), index=team_ids, columns=team_ids)
df_sim.index.name = 'team_identifier'
similarity_dict[event_name] = df_sim
return similarity_dict
# Calculate similarity for possession arrays
similarity_scores_start = calculate_similarity_metrics(arrays_start, team_ids)
similarity_scores_end = calculate_similarity_metrics(arrays_end, team_ids)
# Calculate similarity for out-of-possession arrays
similarity_scores_out_start = calculate_similarity_metrics(arrays_out_start, team_ids)
similarity_scores_out_end = calculate_similarity_metrics(arrays_out_end, team_ids)
# Helper function to safely get values
def safe_get_values(similarity_dict, key):
"""Safely get values from similarity dictionary with fallback."""
if key in similarity_dict:
return similarity_dict[key].values
else:
print(f"Warning: {key} not found in similarity dictionary. Using identity matrix.")
return np.eye(len(team_ids))
# Map to individual similarity variables for backward compatibility - UPDATED FOR NEW STRUCTURE
# In-possession similarities for basic events
similarity0a = safe_get_values(similarity_scores_start, 'bad_touch')
similarity3a = safe_get_values(similarity_scores_start, 'shots_and_goals')
similarity5a = safe_get_values(similarity_scores_start, 'crosses')
similarity12a = safe_get_values(similarity_scores_start, 'defensive_actions')
# Pass events similarities (start)
similarity_pass_buildup_def_start = safe_get_values(similarity_scores_start, 'pass_buildup_def')
similarity_pass_buildup_wb_start = safe_get_values(similarity_scores_start, 'pass_buildup_wb')
similarity_pass_buildup_cm_start = safe_get_values(similarity_scores_start, 'pass_buildup_cm')
similarity_pass_buildup_am_start = safe_get_values(similarity_scores_start, 'pass_buildup_am')
similarity_pass_buildup_st_start = safe_get_values(similarity_scores_start, 'pass_buildup_st')
similarity_pass_cons_def_start = safe_get_values(similarity_scores_start, 'pass_cons_def')
similarity_pass_cons_wb_start = safe_get_values(similarity_scores_start, 'pass_cons_wb')
similarity_pass_cons_cm_start = safe_get_values(similarity_scores_start, 'pass_cons_cm')
similarity_pass_cons_am_start = safe_get_values(similarity_scores_start, 'pass_cons_am')
similarity_pass_cons_st_start = safe_get_values(similarity_scores_start, 'pass_cons_st')
# Dribble events similarities (start)
similarity_drb_buildup_def_start = safe_get_values(similarity_scores_start, 'drb_buildup_def')
similarity_drb_buildup_wb_start = safe_get_values(similarity_scores_start, 'drb_buildup_wb')
similarity_drb_buildup_cm_start = safe_get_values(similarity_scores_start, 'drb_buildup_cm')
similarity_drb_buildup_am_start = safe_get_values(similarity_scores_start, 'drb_buildup_am')
similarity_drb_buildup_st_start = safe_get_values(similarity_scores_start, 'drb_buildup_st')
similarity_drb_cons_def_start = safe_get_values(similarity_scores_start, 'drb_cons_def')
similarity_drb_cons_wb_start = safe_get_values(similarity_scores_start, 'drb_cons_wb')
similarity_drb_cons_cm_start = safe_get_values(similarity_scores_start, 'drb_cons_cm')
similarity_drb_cons_am_start = safe_get_values(similarity_scores_start, 'drb_cons_am')
similarity_drb_cons_st_start = safe_get_values(similarity_scores_start, 'drb_cons_st')
# Special events similarities (start)
similarity_box_start = safe_get_values(similarity_scores_start, 'box')
similarity_transition_start = safe_get_values(similarity_scores_start, 'transition')
similarity_regains_start = safe_get_values(similarity_scores_start, 'regains')
similarity_longball_chain_start = safe_get_values(similarity_scores_start, 'longball_chain')
# In-possession similarities (end)
similarity4b = safe_get_values(similarity_scores_end, 'goalkicks')
similarity5b = safe_get_values(similarity_scores_end, 'crosses')
# Pass events similarities (end)
similarity_pass_buildup_def_end = safe_get_values(similarity_scores_end, 'pass_buildup_def')
similarity_pass_buildup_wb_end = safe_get_values(similarity_scores_end, 'pass_buildup_wb')
similarity_pass_buildup_cm_end = safe_get_values(similarity_scores_end, 'pass_buildup_cm')
similarity_pass_buildup_am_end = safe_get_values(similarity_scores_end, 'pass_buildup_am')
similarity_pass_buildup_st_end = safe_get_values(similarity_scores_end, 'pass_buildup_st')
similarity_pass_cons_def_end = safe_get_values(similarity_scores_end, 'pass_cons_def')
similarity_pass_cons_wb_end = safe_get_values(similarity_scores_end, 'pass_cons_wb')
similarity_pass_cons_cm_end = safe_get_values(similarity_scores_end, 'pass_cons_cm')
similarity_pass_cons_am_end = safe_get_values(similarity_scores_end, 'pass_cons_am')
similarity_pass_cons_st_end = safe_get_values(similarity_scores_end, 'pass_cons_st')
# Dribble events similarities (end)
similarity_drb_buildup_def_end = safe_get_values(similarity_scores_end, 'drb_buildup_def')
similarity_drb_buildup_wb_end = safe_get_values(similarity_scores_end, 'drb_buildup_wb')
similarity_drb_buildup_cm_end = safe_get_values(similarity_scores_end, 'drb_buildup_cm')
similarity_drb_buildup_am_end = safe_get_values(similarity_scores_end, 'drb_buildup_am')
similarity_drb_buildup_st_end = safe_get_values(similarity_scores_end, 'drb_buildup_st')
similarity_drb_cons_def_end = safe_get_values(similarity_scores_end, 'drb_cons_def')
similarity_drb_cons_wb_end = safe_get_values(similarity_scores_end, 'drb_cons_wb')
similarity_drb_cons_cm_end = safe_get_values(similarity_scores_end, 'drb_cons_cm')
similarity_drb_cons_am_end = safe_get_values(similarity_scores_end, 'drb_cons_am')
similarity_drb_cons_st_end = safe_get_values(similarity_scores_end, 'drb_cons_st')
# Special events similarities (end)
similarity_box_end = safe_get_values(similarity_scores_end, 'box')
similarity_transition_end = safe_get_values(similarity_scores_end, 'transition')
similarity_regains_end = safe_get_values(similarity_scores_end, 'regains')
similarity_longball_chain_end = safe_get_values(similarity_scores_end, 'longball_chain')
similarity_longball_chain_second_end = safe_get_values(similarity_scores_end, 'longball_chain_second')
# OUT-OF-POSSESSION SIMILARITIES
# Basic events (start)
similarity0a_out = safe_get_values(similarity_scores_out_start, 'bad_touch')
similarity3a_out = safe_get_values(similarity_scores_out_start, 'shots_and_goals')
similarity5a_out = safe_get_values(similarity_scores_out_start, 'crosses')
similarity12a_out = safe_get_values(similarity_scores_out_start, 'defensive_actions')
# Pass events similarities (start) - out of possession
similarity_pass_buildup_def_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_def')
similarity_pass_buildup_wb_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_wb')
similarity_pass_buildup_cm_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_cm')
similarity_pass_buildup_am_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_am')
similarity_pass_buildup_st_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_st')
similarity_pass_cons_def_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_def')
similarity_pass_cons_wb_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_wb')
similarity_pass_cons_cm_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_cm')
similarity_pass_cons_am_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_am')
similarity_pass_cons_st_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_st')
# Dribble events similarities (start) - out of possession
similarity_drb_buildup_def_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_def')
similarity_drb_buildup_wb_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_wb')
similarity_drb_buildup_cm_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_cm')
similarity_drb_buildup_am_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_am')
similarity_drb_buildup_st_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_st')
similarity_drb_cons_def_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_def')
similarity_drb_cons_wb_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_wb')
similarity_drb_cons_cm_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_cm')
similarity_drb_cons_am_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_am')
similarity_drb_cons_st_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_st')
# Special events similarities (start) - out of possession
similarity_box_start_out = safe_get_values(similarity_scores_out_start, 'box')
similarity_transition_start_out = safe_get_values(similarity_scores_out_start, 'transition')
similarity_regains_start_out = safe_get_values(similarity_scores_out_start, 'regains')
similarity_longball_chain_start_out = safe_get_values(similarity_scores_out_start, 'longball_chain')
# Out-of-possession similarities (end)
similarity4b_out = safe_get_values(similarity_scores_out_end, 'goalkicks')
similarity5b_out = safe_get_values(similarity_scores_out_end, 'crosses')
# Pass events similarities (end) - out of possession
similarity_pass_buildup_def_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_def')
similarity_pass_buildup_wb_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_wb')
similarity_pass_buildup_cm_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_cm')
similarity_pass_buildup_am_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_am')
similarity_pass_buildup_st_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_st')
similarity_pass_cons_def_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_def')
similarity_pass_cons_wb_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_wb')
similarity_pass_cons_cm_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_cm')
similarity_pass_cons_am_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_am')
similarity_pass_cons_st_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_st')
# Dribble events similarities (end) - out of possession
similarity_drb_buildup_def_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_def')
similarity_drb_buildup_wb_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_wb')
similarity_drb_buildup_cm_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_cm')
similarity_drb_buildup_am_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_am')
similarity_drb_buildup_st_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_st')
similarity_drb_cons_def_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_def')
similarity_drb_cons_wb_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_wb')
similarity_drb_cons_cm_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_cm')
similarity_drb_cons_am_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_am')
similarity_drb_cons_st_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_st')
# Special events similarities (end) - out of possession
similarity_box_end_out = safe_get_values(similarity_scores_out_end, 'box')
similarity_transition_end_out = safe_get_values(similarity_scores_out_end, 'transition')
similarity_regains_end_out = safe_get_values(similarity_scores_out_end, 'regains')
similarity_longball_chain_end_out = safe_get_values(similarity_scores_out_end, 'longball_chain')
similarity_longball_chain_second_end_out = safe_get_values(similarity_scores_out_end, 'longball_chain_second')
# Keep old position-based variables but initialize them with identity matrices for backward compatibility
# You can remove these if they're not needed
identity_matrix = np.eye(len(team_ids))
similarity1a = identity_matrix
similarity6a = identity_matrix
similarity7a = identity_matrix
similarity8a = identity_matrix
similarity9a = identity_matrix
similarity10a = identity_matrix
similarity11a = identity_matrix
similarity6b = identity_matrix
similarity7b = identity_matrix
similarity8b = identity_matrix
similarity9b = identity_matrix
similarity10b = identity_matrix
similarity11b = identity_matrix
similarity1a_out = identity_matrix
similarity6a_out = identity_matrix
similarity7a_out = identity_matrix
similarity8a_out = identity_matrix
similarity9a_out = identity_matrix
similarity10a_out = identity_matrix
similarity11a_out = identity_matrix
similarity6b_out = identity_matrix
similarity7b_out = identity_matrix
similarity8b_out = identity_matrix
similarity9b_out = identity_matrix
similarity10b_out = identity_matrix
similarity11b_out = identity_matrix
print("Similarity calculations completed for both possession and out-of-possession metrics.")
Similarity calculations completed for both possession and out-of-possession metrics.
In [20]:
# DataFrame Organization with Team Information
# Create mapping of team IDs to team names
team_name_map = {}
for team_id in team_ids:
# Extract team name from the team_identifier (before the hyphen)
team_name = team_id.split('-')[0] if '-' in team_id else team_id
team_name_map[team_id] = team_name
# Organize DataFrames in a combined dictionary for easier access
# (using the similarity dictionaries we already created)
similarity_dataframes = {}
# Add start event DataFrames
for event_name, df in similarity_scores_start.items():
key = f"similarity_{event_name}_start"
similarity_dataframes[key] = df
# Add end event DataFrames
for event_name, df in similarity_scores_end.items():
key = f"similarity_{event_name}_end"
similarity_dataframes[key] = df
# Add out-of-possession start event DataFrames
for event_name, df in similarity_scores_out_start.items():
key = f"similarity_{event_name}_out_start"
similarity_dataframes[key] = df
# Add out-of-possession end event DataFrames
for event_name, df in similarity_scores_out_end.items():
key = f"similarity_{event_name}_out_end"
similarity_dataframes[key] = df
# Ensure all matrices have consistent index and column names
for key, df in similarity_dataframes.items():
if df is not None:
# Verify that index and columns are named properly
df.index.name = 'team_identifier'
# Print informative summary about the data
event_types_start = list(similarity_scores_start.keys())
event_types_end = list(similarity_scores_end.keys())
print(f"Team information mapped for {len(team_ids)} teams")
print(f"Start events: {', '.join(event_types_start)}")
print(f"End events: {', '.join(event_types_end)}")
print("DataFrames organized with team information.")
Team information mapped for 27 teams Start events: bad_touch, shots_and_goals, crosses, regains, box, transition, defensive_actions, pass_buildup_def, pass_buildup_wb, pass_buildup_cm, pass_buildup_am, pass_buildup_st, pass_cons_def, pass_cons_wb, pass_cons_cm, pass_cons_am, pass_cons_st, drb_buildup_def, drb_buildup_wb, drb_buildup_cm, drb_buildup_am, drb_buildup_st, drb_cons_def, drb_cons_wb, drb_cons_cm, drb_cons_am, drb_cons_st, longball_chain End events: crosses, goalkicks, regains, box, transition, pass_buildup_def, pass_buildup_wb, pass_buildup_cm, pass_buildup_am, pass_buildup_st, pass_cons_def, pass_cons_wb, pass_cons_cm, pass_cons_am, pass_cons_st, drb_buildup_def, drb_buildup_wb, drb_buildup_cm, drb_buildup_am, drb_buildup_st, drb_cons_def, drb_cons_wb, drb_cons_cm, drb_cons_am, drb_cons_st, longball_chain, longball_chain_second DataFrames organized with team information.
In [21]:
# Create final combined similarity matrices for possession and out-of-possession
print("Creating final possession style matrices...")
# Collect all DataFrames for possession style
poss_dfs = []
for event_name in similarity_scores_start:
poss_dfs.append(similarity_scores_start[event_name])
for event_name in similarity_scores_end:
poss_dfs.append(similarity_scores_end[event_name])
# Collect all DataFrames for out-of-possession style
out_poss_dfs = []
for event_name in similarity_scores_out_start:
out_poss_dfs.append(similarity_scores_out_start[event_name])
for event_name in similarity_scores_out_end:
out_poss_dfs.append(similarity_scores_out_end[event_name])
# Create dfPoss by averaging all possession similarity matrices
if poss_dfs:
# Simple average of all similarity matrices
dfPoss = sum(poss_dfs) / len(poss_dfs)
# Ensure diagonal is 1.0
for team in dfPoss.index:
dfPoss.loc[team, team] = 1.0
# Clip values to [0,1] range
dfPoss = dfPoss.clip(0, 1)
print(f"Created in-possession similarity matrix (dfPoss) with {len(dfPoss)} teams.")
else:
print("Warning: No possession data available.")
# Create empty dfPoss as fallback
dfPoss = pd.DataFrame(np.eye(len(team_ids)), index=team_ids, columns=team_ids)
dfPoss.index.name = 'team_identifier'
# Create dfOutPoss by averaging all out-of-possession similarity matrices
if out_poss_dfs:
# Simple average of all similarity matrices
dfOutPoss = sum(out_poss_dfs) / len(out_poss_dfs)
# Ensure diagonal is 1.0
for team in dfOutPoss.index:
dfOutPoss.loc[team, team] = 1.0
# Clip values to [0,1] range
dfOutPoss = dfOutPoss.clip(0, 1)
print(f"Created out-of-possession similarity matrix (dfOutPoss) with {len(dfOutPoss)} teams.")
else:
print("Warning: No out-of-possession data available.")
# Create empty dfOutPoss as fallback
dfOutPoss = pd.DataFrame(np.eye(len(team_ids)), index=team_ids, columns=team_ids)
dfOutPoss.index.name = 'team_identifier'
Creating final possession style matrices... Created in-possession similarity matrix (dfPoss) with 27 teams. Created out-of-possession similarity matrix (dfOutPoss) with 27 teams.
In [22]:
def plot_team_similarity_matrix(team_matrix, title="Team Style Similarity Matrix",
possession_type="in", max_teams=None):
"""
Plot a heatmap of team style similarities with dynamic sizing.
Args:
team_matrix: DataFrame containing team similarity scores
title: Title for the plot
possession_type: 'in' for possession, 'out' for out-of-possession
max_teams: Maximum number of teams to show (None for all)
"""
# Handle empty matrix
if team_matrix is None or team_matrix.empty:
print("Error: Empty similarity matrix")
return
# Create a subset matrix if needed
if max_teams and len(team_matrix) > max_teams:
# Just take the first max_teams
subset_teams = list(team_matrix.index[:max_teams])
matrix = team_matrix.loc[subset_teams, subset_teams].copy()
subset_note = f" (showing {len(matrix)} of {len(team_matrix)} teams)"
else:
matrix = team_matrix.copy()
subset_note = ""
# Determine figure size based on matrix dimensions
n_teams = len(matrix)
# Base figure size that works well for ~15 teams
base_width, base_height = 12, 10
# Scale the figure size based on the number of teams
if n_teams <= 10:
figsize = (base_width * 0.8, base_height * 0.8) # Smaller for few teams
elif n_teams <= 20:
figsize = (base_width, base_height) # Default size
elif n_teams <= 30:
figsize = (base_width * 1.3, base_height * 1.3) # Larger
else:
figsize = (base_width * 1.5, base_height * 1.5) # Much larger
try:
plt.figure(figsize=figsize)
# Create mask for upper triangle
mask = np.zeros_like(matrix)
mask[np.triu_indices_from(mask)] = True # Show only lower triangle
# Font size for the annotations based on number of teams
if n_teams <= 10:
annot_fontsize = 10
elif n_teams <= 20:
annot_fontsize = 8
elif n_teams <= 30:
annot_fontsize = 6
else:
annot_fontsize = 5
# Add possession type to title
poss_label = " (In Possession)" if possession_type == "in" else " (Out of Possession)"
full_title = f"{title}{poss_label}{subset_note}"
# Changed from fmt=".2f" to fmt=".3f" to show 3 decimal places
vmin_value = matrix.min().min() # Gets the minimum value in the entire matrix
matrix_without_diag = matrix.copy()
np.fill_diagonal(matrix_without_diag.values, np.nan) # Replace diagonal with NaN
max_non_diag = matrix_without_diag.max().max() # Get max excluding diagonal
# If all non-diagonal values are 1 or less than 1, use 1 as vmax
vmax_value = max_non_diag if not np.isnan(max_non_diag) else 1
sns.heatmap(matrix, cmap="YlGnBu", annot=True, fmt=".3f",
square=True, mask=mask, vmin=vmin_value, vmax=vmax_value, annot_kws={"size": annot_fontsize})
plt.title(full_title, fontsize=16)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error creating similarity matrix visualization: {e}")
In [23]:
def plot_team_heatmap(team_id, event_type, arrays_dict, possession_type="in", team_name_map=None):
"""
Plot heatmap for a specific team and event type.
"""
# Check if the event is in the provided arrays_dict
if event_type not in arrays_dict:
print(f"Event type {event_type} not found in the provided arrays dictionary.")
return
# Debug info
print(f"Plotting heatmap for {team_id}, event {event_type}, using {'in-possession' if possession_type=='in' else 'out-of-possession'} data")
# Get the team index
try:
team_idx = list(team_ids).index(team_id)
except ValueError:
print(f"Team ID {team_id} not found.")
return
# Safely access team array
try:
team_array = arrays_dict[event_type][team_idx]
print(f"Team array shape: {team_array.shape}")
except IndexError:
print(f"Error: Team index {team_idx} out of bounds for event {event_type}")
return
except Exception as e:
print(f"Error accessing team array: {e}")
return
# Get the team name if mapping is provided
team_name = team_name_map.get(team_id, str(team_id)) if team_name_map else str(team_id)
# Determine if this is an end event based on the dictionary
is_end_event = False
if possession_type == "in":
is_end_event = (arrays_dict == arrays_end)
else:
is_end_event = (arrays_dict == arrays_out_end)
# Create pitch and plot heatmap
try:
# Define custom colormap
cmap = LinearSegmentedColormap.from_list('custom_cmap',
['#D7D1CF', '#FFFFFF', '#FFFF80', '#FF8000', '#800000', '#000000'],
N=256)
# Set the pitch color to #D7D1CF
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box',
linewidth=2, pitch_color='#D7D1CF', line_color='#000000')
# Create figure with specified facecolor
fig = plt.figure(figsize=(10, 7), facecolor='#D7D1CF')
ax = fig.add_subplot(111)
ax.set_facecolor('#D7D1CF')
# Draw the pitch
pitch.draw(ax=ax)
# Create meshgrid for plotting
x = np.linspace(0, 105, 105)
y = np.linspace(0, 68, 68)
X, Y = np.meshgrid(x, y)
# Check and transpose team_array if needed
if team_array.shape != (68, 105) and team_array.shape == (105, 68):
print(f"Transposing array from {team_array.shape} to match expected (68, 105)")
team_array = team_array.T
# Plot using pcolormesh directly with nearest shading and custom colormap
hm = ax.pcolormesh(X, Y, team_array, cmap=cmap, alpha=0.9, shading='nearest')
# Add a colorbar
cbar = fig.colorbar(hm, ax=ax)
cbar.set_label('Event Density')
# Set title with possession type
poss_label = "In Possession" if possession_type == "in" else "Out of Possession"
# Add (End Locations) to the title for events from end dictionaries
if is_end_event:
ax.set_title(f"{team_name} - {event_type.replace('_', ' ').title()} ({poss_label}, End Locations)", fontsize=14)
else:
ax.set_title(f"{team_name} - {event_type.replace('_', ' ').title()} ({poss_label}, Start Locations)", fontsize=14)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error creating heatmap visualization: {e}")
import traceback
traceback.print_exc()
In [24]:
def compare_team_heatmaps(team_id1, team_id2, event_type, arrays_dict, possession_type="in", team_name_map=None):
"""
Plot heatmaps for two teams side by side for direct comparison.
"""
# Check if the event is in the provided arrays_dict
if event_type not in arrays_dict:
print(f"Event type {event_type} not found in the provided arrays dictionary.")
return
# Get the team indices
try:
team_idx1 = list(team_ids).index(team_id1)
team_idx2 = list(team_ids).index(team_id2)
except ValueError as e:
print(f"Team ID not found.")
return
# Safely access team arrays
try:
team_array1 = arrays_dict[event_type][team_idx1]
team_array2 = arrays_dict[event_type][team_idx2]
print(f"Team 1 array shape: {team_array1.shape}")
print(f"Team 2 array shape: {team_array2.shape}")
except IndexError:
print(f"Error: Team index out of bounds for event {event_type}")
return
except Exception as e:
print(f"Error accessing team array: {e}")
return
# Get the team names if mapping is provided
team_name1 = team_name_map.get(team_id1, str(team_id1)) if team_name_map else str(team_id1)
team_name2 = team_name_map.get(team_id2, str(team_id2)) if team_name_map else str(team_id2)
# Determine if this is an end event based on the dictionary
is_end_event = False
if possession_type == "in":
is_end_event = (arrays_dict == arrays_end)
else:
is_end_event = (arrays_dict == arrays_out_end)
# Set possession type label
poss_label = "In Possession" if possession_type == "in" else "Out of Possession"
# Define custom colormap
cmap = LinearSegmentedColormap.from_list('custom_cmap',
['#D7D1CF', '#FFFFFF', '#FFFF80', '#FF8000', '#800000', '#000000'],
N=256)
# Create figure with two subplots and specified facecolor
fig = plt.figure(figsize=(20, 8), facecolor='#D7D1CF')
axs = [fig.add_subplot(1, 2, i+1) for i in range(2)]
for ax in axs:
ax.set_facecolor('#D7D1CF')
try:
# Create pitch objects with new color
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box',
linewidth=2, pitch_color='#D7D1CF', line_color='#000000')
# Plot first team heatmap
pitch.draw(ax=axs[0])
# Create meshgrid for plotting
x = np.linspace(0, 105, 105)
y = np.linspace(0, 68, 68)
X, Y = np.meshgrid(x, y)
# Check and transpose team_array1 if needed
if team_array1.shape != (68, 105) and team_array1.shape == (105, 68):
print(f"Transposing team 1 array from {team_array1.shape} to match expected (68, 105)")
team_array1 = team_array1.T
# Plot using pcolormesh directly with nearest shading and custom colormap
hm1 = axs[0].pcolormesh(X, Y, team_array1, cmap=cmap, alpha=0.9, shading='nearest')
# Set title and note if it's an end event
location_label = "End Locations" if is_end_event else "Start Locations"
axs[0].set_title(f"{team_name1} - {event_type.replace('_', ' ').title()} ({poss_label}, {location_label})", fontsize=14)
# Add a colorbar for team 1
cbar1 = fig.colorbar(hm1, ax=axs[0])
cbar1.set_label('Event Density')
# Plot second team heatmap
pitch.draw(ax=axs[1])
# Check and transpose team_array2 if needed
if team_array2.shape != (68, 105) and team_array2.shape == (105, 68):
print(f"Transposing team 2 array from {team_array2.shape} to match expected (68, 105)")
team_array2 = team_array2.T
# Plot using pcolormesh directly with nearest shading and custom colormap
hm2 = axs[1].pcolormesh(X, Y, team_array2, cmap=cmap, alpha=0.9, shading='nearest')
# Set title and note if it's an end event
axs[1].set_title(f"{team_name2} - {event_type.replace('_', ' ').title()} ({poss_label}, {location_label})", fontsize=14)
# Add a colorbar for team 2
cbar2 = fig.colorbar(hm2, ax=axs[1])
cbar2.set_label('Event Density')
# Calculate similarity score between these two teams for this event
try:
# Get the right similarity dictionary based on the arrays_dict
if possession_type == "in":
similarity_dict = similarity_scores_start if not is_end_event else similarity_scores_end
else:
similarity_dict = similarity_scores_out_start if not is_end_event else similarity_scores_out_end
if event_type in similarity_dict:
similarity_matrix = similarity_dict[event_type]
if (team_id1 in similarity_matrix.index and
team_id2 in similarity_matrix.index):
similarity_score = similarity_matrix.loc[team_id1, team_id2]
fig.suptitle(f"Comparison: {team_name1} vs {team_name2} - Similarity: {similarity_score:.3f}",
fontsize=16)
else:
fig.suptitle(f"Comparison: {team_name1} vs {team_name2}", fontsize=16)
else:
fig.suptitle(f"Comparison: {team_name1} vs {team_name2}", fontsize=16)
except Exception as e:
print(f"Error calculating similarity: {e}")
fig.suptitle(f"Comparison: {team_name1} vs {team_name2}", fontsize=16)
plt.tight_layout()
plt.show()
except Exception as e:
print(f"Error creating comparison visualization: {e}")
import traceback
traceback.print_exc()
In [25]:
def analyze_team_style_signature(team_id, possession_type="in", team_name_map=None):
"""
Create a visual signature of a team's playing style across all event types.
Args:
team_id: Team identifier
possession_type: 'in' for possession, 'out' for out-of-possession
team_name_map: Optional dictionary mapping team IDs to team names
"""
# Select the appropriate arrays_dict based on possession type
if possession_type == "in":
# Include both start and end arrays for a complete picture
arrays_dict_start = arrays_start
arrays_dict_end = arrays_end
similarity_matrix = dfPoss
poss_label = "In Possession"
else:
# Include both start and end arrays for a complete picture
arrays_dict_start = arrays_out_start
arrays_dict_end = arrays_out_end
similarity_matrix = dfOutPoss
poss_label = "Out of Possession"
# Get team index
try:
team_idx = list(team_ids).index(team_id)
except ValueError:
print(f"Team ID {team_id} not found.")
return
# Get team name if mapping is provided
team_name = team_name_map.get(team_id, str(team_id)) if team_name_map else str(team_id)
# Get all available events from both start and end dictionaries
start_events = list(arrays_dict_start.keys())
end_events = list(arrays_dict_end.keys())
if not start_events and not end_events:
print(f"No event types found for {poss_label.lower()} analysis")
return
# Define custom colormap
cmap = LinearSegmentedColormap.from_list('custom_cmap',
['#D7D1CF', '#FFFFFF', '#FFFF80', '#FF8000', '#800000', '#000000'],
N=256)
# Create a figure for all events
try:
# Calculate total events for grid layout
all_events = []
for event in start_events:
all_events.append((event, "start"))
for event in end_events:
all_events.append((event, "end"))
n_events = len(all_events)
n_cols = min(4, max(2, int(np.ceil(np.sqrt(n_events)))))
n_rows = int(np.ceil(n_events / n_cols))
# Create figure with specified facecolor
fig = plt.figure(figsize=(n_cols * 5, n_rows * 4), facecolor='#D7D1CF')
# Add a title to the figure with possession type
fig.suptitle(f"Style Signature: {team_name} ({poss_label})", fontsize=20)
# Create a pitch for drawing with updated color
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box',
linewidth=2, pitch_color='#D7D1CF', line_color='#000000')
# Create meshgrid for plotting
x = np.linspace(0, 105, 105)
y = np.linspace(0, 68, 68)
X, Y = np.meshgrid(x, y)
# Plot each available event
for i, (event, location_type) in enumerate(all_events):
# Create subplot
ax = fig.add_subplot(n_rows, n_cols, i + 1)
ax.set_facecolor('#D7D1CF')
# Draw the pitch
pitch.draw(ax=ax)
try:
# Get the right array dictionary based on location type
arrays_dict = arrays_dict_start if location_type == "start" else arrays_dict_end
# Get the event array
team_array = arrays_dict[event][team_idx]
# Check and transpose team_array if needed
if team_array.shape != (68, 105) and team_array.shape == (105, 68):
team_array = team_array.T
# Plot using pcolormesh directly with nearest shading and custom colormap
hm = ax.pcolormesh(X, Y, team_array, cmap=cmap, alpha=0.9, shading='nearest')
# Set title
location_label = "End" if location_type == "end" else "Start"
ax.set_title(f"{event.replace('_', ' ').title()} ({location_label})", fontsize=12)
except Exception as e:
print(f"Error plotting event {event}: {e}")
ax.set_title(f"{event.replace('_', ' ').title()} - Error", fontsize=12, color='red')
plt.tight_layout(rect=[0, 0, 1, 0.96]) # Adjust layout to make room for figure title
plt.show()
# Calculate and display similarity to other teams
if similarity_matrix is not None and team_id in similarity_matrix.index:
similarity_to_others = similarity_matrix.loc[team_id].sort_values(ascending=False)
# Remove self (should be 1.0)
if team_id in similarity_to_others.index:
similarity_to_others = similarity_to_others.drop(team_id)
# Display top 5 most similar teams
print(f"Teams most similar to {team_name} ({poss_label}):")
for i, (other_team, similarity) in enumerate(similarity_to_others.head(5).items()):
other_name = team_name_map.get(other_team, other_team) if team_name_map else other_team
print(f"{i+1}. {other_team}: {similarity:.3f}")
# Return the sorted similarity scores for further analysis
return similarity_to_others
except Exception as e:
print(f"Error creating team style signature: {e}")
import traceback
traceback.print_exc()
return None
In [26]:
def create_improved_team_style_explorer():
"""
Create an enhanced interactive widget to explore team styles with both possession types.
This improved version properly handles the mapping between location selection (start/end)
and the appropriate data arrays.
"""
# Get available event types from all sources
event_types_start = list(arrays_start.keys())
event_types_end = list(arrays_end.keys()) # Added to include goalkicks
event_types_out_start = list(arrays_out_start.keys())
event_types_out_end = list(arrays_out_end.keys()) # Added to include goalkicks
# Combine and sort all event types from start and end events
all_event_types = sorted(set(event_types_start + event_types_end +
event_types_out_start + event_types_out_end))
# Create dropdown options that use and show the full team_identifier
team_options = [(team_id, team_id) for team_id in team_ids]
# Create widgets
team1_dropdown = widgets.Dropdown(
options=team_options,
description='Team 1:',
style={'description_width': 'initial'}
)
team2_dropdown = widgets.Dropdown(
options=team_options,
description='Team 2:',
style={'description_width': 'initial'}
)
event_dropdown = widgets.Dropdown(
options=all_event_types,
description='Event Type:',
style={'description_width': 'initial'}
)
view_type = widgets.RadioButtons(
options=['Single Team', 'Compare Teams', 'Team Similarity', 'Team Style Signature'],
description='View Type:',
style={'description_width': 'initial'}
)
location_type = widgets.RadioButtons(
options=['Start Location', 'End Location'],
description='Location:',
style={'description_width': 'initial'}
)
# Add possession type selector
possession_type = widgets.RadioButtons(
options=['In Possession', 'Out of Possession'],
description='Possession:',
style={'description_width': 'initial'}
)
# Max teams slider for similarity view
max_teams = widgets.IntSlider(
value=min(20, len(team_ids)),
min=5,
max=min(50, len(team_ids)),
step=5,
description='Max Teams:',
disabled=True, # Initially disabled
style={'description_width': 'initial'}
)
# Progress indicator
progress = widgets.HTML(
value="",
description=""
)
# Output widget
output = widgets.Output()
# Create button to update the visualization
button = widgets.Button(
description='Update Visualization',
button_style='primary',
tooltip='Click to update the visualization'
)
# Function to update UI based on selected view
def on_view_change(change):
if change['new'] == 'Team Similarity':
max_teams.disabled = False
else:
max_teams.disabled = True
# Register the view change handler
view_type.observe(on_view_change, names='value')
# Define the update function with improved debugging
def update_visualization(_):
with output:
output.clear_output()
# Get values from widgets
team1 = team1_dropdown.value
team2 = team2_dropdown.value
event = event_dropdown.value
view = view_type.value
location = location_type.value
poss_type = "in" if possession_type.value == "In Possession" else "out"
num_teams = max_teams.value
# Show progress message
progress.value = f"<b>Processing {view} visualization...</b>"
try:
# IMPROVED LOGGING: Print selection information
print(f"Selected: Team1={team1}, Team2={team2}, Event={event}")
print(f"View={view}, Location={location}, Possession={possession_type.value}")
# IMPROVED LOGIC: Select the correct dictionary based on both possession type AND location
if poss_type == "in": # In possession
if location == "End Location":
if event in arrays_end:
arrays_dict = arrays_end
similarity_dict = similarity_scores_end
is_end_event = True
print(f"Using end locations for in-possession event {event}")
else:
# Fall back to start if event isn't in end
arrays_dict = arrays_start
similarity_dict = similarity_scores_start
is_end_event = False
print(f"Note: Event {event} not found in end locations, using start locations instead.")
else: # Start Location
if event in arrays_start:
arrays_dict = arrays_start
similarity_dict = similarity_scores_start
is_end_event = False
print(f"Using start locations for in-possession event {event}")
else:
# Fall back to end if event isn't in start
arrays_dict = arrays_end
similarity_dict = similarity_scores_end
is_end_event = True
print(f"Note: Event {event} not found in start locations, using end locations instead.")
else: # Out of possession
if location == "End Location":
if event in arrays_out_end:
arrays_dict = arrays_out_end
similarity_dict = similarity_scores_out_end
is_end_event = True
print(f"Using end locations for out-of-possession event {event}")
else:
# Fall back to start if event isn't in end
arrays_dict = arrays_out_start
similarity_dict = similarity_scores_out_start
is_end_event = False
print(f"Note: Event {event} not found in end locations for out-of-possession, using start locations instead.")
else: # Start Location
if event in arrays_out_start:
arrays_dict = arrays_out_start
similarity_dict = similarity_scores_out_start
is_end_event = False
print(f"Using start locations for out-of-possession event {event}")
else:
# Fall back to end if event isn't in start
arrays_dict = arrays_out_end
similarity_dict = similarity_scores_out_end
is_end_event = True
print(f"Note: Event {event} not found in start locations for out-of-possession, using end locations instead.")
# IMPROVED DIAGNOSTIC: Print array information
if arrays_dict is not None:
print(f"Selected array dictionary contains these events: {list(arrays_dict.keys())}")
else:
raise ValueError("No array dictionary selected")
# Use an appropriate overall similarity matrix
if poss_type == "in":
similarity_matrix = dfPoss
else:
similarity_matrix = dfOutPoss
# Generate the visualization based on the view type
if view == 'Single Team':
if event not in arrays_dict:
print(f"Event '{event}' is not available for {location.lower()} in {possession_type.value.lower()}. Please try another combination.")
progress.value = "<b style='color:red'>Error: Event not available</b>"
return
# Print debug information about the event
print(f"Plotting {event} from {'end' if is_end_event else 'start'} location")
if event in arrays_dict:
print(f"Event data shape: {np.shape(arrays_dict[event])}")
# This passes the actual data arrays that contain the heatmap data, not similarity scores
plot_team_heatmap(team1, event, arrays_dict, poss_type, team_name_map)
elif view == 'Compare Teams':
if event not in arrays_dict:
print(f"Event '{event}' is not available for {location.lower()} in {possession_type.value.lower()}. Please try another combination.")
progress.value = "<b style='color:red'>Error: Event not available</b>"
return
# This passes the actual data arrays, not similarity scores
compare_team_heatmaps(team1, team2, event, arrays_dict, poss_type, team_name_map)
elif view == 'Team Similarity':
# Create subset if needed
if len(team_ids) > num_teams:
# Find the nearest teams to the selected team
similar_teams = similarity_matrix.loc[team1].sort_values(ascending=False).head(num_teams).index.tolist()
if team1 not in similar_teams:
similar_teams.append(team1)
matrix_subset = similarity_matrix.loc[similar_teams, similar_teams]
plot_team_similarity_matrix(matrix_subset, f"Teams Similar to {team1}", poss_type)
else:
plot_team_similarity_matrix(similarity_matrix, "Team Playing Style Similarity", poss_type)
elif view == 'Team Style Signature':
analyze_team_style_signature(team1, poss_type, team_name_map)
# Clear progress message on completion
progress.value = "<b style='color:green'>Visualization complete</b>"
except Exception as e:
print(f"Error generating visualization: {e}")
progress.value = f"<b style='color:red'>Error: {str(e)}</b>"
import traceback
traceback.print_exc()
# Connect the button click to the update function
button.on_click(update_visualization)
# Create the layout
ui = widgets.VBox([
widgets.HBox([team1_dropdown, team2_dropdown]),
widgets.HBox([event_dropdown, location_type]),
widgets.HBox([view_type, possession_type]),
max_teams,
button,
progress,
output
])
# Add the help text to the UI
full_ui = widgets.VBox([ui])
return full_ui
In [27]:
import ipywidgets as widgets
from IPython.display import display
#Create and display the improved interactive explorer
explorer = create_improved_team_style_explorer()
display(explorer)
VBox(children=(VBox(children=(HBox(children=(Dropdown(description='Team 1:', options=(('AC Milan-Paulo Alexand…
In [ ]:
In [ ]:
In [ ]:
In [28]:
playerlist = dfx['team_identifier'].unique().tolist()
cleaned_playerlist = [name for name in playerlist if pd.notna(name)]
cleaned_playerlist.sort()
In [29]:
from IPython.display import display, HTML
# Step 3: Generate the HTML dropdown
options_html = ''.join([f'<option value="{name}">{name}</option>' for name in cleaned_playerlist])
dropdown_html = f"""
<input list="players" id="dropdown" oninput="handleInput()" placeholder="Choose Someone">
<datalist id="players">
{options_html}
</datalist>
<p id="output"></p>
<script>
function handleInput() {{
var input = document.getElementById("dropdown").value;
var output = document.getElementById("output");
output.innerHTML = "Selected: " + input;
}}
</script>
"""
# Display the dropdown
display(HTML(dropdown_html))
In [30]:
team = "Como-Cesc Fàbregas-2425"
In [31]:
dfPoss_sorted = dfPoss.T.sort_values(by=team, ascending=False)
dfPoss_filtered = (dfPoss_sorted.filter(items=[team])).round(4)
In [32]:
dfOutPoss_sorted = dfOutPoss.T.sort_values(by=team, ascending=False)
dfOutPoss_filtered = (dfOutPoss_sorted.filter(items=[team])).round(4)
In [33]:
dfMain = (dfPoss+dfOutPoss)/2
dfMain_sorted = dfMain.T.sort_values(by=team, ascending=False)
dfMain_filtered = (dfMain_sorted.filter(items=[team])).round(4)
In [34]:
merged_df = pd.concat([dfMain_filtered, dfPoss_filtered, dfOutPoss_filtered], axis=1).iloc[1:]
merged_df.columns = ['General', 'In_Possession', 'Out_Possession']
# Now slice rows from index 7 to 21
filtered_df = merged_df[:10]
filtered_df = filtered_df.reset_index()
filtered_df['index'] = filtered_df['index'].str.replace('-', ' ')
filtered_df = filtered_df.sort_values(by='General', ascending=True)
In [35]:
fig = plt.figure(figsize=(1800/500, 1800/500), dpi=500)
ax = plt.subplot()
ncols = filtered_df.shape[1]
nrows = filtered_df.shape[0]
ax.set_xlim(0, ncols + 1)
ax.set_ylim(0, nrows + 1)
fig.patch.set_facecolor('#D7D1CF')
positions = [0.1, 3.3, 3.8, 4.5]
columns = ['index', 'In_Possession', 'Out_Possession', 'General']
for i in range(nrows):
for j, column in enumerate(columns):
if j == 0:
ha = 'left'
else:
ha = 'center'
# Determine font styling for each column
if column == 'General':
fontsize = 10
color = '#FFFFFF'
# Use semibold font only for General column
fontname = fe_semibold.name
elif column in ['In_Possession', 'Out_Possession']:
fontsize = 6
color = '#000000'
fontname = fe_regular.name
else:
fontsize = 8
color = '#000000'
fontname = fe_regular.name
ax.annotate(
xy=(positions[j], i + .5),
text=str(filtered_df[column].iloc[i]),
ha=ha,
va='center',
fontsize=fontsize,
color=color,
fontname=fontname
)
# Add dividing lines
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [nrows, nrows], lw=1.5, color='#000000', marker='', zorder=4)
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [0, 0], lw=1.5, color='#000000', marker='', zorder=4)
for x in range(1, nrows):
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [x, x], lw=0.5, color='#000000', ls='-', zorder=3, marker='')
# Add colored background for the General column
ax.fill_between(x=[4.1, 4.9], y1=nrows, y2=0, color='red', alpha=1, ec='None')
plt.text(0.5, 0.88, f"Top 10 {team}'s\nStyle Similarity Ratings", transform=fig.transFigure, horizontalalignment='center', fontsize=10,
fontname=fe_semibold.name, color='#000000')
plt.text(0.5, 0.84, f'In Possession, Out of Possession, and Overall Rating', transform=fig.transFigure, horizontalalignment='center',
fontsize=7, color='#4E616C', fontname=fe_regular.name)
plt.text(0, -0.5, 'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
horizontalalignment='left', fontsize=3)
ax.set_axis_off()
In [ ]:
In [ ]:
In [ ]:
In [63]:
merged_df.to_csv("Bologna2324similarity.csv")