In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.font_manager as fm
import matplotlib.cm as cm
from scipy.ndimage import gaussian_filter
from scipy.spatial.distance import cosine
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.manifold import TSNE, MDS
from mplsoccer import Pitch, VerticalPitch
import socceraction
import socceraction.spadl as spadl
import seaborn as sns
import tqdm
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('fivethirtyeight')
sns.set_context("notebook", font_scale=1.2)
In [2]:
fe_regular = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
    name='SourceSansPro-Regular'
)
fe_semibold = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf',
    name='SourceSansPro-SemiBold'
)

# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)
fm.fontManager.ttflist.insert(1, fe_semibold)

# Set the font family
matplotlib.rcParams['font.family'] = fe_regular.name  # Default to Regular
In [3]:
# Load data
fb = pd.read_csv("teamsFOTMOB.csv", index_col=0)
positions = pd.read_csv("clustered_position.csv", index_col = 0)
players = pd.read_csv("players2425.csv", index_col=0)
games = pd.read_csv("games2425.csv", index_col=0)
actions = pd.read_csv("actions2425.csv", index_col=0)
chains = pd.read_csv("possession_chains_info2425.csv", index_col=0)
In [4]:
#Merging chains info on events
actions = actions.merge(chains)
In [5]:
#Cleaning players dataframe and adding wanted information like position
playersA = players.merge(fb, how="left")
playersB = playersA.merge(positions, how='left')
players_info = playersB[['game_id', 'team_id', 'player_id', 'position']]
In [6]:
#Creating team_id - team_manager columns disposition for home teams
gamesA = games[["game_id", "competition_id", "season_id", 'home_team_id', 'home_manager']]
gamesA = gamesA.rename(columns={'home_team_id': 'team_id'})
gamesA = gamesA.rename(columns={'home_manager': 'manager'})

#Creating team_id - team_manager columns disposition for away teams
gamesB = games[["game_id", "competition_id", "season_id", 'away_team_id', 'away_manager']]
gamesB = gamesB.rename(columns={'away_team_id': 'team_id'})
gamesB = gamesB.rename(columns={'away_manager': 'manager'})
In [7]:
#Creating a dataframe with manager info to merge on events for later identifier creation
gamesX = pd.concat([gamesA, gamesB]).sort_values('game_id')
In [8]:
# Process actions data
actions.drop(columns=['action_id'], inplace=True)
actions.reset_index(drop=True, inplace=True)
actions.reset_index(inplace=True)
actions.rename(columns={'index': 'action_id'}, inplace=True)
actions = spadl.add_names(actions)
In [9]:
# Merge all data
df = (
    actions
    .merge(gamesX, how="left")
    .merge(players_info, how="left")
    .merge(fb, how="left"))
In [10]:
# Clean data
df = df.dropna(subset=['team_name', 'season_id', 'manager'])
df['team_name'] = df['team_name'].astype(str)
df['season_id'] = df['season_id'].astype(str)
df['manager'] = df['manager'].astype(str)

# Create team identifier with manager between team_name and season_id
df['team_identifier'] = df['team_name'] + '-' + df['manager'] + '-' + df['season_id']
In [11]:
df.competition_id.unique()
Out[11]:
array(['BEL-Jupiler Pro League', 'BRA-Brasileirão', 'ENG-Championship',
       'ENG-FA Cup', 'ENG-League Cup', 'ENG-League One', 'ENG-League Two',
       'ENG-Premier League', 'ESP-La Liga', 'EU-Champions League',
       'EU-Europa League', 'FRA-Ligue 1', 'GER-Bundesliga', 'ITA-Serie A',
       'NED-Eredivisie', 'POR-Liga Portugal', 'RUS-Premier League',
       'SCO-Premiership', 'USA-Major League Soccer'], dtype=object)
In [12]:
dfx = df[df['competition_id'].isin(['ITA-Serie A'])]
In [13]:
# Calculate touches
touches = (dfx.groupby(["team_identifier"], observed=True)["type_name"].count().reset_index()) 

# Calculate touches against
against0 = (dfx.groupby(["team_identifier", "team_id", "team_name", "game_id"], observed=True)["type_name"].count().reset_index(name='count'))
In [14]:
# Calculate touches against each team
merged_df = against0.merge(
    against0,
    on='game_id',
    suffixes=('_team', '_opponent')
)
merged_df = merged_df[merged_df['team_id_team'] != merged_df['team_id_opponent']]
result_df = merged_df[['team_identifier_team', 'count_opponent']]
result_df.columns = ['team_identifier', 'touches_against']
against = (result_df.groupby(["team_identifier"], observed=True)["touches_against"].sum().reset_index()) 
In [15]:
# Setup pitch
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='#000000')
In [16]:
# Cell 1: Filter by specific action types
df0 = dfx[dfx['type_name'] == 'bad_touch']
df2 = dfx[dfx['type_name'] == 'shot']
df4 = dfx[dfx['type_name'] == 'goalkick']
df5 = dfx[dfx['type_name'] == 'cross']
df6 = dfx[(dfx['type_name'].isin(['dribble', 'pass', 'take_on'])) & (dfx['is_inbox'] == True)]

df7 = dfx[(dfx['type_name'].isin(['dribble', 'take_on'])) & (dfx['is_buildup'] == True)]
df7a = df7[df7['position'].isin(['GK', 'CB', 'RCB', 'LCB'])]
df7b = df7[df7['position'].isin(['LWB', 'RWB'])]
df7c = df7[df7['position'].isin(['DM', 'CM', 'LCM'])]
df7d = df7[df7['position'].isin(['AWL', 'AWR'])]
df7e = df7[df7['position'].isin(['ST', 'SS'])]

df8 = dfx[(dfx['type_name'].isin(['dribble', 'take_on'])) & (dfx['is_consolidate'] == True)]
df8a = df8[df8['position'].isin(['GK', 'CB', 'RCB', 'LCB'])]
df8b = df8[df8['position'].isin(['LWB', 'RWB'])]
df8c = df8[df8['position'].isin(['DM', 'CM', 'LCM'])]
df8d = df8[df8['position'].isin(['AWL', 'AWR'])]
df8e = df8[df8['position'].isin(['ST', 'SS'])]

df9 = dfx[(dfx['type_name'].isin(['pass'])) & (dfx['is_buildup'] == True)]
df9a = df9[df9['position'].isin(['GK', 'CB', 'RCB', 'LCB'])]
df9b = df9[df9['position'].isin(['LWB', 'RWB'])]
df9c = df9[df9['position'].isin(['DM', 'CM', 'LCM'])]
df9d = df9[df9['position'].isin(['AWL', 'AWR'])]
df9e = df9[df9['position'].isin(['ST', 'SS'])]

df10 = dfx[(dfx['type_name'].isin(['pass'])) & (dfx['is_consolidate'] == True)]
df10a = df10[df10['position'].isin(['GK', 'CB', 'RCB', 'LCB'])]
df10b = df10[df10['position'].isin(['LWB', 'RWB'])]
df10c = df10[df10['position'].isin(['DM', 'CM', 'LCM'])]
df10d = df10[df10['position'].isin(['AWL', 'AWR'])]
df10e = df10[df10['position'].isin(['ST', 'SS'])]

df11 = dfx[dfx['is_transition'] == True]
df12 = dfx[dfx['type_name'].isin(['tackle', 'interception', 'clearance', 'foul'])]
df13 = dfx[dfx['is_regain'] == True]

# For each possession_chain, keep only the first occurrence in df13a
df13a = df13.groupby('possession_chain').first().reset_index()
# For each possession_chain, keep only the second occurrence in df13b
# First sort to ensure proper ordering within each possession_chain
sorted_df13 = df13.sort_values(['possession_chain', 'action_id'])
# Then use groupby with nth to get the second row of each group
df13b = sorted_df13.groupby('possession_chain').nth(1).reset_index()

# First, identify possession chains where the first occurrence has is_longball == True
# Sort to ensure we're looking at the first event in each possession chain
sorted_dfx = dfx.sort_values(['possession_chain', 'action_id'])
# Get the first row for each possession_chain
first_rows = sorted_dfx.groupby('possession_chain').first()
# Filter to only keep possession chains where the first occurrence has is_longball == True
valid_chains = first_rows[first_rows['is_longball'] == True].index.tolist()
# Filter the original dataframe to only include these valid chains
longball_df = dfx[dfx['possession_chain'].isin(valid_chains)]
# Now create df14a (first occurrence) and df14b (second occurrence)
df14a = longball_df.groupby('possession_chain').first().reset_index()
# For the second occurrence
df14b = longball_df.groupby('possession_chain').nth(1).reset_index()
In [17]:
# Enhanced combined dictionary with more contextual information
dfp_dict_combined = {    
    # Possession loss events
    'bad_touch_start': (df0, 'start_x_a0', 'start_y_a0', 'possession_loss'),
    
    # Attacking events
    'shots_and_goals_start': (df2, 'start_x_a0', 'start_y_a0', 'attacking'),
    'crosses_start': (df5, 'start_x_a0', 'start_y_a0', 'attacking'),
    'crosses_end': (df5, 'end_x_a0', 'end_y_a0', 'attacking'),
    
    # Possession restart
    'goalkicks_end': (df4, 'end_x_a0', 'end_y_a0', 'possession_restart'),
    'regains_start': (df13a, 'start_x_a0', 'start_y_a0', 'possession_restart'),
    'regains_end': (df13a, 'end_x_a0', 'end_y_a0', 'possession_restart'),

    # Inbox/Intobox events
    'box_start': (df6, 'start_x_a0', 'start_y_a0', 'box'),
    'box_end': (df6, 'end_x_a0', 'end_y_a0', 'box'),

    #Transition event
    'transition_start': (df11, 'start_x_a0', 'start_y_a0', 'transition'),
    'transition_end': (df11, 'end_x_a0', 'end_y_a0', 'transition'),

    # Defensive events
    'defensive_actions_start': (df12, 'start_x_a0', 'start_y_a0', 'defensive'),

    # Passes event
    'pass_buildup_def_start': (df9a, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_buildup_def_end': (df9a, 'end_x_a0', 'end_y_a0', 'passes_events'),
    'pass_buildup_wb_start': (df9b, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_buildup_wb_end': (df9b, 'end_x_a0', 'end_y_a0', 'passes_events'),    
    'pass_buildup_cm_start': (df9c, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_buildup_cm_end': (df9c, 'end_x_a0', 'end_y_a0', 'passes_events'),
    'pass_buildup_am_start': (df9d, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_buildup_am_end': (df9d, 'end_x_a0', 'end_y_a0', 'passes_events'),
    'pass_buildup_st_start': (df9e, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_buildup_st_end': (df9e, 'end_x_a0', 'end_y_a0', 'passes_events'),

    'pass_cons_def_start': (df10a, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_cons_def_end': (df10a, 'end_x_a0', 'end_y_a0', 'passes_events'),
    'pass_cons_wb_start': (df10b, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_cons_wb_end': (df10b, 'end_x_a0', 'end_y_a0', 'passes_events'),    
    'pass_cons_cm_start': (df10c, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_cons_cm_end': (df10c, 'end_x_a0', 'end_y_a0', 'passes_events'),
    'pass_cons_am_start': (df10d, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_cons_am_end': (df10d, 'end_x_a0', 'end_y_a0', 'passes_events'),
    'pass_cons_st_start': (df10e, 'start_x_a0', 'start_y_a0', 'passes_events'),
    'pass_cons_st_end': (df10e, 'end_x_a0', 'end_y_a0', 'passes_events'),

    # Dribbles + Take Ons events
    'drb_buildup_def_start': (df7a, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_buildup_def_end': (df7a, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
    'drb_buildup_wb_start': (df7b, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_buildup_wb_end': (df7b, 'end_x_a0', 'end_y_a0', 'dribbles_events'),    
    'drb_buildup_cm_start': (df7c, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_buildup_cm_end': (df7c, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
    'drb_buildup_am_start': (df7d, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_buildup_am_end': (df7d, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
    'drb_buildup_st_start': (df7e, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_buildup_st_end': (df7e, 'end_x_a0', 'end_y_a0', 'dribbles_events'),

    'drb_cons_def_start': (df8a, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_cons_def_end': (df8a, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
    'drb_cons_wb_start': (df8b, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_cons_wb_end': (df8b, 'end_x_a0', 'end_y_a0', 'dribbles_events'),    
    'drb_cons_cm_start': (df8c, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_cons_cm_end': (df8c, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
    'drb_cons_am_start': (df8d, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_cons_am_end': (df8d, 'end_x_a0', 'end_y_a0', 'dribbles_events'),
    'drb_cons_st_start': (df8e, 'start_x_a0', 'start_y_a0', 'dribbles_events'),
    'drb_cons_st_end': (df8e, 'end_x_a0', 'end_y_a0', 'dribbles_events'),

    # Longball chains events
    'longball_chain_start': (df14a, 'start_x_a0', 'start_y_a0', 'longballs_events'),
    'longball_chain_end': (df14a, 'end_x_a0', 'end_y_a0', 'longballs_events'),
    'longball_chain_second_end': (df14b, 'end_x_a0', 'end_y_a0', 'longballs_events'),
}
In [18]:
# Initialize dictionaries to store both possession and out-of-possession results
arrays_start = {}       # In-possession, start coordinates
arrays_end = {}         # In-possession, end coordinates
arrays_out_start = {}   # Out-of-possession, start coordinates
arrays_out_end = {}     # Out-of-possession, end coordinates
team_ids = None

def calculate_bin_statistics(df, x_column, y_column, bins=(105, 68), statistic='count'):
    """Calculate bin statistics with enhanced grid resolution."""
    # Handle empty dataframe
    if df.empty:
        bin_stat = {'statistic': np.zeros((105, 68))}  # Note: Changed shape here
        return bin_stat
    
    # Ensure all coordinates are within pitch boundaries
    df_valid = df.dropna(subset=[x_column, y_column])
    df_valid = df_valid[(df_valid[x_column] >= 0) & (df_valid[x_column] <= 105) & 
                        (df_valid[y_column] >= 0) & (df_valid[y_column] <= 68)]
    
    if df_valid.empty:
        bin_stat = {'statistic': np.zeros((105, 68))}  # Note: Changed shape here
        return bin_stat
    
    # Calculate bin statistics using pitch dimensions
    bin_stat = pitch.bin_statistic(df_valid[x_column], df_valid[y_column], 
                              statistic=statistic, bins=bins)
    
    # Transpose the result to ensure (105, 68) shape
    if bin_stat['statistic'].shape == (68, 105):
        bin_stat['statistic'] = bin_stat['statistic'].T
        
    return bin_stat

# Function for fixed Gaussian smoothing
def fixed_smooth(bin_statistic, sigma=2.5):
    """Apply Gaussian smoothing with a fixed sigma value of 2.5."""
    statistic = bin_statistic['statistic'].copy()
    
    # Apply Gaussian filter with fixed sigma value
    smoothed_statistic = gaussian_filter(statistic, sigma)
    
    # Create a copy to avoid modifying the original
    smoothed_bin_statistic = bin_statistic.copy()
    smoothed_bin_statistic['statistic'] = smoothed_statistic
    
    return smoothed_bin_statistic

# Events that will use the simplified calculation (no normalization)
simplified_events = ['bad_touch_start', 
                    'shots_and_goals_start', 'crosses_start', 'goalkicks_end', 'crosses_end']

# Extract team_ids from a sample event
sample_event = list(dfp_dict_combined.values())[0][0]
team_ids = np.sort(sample_event['team_identifier'].unique())

# Calculate total iterations
total_iterations = len(dfp_dict_combined) * len(team_ids)

print(f"Processing {total_iterations} team-event combinations for both possession and out-of-possession...")

# Process each event for both possession and out-of-possession analysis
with tqdm.tqdm(total=total_iterations, desc="Processing Events and Teams") as pbar:
    # Process each DataFrame in the combined dictionary
    for event_name, (dfp_event, x_col, y_col, event_type) in dfp_dict_combined.items():
        array_event = []       # For possession (team's own actions)
        array_out_event = []   # For out-of-possession (opponent actions)
        
        # Loop through each team identifier for the current event
        for team_id in team_ids:
            # POSSESSION ANALYSIS (team's own actions)
            df_event_team = dfp_event[dfp_event['team_identifier'] == team_id]
            
            # Handle different event types
            if event_name == "defensive_actions_start":
                # For defensive actions, we actually want what the team does (different from other events)
                bin_statistica = calculate_bin_statistics(df_event_team, x_col, y_col)
                bin_statistica = fixed_smooth(bin_statistica)
            
            elif any(event_key in event_name for event_key in simplified_events):
                # Simplified calculation for possession events
                bin_statistica = calculate_bin_statistics(df_event_team, x_col, y_col)
                bin_statistica = fixed_smooth(bin_statistica)
                
            else:
                # Normalized calculation
                df_full_team = dfx[dfx['team_identifier'] == team_id]
                
                # Calculate bin statistics for the event and the team
                bin_statistica = calculate_bin_statistics(df_event_team, x_col, y_col)
                bin_statistic1 = calculate_bin_statistics(df_full_team, x_col, y_col)
                
                # Normalize the statistics
                with np.errstate(divide='ignore', invalid='ignore'):
                    normalized_statistic = np.where(bin_statistic1['statistic'] != 0, 
                                                  bin_statistica['statistic'] / bin_statistic1['statistic'], 0)
                normalized_statistic = np.where(np.isnan(normalized_statistic), 0, normalized_statistic)
                
                # Apply fixed smoothing
                bin_statistica['statistic'] = normalized_statistic
                bin_statistica = fixed_smooth(bin_statistica)
            
            # Append the result to the possession event array
            array_event.append(bin_statistica['statistic'])
            
            # OUT-OF-POSSESSION ANALYSIS (opponent actions)
            # Get opponent event data for the same games
            df_opponent_eventa = dfp_event[dfp_event["game_id"].isin(df_event_team['game_id'].unique())]
            df_opponent_event = df_opponent_eventa[df_opponent_eventa["team_identifier"] != team_id]
            
            if event_name == "defensive_actions_start":
                # For defensive actions, we want to invert coordinates and use opponent data
                df_opponent_event = df_opponent_event.copy()
                df_opponent_event['x_a0_inverted'] = 105 - df_opponent_event[x_col]
                
                # Calculate bin statistics for the opponent team
                bin_statistica_out = calculate_bin_statistics(df_opponent_event, 'x_a0_inverted', y_col)
                
                # No need to normalize here since we're analyzing raw defensive patterns
                bin_statistica_out = fixed_smooth(bin_statistica_out)
                
            elif any(event_key in event_name for event_key in simplified_events):
                # Simplified calculation for opponent events
                bin_statistica_out = calculate_bin_statistics(df_opponent_event, x_col, y_col)
                bin_statistica_out = fixed_smooth(bin_statistica_out)
                
            else:
                # Normalized calculation for opponent events
                df_opponent_full = dfx[dfx["game_id"].isin(df_event_team['game_id'].unique())]
                df_opponent_full = df_opponent_full[df_opponent_full["team_identifier"] != team_id]
                
                # Calculate bin statistics for the opponent event and full actions
                bin_statistica_out = calculate_bin_statistics(df_opponent_event, x_col, y_col)
                bin_statistic1_out = calculate_bin_statistics(df_opponent_full, x_col, y_col)
                
                # Normalize the statistics
                with np.errstate(divide='ignore', invalid='ignore'):
                    normalized_statistic_out = np.where(bin_statistic1_out['statistic'] != 0, 
                                                     bin_statistica_out['statistic'] / bin_statistic1_out['statistic'], 0)
                normalized_statistic_out = np.where(np.isnan(normalized_statistic_out), 0, normalized_statistic_out)
                
                # Apply fixed smoothing
                bin_statistica_out['statistic'] = normalized_statistic_out
                bin_statistica_out = fixed_smooth(bin_statistica_out)
            
            # Append the result to the out-of-possession event array
            array_out_event.append(bin_statistica_out['statistic'])
            
            # Update the progress bar
            pbar.update(1)
        
        # Store the results for this event type
        if '_start' in event_name:
            base_name = event_name.replace('_start', '')
            arrays_start[base_name] = array_event
            arrays_out_start[base_name] = array_out_event
        elif '_end' in event_name:
            base_name = event_name.replace('_end', '')
            arrays_end[base_name] = array_event
            arrays_out_end[base_name] = array_out_event

# Map arrays to individual variables for backward compatibility - UPDATED FOR NEW STRUCTURE
# In-possession arrays (start coordinates)
array0a = arrays_start.get('bad_touch', [])
array3a = arrays_start.get('shots_and_goals', [])
array5a = arrays_start.get('crosses', [])
array12a = arrays_start.get('defensive_actions', [])

# Pass events (start coordinates)
pass_buildup_def_start = arrays_start.get('pass_buildup_def', [])
pass_buildup_wb_start = arrays_start.get('pass_buildup_wb', [])
pass_buildup_cm_start = arrays_start.get('pass_buildup_cm', [])
pass_buildup_am_start = arrays_start.get('pass_buildup_am', [])
pass_buildup_st_start = arrays_start.get('pass_buildup_st', [])

pass_cons_def_start = arrays_start.get('pass_cons_def', [])
pass_cons_wb_start = arrays_start.get('pass_cons_wb', [])
pass_cons_cm_start = arrays_start.get('pass_cons_cm', [])
pass_cons_am_start = arrays_start.get('pass_cons_am', [])
pass_cons_st_start = arrays_start.get('pass_cons_st', [])

# Dribble events (start coordinates)
drb_buildup_def_start = arrays_start.get('drb_buildup_def', [])
drb_buildup_wb_start = arrays_start.get('drb_buildup_wb', [])
drb_buildup_cm_start = arrays_start.get('drb_buildup_cm', [])
drb_buildup_am_start = arrays_start.get('drb_buildup_am', [])
drb_buildup_st_start = arrays_start.get('drb_buildup_st', [])

drb_cons_def_start = arrays_start.get('drb_cons_def', [])
drb_cons_wb_start = arrays_start.get('drb_cons_wb', [])
drb_cons_cm_start = arrays_start.get('drb_cons_cm', [])
drb_cons_am_start = arrays_start.get('drb_cons_am', [])
drb_cons_st_start = arrays_start.get('drb_cons_st', [])

# Box events
box_start = arrays_start.get('box', [])

# Transition events
transition_start = arrays_start.get('transition', [])

# Regain events
regains_start = arrays_start.get('regains', [])

# Longball events
longball_chain_start = arrays_start.get('longball_chain', [])

# In-possession arrays (end coordinates)
array4b = arrays_end.get('goalkicks', [])
array5b = arrays_end.get('crosses', [])

# Pass events (end coordinates)
pass_buildup_def_end = arrays_end.get('pass_buildup_def', [])
pass_buildup_wb_end = arrays_end.get('pass_buildup_wb', [])
pass_buildup_cm_end = arrays_end.get('pass_buildup_cm', [])
pass_buildup_am_end = arrays_end.get('pass_buildup_am', [])
pass_buildup_st_end = arrays_end.get('pass_buildup_st', [])

pass_cons_def_end = arrays_end.get('pass_cons_def', [])
pass_cons_wb_end = arrays_end.get('pass_cons_wb', [])
pass_cons_cm_end = arrays_end.get('pass_cons_cm', [])
pass_cons_am_end = arrays_end.get('pass_cons_am', [])
pass_cons_st_end = arrays_end.get('pass_cons_st', [])

# Dribble events (end coordinates)
drb_buildup_def_end = arrays_end.get('drb_buildup_def', [])
drb_buildup_wb_end = arrays_end.get('drb_buildup_wb', [])
drb_buildup_cm_end = arrays_end.get('drb_buildup_cm', [])
drb_buildup_am_end = arrays_end.get('drb_buildup_am', [])
drb_buildup_st_end = arrays_end.get('drb_buildup_st', [])

drb_cons_def_end = arrays_end.get('drb_cons_def', [])
drb_cons_wb_end = arrays_end.get('drb_cons_wb', [])
drb_cons_cm_end = arrays_end.get('drb_cons_cm', [])
drb_cons_am_end = arrays_end.get('drb_cons_am', [])
drb_cons_st_end = arrays_end.get('drb_cons_st', [])

# Box events (end)
box_end = arrays_end.get('box', [])

# Transition events (end)
transition_end = arrays_end.get('transition', [])

# Regain events (end)
regains_end = arrays_end.get('regains', [])

# Longball events (end)
longball_chain_end = arrays_end.get('longball_chain', [])
longball_chain_second_end = arrays_end.get('longball_chain_second', [])

# OUT-OF-POSSESSION ARRAYS
# Original position-based arrays (start coordinates)
array0a_out = arrays_out_start.get('bad_touch', [])
array3a_out = arrays_out_start.get('shots_and_goals', [])
array5a_out = arrays_out_start.get('crosses', [])
array12a_out = arrays_out_start.get('defensive_actions', [])

# Pass events (start coordinates) - out of possession
pass_buildup_def_start_out = arrays_out_start.get('pass_buildup_def', [])
pass_buildup_wb_start_out = arrays_out_start.get('pass_buildup_wb', [])
pass_buildup_cm_start_out = arrays_out_start.get('pass_buildup_cm', [])
pass_buildup_am_start_out = arrays_out_start.get('pass_buildup_am', [])
pass_buildup_st_start_out = arrays_out_start.get('pass_buildup_st', [])

pass_cons_def_start_out = arrays_out_start.get('pass_cons_def', [])
pass_cons_wb_start_out = arrays_out_start.get('pass_cons_wb', [])
pass_cons_cm_start_out = arrays_out_start.get('pass_cons_cm', [])
pass_cons_am_start_out = arrays_out_start.get('pass_cons_am', [])
pass_cons_st_start_out = arrays_out_start.get('pass_cons_st', [])

# Dribble events (start coordinates) - out of possession
drb_buildup_def_start_out = arrays_out_start.get('drb_buildup_def', [])
drb_buildup_wb_start_out = arrays_out_start.get('drb_buildup_wb', [])
drb_buildup_cm_start_out = arrays_out_start.get('drb_buildup_cm', [])
drb_buildup_am_start_out = arrays_out_start.get('drb_buildup_am', [])
drb_buildup_st_start_out = arrays_out_start.get('drb_buildup_st', [])

drb_cons_def_start_out = arrays_out_start.get('drb_cons_def', [])
drb_cons_wb_start_out = arrays_out_start.get('drb_cons_wb', [])
drb_cons_cm_start_out = arrays_out_start.get('drb_cons_cm', [])
drb_cons_am_start_out = arrays_out_start.get('drb_cons_am', [])
drb_cons_st_start_out = arrays_out_start.get('drb_cons_st', [])

# Box events - out of possession
box_start_out = arrays_out_start.get('box', [])

# Transition events - out of possession
transition_start_out = arrays_out_start.get('transition', [])

# Regain events - out of possession
regains_start_out = arrays_out_start.get('regains', [])

# Longball events - out of possession
longball_chain_start_out = arrays_out_start.get('longball_chain', [])

# Out-of-possession arrays (end coordinates)
# Original position-based arrays
array4b_out = arrays_out_end.get('goalkicks', [])
array5b_out = arrays_out_end.get('crosses', [])

# Pass events (end coordinates) - out of possession
pass_buildup_def_end_out = arrays_out_end.get('pass_buildup_def', [])
pass_buildup_wb_end_out = arrays_out_end.get('pass_buildup_wb', [])
pass_buildup_cm_end_out = arrays_out_end.get('pass_buildup_cm', [])
pass_buildup_am_end_out = arrays_out_end.get('pass_buildup_am', [])
pass_buildup_st_end_out = arrays_out_end.get('pass_buildup_st', [])

pass_cons_def_end_out = arrays_out_end.get('pass_cons_def', [])
pass_cons_wb_end_out = arrays_out_end.get('pass_cons_wb', [])
pass_cons_cm_end_out = arrays_out_end.get('pass_cons_cm', [])
pass_cons_am_end_out = arrays_out_end.get('pass_cons_am', [])
pass_cons_st_end_out = arrays_out_end.get('pass_cons_st', [])

# Dribble events (end coordinates) - out of possession
drb_buildup_def_end_out = arrays_out_end.get('drb_buildup_def', [])
drb_buildup_wb_end_out = arrays_out_end.get('drb_buildup_wb', [])
drb_buildup_cm_end_out = arrays_out_end.get('drb_buildup_cm', [])
drb_buildup_am_end_out = arrays_out_end.get('drb_buildup_am', [])
drb_buildup_st_end_out = arrays_out_end.get('drb_buildup_st', [])

drb_cons_def_end_out = arrays_out_end.get('drb_cons_def', [])
drb_cons_wb_end_out = arrays_out_end.get('drb_cons_wb', [])
drb_cons_cm_end_out = arrays_out_end.get('drb_cons_cm', [])
drb_cons_am_end_out = arrays_out_end.get('drb_cons_am', [])
drb_cons_st_end_out = arrays_out_end.get('drb_cons_st', [])

# Box events (end) - out of possession
box_end_out = arrays_out_end.get('box', [])

# Transition events (end) - out of possession
transition_end_out = arrays_out_end.get('transition', [])

# Regain events (end) - out of possession
regains_end_out = arrays_out_end.get('regains', [])

# Longball events (end) - out of possession
longball_chain_end_out = arrays_out_end.get('longball_chain', [])
longball_chain_second_end_out = arrays_out_end.get('longball_chain_second', [])

# Longball events (end) - out of possession
longball_chain_end_out = arrays_out_end.get('longball_chain', [])
longball_chain_second_end_out = arrays_out_end.get('longball_chain_second', [])

print("Processing completed for both possession and out-of-possession.")
print(f"Results stored in arrays_start/end ({len(arrays_start)} events) and arrays_out_start/end ({len(arrays_out_start)} events).")
Processing 1485 team-event combinations for both possession and out-of-possession...
Processing Events and Teams: 100%|██████████| 1485/1485 [00:38<00:00, 38.10it/s]
Processing completed for both possession and out-of-possession.
Results stored in arrays_start/end (28 events) and arrays_out_start/end (28 events).

In [19]:
# Initialize dictionaries to store similarity metrics for both possession types
similarity_scores_start = {}
similarity_scores_end = {}
similarity_scores_out_start = {}
similarity_scores_out_end = {}

# Function to calculate similarity metrics
def calculate_similarity_metrics(arrays_dict, team_ids):
    """Calculate cosine similarity between teams based on spatial patterns."""
    similarity_dict = {}
    
    for event_name, array_list in arrays_dict.items():
        try:
            # Flatten arrays for comparison
            flattened = [array.flatten() for array in array_list]
            
            # Calculate cosine similarity
            similarity_matrix = cosine_similarity(flattened)
            
            # Create DataFrame with team IDs
            df_sim = pd.DataFrame(similarity_matrix, index=team_ids, columns=team_ids)
            df_sim.index.name = 'team_identifier'
            
            # Store in dictionary
            similarity_dict[event_name] = df_sim
            
        except Exception as e:
            print(f"Error calculating similarity for {event_name}: {e}")
            # Create empty DataFrame as fallback
            df_sim = pd.DataFrame(np.eye(len(team_ids)), index=team_ids, columns=team_ids)
            df_sim.index.name = 'team_identifier'
            similarity_dict[event_name] = df_sim
    
    return similarity_dict

# Calculate similarity for possession arrays
similarity_scores_start = calculate_similarity_metrics(arrays_start, team_ids)
similarity_scores_end = calculate_similarity_metrics(arrays_end, team_ids)

# Calculate similarity for out-of-possession arrays
similarity_scores_out_start = calculate_similarity_metrics(arrays_out_start, team_ids)
similarity_scores_out_end = calculate_similarity_metrics(arrays_out_end, team_ids)

# Helper function to safely get values
def safe_get_values(similarity_dict, key):
    """Safely get values from similarity dictionary with fallback."""
    if key in similarity_dict:
        return similarity_dict[key].values
    else:
        print(f"Warning: {key} not found in similarity dictionary. Using identity matrix.")
        return np.eye(len(team_ids))

# Map to individual similarity variables for backward compatibility - UPDATED FOR NEW STRUCTURE
# In-possession similarities for basic events
similarity0a = safe_get_values(similarity_scores_start, 'bad_touch')
similarity3a = safe_get_values(similarity_scores_start, 'shots_and_goals')
similarity5a = safe_get_values(similarity_scores_start, 'crosses')
similarity12a = safe_get_values(similarity_scores_start, 'defensive_actions')

# Pass events similarities (start)
similarity_pass_buildup_def_start = safe_get_values(similarity_scores_start, 'pass_buildup_def')
similarity_pass_buildup_wb_start = safe_get_values(similarity_scores_start, 'pass_buildup_wb')
similarity_pass_buildup_cm_start = safe_get_values(similarity_scores_start, 'pass_buildup_cm')
similarity_pass_buildup_am_start = safe_get_values(similarity_scores_start, 'pass_buildup_am')
similarity_pass_buildup_st_start = safe_get_values(similarity_scores_start, 'pass_buildup_st')

similarity_pass_cons_def_start = safe_get_values(similarity_scores_start, 'pass_cons_def')
similarity_pass_cons_wb_start = safe_get_values(similarity_scores_start, 'pass_cons_wb')
similarity_pass_cons_cm_start = safe_get_values(similarity_scores_start, 'pass_cons_cm')
similarity_pass_cons_am_start = safe_get_values(similarity_scores_start, 'pass_cons_am')
similarity_pass_cons_st_start = safe_get_values(similarity_scores_start, 'pass_cons_st')

# Dribble events similarities (start)
similarity_drb_buildup_def_start = safe_get_values(similarity_scores_start, 'drb_buildup_def')
similarity_drb_buildup_wb_start = safe_get_values(similarity_scores_start, 'drb_buildup_wb')
similarity_drb_buildup_cm_start = safe_get_values(similarity_scores_start, 'drb_buildup_cm')
similarity_drb_buildup_am_start = safe_get_values(similarity_scores_start, 'drb_buildup_am')
similarity_drb_buildup_st_start = safe_get_values(similarity_scores_start, 'drb_buildup_st')

similarity_drb_cons_def_start = safe_get_values(similarity_scores_start, 'drb_cons_def')
similarity_drb_cons_wb_start = safe_get_values(similarity_scores_start, 'drb_cons_wb')
similarity_drb_cons_cm_start = safe_get_values(similarity_scores_start, 'drb_cons_cm')
similarity_drb_cons_am_start = safe_get_values(similarity_scores_start, 'drb_cons_am')
similarity_drb_cons_st_start = safe_get_values(similarity_scores_start, 'drb_cons_st')

# Special events similarities (start)
similarity_box_start = safe_get_values(similarity_scores_start, 'box')
similarity_transition_start = safe_get_values(similarity_scores_start, 'transition')
similarity_regains_start = safe_get_values(similarity_scores_start, 'regains')
similarity_longball_chain_start = safe_get_values(similarity_scores_start, 'longball_chain')

# In-possession similarities (end)
similarity4b = safe_get_values(similarity_scores_end, 'goalkicks')
similarity5b = safe_get_values(similarity_scores_end, 'crosses')

# Pass events similarities (end)
similarity_pass_buildup_def_end = safe_get_values(similarity_scores_end, 'pass_buildup_def')
similarity_pass_buildup_wb_end = safe_get_values(similarity_scores_end, 'pass_buildup_wb')
similarity_pass_buildup_cm_end = safe_get_values(similarity_scores_end, 'pass_buildup_cm')
similarity_pass_buildup_am_end = safe_get_values(similarity_scores_end, 'pass_buildup_am')
similarity_pass_buildup_st_end = safe_get_values(similarity_scores_end, 'pass_buildup_st')

similarity_pass_cons_def_end = safe_get_values(similarity_scores_end, 'pass_cons_def')
similarity_pass_cons_wb_end = safe_get_values(similarity_scores_end, 'pass_cons_wb')
similarity_pass_cons_cm_end = safe_get_values(similarity_scores_end, 'pass_cons_cm')
similarity_pass_cons_am_end = safe_get_values(similarity_scores_end, 'pass_cons_am')
similarity_pass_cons_st_end = safe_get_values(similarity_scores_end, 'pass_cons_st')

# Dribble events similarities (end)
similarity_drb_buildup_def_end = safe_get_values(similarity_scores_end, 'drb_buildup_def')
similarity_drb_buildup_wb_end = safe_get_values(similarity_scores_end, 'drb_buildup_wb')
similarity_drb_buildup_cm_end = safe_get_values(similarity_scores_end, 'drb_buildup_cm')
similarity_drb_buildup_am_end = safe_get_values(similarity_scores_end, 'drb_buildup_am')
similarity_drb_buildup_st_end = safe_get_values(similarity_scores_end, 'drb_buildup_st')

similarity_drb_cons_def_end = safe_get_values(similarity_scores_end, 'drb_cons_def')
similarity_drb_cons_wb_end = safe_get_values(similarity_scores_end, 'drb_cons_wb')
similarity_drb_cons_cm_end = safe_get_values(similarity_scores_end, 'drb_cons_cm')
similarity_drb_cons_am_end = safe_get_values(similarity_scores_end, 'drb_cons_am')
similarity_drb_cons_st_end = safe_get_values(similarity_scores_end, 'drb_cons_st')

# Special events similarities (end)
similarity_box_end = safe_get_values(similarity_scores_end, 'box')
similarity_transition_end = safe_get_values(similarity_scores_end, 'transition')
similarity_regains_end = safe_get_values(similarity_scores_end, 'regains')
similarity_longball_chain_end = safe_get_values(similarity_scores_end, 'longball_chain')
similarity_longball_chain_second_end = safe_get_values(similarity_scores_end, 'longball_chain_second')

# OUT-OF-POSSESSION SIMILARITIES

# Basic events (start)
similarity0a_out = safe_get_values(similarity_scores_out_start, 'bad_touch')
similarity3a_out = safe_get_values(similarity_scores_out_start, 'shots_and_goals')
similarity5a_out = safe_get_values(similarity_scores_out_start, 'crosses')
similarity12a_out = safe_get_values(similarity_scores_out_start, 'defensive_actions')

# Pass events similarities (start) - out of possession
similarity_pass_buildup_def_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_def')
similarity_pass_buildup_wb_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_wb')
similarity_pass_buildup_cm_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_cm')
similarity_pass_buildup_am_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_am')
similarity_pass_buildup_st_start_out = safe_get_values(similarity_scores_out_start, 'pass_buildup_st')

similarity_pass_cons_def_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_def')
similarity_pass_cons_wb_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_wb')
similarity_pass_cons_cm_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_cm')
similarity_pass_cons_am_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_am')
similarity_pass_cons_st_start_out = safe_get_values(similarity_scores_out_start, 'pass_cons_st')

# Dribble events similarities (start) - out of possession
similarity_drb_buildup_def_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_def')
similarity_drb_buildup_wb_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_wb')
similarity_drb_buildup_cm_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_cm')
similarity_drb_buildup_am_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_am')
similarity_drb_buildup_st_start_out = safe_get_values(similarity_scores_out_start, 'drb_buildup_st')

similarity_drb_cons_def_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_def')
similarity_drb_cons_wb_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_wb')
similarity_drb_cons_cm_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_cm')
similarity_drb_cons_am_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_am')
similarity_drb_cons_st_start_out = safe_get_values(similarity_scores_out_start, 'drb_cons_st')

# Special events similarities (start) - out of possession
similarity_box_start_out = safe_get_values(similarity_scores_out_start, 'box')
similarity_transition_start_out = safe_get_values(similarity_scores_out_start, 'transition')
similarity_regains_start_out = safe_get_values(similarity_scores_out_start, 'regains')
similarity_longball_chain_start_out = safe_get_values(similarity_scores_out_start, 'longball_chain')

# Out-of-possession similarities (end)
similarity4b_out = safe_get_values(similarity_scores_out_end, 'goalkicks')
similarity5b_out = safe_get_values(similarity_scores_out_end, 'crosses')

# Pass events similarities (end) - out of possession
similarity_pass_buildup_def_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_def')
similarity_pass_buildup_wb_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_wb')
similarity_pass_buildup_cm_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_cm')
similarity_pass_buildup_am_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_am')
similarity_pass_buildup_st_end_out = safe_get_values(similarity_scores_out_end, 'pass_buildup_st')

similarity_pass_cons_def_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_def')
similarity_pass_cons_wb_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_wb')
similarity_pass_cons_cm_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_cm')
similarity_pass_cons_am_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_am')
similarity_pass_cons_st_end_out = safe_get_values(similarity_scores_out_end, 'pass_cons_st')

# Dribble events similarities (end) - out of possession
similarity_drb_buildup_def_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_def')
similarity_drb_buildup_wb_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_wb')
similarity_drb_buildup_cm_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_cm')
similarity_drb_buildup_am_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_am')
similarity_drb_buildup_st_end_out = safe_get_values(similarity_scores_out_end, 'drb_buildup_st')

similarity_drb_cons_def_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_def')
similarity_drb_cons_wb_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_wb')
similarity_drb_cons_cm_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_cm')
similarity_drb_cons_am_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_am')
similarity_drb_cons_st_end_out = safe_get_values(similarity_scores_out_end, 'drb_cons_st')

# Special events similarities (end) - out of possession
similarity_box_end_out = safe_get_values(similarity_scores_out_end, 'box')
similarity_transition_end_out = safe_get_values(similarity_scores_out_end, 'transition')
similarity_regains_end_out = safe_get_values(similarity_scores_out_end, 'regains')
similarity_longball_chain_end_out = safe_get_values(similarity_scores_out_end, 'longball_chain')
similarity_longball_chain_second_end_out = safe_get_values(similarity_scores_out_end, 'longball_chain_second')

# Keep old position-based variables but initialize them with identity matrices for backward compatibility
# You can remove these if they're not needed
identity_matrix = np.eye(len(team_ids))
similarity1a = identity_matrix
similarity6a = identity_matrix
similarity7a = identity_matrix
similarity8a = identity_matrix
similarity9a = identity_matrix
similarity10a = identity_matrix
similarity11a = identity_matrix

similarity6b = identity_matrix
similarity7b = identity_matrix
similarity8b = identity_matrix
similarity9b = identity_matrix
similarity10b = identity_matrix
similarity11b = identity_matrix

similarity1a_out = identity_matrix
similarity6a_out = identity_matrix
similarity7a_out = identity_matrix
similarity8a_out = identity_matrix
similarity9a_out = identity_matrix
similarity10a_out = identity_matrix
similarity11a_out = identity_matrix

similarity6b_out = identity_matrix
similarity7b_out = identity_matrix
similarity8b_out = identity_matrix
similarity9b_out = identity_matrix
similarity10b_out = identity_matrix
similarity11b_out = identity_matrix

print("Similarity calculations completed for both possession and out-of-possession metrics.")
Similarity calculations completed for both possession and out-of-possession metrics.
In [20]:
# DataFrame Organization with Team Information
# Create mapping of team IDs to team names
team_name_map = {}
for team_id in team_ids:
    # Extract team name from the team_identifier (before the hyphen)
    team_name = team_id.split('-')[0] if '-' in team_id else team_id
    team_name_map[team_id] = team_name

# Organize DataFrames in a combined dictionary for easier access
# (using the similarity dictionaries we already created)
similarity_dataframes = {}

# Add start event DataFrames
for event_name, df in similarity_scores_start.items():
    key = f"similarity_{event_name}_start"
    similarity_dataframes[key] = df

# Add end event DataFrames
for event_name, df in similarity_scores_end.items():
    key = f"similarity_{event_name}_end"
    similarity_dataframes[key] = df
    
# Add out-of-possession start event DataFrames
for event_name, df in similarity_scores_out_start.items():
    key = f"similarity_{event_name}_out_start"
    similarity_dataframes[key] = df
    
# Add out-of-possession end event DataFrames
for event_name, df in similarity_scores_out_end.items():
    key = f"similarity_{event_name}_out_end"
    similarity_dataframes[key] = df

# Ensure all matrices have consistent index and column names
for key, df in similarity_dataframes.items():
    if df is not None:
        # Verify that index and columns are named properly
        df.index.name = 'team_identifier'

# Print informative summary about the data
event_types_start = list(similarity_scores_start.keys())
event_types_end = list(similarity_scores_end.keys())
print(f"Team information mapped for {len(team_ids)} teams")
print(f"Start events: {', '.join(event_types_start)}")
print(f"End events: {', '.join(event_types_end)}")
print("DataFrames organized with team information.")
Team information mapped for 27 teams
Start events: bad_touch, shots_and_goals, crosses, regains, box, transition, defensive_actions, pass_buildup_def, pass_buildup_wb, pass_buildup_cm, pass_buildup_am, pass_buildup_st, pass_cons_def, pass_cons_wb, pass_cons_cm, pass_cons_am, pass_cons_st, drb_buildup_def, drb_buildup_wb, drb_buildup_cm, drb_buildup_am, drb_buildup_st, drb_cons_def, drb_cons_wb, drb_cons_cm, drb_cons_am, drb_cons_st, longball_chain
End events: crosses, goalkicks, regains, box, transition, pass_buildup_def, pass_buildup_wb, pass_buildup_cm, pass_buildup_am, pass_buildup_st, pass_cons_def, pass_cons_wb, pass_cons_cm, pass_cons_am, pass_cons_st, drb_buildup_def, drb_buildup_wb, drb_buildup_cm, drb_buildup_am, drb_buildup_st, drb_cons_def, drb_cons_wb, drb_cons_cm, drb_cons_am, drb_cons_st, longball_chain, longball_chain_second
DataFrames organized with team information.
In [21]:
# Create final combined similarity matrices for possession and out-of-possession
print("Creating final possession style matrices...")

# Collect all DataFrames for possession style
poss_dfs = []
for event_name in similarity_scores_start:
    poss_dfs.append(similarity_scores_start[event_name])
for event_name in similarity_scores_end:
    poss_dfs.append(similarity_scores_end[event_name])

# Collect all DataFrames for out-of-possession style
out_poss_dfs = []
for event_name in similarity_scores_out_start:
    out_poss_dfs.append(similarity_scores_out_start[event_name])
for event_name in similarity_scores_out_end:
    out_poss_dfs.append(similarity_scores_out_end[event_name])

# Create dfPoss by averaging all possession similarity matrices
if poss_dfs:
    # Simple average of all similarity matrices
    dfPoss = sum(poss_dfs) / len(poss_dfs)
    
    # Ensure diagonal is 1.0
    for team in dfPoss.index:
        dfPoss.loc[team, team] = 1.0
    
    # Clip values to [0,1] range
    dfPoss = dfPoss.clip(0, 1)
    
    print(f"Created in-possession similarity matrix (dfPoss) with {len(dfPoss)} teams.")
else:
    print("Warning: No possession data available.")
    # Create empty dfPoss as fallback
    dfPoss = pd.DataFrame(np.eye(len(team_ids)), index=team_ids, columns=team_ids)
    dfPoss.index.name = 'team_identifier'

# Create dfOutPoss by averaging all out-of-possession similarity matrices
if out_poss_dfs:
    # Simple average of all similarity matrices
    dfOutPoss = sum(out_poss_dfs) / len(out_poss_dfs)
    
    # Ensure diagonal is 1.0
    for team in dfOutPoss.index:
        dfOutPoss.loc[team, team] = 1.0
    
    # Clip values to [0,1] range
    dfOutPoss = dfOutPoss.clip(0, 1)
    
    print(f"Created out-of-possession similarity matrix (dfOutPoss) with {len(dfOutPoss)} teams.")
else:
    print("Warning: No out-of-possession data available.")
    # Create empty dfOutPoss as fallback
    dfOutPoss = pd.DataFrame(np.eye(len(team_ids)), index=team_ids, columns=team_ids)
    dfOutPoss.index.name = 'team_identifier'
Creating final possession style matrices...
Created in-possession similarity matrix (dfPoss) with 27 teams.
Created out-of-possession similarity matrix (dfOutPoss) with 27 teams.
In [22]:
def plot_team_similarity_matrix(team_matrix, title="Team Style Similarity Matrix", 
                               possession_type="in", max_teams=None):
    """
    Plot a heatmap of team style similarities with dynamic sizing.
    
    Args:
        team_matrix: DataFrame containing team similarity scores
        title: Title for the plot
        possession_type: 'in' for possession, 'out' for out-of-possession
        max_teams: Maximum number of teams to show (None for all)
    """
    # Handle empty matrix
    if team_matrix is None or team_matrix.empty:
        print("Error: Empty similarity matrix")
        return
    
    # Create a subset matrix if needed
    if max_teams and len(team_matrix) > max_teams:
        # Just take the first max_teams
        subset_teams = list(team_matrix.index[:max_teams])
        matrix = team_matrix.loc[subset_teams, subset_teams].copy()
        subset_note = f" (showing {len(matrix)} of {len(team_matrix)} teams)"
    else:
        matrix = team_matrix.copy()
        subset_note = ""
    
    # Determine figure size based on matrix dimensions
    n_teams = len(matrix)
    
    # Base figure size that works well for ~15 teams
    base_width, base_height = 12, 10
    
    # Scale the figure size based on the number of teams
    if n_teams <= 10:
        figsize = (base_width * 0.8, base_height * 0.8)  # Smaller for few teams
    elif n_teams <= 20:
        figsize = (base_width, base_height)  # Default size
    elif n_teams <= 30:
        figsize = (base_width * 1.3, base_height * 1.3)  # Larger
    else:
        figsize = (base_width * 1.5, base_height * 1.5)  # Much larger
    
    try:
        plt.figure(figsize=figsize)
        
        # Create mask for upper triangle
        mask = np.zeros_like(matrix)
        mask[np.triu_indices_from(mask)] = True  # Show only lower triangle
        
        # Font size for the annotations based on number of teams
        if n_teams <= 10:
            annot_fontsize = 10
        elif n_teams <= 20:
            annot_fontsize = 8
        elif n_teams <= 30:
            annot_fontsize = 6
        else:
            annot_fontsize = 5
        
        # Add possession type to title
        poss_label = " (In Possession)" if possession_type == "in" else " (Out of Possession)"
        full_title = f"{title}{poss_label}{subset_note}"
        
        # Changed from fmt=".2f" to fmt=".3f" to show 3 decimal places
        vmin_value = matrix.min().min()  # Gets the minimum value in the entire matrix
        
        matrix_without_diag = matrix.copy()
        np.fill_diagonal(matrix_without_diag.values, np.nan)  # Replace diagonal with NaN
        max_non_diag = matrix_without_diag.max().max()  # Get max excluding diagonal

        # If all non-diagonal values are 1 or less than 1, use 1 as vmax
        vmax_value = max_non_diag if not np.isnan(max_non_diag) else 1
        
        sns.heatmap(matrix, cmap="YlGnBu", annot=True, fmt=".3f", 
                    square=True, mask=mask, vmin=vmin_value, vmax=vmax_value, annot_kws={"size": annot_fontsize})
        
        plt.title(full_title, fontsize=16)
        plt.tight_layout()
        plt.show()
        
    except Exception as e:
        print(f"Error creating similarity matrix visualization: {e}")
In [23]:
def plot_team_heatmap(team_id, event_type, arrays_dict, possession_type="in", team_name_map=None):
    """
    Plot heatmap for a specific team and event type.
    """
    # Check if the event is in the provided arrays_dict
    if event_type not in arrays_dict:
        print(f"Event type {event_type} not found in the provided arrays dictionary.")
        return
    
    # Debug info
    print(f"Plotting heatmap for {team_id}, event {event_type}, using {'in-possession' if possession_type=='in' else 'out-of-possession'} data")
    
    # Get the team index
    try:
        team_idx = list(team_ids).index(team_id)
    except ValueError:
        print(f"Team ID {team_id} not found.")
        return
    
    # Safely access team array
    try:
        team_array = arrays_dict[event_type][team_idx]
        print(f"Team array shape: {team_array.shape}")
    except IndexError:
        print(f"Error: Team index {team_idx} out of bounds for event {event_type}")
        return
    except Exception as e:
        print(f"Error accessing team array: {e}")
        return
    
    # Get the team name if mapping is provided
    team_name = team_name_map.get(team_id, str(team_id)) if team_name_map else str(team_id)
    
    # Determine if this is an end event based on the dictionary
    is_end_event = False
    if possession_type == "in":
        is_end_event = (arrays_dict == arrays_end)
    else:
        is_end_event = (arrays_dict == arrays_out_end)
    
    # Create pitch and plot heatmap
    try:
        # Define custom colormap
        cmap = LinearSegmentedColormap.from_list('custom_cmap', 
                                                ['#D7D1CF', '#FFFFFF', '#FFFF80', '#FF8000', '#800000', '#000000'], 
                                                N=256)
        
        # Set the pitch color to #D7D1CF
        pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', 
                     linewidth=2, pitch_color='#D7D1CF', line_color='#000000')
        
        # Create figure with specified facecolor
        fig = plt.figure(figsize=(10, 7), facecolor='#D7D1CF')
        ax = fig.add_subplot(111)
        ax.set_facecolor('#D7D1CF')
        
        # Draw the pitch
        pitch.draw(ax=ax)
        
        # Create meshgrid for plotting
        x = np.linspace(0, 105, 105)
        y = np.linspace(0, 68, 68)
        X, Y = np.meshgrid(x, y)
        
        # Check and transpose team_array if needed
        if team_array.shape != (68, 105) and team_array.shape == (105, 68):
            print(f"Transposing array from {team_array.shape} to match expected (68, 105)")
            team_array = team_array.T
        
        # Plot using pcolormesh directly with nearest shading and custom colormap
        hm = ax.pcolormesh(X, Y, team_array, cmap=cmap, alpha=0.9, shading='nearest')
        
        # Add a colorbar
        cbar = fig.colorbar(hm, ax=ax)
        cbar.set_label('Event Density')
        
        # Set title with possession type
        poss_label = "In Possession" if possession_type == "in" else "Out of Possession"
        
        # Add (End Locations) to the title for events from end dictionaries
        if is_end_event:
            ax.set_title(f"{team_name} - {event_type.replace('_', ' ').title()} ({poss_label}, End Locations)", fontsize=14)
        else:
            ax.set_title(f"{team_name} - {event_type.replace('_', ' ').title()} ({poss_label}, Start Locations)", fontsize=14)
        
        plt.tight_layout()
        plt.show()
        
    except Exception as e:
        print(f"Error creating heatmap visualization: {e}")
        import traceback
        traceback.print_exc()
In [24]:
def compare_team_heatmaps(team_id1, team_id2, event_type, arrays_dict, possession_type="in", team_name_map=None):
    """
    Plot heatmaps for two teams side by side for direct comparison.
    """
    # Check if the event is in the provided arrays_dict
    if event_type not in arrays_dict:
        print(f"Event type {event_type} not found in the provided arrays dictionary.")
        return
    
    # Get the team indices
    try:
        team_idx1 = list(team_ids).index(team_id1)
        team_idx2 = list(team_ids).index(team_id2)
    except ValueError as e:
        print(f"Team ID not found.")
        return
    
    # Safely access team arrays
    try:
        team_array1 = arrays_dict[event_type][team_idx1]
        team_array2 = arrays_dict[event_type][team_idx2]
        print(f"Team 1 array shape: {team_array1.shape}")
        print(f"Team 2 array shape: {team_array2.shape}")
    except IndexError:
        print(f"Error: Team index out of bounds for event {event_type}")
        return
    except Exception as e:
        print(f"Error accessing team array: {e}")
        return
    
    # Get the team names if mapping is provided
    team_name1 = team_name_map.get(team_id1, str(team_id1)) if team_name_map else str(team_id1)
    team_name2 = team_name_map.get(team_id2, str(team_id2)) if team_name_map else str(team_id2)
    
    # Determine if this is an end event based on the dictionary
    is_end_event = False
    if possession_type == "in":
        is_end_event = (arrays_dict == arrays_end)
    else:
        is_end_event = (arrays_dict == arrays_out_end)
    
    # Set possession type label
    poss_label = "In Possession" if possession_type == "in" else "Out of Possession"
    
    # Define custom colormap
    cmap = LinearSegmentedColormap.from_list('custom_cmap', 
                                            ['#D7D1CF', '#FFFFFF', '#FFFF80', '#FF8000', '#800000', '#000000'], 
                                            N=256)
    
    # Create figure with two subplots and specified facecolor
    fig = plt.figure(figsize=(20, 8), facecolor='#D7D1CF')
    axs = [fig.add_subplot(1, 2, i+1) for i in range(2)]
    for ax in axs:
        ax.set_facecolor('#D7D1CF')
    
    try:
        # Create pitch objects with new color
        pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', 
                     linewidth=2, pitch_color='#D7D1CF', line_color='#000000')
        
        # Plot first team heatmap
        pitch.draw(ax=axs[0])
        
        # Create meshgrid for plotting
        x = np.linspace(0, 105, 105)
        y = np.linspace(0, 68, 68)
        X, Y = np.meshgrid(x, y)
        
        # Check and transpose team_array1 if needed
        if team_array1.shape != (68, 105) and team_array1.shape == (105, 68):
            print(f"Transposing team 1 array from {team_array1.shape} to match expected (68, 105)")
            team_array1 = team_array1.T
        
        # Plot using pcolormesh directly with nearest shading and custom colormap
        hm1 = axs[0].pcolormesh(X, Y, team_array1, cmap=cmap, alpha=0.9, shading='nearest')
        
        # Set title and note if it's an end event
        location_label = "End Locations" if is_end_event else "Start Locations"
        axs[0].set_title(f"{team_name1} - {event_type.replace('_', ' ').title()} ({poss_label}, {location_label})", fontsize=14)
        
        # Add a colorbar for team 1
        cbar1 = fig.colorbar(hm1, ax=axs[0])
        cbar1.set_label('Event Density')
        
        # Plot second team heatmap
        pitch.draw(ax=axs[1])
        
        # Check and transpose team_array2 if needed
        if team_array2.shape != (68, 105) and team_array2.shape == (105, 68):
            print(f"Transposing team 2 array from {team_array2.shape} to match expected (68, 105)")
            team_array2 = team_array2.T
        
        # Plot using pcolormesh directly with nearest shading and custom colormap
        hm2 = axs[1].pcolormesh(X, Y, team_array2, cmap=cmap, alpha=0.9, shading='nearest')
        
        # Set title and note if it's an end event
        axs[1].set_title(f"{team_name2} - {event_type.replace('_', ' ').title()} ({poss_label}, {location_label})", fontsize=14)
        
        # Add a colorbar for team 2
        cbar2 = fig.colorbar(hm2, ax=axs[1])
        cbar2.set_label('Event Density')
        
        # Calculate similarity score between these two teams for this event
        try:
            # Get the right similarity dictionary based on the arrays_dict
            if possession_type == "in":
                similarity_dict = similarity_scores_start if not is_end_event else similarity_scores_end
            else:
                similarity_dict = similarity_scores_out_start if not is_end_event else similarity_scores_out_end
            
            if event_type in similarity_dict:
                similarity_matrix = similarity_dict[event_type]
                
                if (team_id1 in similarity_matrix.index and 
                    team_id2 in similarity_matrix.index):
                    similarity_score = similarity_matrix.loc[team_id1, team_id2]
                    fig.suptitle(f"Comparison: {team_name1} vs {team_name2} - Similarity: {similarity_score:.3f}", 
                                fontsize=16)
                else:
                    fig.suptitle(f"Comparison: {team_name1} vs {team_name2}", fontsize=16)
            else:
                fig.suptitle(f"Comparison: {team_name1} vs {team_name2}", fontsize=16)
        except Exception as e:
            print(f"Error calculating similarity: {e}")
            fig.suptitle(f"Comparison: {team_name1} vs {team_name2}", fontsize=16)
        
        plt.tight_layout()
        plt.show()
        
    except Exception as e:
        print(f"Error creating comparison visualization: {e}")
        import traceback
        traceback.print_exc()
In [25]:
def analyze_team_style_signature(team_id, possession_type="in", team_name_map=None):
    """
    Create a visual signature of a team's playing style across all event types.
    
    Args:
        team_id: Team identifier
        possession_type: 'in' for possession, 'out' for out-of-possession
        team_name_map: Optional dictionary mapping team IDs to team names
    """
    # Select the appropriate arrays_dict based on possession type
    if possession_type == "in":
        # Include both start and end arrays for a complete picture
        arrays_dict_start = arrays_start
        arrays_dict_end = arrays_end
        similarity_matrix = dfPoss
        poss_label = "In Possession"
    else:
        # Include both start and end arrays for a complete picture
        arrays_dict_start = arrays_out_start
        arrays_dict_end = arrays_out_end
        similarity_matrix = dfOutPoss
        poss_label = "Out of Possession"
    
    # Get team index
    try:
        team_idx = list(team_ids).index(team_id)
    except ValueError:
        print(f"Team ID {team_id} not found.")
        return
    
    # Get team name if mapping is provided
    team_name = team_name_map.get(team_id, str(team_id)) if team_name_map else str(team_id)
    
    # Get all available events from both start and end dictionaries
    start_events = list(arrays_dict_start.keys())
    end_events = list(arrays_dict_end.keys())
    
    if not start_events and not end_events:
        print(f"No event types found for {poss_label.lower()} analysis")
        return
    
    # Define custom colormap
    cmap = LinearSegmentedColormap.from_list('custom_cmap', 
                                            ['#D7D1CF', '#FFFFFF', '#FFFF80', '#FF8000', '#800000', '#000000'], 
                                            N=256)
    
    # Create a figure for all events
    try:
        # Calculate total events for grid layout
        all_events = []
        for event in start_events:
            all_events.append((event, "start"))
        for event in end_events:
            all_events.append((event, "end"))
        
        n_events = len(all_events)
        n_cols = min(4, max(2, int(np.ceil(np.sqrt(n_events)))))
        n_rows = int(np.ceil(n_events / n_cols))
        
        # Create figure with specified facecolor
        fig = plt.figure(figsize=(n_cols * 5, n_rows * 4), facecolor='#D7D1CF')
        
        # Add a title to the figure with possession type
        fig.suptitle(f"Style Signature: {team_name} ({poss_label})", fontsize=20)
        
        # Create a pitch for drawing with updated color
        pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', 
                     linewidth=2, pitch_color='#D7D1CF', line_color='#000000')
        
        # Create meshgrid for plotting
        x = np.linspace(0, 105, 105)
        y = np.linspace(0, 68, 68)
        X, Y = np.meshgrid(x, y)
        
        # Plot each available event
        for i, (event, location_type) in enumerate(all_events):
            # Create subplot
            ax = fig.add_subplot(n_rows, n_cols, i + 1)
            ax.set_facecolor('#D7D1CF')
            
            # Draw the pitch
            pitch.draw(ax=ax)
            
            try:
                # Get the right array dictionary based on location type
                arrays_dict = arrays_dict_start if location_type == "start" else arrays_dict_end
                
                # Get the event array
                team_array = arrays_dict[event][team_idx]
                
                # Check and transpose team_array if needed
                if team_array.shape != (68, 105) and team_array.shape == (105, 68):
                    team_array = team_array.T
                
                # Plot using pcolormesh directly with nearest shading and custom colormap
                hm = ax.pcolormesh(X, Y, team_array, cmap=cmap, alpha=0.9, shading='nearest')
                
                # Set title
                location_label = "End" if location_type == "end" else "Start"
                ax.set_title(f"{event.replace('_', ' ').title()} ({location_label})", fontsize=12)
                
            except Exception as e:
                print(f"Error plotting event {event}: {e}")
                ax.set_title(f"{event.replace('_', ' ').title()} - Error", fontsize=12, color='red')
        
        plt.tight_layout(rect=[0, 0, 1, 0.96])  # Adjust layout to make room for figure title
        plt.show()
        
        # Calculate and display similarity to other teams
        if similarity_matrix is not None and team_id in similarity_matrix.index:
            similarity_to_others = similarity_matrix.loc[team_id].sort_values(ascending=False)
            
            # Remove self (should be 1.0)
            if team_id in similarity_to_others.index:
                similarity_to_others = similarity_to_others.drop(team_id)
            
            # Display top 5 most similar teams
            print(f"Teams most similar to {team_name} ({poss_label}):")
            for i, (other_team, similarity) in enumerate(similarity_to_others.head(5).items()):
                other_name = team_name_map.get(other_team, other_team) if team_name_map else other_team
                print(f"{i+1}. {other_team}: {similarity:.3f}")
            
            # Return the sorted similarity scores for further analysis
            return similarity_to_others
        
    except Exception as e:
        print(f"Error creating team style signature: {e}")
        import traceback
        traceback.print_exc()
        return None
In [26]:
def create_improved_team_style_explorer():
    """
    Create an enhanced interactive widget to explore team styles with both possession types.
    This improved version properly handles the mapping between location selection (start/end)
    and the appropriate data arrays.
    """
    # Get available event types from all sources
    event_types_start = list(arrays_start.keys())
    event_types_end = list(arrays_end.keys())  # Added to include goalkicks
    event_types_out_start = list(arrays_out_start.keys())
    event_types_out_end = list(arrays_out_end.keys())  # Added to include goalkicks
    
    # Combine and sort all event types from start and end events
    all_event_types = sorted(set(event_types_start + event_types_end + 
                              event_types_out_start + event_types_out_end))
    
    # Create dropdown options that use and show the full team_identifier
    team_options = [(team_id, team_id) for team_id in team_ids]
    
    # Create widgets
    team1_dropdown = widgets.Dropdown(
        options=team_options,
        description='Team 1:',
        style={'description_width': 'initial'}
    )
    
    team2_dropdown = widgets.Dropdown(
        options=team_options,
        description='Team 2:',
        style={'description_width': 'initial'}
    )
    
    event_dropdown = widgets.Dropdown(
        options=all_event_types,
        description='Event Type:',
        style={'description_width': 'initial'}
    )
    
    view_type = widgets.RadioButtons(
        options=['Single Team', 'Compare Teams', 'Team Similarity', 'Team Style Signature'],
        description='View Type:',
        style={'description_width': 'initial'}
    )
    
    location_type = widgets.RadioButtons(
        options=['Start Location', 'End Location'],
        description='Location:',
        style={'description_width': 'initial'}
    )
    
    # Add possession type selector
    possession_type = widgets.RadioButtons(
        options=['In Possession', 'Out of Possession'],
        description='Possession:',
        style={'description_width': 'initial'}
    )
    
    # Max teams slider for similarity view
    max_teams = widgets.IntSlider(
        value=min(20, len(team_ids)),
        min=5,
        max=min(50, len(team_ids)),
        step=5,
        description='Max Teams:',
        disabled=True,  # Initially disabled
        style={'description_width': 'initial'}
    )
    
    # Progress indicator
    progress = widgets.HTML(
        value="",
        description=""
    )
    
    # Output widget
    output = widgets.Output()
    
    # Create button to update the visualization
    button = widgets.Button(
        description='Update Visualization',
        button_style='primary',
        tooltip='Click to update the visualization'
    )
    
    # Function to update UI based on selected view
    def on_view_change(change):
        if change['new'] == 'Team Similarity':
            max_teams.disabled = False
        else:
            max_teams.disabled = True
    
    # Register the view change handler
    view_type.observe(on_view_change, names='value')
    
    # Define the update function with improved debugging
    def update_visualization(_):
        with output:
            output.clear_output()
            
            # Get values from widgets
            team1 = team1_dropdown.value
            team2 = team2_dropdown.value
            event = event_dropdown.value
            view = view_type.value
            location = location_type.value
            poss_type = "in" if possession_type.value == "In Possession" else "out"
            num_teams = max_teams.value
            
            # Show progress message
            progress.value = f"<b>Processing {view} visualization...</b>"
            
            try:
                # IMPROVED LOGGING: Print selection information
                print(f"Selected: Team1={team1}, Team2={team2}, Event={event}")
                print(f"View={view}, Location={location}, Possession={possession_type.value}")
                
                # IMPROVED LOGIC: Select the correct dictionary based on both possession type AND location
                if poss_type == "in":  # In possession
                    if location == "End Location":
                        if event in arrays_end:
                            arrays_dict = arrays_end
                            similarity_dict = similarity_scores_end
                            is_end_event = True
                            print(f"Using end locations for in-possession event {event}")
                        else:
                            # Fall back to start if event isn't in end
                            arrays_dict = arrays_start
                            similarity_dict = similarity_scores_start
                            is_end_event = False
                            print(f"Note: Event {event} not found in end locations, using start locations instead.")
                    else:  # Start Location
                        if event in arrays_start:
                            arrays_dict = arrays_start
                            similarity_dict = similarity_scores_start
                            is_end_event = False
                            print(f"Using start locations for in-possession event {event}")
                        else:
                            # Fall back to end if event isn't in start
                            arrays_dict = arrays_end
                            similarity_dict = similarity_scores_end
                            is_end_event = True
                            print(f"Note: Event {event} not found in start locations, using end locations instead.")
                else:  # Out of possession
                    if location == "End Location":
                        if event in arrays_out_end:
                            arrays_dict = arrays_out_end
                            similarity_dict = similarity_scores_out_end
                            is_end_event = True
                            print(f"Using end locations for out-of-possession event {event}")
                        else:
                            # Fall back to start if event isn't in end
                            arrays_dict = arrays_out_start
                            similarity_dict = similarity_scores_out_start
                            is_end_event = False
                            print(f"Note: Event {event} not found in end locations for out-of-possession, using start locations instead.")
                    else:  # Start Location
                        if event in arrays_out_start:
                            arrays_dict = arrays_out_start
                            similarity_dict = similarity_scores_out_start
                            is_end_event = False
                            print(f"Using start locations for out-of-possession event {event}")
                        else:
                            # Fall back to end if event isn't in start
                            arrays_dict = arrays_out_end
                            similarity_dict = similarity_scores_out_end
                            is_end_event = True
                            print(f"Note: Event {event} not found in start locations for out-of-possession, using end locations instead.")
                
                # IMPROVED DIAGNOSTIC: Print array information
                if arrays_dict is not None:
                    print(f"Selected array dictionary contains these events: {list(arrays_dict.keys())}")
                else:
                    raise ValueError("No array dictionary selected")
                
                # Use an appropriate overall similarity matrix
                if poss_type == "in":
                    similarity_matrix = dfPoss
                else:
                    similarity_matrix = dfOutPoss
                
                # Generate the visualization based on the view type
                if view == 'Single Team':
                    if event not in arrays_dict:
                        print(f"Event '{event}' is not available for {location.lower()} in {possession_type.value.lower()}. Please try another combination.")
                        progress.value = "<b style='color:red'>Error: Event not available</b>"
                        return
                    
                    # Print debug information about the event
                    print(f"Plotting {event} from {'end' if is_end_event else 'start'} location")
                    if event in arrays_dict:
                        print(f"Event data shape: {np.shape(arrays_dict[event])}")
                    
                    # This passes the actual data arrays that contain the heatmap data, not similarity scores
                    plot_team_heatmap(team1, event, arrays_dict, poss_type, team_name_map)
                
                elif view == 'Compare Teams':
                    if event not in arrays_dict:
                        print(f"Event '{event}' is not available for {location.lower()} in {possession_type.value.lower()}. Please try another combination.")
                        progress.value = "<b style='color:red'>Error: Event not available</b>"
                        return
                    
                    # This passes the actual data arrays, not similarity scores
                    compare_team_heatmaps(team1, team2, event, arrays_dict, poss_type, team_name_map)
                
                elif view == 'Team Similarity':
                    # Create subset if needed
                    if len(team_ids) > num_teams:
                        # Find the nearest teams to the selected team
                        similar_teams = similarity_matrix.loc[team1].sort_values(ascending=False).head(num_teams).index.tolist()
                        if team1 not in similar_teams:
                            similar_teams.append(team1)
                        matrix_subset = similarity_matrix.loc[similar_teams, similar_teams]
                        plot_team_similarity_matrix(matrix_subset, f"Teams Similar to {team1}", poss_type)
                    else:
                        plot_team_similarity_matrix(similarity_matrix, "Team Playing Style Similarity", poss_type)
                
                elif view == 'Team Style Signature':
                    analyze_team_style_signature(team1, poss_type, team_name_map)
                
                # Clear progress message on completion
                progress.value = "<b style='color:green'>Visualization complete</b>"
                
            except Exception as e:
                print(f"Error generating visualization: {e}")
                progress.value = f"<b style='color:red'>Error: {str(e)}</b>"
                import traceback
                traceback.print_exc()
    
    # Connect the button click to the update function
    button.on_click(update_visualization)
    
    # Create the layout
    ui = widgets.VBox([
        widgets.HBox([team1_dropdown, team2_dropdown]),
        widgets.HBox([event_dropdown, location_type]),
        widgets.HBox([view_type, possession_type]),
        max_teams,
        button,
        progress,
        output
    ])
    
    # Add the help text to the UI
    full_ui = widgets.VBox([ui])
    
    return full_ui
In [27]:
import ipywidgets as widgets
from IPython.display import display

#Create and display the improved interactive explorer
explorer = create_improved_team_style_explorer()
display(explorer)
VBox(children=(VBox(children=(HBox(children=(Dropdown(description='Team 1:', options=(('AC Milan-Paulo Alexand…
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [28]:
playerlist = dfx['team_identifier'].unique().tolist()
cleaned_playerlist = [name for name in playerlist if pd.notna(name)]
cleaned_playerlist.sort()
In [29]:
from IPython.display import display, HTML

# Step 3: Generate the HTML dropdown
options_html = ''.join([f'<option value="{name}">{name}</option>' for name in cleaned_playerlist])

dropdown_html = f"""
<input list="players" id="dropdown" oninput="handleInput()" placeholder="Choose Someone">
<datalist id="players">
    {options_html}
</datalist>
<p id="output"></p>
<script>
function handleInput() {{
    var input = document.getElementById("dropdown").value;
    var output = document.getElementById("output");
    output.innerHTML = "Selected: " + input;
}}
</script>
"""

# Display the dropdown
display(HTML(dropdown_html))

In [30]:
team = "Como-Cesc Fàbregas-2425"
In [31]:
dfPoss_sorted = dfPoss.T.sort_values(by=team, ascending=False)
dfPoss_filtered = (dfPoss_sorted.filter(items=[team])).round(4)
In [32]:
dfOutPoss_sorted = dfOutPoss.T.sort_values(by=team, ascending=False)
dfOutPoss_filtered = (dfOutPoss_sorted.filter(items=[team])).round(4)
In [33]:
dfMain = (dfPoss+dfOutPoss)/2
dfMain_sorted = dfMain.T.sort_values(by=team, ascending=False)
dfMain_filtered = (dfMain_sorted.filter(items=[team])).round(4)
In [34]:
merged_df = pd.concat([dfMain_filtered, dfPoss_filtered, dfOutPoss_filtered], axis=1).iloc[1:]
merged_df.columns = ['General', 'In_Possession', 'Out_Possession']

# Now slice rows from index 7 to 21
filtered_df = merged_df[:10]
filtered_df = filtered_df.reset_index()
filtered_df['index'] = filtered_df['index'].str.replace('-', ' ')
filtered_df = filtered_df.sort_values(by='General', ascending=True)
In [35]:
fig = plt.figure(figsize=(1800/500, 1800/500), dpi=500)
ax = plt.subplot()
ncols = filtered_df.shape[1]
nrows = filtered_df.shape[0]
ax.set_xlim(0, ncols + 1)
ax.set_ylim(0, nrows + 1)
fig.patch.set_facecolor('#D7D1CF')
positions = [0.1, 3.3, 3.8, 4.5]
columns = ['index', 'In_Possession', 'Out_Possession', 'General']
for i in range(nrows):
    for j, column in enumerate(columns):
        if j == 0:
            ha = 'left'
        else:
            ha = 'center'
        
        # Determine font styling for each column
        if column == 'General':
            fontsize = 10
            color = '#FFFFFF'
            # Use semibold font only for General column
            fontname = fe_semibold.name
        elif column in ['In_Possession', 'Out_Possession']:
            fontsize = 6
            color = '#000000'
            fontname = fe_regular.name
        else:
            fontsize = 8
            color = '#000000'
            fontname = fe_regular.name
        
        ax.annotate(
            xy=(positions[j], i + .5), 
            text=str(filtered_df[column].iloc[i]), 
            ha=ha, 
            va='center', 
            fontsize=fontsize, 
            color=color,
            fontname=fontname
        )

# Add dividing lines
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [nrows, nrows], lw=1.5, color='#000000', marker='', zorder=4)
ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [0, 0], lw=1.5, color='#000000', marker='', zorder=4)
for x in range(1, nrows):
    ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [x, x], lw=0.5, color='#000000', ls='-', zorder=3, marker='')
    
# Add colored background for the General column
ax.fill_between(x=[4.1, 4.9], y1=nrows, y2=0, color='red', alpha=1, ec='None')

plt.text(0.5, 0.88, f"Top 10 {team}'s\nStyle Similarity Ratings", transform=fig.transFigure, horizontalalignment='center', fontsize=10,
         fontname=fe_semibold.name, color='#000000')
plt.text(0.5, 0.84, f'In Possession, Out of Possession, and Overall Rating', transform=fig.transFigure, horizontalalignment='center',
         fontsize=7, color='#4E616C', fontname=fe_regular.name)
plt.text(0, -0.5, 'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
         horizontalalignment='left', fontsize=3)
ax.set_axis_off()
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [63]:
merged_df.to_csv("Bologna2324similarity.csv")