# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import matplotlib.colors as mcolors
import matplotlib.patheffects as path_effects

from mplsoccer import Pitch, VerticalPitch, lines
from PIL import Image
import urllib
import socceraction
import socceraction.atomic.spadl as atomicspadl

# Load custom fonts for visualization
fe_regular = fm.FontEntry(
    fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-Regular.ttf',
    name='SourceSansPro-Regular'
)

semibold_font = fm.FontProperties(fname='/Users/davidegualano/Documents/Python FTBLData/SourceSansPro-SemiBold.ttf')

# Insert both fonts into the font manager
fm.fontManager.ttflist.insert(0, fe_regular)

# Set the font family to the custom regular font
matplotlib.rcParams['font.family'] = fe_regular.name

season = 2425

# Load datasets from CSV files
VAEP = pd.read_csv("aVAEPactions.csv", index_col = 0)
fb = pd.read_csv("teamsFOTMOB.csv", index_col = 0)
players = pd.read_csv(f"players{season}.csv", index_col = 0)
games = pd.read_csv(f"games{season}.csv", index_col = 0)
aactions = pd.read_csv(f"atomic_actions{season}.csv", index_col = 0)
chains = pd.read_csv(f"possession_chains_info{season}.csv", index_col = 0)
recoveries = pd.read_csv(f"recoveries_id{season}.csv", index_col = 0)
recoveries.rename(columns = {'event_id':'original_event_id'}, inplace = True)

# Add descriptive action names to the atomic actions DataFrame
aactions = atomicspadl.add_names(aactions)

# Merge datasets to create a unified DataFrame
dfa = (
    aactions
    .merge(players, how="left")
    .merge(fb, how="left")
    .merge(VAEP, how="left"))

#Adding on both frames a set of features that we need to define progressive actions
dfa["beginning_distance"] = np.sqrt(np.square(105-dfa['x_a0']) + np.square(34-dfa['y_a0'])).round(2)
dfa["end_distance"] = np.sqrt(np.square(105-dfa['end_x']) + np.square(34-dfa['end_y'])).round(2)
dfa["length"] = dfa["end_distance"] - dfa["beginning_distance"]
dfa['length'] = dfa['length'].abs()
dfa["angle"] = np.arctan2(dfa["end_y"] - dfa["y_a0"], dfa["end_x"] - dfa["x_a0"])
dfa['angle_degrees'] = np.degrees(dfa['angle']) % 360

#Adding features for next actions we'll use for calculations and plotting later on
dfa["next_type_name"] = dfa.shift(-1, fill_value=0)["type_name"]
dfa["next_team_name"] = dfa.shift(-1, fill_value=0)["team_name"]
dfa["next_starting_position"] = dfa.shift(-1, fill_value=0)["starting_position"]
dfa["next_player_name"] = dfa.shift(-1, fill_value=0)["player_name"]
dfa["next_original_event_id"] = dfa.shift(-1, fill_value=0)["original_event_id"]
dfa["next_start_x"] = dfa.shift(-1, fill_value=0)["x_a0"]
dfa["next_start_y"] = dfa.shift(-1, fill_value=0)["y_a0"]
dfa["next_end_x"] = dfa.shift(-1, fill_value=0)["end_x"]
dfa["next_end_y"] = dfa.shift(-1, fill_value=0)["end_y"]
dfa["next_vaep_value"] = dfa.shift(-1, fill_value=0)["vaep_value"]

#Flagging progressive actions given a custom defintion of passes not starting in the box, with lenght of more than 5m 
#not backwards or horizontal and the action closes the distance from starting point to center of the goal by at least 17.5%
dfa['progressive'] = np.where(
    ((dfa['beginning_distance'] - dfa['end_distance']) / dfa['beginning_distance'] >= 0.175) & (dfa['length'] > 5) & 
    (((dfa['angle_degrees'] >= 0) & (dfa['angle_degrees'] <= 60)) | ((dfa['angle_degrees'] >= 260) & (dfa['angle_degrees'] <= 360))) &
    ~((dfa['x_a0'] >= 88.5) & (dfa['y_a0'] >= 13.885) & (dfa['y_a0'] <= 54.115)),
    True, False)

# Function to format season ID into a readable format
def format_season_id(season_id):
    # Convert to integer if it's a float
    season_id = int(season_id)
    # Extract the last two digits of the year
    start_year = str(season_id -1)[-2:]
    # Calculate the end year
    end_year = str(season_id)[-2:]
    # Format as 20/21
    formatted_season = f"{start_year}/{end_year}"
    return formatted_season

# Convert 'minutes_played' to total minutes with error handling
def convert_to_minutes(time_str):
    try:
        # Convert to string in case it's a float (e.g., NaN)
        time_str = str(time_str)
        # Split the time string into minutes and seconds
        minutes, seconds = map(int, time_str.split(':'))
        # Convert total time to minutes (seconds converted to fraction of minutes)
        return minutes + seconds / 60
    except (ValueError, AttributeError):
        # Handle cases where the conversion fails (e.g., NaN or bad format)
        return 0  # or use `np.nan` if you prefer to mark as missing

# Apply the conversion function to the 'minutes_played' column
players['minutes_played_converted'] = players['minutes_played'].apply(convert_to_minutes)

#We look at the duration of each game in the dataset
minutesadj = players.groupby(["game_id", "game_duration"], observed=True)['is_starter'].count().reset_index(name='is_starter')

# Apply the conversion function to the 'minutes_played' column
minutesadj['game_duration_converted'] = minutesadj['game_duration'].apply(convert_to_minutes)

#We find the median duration of games in the dataset to normalize for that instead of 90'
minutesadj = minutesadj.game_duration_converted.median()
minutesadj

98.46666666666667

#Creating a table in which each player has his total of minutes played in the season and merge with team_name column
mp = players.groupby(["player_id", "player_name", "team_id"])["minutes_played_converted"].sum().reset_index(name='minutes_played')
mp = mp.merge(fb[['team_id', 'team_name']])

#Keeping only carries in the dataframe
df1a = dfa[dfa["type_name"] == 'dribble']

#Get the whole list of players in the data
playerlist = df1a['player_name'].unique().tolist()
cleaned_playerlist = [name for name in playerlist if pd.notna(name)]
cleaned_playerlist.sort()

from IPython.display import display, HTML

# Generate the HTML dropdown to easily search for players
options_html = ''.join([f'<option value="{name}">{name}</option>' for name in cleaned_playerlist])

dropdown_html = f"""
<input list="players" id="dropdown" oninput="handleInput()" placeholder="Choose Someone">
<datalist id="players">
    {options_html}
</datalist>
<p id="output"></p>
<script>
function handleInput() {{
    var input = document.getElementById("dropdown").value;
    var output = document.getElementById("output");
    output.innerHTML = "Selected: " + input;
}}
</script>
"""

# Display the dropdown
display(HTML(dropdown_html))

#Selecting player and filtering
player = 'Tino Anjorin'
df2a = df1a[df1a["player_name"] == player]

#Check the teams he played for in the season/dataset to filter down eventually
df2a.team_name.unique()

array(['Empoli'], dtype=object)

# Apply the function to the 'season_id' column
df2a['formatted_season'] = df2a['season_id'].apply(format_season_id)

/var/folders/ns/3wxdg4g57h77vxwmr4wzmvt40000gn/T/ipykernel_62848/2406992386.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2a['formatted_season'] = df2a['season_id'].apply(format_season_id)

#Of course we keep this lines if we want to keep action for specific teams
df3a = df2a[df2a["team_name"] == "Empoli"]

#Selecting only progressive passes
df4a = df3a[df3a["progressive"] == True]

# Group once and calculate all metrics in a single operation
metrics = (df4a.groupby(
    ["player_id", "player_name", "team_name", "season_id"], 
    observed=True
).agg(
    aVAEP=("vaep_value", "sum"),
    aVAEP_next=("next_vaep_value", "sum")
).reset_index())

#We merge the amount of minutes played 
metrics = metrics.merge(mp)

#Creating normalized metrics now that we have minutes played
metrics["aVAEP"] = metrics.aVAEP * minutesadj / metrics.minutes_played
metrics["aVAEP_next"] = metrics.aVAEP_next * minutesadj / metrics.minutes_played

# setting up the pitch, bins and figure
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='black', half=False)
bins = (9, 6)
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)

# plot the heatmap - more intense red = more carries originating from that square
cmap = mcolors.LinearSegmentedColormap.from_list("custom_red", ["#D7D1CF", "#FF0000"])
bs_heatmap = pitch.bin_statistic(df4a.x_a0, df4a.y_a0, statistic='count', bins=bins)
hm = pitch.heatmap(bs_heatmap, ax=ax, cmap=cmap, zorder = 3, alpha = 0.8)

# plot the underliying carries
am = pitch.arrows(df4a.x_a0, df4a.y_a0, df4a.end_x, df4a.end_y, width=0.8, alpha = 0.5, zorder = 1,
             headwidth = 10, headlength = 8, color = '#000000', label = 'successful passes', ax=ax)

# Calculate sum value of vaep value for each bin
bs_vaep = pitch.bin_statistic(df4a.x_a0, df4a.y_a0, values=df4a.vaep_value, statistic='sum', bins=bins)

# Add text annotations for each bin with the sum vaep value
for i in range(bs_vaep['statistic'].shape[0]):
    for j in range(bs_vaep['statistic'].shape[1]):
        # Get the bin center coordinates
        bin_center_x, bin_center_y = bs_vaep['cx'][i, j], bs_vaep['cy'][i, j]
        # Get the sum value of vaep_net for the bin
        sum_vaep = bs_vaep['statistic'][i, j]
        # Add annotation if sum value is not close to zero
        if (sum_vaep < -0.01) | (sum_vaep > 0.01):
            text = ax.text(bin_center_x, bin_center_y, f"{sum_vaep:.3f}", color="#FFFFFF", ha="center", va="center",
                               fontsize=20, zorder=4)
            # Add path effects for the edge
            text.set_path_effects([path_effects.Stroke(linewidth=4, foreground='#000000'), path_effects.Normal()])

#Adding notes and titles
ax.text(0.5, 1.06, f"{player}'s progressive carries for {df3a.team_name.unique()[0]}", fontsize=25, va='center', ha='center', transform=ax.transAxes, fontproperties=semibold_font)
ax.text(0.5, 1.01, f"Carries : {(df4a['type_name'].count())}  |  Carries aVAEP per 98 : {metrics['aVAEP'].unique()[0].round(3)}  |  aVAEP following action per 98 : {metrics['aVAEP_next'].unique()[0].round(3)} |  {', '.join(df3a['competition_id'].unique())} {', '.join(df3a['formatted_season'].unique())}\nHeatmap: amount of carries from that zone  |  Annotations: aVAEP from zone",
        fontsize=12, va='center', ha='center', transform=ax.transAxes)
ax.text(0.5, 0.02, f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
        fontsize=10, va='center', ha='center', transform=ax.transAxes)
ax.annotate(text="", xy=(65, 69.5), xytext=(-200, 0), textcoords="offset points", size=27, color="#000000", 
        arrowprops=dict(arrowstyle="-|>", shrinkA=0, color="black", linewidth=2))

#Adding team logo
fotmob_url = "https://images.fotmob.com/image_resources/logo/teamlogo/"
logo_ax = fig.add_axes([.135, 0.85, 0.09, 0.09], zorder=1)
club_icon = Image.open(urllib.request.urlopen(f"{fotmob_url}{df4a['fotmob_id'].iloc[0]}.png"))
logo_ax.imshow(club_icon)
logo_ax.axis("off")

# Save the figure with adjusted face color and transparency
plt.savefig(f'{player}-progressive-{season}.png', dpi=500, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)

# Group once and calculate all metrics in a single operation
metrics = (df3a.groupby(
    ["player_id", "player_name", "team_name", "season_id"], 
    observed=True
).agg(
    aVAEP=("vaep_value", "sum"),
    aVAEP_next=("next_vaep_value", "sum")
).reset_index())

#We merge the amount of minutes played 
metrics = metrics.merge(mp)

#Creating normalized metrics now that we have minutes played
metrics["aVAEP"] = metrics.aVAEP * minutesadj / metrics.minutes_played
metrics["aVAEP_next"] = metrics.aVAEP_next * minutesadj / metrics.minutes_played

# setting up the pitch, bins and figure
pitch = Pitch(pitch_type='custom', pitch_width=68, pitch_length=105, goal_type='box', linewidth=2, line_color='black', half=False)
bins = (9, 6)
fig, ax = pitch.draw(figsize=(16, 11), constrained_layout=True, tight_layout=False)

# plot the heatmap - more intense red = more carries originating from that square
cmap = mcolors.LinearSegmentedColormap.from_list("custom_red", ["#D7D1CF", "#FF0000"])
bs_heatmap = pitch.bin_statistic(df3a.x_a0, df3a.y_a0, statistic='count', bins=bins)
hm = pitch.heatmap(bs_heatmap, ax=ax, cmap=cmap, zorder = 3, alpha = 0.8)

# plot the underliying carries
am = pitch.arrows(df3a.x_a0, df3a.y_a0, df3a.end_x, df3a.end_y, width=0.8, alpha = 0.5, zorder = 1,
             headwidth = 10, headlength = 8, color = '#000000', label = 'successful passes', ax=ax)

# Calculate sum value of vaep value for each bin
bs_vaep = pitch.bin_statistic(df3a.x_a0, df3a.y_a0, values=df3a.vaep_value, statistic='sum', bins=bins)

# Add text annotations for each bin with the sum vaep value
for i in range(bs_vaep['statistic'].shape[0]):
    for j in range(bs_vaep['statistic'].shape[1]):
        # Get the bin center coordinates
        bin_center_x, bin_center_y = bs_vaep['cx'][i, j], bs_vaep['cy'][i, j]
        # Get the sum value of vaep_net for the bin
        sum_vaep = bs_vaep['statistic'][i, j]
        # Add annotation if sum value is not close to zero
        if (sum_vaep < -0.01) | (sum_vaep > 0.01):
            text = ax.text(bin_center_x, bin_center_y, f"{sum_vaep:.3f}", color="#FFFFFF", ha="center", va="center",
                               fontsize=20, zorder=4)
            # Add path effects for the edge
            text.set_path_effects([path_effects.Stroke(linewidth=4, foreground='#000000'), path_effects.Normal()])

#Adding notes and titles
ax.text(0.5, 1.06, f"{player}'s carries for {df3a.team_name.unique()[0]}", fontsize=25, va='center', ha='center', transform=ax.transAxes, fontproperties=semibold_font)
ax.text(0.5, 1.01, f"Carries : {(df3a['type_name'].count())}  |  Carries aVAEP per 98 : {metrics['aVAEP'].unique()[0].round(3)}  |  aVAEP following action per 98 : {metrics['aVAEP_next'].unique()[0].round(3)} |  {', '.join(df3a['competition_id'].unique())} {', '.join(df3a['formatted_season'].unique())}\nHeatmap: amount of carries from that zone  |  Annotations: aVAEP from zone",
        fontsize=12, va='center', ha='center', transform=ax.transAxes)
ax.text(0.5, 0.02, f'X: @gualanodavide | Bluesky: @gualanodavide.bsky.social | Linkedin: www.linkedin.com/in/davide-gualano-a2454b187 | Newsletter: the-cutback.beehiiv.com',
        fontsize=10, va='center', ha='center', transform=ax.transAxes)
ax.annotate(text="", xy=(65, 69.5), xytext=(-200, 0), textcoords="offset points", size=27, color="#000000", 
        arrowprops=dict(arrowstyle="-|>", shrinkA=0, color="black", linewidth=2))

#Adding team logo
fotmob_url = "https://images.fotmob.com/image_resources/logo/teamlogo/"
logo_ax = fig.add_axes([.135, 0.85, 0.09, 0.09], zorder=1)
club_icon = Image.open(urllib.request.urlopen(f"{fotmob_url}{df4a['fotmob_id'].iloc[0]}.png"))
logo_ax.imshow(club_icon)
logo_ax.axis("off")

# Save the figure with adjusted face color and transparency
plt.savefig(f'{player}-overall-{season}.png', dpi=500, facecolor="#D7D1CF", bbox_inches="tight", transparent=True)