import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import scikits.bootstrap as bootstrap
import scipy.stats  
from typing import List
regular_season_totals = pd.read_csv("regular_season_totals.csv")
play_off_totals = pd.read_csv("play_off_totals.csv")
regular_season_team_summary_stats = pd.read_csv("regular_season_team_summary_stats.csv")

#Team names to create boxplots of variables for
teamNames1 = ["Atlanta Hawks","Boston Celtics","Charlotte Hornets","Chicago Bulls","Cleveland Cavaliers",
              "Dallas Mavericks","Denver Nuggets","Detroit Pistons","Golden State Warriors",
              "Houston Rockets"]

teamNames2 = ["Indiana Pacers","Los Angeles Clippers","Los Angeles Lakers","Memphis Grizzlies","Miami Heat",
              "Milwaukee Bucks","Minnesota Timberwolves","Brooklyn Nets","New Orleans Pelicans",
              "New York Knicks"]

teamNames3 = ["Oklahoma City Thunder","Orlando Magic","Phoenix Suns","Portland Trail Blazers","Sacramento Kings",
              "San Antonio Spurs","Toronto Raptors","Washington Wizards","Philadelphia 76ers","Utah Jazz"]

regular_season_team_summary_stats_1 = regular_season_team_summary_stats[regular_season_team_summary_stats["teamName"].isin(teamNames1)]
regular_season_team_summary_stats_2 = regular_season_team_summary_stats[regular_season_team_summary_stats["teamName"].isin(teamNames2)]
regular_season_team_summary_stats_3 = regular_season_team_summary_stats[regular_season_team_summary_stats["teamName"].isin(teamNames3)]

#Regular season totals data split between winning games and losing games
winning_games_regular_season = regular_season_totals[regular_season_totals["WL"] == "W"]
losing_games_regular_season = regular_season_totals[regular_season_totals["WL"] == "L"]

#Play-offs totals data split between winning games and losing games
winning_games_play_offs = play_off_totals[play_off_totals["WL"] == "W"]
losing_games_play_offs = play_off_totals[play_off_totals["WL"] == "L"]

#Function to create boxplots
def plot_boxplot(variable:str,df:pd):
    """
    Plots boxplot of given variable for given data frame df for group of teams,
    assuming the variable exists within df 
    """
    plt.figure(figsize=(20,4.8))
    sns.boxplot(x="teamName",y=variable,data=df,color='grey')
    plt.show()
    return 

#Function to create histogram
def plot_histogram(variable:str,df:pd):
    """
    Plots histogram of given variable for given data frame df for a team,
    assuming the variable exists within df
    """
    plt.figure(figsize=(20,4.8))
    sns.histplot(x=variable,data=df,bins='auto',color='grey')
    plt.show()
    return

#Function to create bar chart
def plot_barchart(variable:str,df:pd):
    """
    Plots bar chart of given variable for given data frame df for a team,
    assuming the variable exists within df
    """
    plt.figure(figsize=(20,4.8))
    sns.countplot(x=variable,data=df,color='grey')
    plt.show()
    return 

def bootstrap_distribution(variable:str,original_sample:pd):
    """
    Performs bootstrap resampling for 1000 samples to explore true mean for given variable of the given original sample,
    assuming the variable exists in given data
    """
    bootstrap_means = []
    for _ in range(1000):
        bootstrap_resample = original_sample[variable].sample(n=len(original_sample),replace=True)
        mean = bootstrap_resample.mean()
        bootstrap_means.append(mean)

    return bootstrap_means

def confidence_interval(data:List[int]):
    """
    Computes lower and upper bounds of 95% confidence interval for given numeric data
    """
    data = pd.Series(data)
    lower_bound = float(data.quantile(0.025))
    upper_bound = float(data.quantile(0.975))
    return [lower_bound,upper_bound]

def visualize_confidence_interval(distribution: None,bounds:List[float]):
    """
    Shades in given 95% confidence interval for given distribution
    """
    sns.histplot(x=distribution,bins=30,color='gray')
    plt.axvspan(bounds[0],bounds[1],alpha=0.3,facecolor="turquoise",edgecolor="red",linewidth=4)
    plt.show()
    return

def score(ci1: List[float],ci2:List[float]):
    """
    Computes score for variable given the method described under "Final Selections" 
    """
    return 0.93 * ((ci1[0] + ci1[1])/2)/(ci1[1] - ci1[0]) + 0.07 * ((ci2[0] + ci2[1])/2)/(ci2[1] - ci2[0])

plot_boxplot("totalFieldGoalsMade",regular_season_team_summary_stats_1)
plot_boxplot("totalFieldGoalsMade",regular_season_team_summary_stats_2)
plot_boxplot("totalFieldGoalsMade",regular_season_team_summary_stats_3)

sas_regular_season_totals = regular_season_totals[regular_season_totals["teamName"]=="San Antonio Spurs"]
uta_regular_season_totals = regular_season_totals[regular_season_totals["teamName"]=="Utah Jazz"]

plot_histogram("fieldGoalsMade",sas_regular_season_totals)
plot_histogram("fieldGoalsMade",uta_regular_season_totals)
sas_wl_bar_chart = plot_barchart("WL",sas_regular_season_totals)
uta_wl_bar_chart = plot_barchart("WL",uta_regular_season_totals)

np.random.seed(100)

combined_games_regular_season = pd.merge(winning_games_regular_season,losing_games_regular_season,on=["gameID"])
combined_games_regular_season["diffFieldGoals"] = combined_games_regular_season["fieldGoalsMade_x"] -  combined_games_regular_season["fieldGoalsMade_y"]

combined_games_play_offs = pd.merge(winning_games_play_offs,losing_games_play_offs,on=["gameID"])
combined_games_play_offs["diffFieldGoals"] = combined_games_play_offs["fieldGoalsMade_x"] -  combined_games_play_offs["fieldGoalsMade_y"]

sas_uta_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"]=="San Antonio Spurs") & (combined_games_regular_season["teamName_y"]=="Utah Jazz")]

sas_uta_field_goals_bootstrap_distribution = bootstrap_distribution("diffFieldGoals",sas_uta_games)

sas_uta_percentile_confidence_interval = confidence_interval(sas_uta_field_goals_bootstrap_distribution)

plt.xlabel("Mean Difference in Field Goals")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Field Goals for \nSan Antonio Spurs Winning Utah Jazz (n=1000)")
    
sas_uta_field_goals_confidence_interval = visualize_confidence_interval(sas_uta_field_goals_bootstrap_distribution,sas_uta_percentile_confidence_interval)

sas_uta_percentile_confidence_interval

[1.9301724137931036, 5.414655172413792]

np.random.seed(100)

field_goals_regular_season_bootstrap_distribution = bootstrap_distribution("diffFieldGoals",combined_games_regular_season)

field_goals_regular_season_percentile_confidence_interval = confidence_interval(field_goals_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Field Goals")
plt.title("Bootstrap Sampling Distribution for Difference in Mean Field Goals \n(Regular Season, n=1000)")

field_goals_regular_season_confidence_interval = visualize_confidence_interval(field_goals_regular_season_bootstrap_distribution,field_goals_regular_season_percentile_confidence_interval)
print(field_goals_regular_season_percentile_confidence_interval)

field_goals_play_offs_bootstrap_distribution = bootstrap_distribution("diffFieldGoals",combined_games_play_offs)

field_goals_play_offs_percentile_confidence_interval = confidence_interval(field_goals_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Field Goals")
plt.title("Bootstrap Sampling Distribution for Difference in Mean Field Goals \n(Play-offs, n=1000)")
  
field_goals_play_offs_confidence_interval = visualize_confidence_interval(field_goals_play_offs_bootstrap_distribution,field_goals_play_offs_percentile_confidence_interval)
print(field_goals_play_offs_percentile_confidence_interval)

[3.883704526353704, 4.0328415776203625]

[3.925124688279302, 4.479093931837074]

plot_boxplot("totalThreePointersMade",regular_season_team_summary_stats_1)
plot_boxplot("totalThreePointersMade",regular_season_team_summary_stats_2)
plot_boxplot("totalThreePointersMade",regular_season_team_summary_stats_3)

hou_regular_season_totals = regular_season_totals[regular_season_totals["teamName"]=="Houston Rockets"]

plot_histogram("threePointersMade",hou_regular_season_totals)
plot_histogram("threePointersMade",sas_regular_season_totals)
plot_barchart("WL",hou_regular_season_totals)
plot_barchart("WL",sas_regular_season_totals)

np.random.seed(100)

combined_games_regular_season["diffThreePointers"] = combined_games_regular_season["threePointersMade_x"] - combined_games_regular_season["threePointersMade_y"]
combined_games_play_offs["diffThreePointers"] = combined_games_play_offs["threePointersMade_x"] - combined_games_play_offs["threePointersMade_y"]


sas_hou_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"] == "Houston Rockets") & (combined_games_regular_season["teamName_y"] == "San Antonio Spurs")]

sas_hou_field_goals_bootstrap_distribution = bootstrap_distribution("diffThreePointers",sas_hou_games)

sas_hou_percentile_confidence_interval = confidence_interval(sas_hou_field_goals_bootstrap_distribution)


plt.xlabel("Mean Difference in Three Pointers")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Three Pointers for \nSan Antonio Spurs Winning Houston Rockets (n=1000)")
sas_hou_field_goals_confidence_interval = visualize_confidence_interval(sas_hou_field_goals_bootstrap_distribution,sas_hou_percentile_confidence_interval)
plt.show()

sas_hou_percentile_confidence_interval

[1.32, 4.88]

np.random.seed(100)

three_pointers_regular_season_bootstrap_distribution = bootstrap_distribution("diffThreePointers",combined_games_regular_season)
                                                      
three_pointers_regular_season_percentile_confidence_interval = confidence_interval(three_pointers_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Three Pointers")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Three Pointers \n(Regular season, n=1000)")
three_pointers_regular_season_confidence_interval = visualize_confidence_interval(three_pointers_regular_season_bootstrap_distribution,three_pointers_regular_season_percentile_confidence_interval)
print(three_pointers_regular_season_percentile_confidence_interval)
                                                
three_pointers_play_offs_bootstrap_distribution = bootstrap_distribution("diffThreePointers",combined_games_play_offs)

three_pointers_play_offs_percentile_confidence_interval = confidence_interval(three_pointers_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Three Pointers")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Three Pointers \n(Play-offs, n=1000)")
three_pointers_play_offs_confidence_interval = visualize_confidence_interval(three_pointers_play_offs_bootstrap_distribution,three_pointers_play_offs_percentile_confidence_interval) 
print(three_pointers_play_offs_percentile_confidence_interval)

[1.7015848241085365, 1.8355249729859529]

[1.693246051537822, 2.2103075644222776]

plot_boxplot("totalFreeThrowsMade",regular_season_team_summary_stats_1)
plot_boxplot("totalFreeThrowsMade",regular_season_team_summary_stats_2)
plot_boxplot("totalFreeThrowsMade",regular_season_team_summary_stats_3)

orl_regular_season_totals = regular_season_totals[regular_season_totals["teamName"]=="Orlando Magic"]

plot_histogram("freeThrowsMade",hou_regular_season_totals)
plot_histogram("freeThrowsMade",orl_regular_season_totals)
plot_barchart("WL",hou_regular_season_totals)
plot_barchart("WL",orl_regular_season_totals)

np.random.seed(100)

combined_games_regular_season["diffFreeThrows"] = combined_games_regular_season["freeThrowsMade_x"] - combined_games_regular_season["freeThrowsMade_y"]

combined_games_play_offs["diffFreeThrows"] = combined_games_play_offs["freeThrowsMade_x"] - combined_games_play_offs["freeThrowsMade_y"]
                        

hou_orl_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"]=="Houston Rockets") & (combined_games_regular_season["teamName_y"]=="Orlando Magic")]

hou_orl_free_throws_bootstrap_distribution = bootstrap_distribution("diffFreeThrows",hou_orl_games)

hou_orl_free_throws_percentile_confidence_interval = confidence_interval(hou_orl_free_throws_bootstrap_distribution)

plt.xlabel("Mean Difference in Free Throws")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Free Throws for \nHouston Rockets Winning Orlando Magic (n=1000)")
hou_orl_free_throws_confidence_interval = visualize_confidence_interval(hou_orl_free_throws_bootstrap_distribution,hou_orl_free_throws_percentile_confidence_interval)
                                         
hou_orl_free_throws_percentile_confidence_interval

[-2.0588235294117645, 4.529411764705882]

np.random.seed(100)

free_throws_regular_season_bootstrap_distribution = bootstrap_distribution("diffFreeThrows",combined_games_regular_season)

free_throws_regular_season_percentile_confidence_interval = confidence_interval(free_throws_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Free Throws")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Free Throws \n(Regular season, n=1000)")
free_throws_regular_season_confidence_interval = visualize_confidence_interval(free_throws_regular_season_bootstrap_distribution,free_throws_regular_season_percentile_confidence_interval)
print(free_throws_regular_season_percentile_confidence_interval)         

free_throws_play_offs_bootstrap_distribution = bootstrap_distribution("diffFreeThrows",combined_games_play_offs)

free_throws_play_offs_percentile_confidence_interval = confidence_interval(free_throws_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Free Throws")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Free Throws \n(Play-offs, n=1000)")
free_throws_play_offs_confidence_interval = visualize_confidence_interval(free_throws_play_offs_bootstrap_distribution,free_throws_play_offs_percentile_confidence_interval) 
print(free_throws_play_offs_percentile_confidence_interval)

[1.6124894945371593, 1.8389317445071438]

[1.1953241895261846, 2.0034081463009143]

plot_boxplot("totalRebounds",regular_season_team_summary_stats_1)
plot_boxplot("totalRebounds",regular_season_team_summary_stats_2)
plot_boxplot("totalRebounds",regular_season_team_summary_stats_3)

den_regular_season_totals = regular_season_totals[regular_season_totals["teamName"] == "Denver Nuggets"]
mia_regular_season_totals = regular_season_totals[regular_season_totals["teamName"] == "Miami Heat"]

plot_histogram("reboundsTotal",den_regular_season_totals)
plot_histogram("reboundsTotal",mia_regular_season_totals)
plot_barchart("WL",den_regular_season_totals)
plot_barchart("WL",mia_regular_season_totals)

np.random.seed(100)

combined_games_regular_season["diffRebounds"] = combined_games_regular_season["reboundsTotal_x"] - combined_games_regular_season["reboundsTotal_y"]
                              
combined_games_play_offs["diffRebounds"] = combined_games_play_offs["reboundsTotal_x"] - combined_games_play_offs["reboundsTotal_y"]


den_mia_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"] == "Denver Nuggets") & (combined_games_regular_season["teamName_y"] == "Miami Heat")]

den_mia_rebounds_bootstrap_distribution = bootstrap_distribution("diffRebounds",den_mia_games)

den_mia_rebounds_percentile_confidence_interval = confidence_interval(den_mia_rebounds_bootstrap_distribution)

plt.xlabel("Mean Difference in Rebounds")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Rebounds for \nDenver Nuggets Winning Miami Heat (n=1000)")
den_mia_rebounds_confidence_interval = visualize_confidence_interval(den_mia_rebounds_bootstrap_distribution,den_mia_rebounds_percentile_confidence_interval) 
                                  
den_mia_rebounds_percentile_confidence_interval

[0.9970588235294119, 6.354411764705881]

np.random.seed(100)

rebounds_regular_season_bootstrap_distribution = bootstrap_distribution("diffRebounds",combined_games_regular_season)

rebounds_regular_season_percentile_confidence_interval = confidence_interval(rebounds_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Rebounds")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Rebounds \n(Regular season, n=1000)")
rebounds_regular_season_confidence_interval = visualize_confidence_interval(rebounds_regular_season_bootstrap_distribution,rebounds_regular_season_percentile_confidence_interval) 
print(rebounds_regular_season_percentile_confidence_interval)

rebounds_play_offs_bootstrap_distribution = bootstrap_distribution("diffRebounds",combined_games_play_offs)

rebounds_play_offs_percentile_confidence_interval = confidence_interval(rebounds_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Rebounds")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Rebounds \n(Play-offs, n=1000)")
rebounds_play_offs_confidence_interval = visualize_confidence_interval(rebounds_play_offs_bootstrap_distribution,rebounds_play_offs_percentile_confidence_interval) 
print(rebounds_play_offs_percentile_confidence_interval)

[3.480279745467643, 3.7330246728298713]

[3.329093931837074, 4.289318370739817]

plot_boxplot("totalAssists",regular_season_team_summary_stats_1)
plot_boxplot("totalAssists",regular_season_team_summary_stats_2)
plot_boxplot("totalAssists",regular_season_team_summary_stats_3)

gsw_regular_season_totals = regular_season_totals[regular_season_totals["teamName"] == "Golden State Warriors"]
nyk_regular_season_totals = regular_season_totals[regular_season_totals["teamName"] == "New York Knicks"]

plot_histogram("assists",gsw_regular_season_totals)
plot_histogram("assists",nyk_regular_season_totals)
plot_barchart("WL",gsw_regular_season_totals)
plot_barchart("WL",nyk_regular_season_totals)

np.random.seed(100)

combined_games_regular_season["diffAssists"] = combined_games_regular_season["assists_x"] - combined_games_regular_season["assists_y"]
combined_games_play_offs["diffAssists"] = combined_games_play_offs["assists_x"] - combined_games_play_offs["assists_y"]

gsw_nyk_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"]=="Golden State Warriors") & (combined_games_regular_season["teamName_y"]=="New York Knicks")]

gsw_nyk_assists_bootstrap_distribution = bootstrap_distribution("diffAssists",gsw_nyk_games)

gsw_nyk_assists_percentile_confidence_interval = confidence_interval(gsw_nyk_assists_bootstrap_distribution)

plt.xlabel("Difference in Assists")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Assists for \n Golden State Warriors Winning New York Knicks (n=1000)")
gsw_nyk_assists_confidence_interval = visualize_confidence_interval(gsw_nyk_assists_bootstrap_distribution,gsw_nyk_assists_percentile_confidence_interval) 
print(gsw_nyk_assists_percentile_confidence_interval)

[5.888888888888889, 12.001388888888888]

np.random.seed(100)

assists_regular_season_bootstrap_distribution = bootstrap_distribution("diffAssists",combined_games_regular_season)

assists_regular_season_percentile_confidence_interval = confidence_interval(assists_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Assists")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Assists \n(Regular season, n=1000)")
assists_regular_season_confidence_interval = visualize_confidence_interval(assists_regular_season_bootstrap_distribution,assists_regular_season_percentile_confidence_interval) 
print(assists_regular_season_percentile_confidence_interval)

assists_play_offs_bootstrap_distribution = bootstrap_distribution("diffAssists",combined_games_play_offs)

assists_play_offs_percentile_confidence_interval = confidence_interval(assists_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Assists")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Assists \n(Play-offs, n=1000)")
assists_play_offs_confidence_interval = visualize_confidence_interval(assists_play_offs_bootstrap_distribution,assists_play_offs_percentile_confidence_interval) 
print(assists_play_offs_percentile_confidence_interval)

[3.120283047184536, 3.2888867210949693]

[2.599334995843724, 3.2319825436408975]

plot_boxplot("totalSteals",regular_season_team_summary_stats_1)
plot_boxplot("totalSteals",regular_season_team_summary_stats_2)
plot_boxplot("totalSteals",regular_season_team_summary_stats_3)

np.random.seed(100)

combined_games_regular_season["diffSteals"] = combined_games_regular_season["steals_x"] - combined_games_regular_season["steals_y"]
combined_games_play_offs["diffSteals"] = combined_games_play_offs["steals_x"] - combined_games_play_offs["steals_y"]

mem_por_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"]=="Memphis Grizzlies") & (combined_games_regular_season["teamName_y"]=="Portland Trail Blazers")] 
             
mem_por_steals_bootstrap_distribution = bootstrap_distribution("diffSteals",mem_por_games)

mem_por_steals_percentile_confidence_interval = confidence_interval(mem_por_steals_bootstrap_distribution)

plt.xlabel("Mean Difference in Steals")
plt.title("Bootstrap Sampling Distrbution for Mean Difference in Steals for \nMemphis Grizzlies Winning Portland Trail Blazers (n=1000)")
mem_por_steals_confidence_interval = visualize_confidence_interval(mem_por_steals_bootstrap_distribution,mem_por_steals_percentile_confidence_interval)
print(mem_por_steals_percentile_confidence_interval)

[0.2790000000000001, 2.720999999999999]

np.random.seed(100)

steals_regular_season_bootstrap_distribution = bootstrap_distribution("diffSteals",combined_games_regular_season)

steals_regular_season_percentile_confidence_interval = confidence_interval(steals_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Steals")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Steals \n(Regular season, n=1000)")
steals_regular_season_confidence_interval = visualize_confidence_interval(steals_regular_season_bootstrap_distribution,steals_regular_season_percentile_confidence_interval)
print(steals_regular_season_percentile_confidence_interval)

steals_play_offs_bootstrap_distribution = bootstrap_distribution("diffSteals",combined_games_play_offs)

steals_play_offs_percentile_confidence_interval = confidence_interval(steals_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Steals")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Steals \n(Play-offs, n=1000)")
steals_play_offs_confidence_interval = visualize_confidence_interval(steals_play_offs_bootstrap_distribution,steals_play_offs_percentile_confidence_interval)
print(steals_play_offs_percentile_confidence_interval)

[0.7036844158962661, 0.8254397286589026]

[0.7024106400665004, 1.1355153782211138]

plot_boxplot("totalBlocks",regular_season_team_summary_stats_1)
plot_boxplot("totalBlocks",regular_season_team_summary_stats_2)
plot_boxplot("totalBlocks",regular_season_team_summary_stats_3)

okc_regular_season_totals = regular_season_totals[regular_season_totals["teamName"]=="Oklahoma City Thunder"]
cle_regular_season_totals = regular_season_totals[regular_season_totals["teamName"]=="Cleveland Cavaliers"]

plot_histogram("blocks",okc_regular_season_totals)
plot_histogram("blocks",cle_regular_season_totals)
plot_barchart("WL",okc_regular_season_totals)
plot_barchart("WL",cle_regular_season_totals)

np.random.seed(100)

combined_games_regular_season["diffBlocks"] = combined_games_regular_season["blocks_x"] - combined_games_regular_season["blocks_y"]
combined_games_play_offs["diffBlocks"] = combined_games_play_offs["blocks_x"] - combined_games_play_offs["blocks_y"]

okc_cle_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"] == "Oklahoma City Thunder") & (combined_games_regular_season["teamName_y"] == "Cleveland Cavaliers")]
              
okc_cle_blocks_bootstrap_distribution = bootstrap_distribution("diffBlocks",okc_cle_games)

okc_cle_blocks_percentile_confidence_interval = confidence_interval(okc_cle_blocks_bootstrap_distribution)


plt.xlabel("Mean Difference in Blocks")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Blocks for \nOklahoma City Thunder Winning Cleveland Cavaliers (n=1000)")
okc_cle_blocks_confidence_interval = visualize_confidence_interval(okc_cle_blocks_bootstrap_distribution,okc_cle_blocks_percentile_confidence_interval) 
print(okc_cle_blocks_percentile_confidence_interval)

[1.6666666666666667, 5.668333333333332]

np.random.seed(100)

blocks_regular_season_bootstrap_distribution = bootstrap_distribution("diffBlocks",combined_games_regular_season)

blocks_regular_season_percentile_confidence_interval = confidence_interval(blocks_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Blocks")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Blocks \n(Regular season, n=1000)")
blocks_regular_season_confidence_interval = visualize_confidence_interval(blocks_regular_season_bootstrap_distribution,blocks_regular_season_percentile_confidence_interval)
print(blocks_regular_season_percentile_confidence_interval)

blocks_play_offs_bootstrap_distribution = bootstrap_distribution("diffBlocks",combined_games_play_offs)

blocks_play_offs_percentile_confidence_interval = confidence_interval(blocks_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Blocks")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Blocks \n(Play-offs, n=1000)")
blocks_play_offs_confidence_interval = visualize_confidence_interval(blocks_play_offs_bootstrap_distribution,blocks_play_offs_percentile_confidence_interval)
print(blocks_play_offs_percentile_confidence_interval)

[0.7778079601392724, 0.8788299915956297]

[0.6707605985037406, 1.0623649210307564]

plot_boxplot("totalFouls",regular_season_team_summary_stats_1)
plot_boxplot("totalFouls",regular_season_team_summary_stats_2)
plot_boxplot("totalFouls",regular_season_team_summary_stats_3)

phx_regular_season_totals = regular_season_totals[regular_season_totals["teamName"]=="Phoenix Suns"]

plot_histogram("foulsPersonal",phx_regular_season_totals)
plot_histogram("foulsPersonal",sas_regular_season_totals)
plot_barchart("WL",phx_regular_season_totals)
plot_barchart("WL",sas_regular_season_totals)

np.random.seed(100)

combined_games_regular_season["diffFouls"] = combined_games_regular_season["foulsPersonal_x"] - combined_games_regular_season["foulsPersonal_y"]
combined_games_play_offs["diffFouls"] = combined_games_play_offs["foulsPersonal_x"] - combined_games_play_offs["foulsPersonal_y"]

phx_sas_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"] == "San Antonio Spurs") & (combined_games_regular_season["teamName_y"] == "Phoenix Suns")] 

phx_sas_blocks_bootstrap_distribution = bootstrap_distribution("diffFouls",phx_sas_games)

phx_sas_blocks_percentile_confidence_interval = confidence_interval(phx_sas_blocks_bootstrap_distribution)

plt.xlabel("Mean Difference in Fouls")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Fouls for \nPhoenix Suns Winning San Antonio Spurs (n=1000)")
phx_sas_blocks_confidence_interval = visualize_confidence_interval(phx_sas_blocks_bootstrap_distribution,phx_sas_blocks_percentile_confidence_interval) 
print(phx_sas_blocks_percentile_confidence_interval)

[-3.6470588235294117, -0.8235294117647058]

np.random.seed(100)

fouls_regular_season_bootstrap_distribution = bootstrap_distribution("diffFouls",combined_games_regular_season)

fouls_regular_season_percentile_confidence_interval = confidence_interval(fouls_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Fouls")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Fouls \n(Regular season, n=1000)")
fouls_regular_season_confidence_interval = visualize_confidence_interval(fouls_regular_season_bootstrap_distribution,fouls_regular_season_percentile_confidence_interval)
print(fouls_regular_season_percentile_confidence_interval)

fouls_play_offs_bootstrap_distribution = bootstrap_distribution("diffFouls",combined_games_play_offs)

fouls_play_offs_percentile_confidence_interval = confidence_interval(fouls_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Fouls")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Fouls \n(Play-offs, n=1000)")
fouls_play_offs_confidence_interval = visualize_confidence_interval(fouls_play_offs_bootstrap_distribution,fouls_play_offs_percentile_confidence_interval)
print(fouls_play_offs_percentile_confidence_interval)

[-0.9539620602713411, -0.7976197622763838]

[-1.1264546965918538, -0.5860141313383209]

plot_boxplot("totalTurnovers",regular_season_team_summary_stats_1)
plot_boxplot("totalTurnovers",regular_season_team_summary_stats_2)
plot_boxplot("totalTurnovers",regular_season_team_summary_stats_3)

dal_regular_season_totals = regular_season_totals[regular_season_totals["teamName"]=="Dallas Mavericks"]

plot_histogram("turnovers",gsw_regular_season_totals)
plot_histogram("turnovers",dal_regular_season_totals)
plot_barchart("WL",gsw_regular_season_totals)
plot_barchart("WL",dal_regular_season_totals)

np.random.seed(100)

combined_games_regular_season["diffTurnovers"] = combined_games_regular_season["turnovers_x"] - combined_games_regular_season["turnovers_y"]
combined_games_play_offs["diffTurnovers"] = combined_games_play_offs["turnovers_x"] - combined_games_play_offs["turnovers_y"]

gsw_dal_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"]=="Dallas Mavericks") & (combined_games_regular_season["teamName_y"]=="Golden State Warriors")]

gsw_dal_blocks_bootstrap_distribution = bootstrap_distribution("diffTurnovers",gsw_dal_games)

gsw_dal_blocks_percentile_confidence_interval = confidence_interval(gsw_dal_blocks_bootstrap_distribution)

plt.xlabel("Mean Difference in Turnovers")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Turnovers for \nDallas Mavericks Winning Golden State Warriors (n=1000)")
gsw_dal_blocks_confidence_interval = visualize_confidence_interval(gsw_dal_blocks_bootstrap_distribution,gsw_dal_blocks_percentile_confidence_interval)
print(gsw_dal_blocks_percentile_confidence_interval)

[-3.5920454545454543, 0.2727272727272727]

np.random.seed(100)

turnovers_regular_season_bootstrap_distribution = bootstrap_distribution("diffTurnovers",combined_games_regular_season)

turnovers_regular_season_percentile_confidence_interval = confidence_interval(turnovers_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Turnovers")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Turnovers \n(Regular Season, n=1000)")
turnovers_regular_season_confidence_interval = visualize_confidence_interval(turnovers_regular_season_bootstrap_distribution,turnovers_regular_season_percentile_confidence_interval) 
print(turnovers_regular_season_percentile_confidence_interval)

turnovers_play_offs_bootstrap_distribution = bootstrap_distribution("diffTurnovers",combined_games_play_offs)

turnovers_play_offs_percentile_confidence_interval = confidence_interval(turnovers_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Turnovers")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Turnovers \n(Play-offs, n=1000)")
turnovers_play_offs_confidence_interval = visualize_confidence_interval(turnovers_play_offs_bootstrap_distribution,turnovers_play_offs_percentile_confidence_interval) 
print(turnovers_play_offs_percentile_confidence_interval)

[-0.8297679793492616, -0.6694636210829632]

[-1.2718412302576891, -0.7115544472152951]

plot_boxplot("totalFieldGoalsAttempted",regular_season_team_summary_stats_1)
plot_boxplot("totalFieldGoalsAttempted",regular_season_team_summary_stats_2)
plot_boxplot("totalFieldGoalsAttempted",regular_season_team_summary_stats_3)

np.random.seed(100)

combined_games_regular_season["diffFieldGoalAttempts"] = combined_games_regular_season["fieldGoalsAttempted_x"] - combined_games_regular_season["fieldGoalsAttempted_y"]

combined_games_play_offs["diffFieldGoalAttempts"] = combined_games_play_offs["fieldGoalsAttempted_x"] - combined_games_play_offs["fieldGoalsAttempted_y"]

gsw_mia_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"]=="Golden State Warriors") & (combined_games_regular_season["teamName_y"]=="Miami Heat")]

gsw_mia_field_goal_attempts_bootstrap_distribution = bootstrap_distribution("diffFieldGoalAttempts",gsw_mia_games)

gsw_mia_field_goal_attempts_percentile_confidence_interval = confidence_interval(gsw_mia_field_goal_attempts_bootstrap_distribution)

plt.xlabel("Mean Difference in Field Goal Attempts")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Field Goal Attempts for \nGolden State Warriors Winning Miami Heat (n=1000)")
gsw_mia_field_goal_attempts_confidence_interval = visualize_confidence_interval(gsw_mia_field_goal_attempts_bootstrap_distribution,gsw_mia_field_goal_attempts_percentile_confidence_interval)
print(gsw_mia_field_goal_attempts_percentile_confidence_interval)

[-5.3140625, 2.8125]

np.random.seed(100)

field_goal_attempts_regular_season_bootstrap_distribution = bootstrap_distribution("diffFieldGoalAttempts",combined_games_regular_season)

field_goal_attempts_regular_season_percentile_confidence_interval = confidence_interval(field_goal_attempts_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Field Goal Attempts")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Field Goal Attempts \n(Regular Season, n=1000)")
field_goal_attempts_regular_season_confidence_interval = visualize_confidence_interval(field_goal_attempts_regular_season_bootstrap_distribution,field_goal_attempts_regular_season_percentile_confidence_interval) 
print(field_goal_attempts_regular_season_percentile_confidence_interval)

field_goal_attempts_play_offs_bootstrap_distribution = bootstrap_distribution("diffFieldGoalAttempts",combined_games_play_offs)

field_goal_attempts_play_offs_percentile_confidence_interval = confidence_interval(field_goal_attempts_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Field Goal Attempts")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Field Goal Attempts \n(Play-offs, n=1000)")
field_goal_attempts_play_offs_confidence_interval = visualize_confidence_interval(field_goal_attempts_play_offs_bootstrap_distribution,field_goal_attempts_play_offs_percentile_confidence_interval) 
print(field_goal_attempts_play_offs_percentile_confidence_interval)

[-0.7874414695641734, -0.5167156921599232]

[-0.37578969243557775, 0.6169576059850372]

plot_boxplot("totalThreePointersAttempted",regular_season_team_summary_stats_1)
plot_boxplot("totalThreePointersAttempted",regular_season_team_summary_stats_2)
plot_boxplot("totalThreePointersAttempted",regular_season_team_summary_stats_3)

plot_histogram("threePointersAttempted",hou_regular_season_totals)
plot_histogram("threePointersAttempted",sas_regular_season_totals)
plot_barchart("WL",hou_regular_season_totals)
plot_barchart("WL",sas_regular_season_totals)

np.random.seed(100)

combined_games_regular_season["diffThreePointerAttempts"] = combined_games_regular_season["threePointersAttempted_x"] - combined_games_regular_season["threePointersAttempted_y"]
                            
combined_games_play_offs["diffThreePointerAttempts"] = combined_games_play_offs["threePointersAttempted_x"] - combined_games_play_offs["threePointersAttempted_y"]

hou_sas_games = combined_games_regular_season[(combined_games_regular_season["teamName_x"]=="Houston Rockets") & (combined_games_regular_season["teamName_y"]=="San Antonio Spurs")]
        
hou_sas_three_pointer_attempts_bootstrap_distribution = bootstrap_distribution("diffThreePointerAttempts",hou_sas_games)

hou_sas_three_pointer_attempts_percentile_confidence_interval = confidence_interval(hou_sas_three_pointer_attempts_bootstrap_distribution)

plt.xlabel("Mean Difference in Three Pointer Attempts")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Three Pointer Attempts for \nHouston Rockets Winning San Antonio Spurs (n=1000)")
hou_sas_three_pointer_attempts_confidence_interval = visualize_confidence_interval(hou_sas_three_pointer_attempts_bootstrap_distribution,hou_sas_three_pointer_attempts_percentile_confidence_interval)
print(hou_sas_three_pointer_attempts_percentile_confidence_interval)

[1.197, 9.682999999999996]

np.random.seed(100)
                                                         
three_pointer_attempts_regular_season_bootstrap_distribution = bootstrap_distribution("diffThreePointerAttempts",combined_games_regular_season)

three_pointer_attempts_regular_season_percentile_confidence_interval = confidence_interval(three_pointer_attempts_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Three Pointer Attempts")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Three Pointer Attempts \n(Regular Season, n=1000)")
three_pointer_attempts_regular_season_confidence_interval = visualize_confidence_interval(three_pointer_attempts_regular_season_bootstrap_distribution,three_pointer_attempts_regular_season_percentile_confidence_interval)
print(three_pointer_attempts_regular_season_percentile_confidence_interval)

three_pointer_attempts_play_offs_bootstrap_distribution = bootstrap_distribution("diffThreePointerAttempts",combined_games_play_offs)

three_pointer_attempts_play_offs_percentile_confidence_interval = confidence_interval(three_pointer_attempts_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Three Pointer Attempts")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Three Pointer Attempts \n(Play-offs, n=1000)")
three_pointer_attempts_play_offs_confidence_interval = visualize_confidence_interval(three_pointer_attempts_play_offs_bootstrap_distribution,three_pointer_attempts_play_offs_percentile_confidence_interval)
print(three_pointer_attempts_play_offs_percentile_confidence_interval)

[0.017872793852803468, 0.3045053427782447]

[-0.09981296758104738, 0.8305070656691603]

plot_boxplot("totalFreeThrowsAttempted",regular_season_team_summary_stats_1)
plot_boxplot("totalFreeThrowsAttempted",regular_season_team_summary_stats_2)
plot_boxplot("totalFreeThrowsAttempted",regular_season_team_summary_stats_3)

plot_histogram("freeThrowsAttempted",hou_regular_season_totals)
plot_histogram("freeThrowsAttempted",orl_regular_season_totals)
plot_barchart("WL",hou_regular_season_totals)
plot_barchart("WL",orl_regular_season_totals)

np.random.seed(100)

combined_games_regular_season["diffFreeThrowAttempts"] = combined_games_regular_season["freeThrowsAttempted_x"] - combined_games_regular_season["freeThrowsAttempted_y"]
               
combined_games_play_offs["diffFreeThrowAttempts"] = combined_games_play_offs["freeThrowsAttempted_x"] - combined_games_play_offs["freeThrowsAttempted_y"]

hou_orl_games["diffFreeThrowAttempts"] = hou_orl_games["freeThrowsAttempted_x"] - hou_orl_games["freeThrowsAttempted_y"]

hou_orl_free_throw_attempts_bootstrap_distribution = bootstrap_distribution("diffFreeThrowAttempts",hou_orl_games)

hou_orl_free_throw_attempts_percentile_confidence_interval = confidence_interval(hou_orl_free_throw_attempts_bootstrap_distribution)

plt.xlabel("Mean Difference in Free Throw Attempts")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Free Throw Attempts for \nHouston Rockets Winning Orlando Magic (n=1000)")
hou_orl_free_throw_attempts_confidence_interval = visualize_confidence_interval(hou_orl_free_throw_attempts_bootstrap_distribution,hou_orl_free_throw_attempts_percentile_confidence_interval)
print(hou_orl_free_throw_attempts_percentile_confidence_interval)

/var/folders/5y/f2gxs3rd1px9742dhtwbjbc40000gn/T/ipykernel_72339/2085374337.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hou_orl_games["diffFreeThrowAttempts"] = hou_orl_games["freeThrowsAttempted_x"] - hou_orl_games["freeThrowsAttempted_y"]

[0.2338235294117648, 8.294117647058824]

np.random.seed(100)

free_throw_attempts_regular_season_bootstrap_distribution = bootstrap_distribution("diffFreeThrowAttempts",combined_games_regular_season)

free_throw_attempts_regular_season_percentile_confidence_interval = confidence_interval(free_throw_attempts_regular_season_bootstrap_distribution)

plt.xlabel("Mean Difference in Free Throw Attempts")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Free Throw Attempts \n(Regular Season, n=1000)")
free_throw_attempts_regular_season_confidence_interval = visualize_confidence_interval(free_throw_attempts_regular_season_bootstrap_distribution,free_throw_attempts_regular_season_percentile_confidence_interval)
print(free_throw_attempts_regular_season_percentile_confidence_interval)

free_throw_attempts_play_offs_bootstrap_distribution = bootstrap_distribution("diffFreeThrowAttempts",combined_games_play_offs)

free_throw_attempts_play_offs_percentile_confidence_interval = confidence_interval(free_throw_attempts_play_offs_bootstrap_distribution)

plt.xlabel("Mean Difference in Free Throw Attempts")
plt.title("Bootstrap Sampling Distribution for Mean Difference in Free Throw Attempts \n(Play-offs, n=1000)")
free_throw_attempts_play_offs_confidence_interval = visualize_confidence_interval(free_throw_attempts_play_offs_bootstrap_distribution,free_throw_attempts_play_offs_percentile_confidence_interval)
print(free_throw_attempts_play_offs_percentile_confidence_interval)

[1.5190884259815105, 1.792971845359587]

[0.857024106400665, 1.8146716541978387]

field_goals_score = score(field_goals_regular_season_percentile_confidence_interval,field_goals_play_offs_percentile_confidence_interval)
print(field_goals_score)

three_pointers_score = score(three_pointers_regular_season_percentile_confidence_interval,three_pointers_play_offs_percentile_confidence_interval)
print(three_pointers_score)

free_throws_score = score(free_throws_regular_season_percentile_confidence_interval,free_throws_play_offs_percentile_confidence_interval)
print(free_throws_score)

rebounds_score = score(rebounds_regular_season_percentile_confidence_interval,rebounds_play_offs_percentile_confidence_interval)
print(rebounds_score)

assists_score = score(assists_regular_season_percentile_confidence_interval,assists_play_offs_percentile_confidence_interval)
print(assists_score)

steals_score = score(steals_regular_season_percentile_confidence_interval,steals_play_offs_percentile_confidence_interval)
print(steals_score)

blocks_score = score(blocks_regular_season_percentile_confidence_interval,blocks_play_offs_percentile_confidence_interval)
print(blocks_score)

fouls_score = score(fouls_regular_season_percentile_confidence_interval,fouls_play_offs_percentile_confidence_interval)
print(fouls_score)

turnovers_score= score(turnovers_regular_season_percentile_confidence_interval,turnovers_play_offs_percentile_confidence_interval)
print(turnovers_score)

field_goal_attempts_score = score(field_goal_attempts_regular_season_percentile_confidence_interval,field_goal_attempts_play_offs_percentile_confidence_interval)
print(field_goal_attempts_score)

three_pointer_attempts_score = score(three_pointer_attempts_regular_season_percentile_confidence_interval,three_pointer_attempts_play_offs_percentile_confidence_interval)
print(three_pointer_attempts_score) 

free_throw_attempts_score = score(free_throw_attempts_regular_season_percentile_confidence_interval,free_throw_attempts_play_offs_percentile_confidence_interval)
print(free_throw_attempts_score)

25.21427763257304
12.544016036174868
7.226050867293867
13.548723813295808
17.99875700853817
5.988458058461015
7.780331863029979
-5.320533006607075
-4.472768029068193
-2.231524564548751
0.5504793504985643
5.720869614037792

Initial Loading¶

Visualization Analysis (Team-Level)¶

Boxplot, Histogram, Bar Chart, and Confidence Interval Analysis¶

¶

Three Pointers¶

Free Throws¶

Rebounds¶

Assists¶

Steals¶

Blocks¶

Fouls¶

Turnovers¶

Field Goal Attempts¶

Three Pointer Attempts¶

Free Throw Attempts¶

Final Selections¶