python levenes test
import scipy.stats as stats from scipy.stats import f import numpy as np
alpha = 0.05
Example 1 - Manual Example # 100m race times for college and pro athletes
college_times = [10.5, 10.7, 10.8, 10.4, 10.6]
pro_times = [9.8, 9.9, 9.7, 10.0, 9.9]
#Calculate the medians
college_mean = np.mean(college_times)
pro_mean = np.mean(pro_times)
# Calculate absolute deviations from the median
college_abs_deviation = np.abs(college_times - college_mean)
pro_abs_deviation = np.abs(pro_times - pro_mean)
# Calculate mean of absolute deviations
college_mean_deviation = np.mean(college_abs_deviation)
pro_mean_deviation = np.mean(pro_abs_deviation)
# Calculate sum of squared deviations from the mean of deviations
college_squared_deviations = np.sum((college_abs_deviation - college_mean_deviation) ** 2)
pro_squared_deviations = np.sum((pro_abs_deviation - pro_mean_deviation) ** 2)
# Combine for total squared deviations
total_squared_deviations = college_squared_deviations + pro_squared_deviations
# Total number of observations
N = len(college_times) + len(pro_times)
k = 2 # Number of groups (College and Pro)
# Degrees of freedom
df_between = k - 1 # k-1 for between-group degrees of freedom df_within = N - k # N-k for within-group degrees of freedom
# Between-group sum of squares (SS_between)
Z_grand_mean = np.mean(np.concatenate([college_abs_deviation, pro_abs_deviation])) SS_between = len(college_times) * (college_mean_deviation - Z_grand_mean) ** 2 + len(pro_times) * (pro_mean_deviation - Z_grand_mean) ** 2
# Step 6: Calculate the F-statistic
w = (df_within * sum_squares_between) / (df_between * total_squared_deviations)
print(w)

# Step 7: Calculate the p-value using the F-distribution
p_value_manual_final = 1 - f.cdf(F_stat_manual, df_between, df_within)
print(p_value_manual_final)

if p_value_manual_final < alpha: print("Reject the null hypothesis, different variance") else: print("Fail to reject the null hypothesis, same variance")

Example 2 - Slayer vs Metallica ticket sales
slayer_sales = [20, 18, 22, 19, 21]
metallica_sales = [28, 30, 27, 29, 31]
test_statistic, p_value = stats.levene(slayer_sales, metallica_sales, center='median')
print("Levene's Test Statistic:", test_statistic)

print("P-value:", p_value)

if p_value < alpha: print("Reject the null hypothesis, different variance") else: print("Fail to reject the null hypothesis, same variance")

Example 3 - 3 bands tricket sales center mean
gojira_sales = [7000, 11000, 2500, 9000, 3000]
mastodon_sales = [3000, 4000, 1500, 4000, 2000]
opeth_sales = [2000, 1000, 2500, 3000, 1500]
test_statistic, p_value = stats.levene(gojira_sales, mastodon_sales, opeth_sales, center='mean')
print("Levene's Test Statistic:", test_statistic)

print("P-value:", p_value)

if p_value < alpha: print("Reject the null hypothesis, different variance") else: print("Fail to reject the null hypothesis, same variance")

Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.