python levenes test

				
					import scipy.stats as stats
from scipy.stats import f
import numpy as np
				
			
				
					alpha = 0.05
				
			

Example 1 - Manual Example # 100m race times for college and pro athletes

				
					college_times = [10.5, 10.7, 10.8, 10.4, 10.6]
				
			
				
					pro_times = [9.8, 9.9, 9.7, 10.0, 9.9]
				
			
#Calculate the medians
				
					college_mean = np.mean(college_times)
				
			
				
					pro_mean = np.mean(pro_times)
				
			
# Calculate absolute deviations from the median
				
					college_abs_deviation = np.abs(college_times - college_mean)
				
			
				
					pro_abs_deviation = np.abs(pro_times - pro_mean)
				
			
# Calculate mean of absolute deviations
				
					college_mean_deviation = np.mean(college_abs_deviation)
				
			
				
					pro_mean_deviation = np.mean(pro_abs_deviation)
				
			
# Calculate sum of squared deviations from the mean of deviations
				
					college_squared_deviations = np.sum((college_abs_deviation - college_mean_deviation) ** 2)
				
			
				
					pro_squared_deviations = np.sum((pro_abs_deviation - pro_mean_deviation) ** 2)
				
			
# Combine for total squared deviations
				
					total_squared_deviations = college_squared_deviations + pro_squared_deviations
				
			
# Total number of observations
				
					N = len(college_times) + len(pro_times)
				
			
				
					k = 2  # Number of groups (College and Pro)

				
			
# Degrees of freedom
				
					df_between = k - 1  # k-1 for between-group degrees of freedom
df_within = N - k   # N-k for within-group degrees of freedom
				
			
# Between-group sum of squares (SS_between)
				
					Z_grand_mean = np.mean(np.concatenate([college_abs_deviation, pro_abs_deviation]))
SS_between = len(college_times) * (college_mean_deviation - Z_grand_mean) ** 2 + len(pro_times) * (pro_mean_deviation - Z_grand_mean) ** 2
				
			
# Step 6: Calculate the F-statistic
				
					w = (df_within * sum_squares_between) / (df_between * total_squared_deviations)
				
			
				
					print(w)
				
			
# Step 7: Calculate the p-value using the F-distribution
				
					p_value_manual_final = 1 - f.cdf(F_stat_manual, df_between, df_within)

				
			
				
					print(p_value_manual_final)
				
			
				
					if p_value_manual_final < alpha:
    print("Reject the null hypothesis, different variance")
else:
    print("Fail to reject the null hypothesis, same variance")
				
			

Example 2 - Slayer vs Metallica ticket sales

				
					slayer_sales = [20, 18, 22, 19, 21]
				
			
				
					metallica_sales = [28, 30, 27, 29, 31]
				
			
				
					test_statistic, p_value = stats.levene(slayer_sales, metallica_sales, center='median')
				
			
				
					print("Levene's Test Statistic:", test_statistic)
				
			
				
					print("P-value:", p_value)
				
			
				
					if p_value < alpha:
    print("Reject the null hypothesis, different variance")
else:
    print("Fail to reject the null hypothesis, same variance")
				
			

Example 3 - 3 bands tricket sales center mean

				
					gojira_sales = [7000, 11000, 2500, 9000, 3000]
				
			
				
					mastodon_sales = [3000, 4000, 1500, 4000, 2000]
				
			
				
					opeth_sales = [2000, 1000, 2500, 3000, 1500]
				
			
				
					test_statistic, p_value = stats.levene(gojira_sales, mastodon_sales, opeth_sales, center='mean')
				
			
				
					print("Levene's Test Statistic:", test_statistic)
				
			
				
					print("P-value:", p_value)
				
			
				
					if p_value < alpha:
    print("Reject the null hypothesis, different variance")
else:
    print("Fail to reject the null hypothesis, same variance")
				
			

Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.

Leave a Reply

Your email address will not be published. Required fields are marked *