two sample z test scipy
from statsmodels.stats.weightstats import ztest import numpy as np from scipy.stats import norm #import math
alpha = 0.05
np.random.seed(10) # For reproducibility
#Example 1 More Manual – From Slides
sample_1 = [370, 395, 400, 405, 390, 385, 410, 395, 400, 380, 390, 400, 410, 415, 395, 405, 390, 400, 420, 375, 400, 385, 390, 395, 410, 405, 400, 395, 380, 400]
sample_2 = [360, 375, 385, 390, 370, 380, 395, 390, 385, 375, 380, 395, 400, 405, 385, 395, 375, 385, 395, 370, 380, 395, 390, 385, 375, 380, 395, 400, 385, 395]
mean_sample_1 = sum(sample_1) / len(sample_1) # Mean of Sample 1
mean_sample_2 = sum(sample_2) / len(sample_2) # Mean of Sample 2
print("Sample 1 Mean:", mean_sample_1) # Expected: 396.3

print("Sample 2 Mean:", mean_sample_2) # Expected: 385.5

std_dev = 15 # Given: standard deviation for both samples
n1, n2 = len(sample_1), len(sample_2)
pooled_se = np.sqrt((std_dev**2 / n1) + (std_dev**2 / n2))
print("Pooled Standard Error:", round(pooled_se, 2)) # Expected: 3.87

z_statistic = (mean_sample_1 - mean_sample_2) / pooled_se
print("Z-Statistic:", round(z_statistic, 2)) # Expected: 2.79

p_value = 2 * (1 - norm.cdf(abs(z_statistic))) # Two-tailed test
print("P-Value:", round(p_value, 4)) # Expected: 0.0052

if p_value < alpha: print("Reject the null hypothesis") else: print("Fail to reject the null hypothesis")

Quicker way to test it – Not Entirely precise
The ztest function in statsmodels.stats.weightstats
does not explicitly allow for directly
passing the population standard deviation. Instead, it estimates the standard error based on the
sample standard deviations unless the sample variance is explicitly pooled
z_stat, p_value = ztest(sample_1, sample_2, alternative='two-sided')
print(z_stat)

print(p_value)

example 2 marathon times of two running clubs
marathon_std = 30
sample1 = np.random.normal(loc=272, scale=25, size=50) # Sample 1: Mean = 270, Std = 30
sample2 = np.random.normal(loc=255, scale=25, size=50) # Sample 2: Mean = 260, Std = 30
# Calculate means and standard deviations for both samples mean1, size1 = np.mean(sample1), len(sample1)
mean2, size2 = np.mean(sample2), len(sample2)
mean2, size2 = np.mean(sample2), len(sample2)
pooled_se = np.sqrt((marathon_std**2 / size1) + (marathon_std**2 / size2))
z_score = (mean1 - mean2) / pooled_se
p_value = 2 * (1 - norm.cdf(abs(z_score)))
if p_value < alpha: print("Reject the null hypothesis: The sample mean is significantly different from the population mean.") else: print("Fail to reject the null hypothesis: No significant difference between the sample mean and population mean.")
Reject the null hypothesis: The sample mean is significantly different from the population mean.
Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.