Statistics

Python One-Sample T-Test

June 5, 2025 Ryan Nolan No comments yet

One-Sample T-Test Used to compare the mean of a single sample to a known value (usually a population mean).

				
					import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

Example 1 Manual Calculation

				
					null_hypothesis_mean = 7.5  # H0: Mean hat size is 7.5

				
					confidence_level = 0.95     # 95% confidence level

				
					alpha = 1 - confidence_level  # Significance level (α = 0.05)

				
					# Sample data: Hat sizes
sample_hat_sizes = np.array([7.4, 7.6, 7.7, 7.3, 7.5, 7.8, 7.6])

				
					# Step 2: Calculate the sample mean
sample_mean = np.mean(sample_hat_sizes)

				
					print(f"Sample Mean: {sample_mean:.2f}")

				
					# Step 3: Calculate the sample standard deviation
sample_std = np.std(sample_hat_sizes, ddof=1)  # ddof=1 for sample standard deviation

				
					print(f"Sample Standard Deviation: {sample_std:.2f}")

				
					# Step 4: Calculate the t-statistic
n = len(sample_hat_sizes)  # Sample size

				
					t_statistic = (sample_mean - null_hypothesis_mean) / (sample_std / np.sqrt(n))

				
					print(f"T-Statistic: {t_statistic:.2f}")

Step 5: Degrees of freedom and p-value

				
					degrees_of_freedom = n - 1  # df = n - 1

				
					p_value = 2 * (1 - stats.t.cdf(abs(t_statistic), df=degrees_of_freedom))

				
					print(f"Degrees of Freedom: {degrees_of_freedom}")

				
					print(f"P-Value: {p_value:.4f}")

				
					# Step 6: Compare p-value with significance level (alpha)
if p_value < alpha:
    print("Reject the null hypothesis (H0)")
else:
    print("Fail to reject the null hypothesis (H0)")

				
					# Step 7: Conclusion
print("Conclusion: There is not enough evidence to reject the null hypothesis.")
print("The mean hat size is likely equal to 7.5.")

Example 2 - Shoes Two Tail

				
					alpha = 0.05  # 95% confidence level

				
					sample_data = [380, 410, 395, 405, 390]

				
					population_mean = 400

				
					t_statistic, p_value_two_tailed = stats.ttest_1samp(sample_data, population_mean)

				
					print(f"t-statistic: {t_statistic}")

				
					print(f"Two-tailed p-value: {p_value_two_tailed}")

				
					# Step 6: Conclusion based on p-value
if p_value_two_tailed < alpha:
    print("Reject the null hypothesis. The Sample Shoes are significantly different from the population average")
else:
    print("Fail to reject the null hypothesis. There's no significant difference between the sample of shoes and the population.")

Example 3 - Rookie Batting Average One Tail

rookie batting average is the same as the population mean (0.250)

Rookie Batting Average is lower than the population mean

				
					Step 4: Set significance level
alpha = 0.01 #99% Confidence Level

				
					Step 1: Collect data - batting averages of 12 rookie players
rookie_batting_averages = [0.210, 0.230, 0.160, 0.240, 0.200, 0.235, 0.225, 0.185, 0.275, 0.240, 0.225, 0.215]

				
					mean_rookie_avg = np.mean(rookie_batting_averages)

				
					print(np.mean(mean_rookie_avg))

				
					# Step 2: Define the league average (population mean)
league_avg = 0.250

				
					# Step 3: Perform a one-sample t-test
# Null hypothesis: The mean of rookie batting averages is equal to the league average
t_statistic, p_value = stats.ttest_1samp(rookie_batting_averages, league_avg)

				
					p_value_one_tailed = p_value_two_tailed / 2  # Divide by 2 for one-tailed test

				
					# Step 5: Print the results
print(f"T-Statistic: {t_statistic:.4f}")
print(f"P-Value: {p_value:.4f}")

				
					# Step 6: Conclusion based on p-value
if p_value < alpha:
    print("Reject the null hypothesis. The rookies' average is significantly different from the league average.")
else:
    print("Fail to reject the null hypothesis. There's no significant difference between the rookies' average and the league average.")

Example 4 Boxplot

				
					# Plot 2: Boxplot Comparison - Rookie vs League Average
plt.figure(figsize=(6, 5))
sns.boxplot(data=[rookie_batting_averages, np.full(len(rookie_batting_averages), league_avg)], palette="Set2")
plt.xticks([0, 1], ["Rookies", "League Avg"])
plt.title("Rookie Batting Averages vs League Average")
plt.ylabel("Batting Average")
plt.show()

Example 5

				
					std_error = stats.sem(rookie_batting_averages)  # Standard error of the mean
confidence_interval = stats.t.interval(0.99, len(rookie_batting_averages)-1, loc=mean_rookie_avg, scale=std_error)

				
					# Plot 3: Confidence Interval Plot
plt.figure(figsize=(10, 6))
plt.errorbar(1, mean_rookie_avg, yerr=(confidence_interval[1] - mean_rookie_avg), fmt='o', label='Rookie Mean (99% CI)', color='blue')
plt.axhline(league_avg, color='red', linestyle='--', label='League Average', linewidth=2)
plt.xlim(0, 2)
plt.ylim(min(rookie_batting_averages) - 0.05, max(rookie_batting_averages) + 0.05)
plt.xticks([])
plt.ylabel('Batting Average')
plt.title('Rookie Batting Average vs League Average with 99% Confidence Interval')
plt.legend()
plt.show()

Example 6

				
					# Plot 4: T-distribution and t-statistic
x = np.linspace(-4, 4, 500)
t_dist = stats.t.pdf(x, len(rookie_batting_averages)-1)

plt.figure(figsize=(10, 6))
plt.plot(x, t_dist, label='T-distribution', color='blue')

# Shade the one-tailed critical region (flip to the left for negative t-critical)
t_critical = stats.t.ppf(alpha, len(rookie_batting_averages)-1)
plt.fill_between(x, 0, t_dist, where=(x < t_critical), color='red', alpha=0.5, label=f'Critical Region (α={alpha})')

# Mark the t-statistic
plt.axvline(t_statistic, color='green', linestyle='--', label=f'T-statistic = {t_statistic:.2f}')

plt.title('T-distribution with T-statistic and Critical Region (One-tailed, Left)')
plt.xlabel('T-value')
plt.ylabel('Probability Density')
plt.legend()
plt.show()

Ryan Nolan

Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.

Leave a Reply Cancel reply