time series seasonality python
import matplotlib.pyplot as plt import pandas as pd from datetime import datetime from statsmodels.tsa.stattools import adfuller from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
df = pd.read_csv('/content/86628_2010_1_1_2024.csv')

df.columns = df.columns.str.strip() # Remove leading/trailing spaces from column names
df.columns

df["Date"] = pd.to_datetime(df[["YEAR", "MONTH", "DAY"]], errors="coerce")
invalid_dates = df[(df["Date"].isna()) | (df['meanTemp'] < -10)]
invalid_dates

df = df.dropna(subset=["Date"])
df = df[df['meanTemp'] >= -10]
plt.figure(figsize=(10, 5)) plt.scatter(df["Date"], df["meanTemp"], color='b', label='Mean Temp (°F)', s=10) # s controls the size of the dots # Formatting plot plt.xlabel("Date") plt.ylabel("Temperature (°C)") plt.title("Orlando, FL Daily Mean Temperatures (2010-2024)") plt.xticks(rotation=45) plt.legend() plt.grid() plt.show()

Lets look at a specific year now
df[“Date”] = pd.to_datetime(df[“Date”])
df_filtered = df[(df["Date"] >= "2022-01-01") & (df["Date"] <= "2024-12-31")].copy()
adf_test = adfuller(df_filtered["meanTemp"].dropna()) # Drop NaNs if necessary
print("p-value:", adf_test[1])

# Interpretation if adf_test[1] < 0.05: print("The time series is stationary (reject H0).") else: print("The time series is not stationary (fail to reject H0).")

Summary Statistics by a Period (week, month, quarter etc)
def get_season(month): if month in [12, 1, 2]: return "Winter" elif month in [3, 4, 5]: return "Spring" elif month in [6, 7, 8]: return "Summer" else: return "Fall"
# Apply the function to create a new "Season" column df_filtered["Season"] = df_filtered["Date"].dt.month.apply(get_season)
# Compute summary statistics by season seasonal_stats = df_filtered.groupby("Season")["meanTemp"].describe()
# Display results print(seasonal_stats)

#acf and pacf report # Set up figure for ACF and PACF plots fig, axes = plt.subplots(1, 2, figsize=(12, 5)) # Plot ACF (Autocorrelation Function) plot_acf(df_filtered["meanTemp"].dropna(), ax=axes[0], lags=30) # Adjust lags if needed axes[0].set_title("Autocorrelation Function (ACF)") # Plot PACF (Partial Autocorrelation Function) plot_pacf(df_filtered["meanTemp"].dropna(), ax=axes[1], lags=30, method='ywm') axes[1].set_title("Partial Autocorrelation Function (PACF)") plt.tight_layout() plt.show()

df_filtered["meanTemp_diff"] = df_filtered["meanTemp"].diff()
# Plot original and differenced series plt.figure(figsize=(12, 5)) # Plot original time series plt.subplot(2, 1, 1) plt.scatter(df_filtered["Date"], df_filtered["meanTemp"], color='b', s=10, label="Original") plt.xlabel("Date") plt.ylabel("Temperature (°C)") plt.title("Original Mean Temperature (2022-2024)") plt.legend() plt.grid() # Plot differenced time series plt.subplot(2, 1, 2) plt.scatter(df_filtered["Date"], df_filtered["meanTemp_diff"], color='r', s=10, label="Differenced") plt.xlabel("Date") plt.ylabel("Temperature Change (°C)") plt.title("Differenced Mean Temperature (2022-2024)") plt.legend() plt.grid() plt.tight_layout() plt.show()

Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.