time series seasonality python

				
					import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
				
			
				
					df = pd.read_csv('/content/86628_2010_1_1_2024.csv')
				
			
				
					df.columns = df.columns.str.strip()  # Remove leading/trailing spaces from column names
				
			
				
					df.columns
				
			
				
					df["Date"] = pd.to_datetime(df[["YEAR", "MONTH", "DAY"]], errors="coerce")

				
			
				
					invalid_dates = df[(df["Date"].isna()) | (df['meanTemp'] < -10)]
				
			
				
					invalid_dates
				
			
				
					df = df.dropna(subset=["Date"])
				
			
				
					df = df[df['meanTemp'] >= -10]
				
			
				
					plt.figure(figsize=(10, 5))
plt.scatter(df["Date"], df["meanTemp"], color='b', label='Mean Temp (°F)', s=10)  # s controls the size of the dots

# Formatting plot
plt.xlabel("Date")
plt.ylabel("Temperature (°C)")
plt.title("Orlando, FL Daily Mean Temperatures (2010-2024)")
plt.xticks(rotation=45)
plt.legend()
plt.grid()
plt.show()
				
			
Lets look at a specific year now
df[“Date”] = pd.to_datetime(df[“Date”])
				
					df_filtered = df[(df["Date"] >= "2022-01-01") & (df["Date"] <= "2024-12-31")].copy()
				
			
				
					adf_test = adfuller(df_filtered["meanTemp"].dropna())  # Drop NaNs if necessary
				
			
				
					print("p-value:", adf_test[1])
				
			
				
					# Interpretation
if adf_test[1] < 0.05:
    print("The time series is stationary (reject H0).")
else:
    print("The time series is not stationary (fail to reject H0).")
				
			
Summary Statistics by a Period (week, month, quarter etc)
				
					def get_season(month):
    if month in [12, 1, 2]:
        return "Winter"
    elif month in [3, 4, 5]:
        return "Spring"
    elif month in [6, 7, 8]:
        return "Summer"
    else:
        return "Fall"
				
			
				
					# Apply the function to create a new "Season" column
df_filtered["Season"] = df_filtered["Date"].dt.month.apply(get_season)
				
			
				
					# Compute summary statistics by season
seasonal_stats = df_filtered.groupby("Season")["meanTemp"].describe()
				
			
				
					# Display results
print(seasonal_stats)
				
			
				
					#acf and pacf report
# Set up figure for ACF and PACF plots
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Plot ACF (Autocorrelation Function)
plot_acf(df_filtered["meanTemp"].dropna(), ax=axes[0], lags=30)  # Adjust lags if needed
axes[0].set_title("Autocorrelation Function (ACF)")

# Plot PACF (Partial Autocorrelation Function)
plot_pacf(df_filtered["meanTemp"].dropna(), ax=axes[1], lags=30, method='ywm')
axes[1].set_title("Partial Autocorrelation Function (PACF)")

plt.tight_layout()
plt.show()
				
			
				
					df_filtered["meanTemp_diff"] = df_filtered["meanTemp"].diff()

				
			
				
					# Plot original and differenced series
plt.figure(figsize=(12, 5))

# Plot original time series
plt.subplot(2, 1, 1)
plt.scatter(df_filtered["Date"], df_filtered["meanTemp"], color='b', s=10, label="Original")
plt.xlabel("Date")
plt.ylabel("Temperature (°C)")
plt.title("Original Mean Temperature (2022-2024)")
plt.legend()
plt.grid()

# Plot differenced time series
plt.subplot(2, 1, 2)
plt.scatter(df_filtered["Date"], df_filtered["meanTemp_diff"], color='r', s=10, label="Differenced")
plt.xlabel("Date")
plt.ylabel("Temperature Change (°C)")
plt.title("Differenced Mean Temperature (2022-2024)")
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()
				
			

Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.

Leave a Reply

Your email address will not be published. Required fields are marked *