Stacking Regressor

				
					import pandas as pd
				
			
				
					import numpy as np
				
			
				
					import seaborn as sns
				
			
				
					sns.get_dataset_names()
				
			
				
					mpg = sns.load_dataset("mpg")
				
			
				
					mpg
				
			
				
					mpg = mpg.drop('name', axis=1)
				
			
				
					mpg = pd.get_dummies(mpg)
				
			
				
					mpg.head(10)
				
			

SEE ALL NULL VLAUES

				
					pd.DataFrame(mpg.isnull().sum().sort_values(ascending=False))
				
			
				
					mpg['horsepower'].fillna(mpg['horsepower'].mean(), inplace=True)
				
			
				
					pd.DataFrame(mpg.isnull().sum().sort_values(ascending=False))
				
			
				
					X = mpg.drop('mpg', axis=1)
				
			
				
					y = mpg['mpg']
				
			
				
					from sklearn.model_selection import train_test_split
				
			
				
					X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=19)
				
			
				
					from sklearn.linear_model import LinearRegression
				
			
				
					lr = LinearRegression()
				
			
				
					lr.fit(X_train, y_train)
				
			
				
					y_pred = lr.predict(X_test)
				
			
				
					from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
				
			
				
					mean_squared_error(y_test, y_pred)
				
			
				
					from sklearn.ensemble import RandomForestRegressor
				
			
				
					rfr = RandomForestRegressor(random_state=13)
				
			
				
					rfr.fit(X_train, y_train)
				
			
				
					y_pred = rfr.predict(X_test)
				
			
				
					mean_squared_error(y_test, y_pred)
				
			
				
					from sklearn.linear_model import Ridge
				
			
				
					ridge = Ridge()
				
			
				
					param_grid = {
        'alpha': [0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75],
    }
				
			
				
					from sklearn.model_selection import GridSearchCV
				
			
				
					ridge_cv = GridSearchCV(estimator=ridge, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
				
			
				
					ridge_cv.fit(X_train, y_train)
				
			
				
					ridge_cv.best_estimator_
				
			
				
					mean_squared_error(y_test, y_pred)
				
			
				
					from sklearn.ensemble import GradientBoostingRegressor
				
			
				
					gbr = GradientBoostingRegressor()
				
			
				
					gbr.fit(X_train, y_train)
				
			
				
					y_pred = gbr.predict(X_test)
				
			
				
					mean_squared_error(y_test, y_pred)
				
			
				
					from sklearn.ensemble import StackingRegressor
				
			
				
					estimators = [
     ('lr', lr),
     ('ridge', ridge_cv.best_estimator_),
     ('gbr', gbr),
]
				
			
				
					sr = StackingRegressor(
     estimators=estimators,
     final_estimator=rfr
)
				
			
				
					sr.fit(X_train, y_train)
				
			
				
					y_pred = sr.predict(X_test)
				
			
				
					mean_squared_error(y_test, y_pred)
				
			

voting classifier

				
					from sklearn.ensemble import VotingRegressor
				
			
				
					vc = VotingRegressor([('lr', lr),
                    ('rfr', rfr),
                    ], weights=[1,2])

				
			
				
					vc.fit(X_train, y_train)
				
			
				
					y_pred = vc.predict(X_test)
				
			
				
					mean_squared_error(y_test, y_pred)
				
			
				
					sr2 = StackingRegressor(
     estimators=estimators,
     final_estimator=vc
)
				
			
				
					sr2.fit(X_train, y_train)
				
			
				
					y_pred = sr2.predict(X_test)
				
			
				
					mean_squared_error(y_test, y_pred)
				
			

hyperparamater tuning

				
					from sklearn.svm import SVR
				
			
				
					base_regressors = [
    ('ridge', ridge_cv.best_estimator_),
    ('gbr', gbr),
    ('svr', SVR(C=1.0, kernel='linear')),
    ('random_forest', RandomForestRegressor()),
]
				
			
				
					stacking_regressor = StackingRegressor(
    estimators=base_regressors,
    final_estimator=Ridge(alpha=1.0)
)
				
			
				
					param_grid = {
    'random_forest__n_estimators': [50, 100, 250],
    'svr__C': [0.1, 1.0, 10.0],
    'final_estimator__alpha': [0.1, 1.0, 10.0],
}
				
			
				
					from sklearn.model_selection import RandomizedSearchCV
				
			
				
					random_search = RandomizedSearchCV(stacking_regressor, param_grid, n_iter=10, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
				
			
				
					random_search.fit(X_train, y_train)
				
			
				
					y_pred = random_search.predict(X_test)
				
			
				
					mean_squared_error(y_test, y_pred)
				
			
				
					random_search.best_params_
				
			

Ryan is a Data Scientist at a fintech company, where he focuses on fraud prevention in underwriting and risk. Before that, he worked as a Data Analyst at a tax software company. He holds a degree in Electrical Engineering from UCF.

Leave a Reply

Your email address will not be published. Required fields are marked *