Set and get hyperparameters in scikit-learn
We recall that hyperparameters refer to the parameter that will control the learning process.
This notebook shows how one can get and set the value of a hyperparameter in a scikit-learn estimator. We recall that hyperparameters refer to the parameter that will control the learning process.
They should not be confused with the fitted parameters, resulting from the
training. These fitted parameters are recognizable in scikit-learn because
they are spelled with a final underscore _
, for instance model.coef_
.
import pandas as pd
import matplotlib.pyplot as plt
import time
from sklearn.compose import make_column_selector as selector
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
myDataFrame = pd.read_csv("../../scikit-learn-mooc/datasets/adult-census.csv")
myDataFrame = myDataFrame.drop(columns="education-num")
target_column = 'class'
data = myDataFrame.drop(columns=target_column)
numerical_columns = selector(dtype_exclude=object)(data)
target = myDataFrame[target_column]
data_numerical = myDataFrame[numerical_columns]
data_numerical.head()
model = Pipeline(steps=[
("preprocessor", StandardScaler()),
("classifier", LogisticRegression())
])
cv_results = cross_validate(model, data_numerical, target)
scores = cv_results["test_score"]
fit_time = cv_results["fit_time"]
print("The accuracy is "
f"{scores.mean():.3f} +/- {scores.std():.3f}, for {fit_time.mean():.3f} seconds")
for parameter in model.get_params():
print(parameter)
model.set_params(classifier__C=1e-3)
cv_results = cross_validate(model, data_numerical, target)
scores = cv_results["test_score"]
fit_time = cv_results["fit_time"]
print("The accuracy is "
f"{scores.mean():.3f} +/- {scores.std():.3f}, for {fit_time.mean():.3f} seconds")
model.get_params()['classifier__C']
for C in [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]:
model.set_params(classifier__C=C)
cv_results = cross_validate(model, data_numerical, target)
scores = cv_results["test_score"]
fit_time = cv_results["fit_time"]
print(f"The accuracy via cross-validation with C={C} is "
f"{scores.mean():.3f} +/- {scores.std():.3f}, for {fit_time.mean():.3f} seconds")