Dummy Classifier - Useful
How to have a first idea on the quality of your model
The Dummy classifier will give us an idea of the "minimum" quality we can achieve.
It returns either a fixed value or the most frequent value of the training sample.
The quality of its score will be used as a floor for the future estimation. The objective is to do better or much better than the idiot!
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
myDataFrame = pd.read_csv("../../scikit-learn-mooc/datasets/penguins_classification.csv")
target_column = 'Species'
target = myDataFrame[target_column]
target.value_counts()
target.value_counts(normalize=True)
data = myDataFrame.drop(columns=target_column)
data.columns
numerical_columns = ['Culmen Length (mm)', 'Culmen Depth (mm)']
data_numeric = data[numerical_columns]
data_train, data_test, target_train, target_test = train_test_split(
data_numeric,
target,
#random_state=42,
test_size=0.25)
model = DummyClassifier()
model.fit(data_train, target_train);
a = model.predict(data_test)
n = a.size
unique, counts = np.unique(a, return_counts=True)
dict(zip(unique, counts/n))
accuracy = model.score(data_test, target_test)
print(f"Accuracy of logistic regression: {accuracy:.3f}")
model = DummyClassifier(strategy='stratified')
model.fit(data_train, target_train);
a = model.predict(data_test)
n = a.size
unique, counts = np.unique(a, return_counts=True)
dict(zip(unique, counts/n))
accuracy = model.score(data_test, target_test)
print(f"Accuracy of logistic regression: {accuracy:.3f}")
model = DummyClassifier(strategy='uniform')
model.fit(data_train, target_train);
a = model.predict(data_test)
n = a.size
unique, counts = np.unique(a, return_counts=True)
dict(zip(unique, counts/n))
accuracy = model.score(data_test, target_test)
print(f"Accuracy of logistic regression: {accuracy:.3f}")
model = DummyClassifier(strategy='constant', constant="Chinstrap")
model.fit(data_train, target_train);
a = model.predict(data_test)
n = a.size
unique, counts = np.unique(a, return_counts=True)
dict(zip(unique, counts/n))
accuracy = model.score(data_test, target_test)
print(f"Accuracy of logistic regression: {accuracy:.3f}")