Data set analyse
First analyses on an unknown dataset and data visualization
- Auto-scrolling
- Imports
- First analysis
Auto-scrolling
To disable auto-scrolling, execute this javascript in a notebook cell before other cells are executed 'source stackoverflow'
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
return false;
}
import pandas as pd
import seaborn as sns
myDataFrame = pd.read_csv("../../scikit-learn-mooc/datasets/penguins_classification.csv")
print(f"The dataset contains {myDataFrame.shape[0]} samples and "
f"{myDataFrame.shape[1]} columns")
myDataFrame.columns
myDataFrame.head()
target_column = 'Species'
myDataFrame[target_column].value_counts()
myDataFrame.dtypes
numerical_columns = ['Culmen Length (mm)', 'Culmen Depth (mm)']
categorical_columns = []
all_columns = numerical_columns + categorical_columns + [target_column]
myDataFrame = myDataFrame[all_columns]
myDataFrame.columns
myDataFrame[numerical_columns].describe()
_ = myDataFrame.hist(figsize=(10, 5))
_ = sns.pairplot(myDataFrame)
_ = sns.pairplot(myDataFrame, height=4, hue=target_column, corner=True)
g = sns.pairplot(myDataFrame, height=4, hue=target_column, corner=True)
g.map_lower(sns.kdeplot, levels=3, color=".2");
pd.crosstab(index=myDataFrame[numerical_columns[0]],
columns=myDataFrame[numerical_columns[1]])