Note
Click here to download the full example code or to run this example in your browser via Binder
Boston Housing Regression with Meta Optimization¶
This is an automatic machine learning example. It is more sophisticated than the other simple regression example. Not only a pipeline is defined, but also an hyperparameter space is defined for the pipeline. Then, a random search is performed to find the best possible combination of hyperparameters by sampling randomly in the hyperparameter space.
Out:
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.
The Boston housing prices dataset has an ethical problem. You can refer to
the documentation of this function for further details.
The scikit-learn maintainers therefore strongly discourage the use of this
dataset unless the purpose of the code is to study and educate about
ethical issues in data science and machine learning.
In this special case, you can fetch the dataset from the original
source::
import pandas as pd
import numpy as np
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]
Alternative datasets include the California housing dataset (i.e.
:func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
dataset. You can load the datasets as follows::
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
for the California housing dataset and::
from sklearn.datasets import fetch_openml
housing = fetch_openml(name="house_prices", as_frame=True)
for the Ames housing dataset.
warnings.warn(msg, category=FutureWarning)
Meta-fitting on train:
/home/gui/Documents/GIT/www.neuraxle.org-builder/docs/Neuraxle/neuraxle/metaopt/optimizer.py:247: UserWarning: Warning: changed GridExplorationSampler.expected_n_trials from 0 to 270. RandomSearch will be used as a fallback past this point if needed.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/docs/Neuraxle/neuraxle/metaopt/optimizer.py:247: UserWarning: Warning: changed GridExplorationSampler.expected_n_trials from 270 to 277. RandomSearch will be used as a fallback past this point if needed.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
/home/gui/Documents/GIT/www.neuraxle.org-builder/venv/lib/python3.8/site-packages/sklearn/decomposition/_fastica.py:488: FutureWarning: From version 1.3 whiten='unit-variance' will be used by default.
warnings.warn(
Transforming train and test:
Evaluating transformed train:
R2 regression score: 0.9214005594677775
Evaluating transformed test:
R2 regression score: 0.791241101993557
import numpy as np
from sklearn.cluster import KMeans
from sklearn.datasets import load_boston
from sklearn.decomposition import PCA, FastICA
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from neuraxle.hyperparams.distributions import RandInt, LogUniform, Boolean
from neuraxle.hyperparams.space import HyperparameterSpace
from neuraxle.metaopt.auto_ml import AutoML, ValidationSplitter
from neuraxle.metaopt.callbacks import MetricCallback
from neuraxle.pipeline import Pipeline
from neuraxle.steps.numpy import NumpyTranspose
from neuraxle.steps.sklearn import SKLearnWrapper
from neuraxle.union import AddFeatures, ModelStacking
def main(tmpdir):
boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
# Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
AddFeatures([
SKLearnWrapper(
PCA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
SKLearnWrapper(
FastICA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 300), "max_depth": RandInt(1, 4),
"learning_rate": LogUniform(0.07, 0.7)
})
),
SKLearnWrapper(
KMeans(),
HyperparameterSpace({"n_clusters": RandInt(5, 10)})
),
],
joiner=NumpyTranspose(),
judge=SKLearnWrapper(
Ridge(),
HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})),
)
])
print("Meta-fitting on train:")
auto_ml = AutoML(
p,
validation_splitter=ValidationSplitter(0.20),
n_trials=10,
epochs=1, # 1 epoch here due to using sklearn models that just fit once.
callbacks=[MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)],
)
fitted_random_search = auto_ml.fit(X_train, y_train)
print("")
print("Transforming train and test:")
y_train_predicted = fitted_random_search.predict(X_train)
y_test_predicted = fitted_random_search.predict(X_test)
print("")
print("Evaluating transformed train:")
score_transform = r2_score(y_train_predicted, y_train)
print('R2 regression score:', score_transform)
print("")
print("Evaluating transformed test:")
score_test = r2_score(y_test_predicted, y_test)
print('R2 regression score:', score_test)
if __name__ == "__main__":
main('cache')
Total running time of the script: ( 0 minutes 3.013 seconds)