From 647341c5155500a5c773f68fc184d744be644849 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 17 Oct 2022 15:30:57 +0200
Subject: [PATCH 1/4] n_iter is now keyword-only
---
.../test_sklearn_extension/test_sklearn_extension.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 8de75c1b4..d127b3255 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -2059,7 +2059,7 @@ def test__extract_trace_data(self):
clf = sklearn.model_selection.RandomizedSearchCV(
sklearn.neural_network.MLPClassifier(),
param_grid,
- num_iters,
+ n_iter=num_iters,
)
# just run the task on the model (without invoking any fancy extension & openml code)
train, _ = task.get_train_test_split_indices(0, 0)
From 0389aa6a1e6204475c099705b1926ff134d5e232 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 17 Oct 2022 15:59:21 +0200
Subject: [PATCH 2/4] Standardize sklearn pipeline description lookups
---
.../test_sklearn_extension.py | 139 ++++++------------
1 file changed, 43 insertions(+), 96 deletions(-)
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index d127b3255..5b65cad3d 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -5,6 +5,7 @@
import re
import os
import sys
+from typing import Any
import unittest
from distutils.version import LooseVersion
from collections import OrderedDict
@@ -73,6 +74,45 @@ def setUp(self):
self.extension = SklearnExtension()
+ def _get_expected_pipeline_description(self, model: Any) -> str:
+ if version.parse(sklearn.__version__) >= version.parse("1.0"):
+ expected_fixture = (
+ "Pipeline of transforms with a final estimator.\n\nSequentially"
+ " apply a list of transforms and a final estimator.\n"
+ "Intermediate steps of the pipeline must be 'transforms', that "
+ "is, they\nmust implement `fit` and `transform` methods.\nThe final "
+ "estimator only needs to implement `fit`.\nThe transformers in "
+ "the pipeline can be cached using ``memory`` argument.\n\nThe "
+ "purpose of the pipeline is to assemble several steps that can "
+ "be\ncross-validated together while setting different parameters"
+ ". For this, it\nenables setting parameters of the various steps"
+ " using their names and the\nparameter name separated by a `'__'`,"
+ " as in the example below. A step's\nestimator may be replaced "
+ "entirely by setting the parameter with its name\nto another "
+ "estimator, or a transformer removed by setting it to\n"
+ "`'passthrough'` or `None`."
+ )
+ elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
+ expected_fixture = (
+ "Pipeline of transforms with a final estimator.\n\nSequentially"
+ " apply a list of transforms and a final estimator.\n"
+ "Intermediate steps of the pipeline must be 'transforms', that "
+ "is, they\nmust implement fit and transform methods.\nThe final "
+ "estimator only needs to implement fit.\nThe transformers in "
+ "the pipeline can be cached using ``memory`` argument.\n\nThe "
+ "purpose of the pipeline is to assemble several steps that can "
+ "be\ncross-validated together while setting different parameters"
+ ".\nFor this, it enables setting parameters of the various steps"
+ " using their\nnames and the parameter name separated by a '__',"
+ " as in the example below.\nA step's estimator may be replaced "
+ "entirely by setting the parameter\nwith its name to another "
+ "estimator, or a transformer removed by setting\nit to "
+ "'passthrough' or ``None``."
+ )
+ else:
+ expected_fixture = self.extension._get_sklearn_description(model)
+ return expected_fixture
+
def _serialization_test_helper(
self, model, X, y, subcomponent_parameters, dependencies_mock_call_count=(1, 2)
):
@@ -398,44 +438,7 @@ def test_serialize_pipeline(self):
"dummy=sklearn.dummy.DummyClassifier)".format(scaler_name)
)
fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
-
- if version.parse(sklearn.__version__) >= version.parse("1.0"):
- fixture_description = (
- "Pipeline of transforms with a final estimator.\n\nSequentially"
- " apply a list of transforms and a final estimator.\n"
- "Intermediate steps of the pipeline must be 'transforms', that "
- "is, they\nmust implement `fit` and `transform` methods.\nThe final "
- "estimator only needs to implement `fit`.\nThe transformers in "
- "the pipeline can be cached using ``memory`` argument.\n\nThe "
- "purpose of the pipeline is to assemble several steps that can "
- "be\ncross-validated together while setting different parameters"
- ". For this, it\nenables setting parameters of the various steps"
- " using their names and the\nparameter name separated by a `'__'`,"
- " as in the example below. A step's\nestimator may be replaced "
- "entirely by setting the parameter with its name\nto another "
- "estimator, or a transformer removed by setting it to\n"
- "`'passthrough'` or `None`."
- )
- elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
- fixture_description = (
- "Pipeline of transforms with a final estimator.\n\nSequentially"
- " apply a list of transforms and a final estimator.\n"
- "Intermediate steps of the pipeline must be 'transforms', that "
- "is, they\nmust implement fit and transform methods.\nThe final "
- "estimator only needs to implement fit.\nThe transformers in "
- "the pipeline can be cached using ``memory`` argument.\n\nThe "
- "purpose of the pipeline is to assemble several steps that can "
- "be\ncross-validated together while setting different parameters"
- ".\nFor this, it enables setting parameters of the various steps"
- " using their\nnames and the parameter name separated by a '__',"
- " as in the example below.\nA step's estimator may be replaced "
- "entirely by setting the parameter\nwith its name to another "
- "estimator, or a transformer removed by setting\nit to "
- "'passthrough' or ``None``."
- )
- else:
- fixture_description = self.extension._get_sklearn_description(model)
-
+ fixture_description = self._get_expected_pipeline_description(model)
fixture_structure = {
fixture_name: [],
"sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
@@ -505,43 +508,7 @@ def test_serialize_pipeline_clustering(self):
"clusterer=sklearn.cluster.{}.KMeans)".format(scaler_name, cluster_name)
)
fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
-
- if version.parse(sklearn.__version__) >= version.parse("1.0"):
- fixture_description = (
- "Pipeline of transforms with a final estimator.\n\nSequentially"
- " apply a list of transforms and a final estimator.\n"
- "Intermediate steps of the pipeline must be 'transforms', that "
- "is, they\nmust implement `fit` and `transform` methods.\nThe final "
- "estimator only needs to implement `fit`.\nThe transformers in "
- "the pipeline can be cached using ``memory`` argument.\n\nThe "
- "purpose of the pipeline is to assemble several steps that can "
- "be\ncross-validated together while setting different parameters"
- ". For this, it\nenables setting parameters of the various steps"
- " using their names and the\nparameter name separated by a `'__'`,"
- " as in the example below. A step's\nestimator may be replaced "
- "entirely by setting the parameter with its name\nto another "
- "estimator, or a transformer removed by setting it to\n"
- "`'passthrough'` or `None`."
- )
- elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
- fixture_description = (
- "Pipeline of transforms with a final estimator.\n\nSequentially"
- " apply a list of transforms and a final estimator.\n"
- "Intermediate steps of the pipeline must be 'transforms', that "
- "is, they\nmust implement fit and transform methods.\nThe final "
- "estimator only needs to implement fit.\nThe transformers in "
- "the pipeline can be cached using ``memory`` argument.\n\nThe "
- "purpose of the pipeline is to assemble several steps that can "
- "be\ncross-validated together while setting different parameters"
- ".\nFor this, it enables setting parameters of the various steps"
- " using their\nnames and the parameter name separated by a '__',"
- " as in the example below.\nA step's estimator may be replaced "
- "entirely by setting the parameter\nwith its name to another "
- "estimator, or a transformer removed by setting\nit to "
- "'passthrough' or ``None``."
- )
- else:
- fixture_description = self.extension._get_sklearn_description(model)
+ fixture_description = self._get_expected_pipeline_description(model)
fixture_structure = {
fixture_name: [],
"sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
@@ -699,27 +666,7 @@ def test_serialize_column_transformer_pipeline(self):
fixture_name: [],
}
- if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
- # str obtained from self.extension._get_sklearn_description(model)
- fixture_description = (
- "Pipeline of transforms with a final estimator.\n\nSequentially"
- " apply a list of transforms and a final estimator.\n"
- "Intermediate steps of the pipeline must be 'transforms', that "
- "is, they\nmust implement fit and transform methods.\nThe final"
- " estimator only needs to implement fit.\nThe transformers in "
- "the pipeline can be cached using ``memory`` argument.\n\nThe "
- "purpose of the pipeline is to assemble several steps that can "
- "be\ncross-validated together while setting different "
- "parameters.\nFor this, it enables setting parameters of the "
- "various steps using their\nnames and the parameter name "
- "separated by a '__', as in the example below.\nA step's "
- "estimator may be replaced entirely by setting the parameter\n"
- "with its name to another estimator, or a transformer removed by"
- " setting\nit to 'passthrough' or ``None``."
- )
- else:
- fixture_description = self.extension._get_sklearn_description(model)
-
+ fixture_description = self._get_expected_pipeline_description(model)
serialization, new_model = self._serialization_test_helper(
model,
X=None,
From f80d64508a3afe847a07079df6d498288568673f Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 17 Oct 2022 15:59:47 +0200
Subject: [PATCH 3/4] `priors` is no longer positional, and wasn't used in the
first place
---
.../test_sklearn_extension/test_sklearn_extension.py | 3 ---
1 file changed, 3 deletions(-)
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 5b65cad3d..a07401184 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -1833,9 +1833,6 @@ def test_run_model_on_fold_classification_3(self):
class HardNaiveBayes(sklearn.naive_bayes.GaussianNB):
# class for testing a naive bayes classifier that does not allow soft
# predictions
- def __init__(self, priors=None):
- super(HardNaiveBayes, self).__init__(priors)
-
def predict_proba(*args, **kwargs):
raise AttributeError("predict_proba is not available when " "probability=False")
From 85b7312a4965bb5ca445df10a606a9d55ca05608 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 17 Oct 2022 16:21:36 +0200
Subject: [PATCH 4/4] Remove loss=kneighbours from the complex pipelin
---
.../test_sklearn_extension/test_sklearn_extension.py | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index a07401184..709d123f0 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -1441,9 +1441,7 @@ def test_deserialize_complex_with_defaults(self):
"Estimator",
sklearn.ensemble.AdaBoostClassifier(
sklearn.ensemble.BaggingClassifier(
- sklearn.ensemble.GradientBoostingClassifier(
- sklearn.neighbors.KNeighborsClassifier()
- )
+ sklearn.ensemble.GradientBoostingClassifier()
)
),
),
@@ -1458,7 +1456,6 @@ def test_deserialize_complex_with_defaults(self):
"Estimator__n_estimators": 10,
"Estimator__base_estimator__n_estimators": 10,
"Estimator__base_estimator__base_estimator__learning_rate": 0.1,
- "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13,
}
else:
params = {
@@ -1467,7 +1464,6 @@ def test_deserialize_complex_with_defaults(self):
"Estimator__n_estimators": 50,
"Estimator__base_estimator__n_estimators": 10,
"Estimator__base_estimator__base_estimator__learning_rate": 0.1,
- "Estimator__base_estimator__base_estimator__loss__n_neighbors": 5,
}
pipe_adjusted.set_params(**params)
flow = self.extension.model_to_flow(pipe_adjusted)