From 72491d51b5e98547bac8b42c96b8839e02c1588d Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 7 Apr 2021 09:40:16 +0200 Subject: [PATCH 01/14] fix warnings, make sphinx fail on warnings --- doc/conf.py | 1 + doc/usage.rst | 6 ++++-- examples/30_extended/configure_logging.py | 2 -- examples/30_extended/custom_flow_.py | 1 + examples/30_extended/study_tutorial.py | 1 + examples/30_extended/suites_tutorial.py | 1 + 6 files changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index f0f26318c..921ceded3 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -344,3 +344,4 @@ def setup(app): app.add_css_file("codehighlightstyle.css") + app.warningiserror = True diff --git a/doc/usage.rst b/doc/usage.rst index 23ef4ec84..77c1b358d 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -16,9 +16,11 @@ This document will guide you through the most important use cases, functions and classes in the OpenML Python API. Throughout this document, we will use `pandas `_ to format and filter tables. -~~~~~~~~~~~~~~~~~~~~~~ +.. _installation: + +~~~~~~~~~~~~~~~~~~~~~ Installation & Set up -~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~ The OpenML Python package is a connector to `OpenML `_. It allows you to use and share datasets and tasks, run diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py index a600b0632..185127f42 100644 --- a/examples/30_extended/configure_logging.py +++ b/examples/30_extended/configure_logging.py @@ -6,8 +6,6 @@ Explains openml-python logging, and shows how to configure it. """ ################################################################################## -# Logging -# ^^^^^^^ # Openml-python uses the `Python logging module `_ # to provide users with log messages. Each log message is assigned a level of importance, see # the table in Python's logging tutorial diff --git a/examples/30_extended/custom_flow_.py b/examples/30_extended/custom_flow_.py index 02aef9c5c..1dde40233 100644 --- a/examples/30_extended/custom_flow_.py +++ b/examples/30_extended/custom_flow_.py @@ -130,6 +130,7 @@ # The exact format of the predictions will depend on the task. # # The predictions should always be a list of lists, each list should contain: +# # - the repeat number: for repeated evaluation strategies. (e.g. repeated cross-validation) # - the fold number: for cross-validation. (what should this be for holdout?) # - 0: this field is for backward compatibility. diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py index 3c93a7e81..76cca4840 100644 --- a/examples/30_extended/study_tutorial.py +++ b/examples/30_extended/study_tutorial.py @@ -25,6 +25,7 @@ # connects to the test server at test.openml.org before doing so. # This prevents the crowding of the main server with example datasets, # tasks, runs, and so on. +# ############################################################################ diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py index f583b6957..81447764c 100644 --- a/examples/30_extended/suites_tutorial.py +++ b/examples/30_extended/suites_tutorial.py @@ -24,6 +24,7 @@ # connects to the test server at test.openml.org before doing so. # This prevents the main server from crowding with example datasets, # tasks, runs, and so on. +# ############################################################################ From 95fbb0536e80784af9c2b534d3276f67afe51b77 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 7 Apr 2021 10:13:56 +0200 Subject: [PATCH 02/14] fix a few links --- doc/api.rst | 24 +++++++++++++++++++ doc/conf.py | 5 ++++ .../30_extended/create_upload_tutorial.py | 8 +++---- examples/30_extended/flow_id_tutorial.py | 2 +- openml/study/study.py | 4 ---- openml/tasks/task.py | 10 -------- 6 files changed, 34 insertions(+), 19 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 8a72e6b69..9343cb3e6 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -7,6 +7,8 @@ APIs Top-level Classes ----------------- +.. automodule:: openml + .. currentmodule:: openml .. autosummary:: @@ -60,6 +62,8 @@ Modules :mod:`openml.datasets`: Dataset Functions ----------------------------------------- +.. automodule:: openml.datasets + .. currentmodule:: openml.datasets .. autosummary:: @@ -79,6 +83,8 @@ Modules :mod:`openml.evaluations`: Evaluation Functions ----------------------------------------------- +.. automodule:: openml.evaluations + .. currentmodule:: openml.evaluations .. autosummary:: @@ -91,6 +97,8 @@ Modules :mod:`openml.flows`: Flow Functions ----------------------------------- +.. automodule:: openml.flows + .. currentmodule:: openml.flows .. autosummary:: @@ -104,6 +112,8 @@ Modules :mod:`openml.runs`: Run Functions ---------------------------------- +.. automodule:: openml.runs + .. currentmodule:: openml.runs .. autosummary:: @@ -122,6 +132,8 @@ Modules :mod:`openml.setups`: Setup Functions ------------------------------------- +.. automodule:: openml.setups + .. currentmodule:: openml.setups .. autosummary:: @@ -135,6 +147,8 @@ Modules :mod:`openml.study`: Study Functions ------------------------------------ +.. automodule:: openml.study + .. currentmodule:: openml.study .. autosummary:: @@ -158,6 +172,16 @@ Modules :mod:`openml.tasks`: Task Functions ----------------------------------- +.. automodule:: openml.tasks + +.. currentmodule:: openml.tasks + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + TaskType + .. currentmodule:: openml.tasks .. autosummary:: diff --git a/doc/conf.py b/doc/conf.py index 921ceded3..1f016561b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -114,6 +114,11 @@ # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False +# Complain about all broken internal links - broken external links can be +# found with `make linkcheck` +# +# currently disabled because without intersphinx we cannot link to numpy.ndarray +# nitpicky = True # -- Options for HTML output ---------------------------------------------- diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py index a4e1d9655..96ffb621d 100644 --- a/examples/30_extended/create_upload_tutorial.py +++ b/examples/30_extended/create_upload_tutorial.py @@ -200,8 +200,8 @@ # storing the type of data for each column as well as the attribute names. # Therefore, when providing a Pandas DataFrame, OpenML can infer this # information without needing to explicitly provide it when calling the -# function :func:`create_dataset`. In this regard, you only need to pass -# ``'auto'`` to the ``attributes`` parameter. +# function :func:`openml.datasets.create_dataset`. In this regard, you only +# need to pass ``'auto'`` to the ``attributes`` parameter. df = pd.DataFrame(data, columns=[col_name for col_name, _ in attribute_names]) # enforce the categorical column to have a categorical dtype @@ -214,8 +214,8 @@ # We enforce the column 'outlook' and 'play' to be a categorical # dtype while the column 'windy' is kept as a boolean column. 'temperature' # and 'humidity' are kept as numeric columns. Then, we can -# call :func:`create_dataset` by passing the dataframe and fixing the parameter -# ``attributes`` to ``'auto'``. +# call :func:`openml.datasets.create_dataset` by passing the dataframe and +# fixing the parameter ``attributes`` to ``'auto'``. weather_dataset = create_dataset( name="Weather", diff --git a/examples/30_extended/flow_id_tutorial.py b/examples/30_extended/flow_id_tutorial.py index e77df8d1a..d9465575e 100644 --- a/examples/30_extended/flow_id_tutorial.py +++ b/examples/30_extended/flow_id_tutorial.py @@ -35,7 +35,7 @@ # This piece of code is rather involved. First, it retrieves a # :class:`~openml.extensions.Extension` which is registered and can handle the given model, # in our case it is :class:`openml.extensions.sklearn.SklearnExtension`. Second, the extension -# converts the classifier into an instance of :class:`openml.flow.OpenMLFlow`. Third and finally, +# converts the classifier into an instance of :class:`openml.OpenMLFlow`. Third and finally, # the publish method checks whether the current flow is already present on OpenML. If not, # it uploads the flow, otherwise, it updates the current instance with all information computed # by the server (which is obviously also done when uploading/publishing a flow). diff --git a/openml/study/study.py b/openml/study/study.py index 2b00bb05c..dbbef6e89 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -186,8 +186,6 @@ class OpenMLStudy(BaseStudy): According to this list of run ids, the study object receives a list of OpenML object ids (datasets, flows, tasks and setups). - Inherits from :class:`openml.BaseStudy` - Parameters ---------- study_id : int @@ -268,8 +266,6 @@ class OpenMLBenchmarkSuite(BaseStudy): According to this list of task ids, the suite object receives a list of OpenML object ids (datasets). - Inherits from :class:`openml.BaseStudy` - Parameters ---------- suite_id : int diff --git a/openml/tasks/task.py b/openml/tasks/task.py index ab54db780..6a1f2a4c5 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -199,8 +199,6 @@ def _parse_publish_response(self, xml_response: Dict): class OpenMLSupervisedTask(OpenMLTask, ABC): """OpenML Supervised Classification object. - Inherited from :class:`openml.OpenMLTask` - Parameters ---------- target_name : str @@ -293,8 +291,6 @@ def estimation_parameters(self, est_parameters): class OpenMLClassificationTask(OpenMLSupervisedTask): """OpenML Classification object. - Inherited from :class:`openml.OpenMLSupervisedTask` - Parameters ---------- class_labels : List of str (optional) @@ -338,8 +334,6 @@ def __init__( class OpenMLRegressionTask(OpenMLSupervisedTask): """OpenML Regression object. - - Inherited from :class:`openml.OpenMLSupervisedTask` """ def __init__( @@ -372,8 +366,6 @@ def __init__( class OpenMLClusteringTask(OpenMLTask): """OpenML Clustering object. - Inherited from :class:`openml.OpenMLTask` - Parameters ---------- target_name : str (optional) @@ -451,8 +443,6 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]": class OpenMLLearningCurveTask(OpenMLClassificationTask): """OpenML Learning Curve object. - - Inherited from :class:`openml.OpenMLClassificationTask` """ def __init__( From 3101b4b144beeae479d99bc9cec98f5d2f793ad0 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 7 Apr 2021 11:12:37 +0200 Subject: [PATCH 03/14] fix a bunch of links --- doc/contributing.rst | 10 +++++----- doc/index.rst | 2 +- doc/usage.rst | 10 +++++----- examples/20_basic/introduction_tutorial.py | 2 +- examples/30_extended/create_upload_tutorial.py | 2 +- examples/30_extended/task_manual_iteration_tutorial.py | 2 +- examples/30_extended/tasks_tutorial.py | 6 +++--- openml/flows/functions.py | 2 +- openml/runs/functions.py | 8 ++++---- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/doc/contributing.rst b/doc/contributing.rst index 354a91d1c..63c607365 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -19,7 +19,7 @@ In particular, a few ways to contribute to openml-python are: For more information, see the :ref:`extensions` below. * Bug reports. If something doesn't work for you or is cumbersome, please open a new issue to let - us know about the problem. See `this section `_. + us know about the problem. See `this section `_. * `Cite OpenML `_ if you use it in a scientific publication. @@ -38,7 +38,7 @@ Content of the Library To leverage support from the community and to tap in the potential of OpenML, interfacing with popular machine learning libraries is essential. However, the OpenML-Python team does not have the capacity to develop and maintain such interfaces on its own. For this, we -have built an extension interface to allows others to contribute back. Building a suitable +have built an extension interface to allows others to contribute back. Building a suitable extension for therefore requires an understanding of the current OpenML-Python support. `This example `_ @@ -61,7 +61,7 @@ API Interfacing with OpenML-Python ++++++++++++++++++++++++++++++ -Once the new extension class has been defined, the openml-python module to +Once the new extension class has been defined, the openml-python module to :meth:`openml.extensions.register_extension` must be called to allow OpenML-Python to interface the new extension. @@ -73,8 +73,8 @@ Each extension created should be a stand-alone repository, compatible with the `OpenML-Python repository `_. The extension repository should work off-the-shelf with *OpenML-Python* installed. -Create a `public Github repo `_ with -the following directory structure: +Create a `public Github repo `_ +with the following directory structure: :: diff --git a/doc/index.rst b/doc/index.rst index b78b7c009..4672598e4 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -70,7 +70,7 @@ Further information * `OpenML documentation `_ * `OpenML client APIs `_ -* `OpenML developer guide `_ +* `OpenML developer guide `_ * `Contact information `_ * `Citation request `_ * `OpenML blog `_ diff --git a/doc/usage.rst b/doc/usage.rst index 77c1b358d..3a74f8989 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -14,7 +14,7 @@ User Guide This document will guide you through the most important use cases, functions and classes in the OpenML Python API. Throughout this document, we will use -`pandas `_ to format and filter tables. +`pandas `_ to format and filter tables. .. _installation: @@ -29,7 +29,7 @@ machine learning algorithms on them and then share the results online. The following tutorial gives a short introduction on how to install and set up the OpenML Python connector, followed up by a simple example. -* `Introduction `_ +* `Introduction `_ ~~~~~~~~~~~~~ Configuration @@ -95,7 +95,7 @@ for which a flow should be optimized. Below you can find our tutorial regarding tasks and if you want to know more you can read the `OpenML guide `_: -* `Tasks `_ +* `Tasks `_ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Running machine learning algorithms and uploading results @@ -118,14 +118,14 @@ automatically calculates several metrics which can be used to compare the performance of different flows to each other. So far, the OpenML Python connector works only with estimator objects following -the `scikit-learn estimator API `_. +the `scikit-learn estimator API `_. Those can be directly run on a task, and a flow will automatically be created or downloaded from the server if it already exists. The next tutorial covers how to train different machine learning models, how to run machine learning models on OpenML data and how to share the results: -* `Flows and Runs `_ +* `Flows and Runs `_ ~~~~~~~~ Datasets diff --git a/examples/20_basic/introduction_tutorial.py b/examples/20_basic/introduction_tutorial.py index 151692fdc..6bdc2edcd 100644 --- a/examples/20_basic/introduction_tutorial.py +++ b/examples/20_basic/introduction_tutorial.py @@ -26,7 +26,7 @@ # pip install openml # # For further information, please check out the installation guide at -# https://openml.github.io/openml-python/master/contributing.html#installation +# :ref:`installation`. # ############################################################################ diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py index 96ffb621d..a5803f49d 100644 --- a/examples/30_extended/create_upload_tutorial.py +++ b/examples/30_extended/create_upload_tutorial.py @@ -126,7 +126,7 @@ # OrderedDicts in the case of sparse data. # # Weather dataset: -# http://storm.cis.fordham.edu/~gweiss/data-mining/datasets.html +# https://storm.cis.fordham.edu/~gweiss/data-mining/datasets.html data = [ ["sunny", 85, 85, "FALSE", "no"], diff --git a/examples/30_extended/task_manual_iteration_tutorial.py b/examples/30_extended/task_manual_iteration_tutorial.py index 533f645b2..c30ff66a3 100644 --- a/examples/30_extended/task_manual_iteration_tutorial.py +++ b/examples/30_extended/task_manual_iteration_tutorial.py @@ -6,7 +6,7 @@ ``openml.runs.run_model_on_task`` which automatically runs the model on all splits of the task. However, sometimes it is necessary to manually split a dataset to perform experiments outside of the functions provided by OpenML. One such example is in the benchmark library -`HPOlib2 `_ which extensively uses data from OpenML, +`HPOBench `_ which extensively uses data from OpenML, but not OpenML's functionality to conduct runs. """ diff --git a/examples/30_extended/tasks_tutorial.py b/examples/30_extended/tasks_tutorial.py index c755d265e..2166d5a03 100644 --- a/examples/30_extended/tasks_tutorial.py +++ b/examples/30_extended/tasks_tutorial.py @@ -36,7 +36,7 @@ ############################################################################ # **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, which we convert # into a -# `pandas dataframe `_ +# `pandas dataframe `_ # to have better visualization capabilities and easier access: tasks = pd.DataFrame.from_dict(tasks, orient="index") @@ -76,7 +76,7 @@ ############################################################################ # Resampling strategies can be found on the -# `OpenML Website `_. +# `OpenML Website `_. # # Similar to listing tasks by task type, we can list tasks by tags: @@ -105,7 +105,7 @@ # instances per task. To make things easier, the tasks do not contain highly # unbalanced data and sparse data. However, the tasks include missing values and # categorical features. You can find out more about the *OpenML 100* on -# `the OpenML benchmarking page `_. +# `the OpenML benchmarking page `_. # # Finally, it is also possible to list all tasks on OpenML with: diff --git a/openml/flows/functions.py b/openml/flows/functions.py index 5e8e9dc93..7211cfe40 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -245,7 +245,7 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]: Notes ----- - see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version + see https://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version """ if not (isinstance(name, str) and len(name) > 0): raise ValueError("Argument 'name' should be a non-empty string") diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 92044a1b4..8bbe3b956 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -63,8 +63,8 @@ def run_model_on_task( ---------- model : sklearn model A model which has a function fit(X,Y) and predict(X), - all supervised estimators of scikit learn follow this definition of a model [1] - [1](http://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html) + all supervised estimators of scikit learn follow this definition of a model + (https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html) task : OpenMLTask or int or str Task to perform or Task id. This may be a model instead if the first argument is an OpenMLTask. @@ -166,8 +166,8 @@ def run_flow_on_task( flow : OpenMLFlow A flow wraps a machine learning model together with relevant information. The model has a function fit(X,Y) and predict(X), - all supervised estimators of scikit learn follow this definition of a model [1] - [1](http://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html) + all supervised estimators of scikit learn follow this definition of a model + (https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html) task : OpenMLTask Task to perform. This may be an OpenMLFlow instead if the first argument is an OpenMLTask. avoid_duplicate_runs : bool, optional (default=True) From 733c681648aae299ca52fe70774e77602c828274 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 7 Apr 2021 13:45:20 +0200 Subject: [PATCH 04/14] fix more links --- doc/api.rst | 4 ++-- doc/contributing.rst | 4 ++-- doc/index.rst | 4 ++-- doc/usage.rst | 10 +++++----- examples/20_basic/introduction_tutorial.py | 4 ++-- examples/20_basic/simple_suites_tutorial.py | 7 +++---- examples/30_extended/configure_logging.py | 2 +- examples/30_extended/suites_tutorial.py | 2 +- examples/README.txt | 6 +++--- 9 files changed, 21 insertions(+), 22 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 9343cb3e6..96a52e3ee 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -2,8 +2,8 @@ .. _api: -APIs -**** +API +*** Top-level Classes ----------------- diff --git a/doc/contributing.rst b/doc/contributing.rst index 63c607365..82ce0163d 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -41,7 +41,7 @@ not have the capacity to develop and maintain such interfaces on its own. For th have built an extension interface to allows others to contribute back. Building a suitable extension for therefore requires an understanding of the current OpenML-Python support. -`This example `_ +The :ref:`sphx_glr_examples_20_basic_simple_flows_and_runs_tutorial.py` tutorial shows how scikit-learn currently works with OpenML-Python as an extension. The *sklearn* extension packaged with the `openml-python `_ repository can be used as a template/benchmark to build the new extension. @@ -50,7 +50,7 @@ repository can be used as a template/benchmark to build the new extension. API +++ * The extension scripts must import the `openml` package and be able to interface with - any function from the OpenML-Python `API `_. + any function from the OpenML-Python :ref:`api`. * The extension has to be defined as a Python class and must inherit from :class:`openml.extensions.Extension`. * This class needs to have all the functions from `class Extension` overloaded as required. diff --git a/doc/index.rst b/doc/index.rst index 4672598e4..c4164dc82 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -40,7 +40,7 @@ Example run.publish() print(f'View the run online: {run.openml_url}') -You can find more examples in our `examples gallery `_. +You can find more examples in our :ref:`sphx_glr_examples`. ---------------------------- How to get OpenML for python @@ -60,7 +60,7 @@ Content * :ref:`usage` * :ref:`api` -* `Examples `_ +* :ref:`sphx_glr_examples` * :ref:`contributing` * :ref:`progress` diff --git a/doc/usage.rst b/doc/usage.rst index 3a74f8989..2e71e9476 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -29,7 +29,7 @@ machine learning algorithms on them and then share the results online. The following tutorial gives a short introduction on how to install and set up the OpenML Python connector, followed up by a simple example. -* `Introduction `_ +* `:ref:`sphx_glr_examples_20_basic_introduction_tutorial.py` ~~~~~~~~~~~~~ Configuration @@ -95,7 +95,7 @@ for which a flow should be optimized. Below you can find our tutorial regarding tasks and if you want to know more you can read the `OpenML guide `_: -* `Tasks `_ +* :ref:`sphx_glr_examples_30_extended_tasks_tutorial.py` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Running machine learning algorithms and uploading results @@ -125,7 +125,7 @@ downloaded from the server if it already exists. The next tutorial covers how to train different machine learning models, how to run machine learning models on OpenML data and how to share the results: -* `Flows and Runs `_ +* :ref:`sphx_glr_examples_20_basic_simple_flows_and_runs_tutorial.py` ~~~~~~~~ Datasets @@ -140,12 +140,12 @@ available metadata. The tutorial which follows explains how to get a list of datasets, how to filter the list to find the dataset that suits your requirements and how to download a dataset: -* `Filter and explore datasets `_ +* :ref:`sphx_glr_examples_30_extended_datasets_tutorial.py` OpenML is about sharing machine learning results and the datasets they were obtained on. Learn how to share your datasets in the following tutorial: -* `Upload a dataset `_ +* :ref:`sphx_glr_examples_30_extended_create_upload_tutorial.py` *********************** Extending OpenML-Python diff --git a/examples/20_basic/introduction_tutorial.py b/examples/20_basic/introduction_tutorial.py index 6bdc2edcd..b132469a9 100644 --- a/examples/20_basic/introduction_tutorial.py +++ b/examples/20_basic/introduction_tutorial.py @@ -1,6 +1,6 @@ """ -Setup -===== +Introduction tutorial & Setup +============================= An example how to set up OpenML-Python followed up by a simple example. """ diff --git a/examples/20_basic/simple_suites_tutorial.py b/examples/20_basic/simple_suites_tutorial.py index 37f1eeffb..92dfb3c04 100644 --- a/examples/20_basic/simple_suites_tutorial.py +++ b/examples/20_basic/simple_suites_tutorial.py @@ -62,7 +62,6 @@ # Further examples # ================ # -# * `Advanced benchmarking suites tutorial <../30_extended/suites_tutorial.html>`_ -# * `Benchmarking studies tutorial <../30_extended/study_tutorial.html>`_ -# * `Using studies to compare linear and non-linear classifiers -# <../40_paper/2018_ida_strang_example.html>`_ +# * :ref:`sphx_glr_examples_30_extended_suites_tutorial.py` +# * :ref:`sphx_glr_examples_30_extended_study_tutorial.py` +# * :ref:`sphx_glr_examples_40_paper_2018_ida_strang_example.py` diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py index 185127f42..2dae4047f 100644 --- a/examples/30_extended/configure_logging.py +++ b/examples/30_extended/configure_logging.py @@ -14,7 +14,7 @@ # By default, openml-python will print log messages of level `WARNING` and above to console. # All log messages (including `DEBUG` and `INFO`) are also saved in a file, which can be # found in your cache directory (see also the -# `introduction tutorial <../20_basic/introduction_tutorial.html>`_). +# :ref:`sphx_glr_examples_20_basic_introduction_tutorial.py`). # These file logs are automatically deleted if needed, and use at most 2MB of space. # # It is possible to configure what log levels to send to console and file. diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py index 81447764c..cc26b78db 100644 --- a/examples/30_extended/suites_tutorial.py +++ b/examples/30_extended/suites_tutorial.py @@ -6,7 +6,7 @@ How to list, download and upload benchmark suites. If you want to learn more about benchmark suites, check out our -`brief introductory tutorial <../20_basic/simple_suites_tutorial.html>`_ or the +brief introductory tutorial :ref:`sphx_glr_examples_20_basic_simple_suites_tutorial.py` or the `OpenML benchmark docs `_. """ ############################################################################ diff --git a/examples/README.txt b/examples/README.txt index b90c0e1cb..332a5b990 100644 --- a/examples/README.txt +++ b/examples/README.txt @@ -1,3 +1,3 @@ -======== -Examples -======== +================ +Examples Gallery +================ From b591045122592da68b8c3ed386973561be669dbc Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 7 Apr 2021 16:27:50 +0200 Subject: [PATCH 05/14] fix all remaining links --- doc/contributing.rst | 2 +- doc/usage.rst | 2 +- examples/20_basic/introduction_tutorial.py | 4 +-- .../simple_flows_and_runs_tutorial.py | 4 +-- .../30_extended/create_upload_tutorial.py | 4 +-- .../30_extended/flows_and_runs_tutorial.py | 28 +++++++++---------- .../40_paper/2015_neurips_feurer_example.py | 2 +- examples/40_paper/2018_kdd_rijn_example.py | 2 +- .../40_paper/2018_neurips_perrone_example.py | 2 +- openml/__init__.py | 2 +- openml/flows/flow.py | 5 ++-- setup.py | 2 +- 12 files changed, 30 insertions(+), 29 deletions(-) diff --git a/doc/contributing.rst b/doc/contributing.rst index 82ce0163d..927c21034 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -19,7 +19,7 @@ In particular, a few ways to contribute to openml-python are: For more information, see the :ref:`extensions` below. * Bug reports. If something doesn't work for you or is cumbersome, please open a new issue to let - us know about the problem. See `this section `_. + us know about the problem. See `this section `_. * `Cite OpenML `_ if you use it in a scientific publication. diff --git a/doc/usage.rst b/doc/usage.rst index 2e71e9476..72d22647d 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -118,7 +118,7 @@ automatically calculates several metrics which can be used to compare the performance of different flows to each other. So far, the OpenML Python connector works only with estimator objects following -the `scikit-learn estimator API `_. +the `scikit-learn estimator API `_. Those can be directly run on a task, and a flow will automatically be created or downloaded from the server if it already exists. diff --git a/examples/20_basic/introduction_tutorial.py b/examples/20_basic/introduction_tutorial.py index b132469a9..4382a6777 100644 --- a/examples/20_basic/introduction_tutorial.py +++ b/examples/20_basic/introduction_tutorial.py @@ -38,7 +38,7 @@ # You will receive an API key, which will authenticate you to the server # and allow you to download and upload datasets, tasks, runs and flows. # -# * Create an OpenML account (free) on http://www.openml.org. +# * Create an OpenML account (free) on https://www.openml.org. # * After logging in, open your account page (avatar on the top right) # * Open 'Account Settings', then 'API authentication' to find your API key. # @@ -99,7 +99,7 @@ # For this tutorial, our configuration publishes to the test server # as to not crowd the main server with runs created by examples. myrun = run.publish() -print(f"kNN on {data.name}: http://test.openml.org/r/{myrun.run_id}") +print(f"kNN on {data.name}: https://test.openml.org/r/{myrun.run_id}") ############################################################################ openml.config.stop_using_configuration_for_example() diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py index e88add911..db1ce5438 100644 --- a/examples/20_basic/simple_flows_and_runs_tutorial.py +++ b/examples/20_basic/simple_flows_and_runs_tutorial.py @@ -42,8 +42,8 @@ # ================== myrun = run.publish() -print("Run was uploaded to http://test.openml.org/r/" + str(myrun.run_id)) -print("The flow can be found at http://test.openml.org/f/" + str(myrun.flow_id)) +print("Run was uploaded to https://test.openml.org/r/" + str(myrun.run_id)) +print("The flow can be found at https://test.openml.org/f/" + str(myrun.flow_id)) ############################################################################ openml.config.stop_using_configuration_for_example() diff --git a/examples/30_extended/create_upload_tutorial.py b/examples/30_extended/create_upload_tutorial.py index a5803f49d..f80726396 100644 --- a/examples/30_extended/create_upload_tutorial.py +++ b/examples/30_extended/create_upload_tutorial.py @@ -67,7 +67,7 @@ "Robert Tibshirani (2004) (Least Angle Regression) " "Annals of Statistics (with discussion), 407-499" ) -paper_url = "http://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf" +paper_url = "https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf" ############################################################################ # Create the dataset object @@ -110,7 +110,7 @@ data=data, # A version label which is provided by the user. version_label="test", - original_data_url="http://www4.stat.ncsu.edu/~boos/var.select/diabetes.html", + original_data_url="https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html", paper_url=paper_url, ) diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py index 9f8c89375..4fdaede7a 100644 --- a/examples/30_extended/flows_and_runs_tutorial.py +++ b/examples/30_extended/flows_and_runs_tutorial.py @@ -69,7 +69,7 @@ myrun = run.publish() # For this tutorial, our configuration publishes to the test server # as to not pollute the main server. -print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id)) +print("Uploaded to https://test.openml.org/r/" + str(myrun.run_id)) ############################################################################ # We can now also inspect the flow object which was automatically created: @@ -115,7 +115,7 @@ run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False) myrun = run.publish() -print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id)) +print("Uploaded to https://test.openml.org/r/" + str(myrun.run_id)) # The above pipeline works with the helper functions that internally deal with pandas DataFrame. @@ -159,7 +159,7 @@ run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False, dataset_format="array") myrun = run.publish() -print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id)) +print("Uploaded to https://test.openml.org/r/" + str(myrun.run_id)) ############################################################################### # Running flows on tasks offline for later upload @@ -210,16 +210,16 @@ # compare your results with the rest of the class and learn from # them. Some tasks you could try (or browse openml.org): # -# * EEG eye state: data_id:`1471 `_, -# task_id:`14951 `_ -# * Volcanoes on Venus: data_id:`1527 `_, -# task_id:`10103 `_ -# * Walking activity: data_id:`1509 `_, -# task_id:`9945 `_, 150k instances. -# * Covertype (Satellite): data_id:`150 `_, -# task_id:`218 `_, 500k instances. -# * Higgs (Physics): data_id:`23512 `_, -# task_id:`52950 `_, 100k instances, missing values. +# * EEG eye state: data_id:`1471 `_, +# task_id:`14951 `_ +# * Volcanoes on Venus: data_id:`1527 `_, +# task_id:`10103 `_ +# * Walking activity: data_id:`1509 `_, +# task_id:`9945 `_, 150k instances. +# * Covertype (Satellite): data_id:`150 `_, +# task_id:`218 `_, 500k instances. +# * Higgs (Physics): data_id:`23512 `_, +# task_id:`52950 `_, 100k instances, missing values. # Easy benchmarking: for task_id in [115]: # Add further tasks. Disclaimer: they might take some time @@ -229,7 +229,7 @@ run = openml.runs.run_model_on_task(clf, task, avoid_duplicate_runs=False) myrun = run.publish() - print(f"kNN on {data.name}: http://test.openml.org/r/{myrun.run_id}") + print(f"kNN on {data.name}: https://test.openml.org/r/{myrun.run_id}") ############################################################################ diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py index 733a436ad..721186016 100644 --- a/examples/40_paper/2015_neurips_feurer_example.py +++ b/examples/40_paper/2015_neurips_feurer_example.py @@ -12,7 +12,7 @@ | Efficient and Robust Automated Machine Learning | Matthias Feurer, Aaron Klein, Katharina Eggensperger, Jost Springenberg, Manuel Blum and Frank Hutter | In *Advances in Neural Information Processing Systems 28*, 2015 -| Available at http://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning.pdf +| Available at https://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning.pdf """ # noqa F401 # License: BSD 3-Clause diff --git a/examples/40_paper/2018_kdd_rijn_example.py b/examples/40_paper/2018_kdd_rijn_example.py index 752419ea3..d3ce59f35 100644 --- a/examples/40_paper/2018_kdd_rijn_example.py +++ b/examples/40_paper/2018_kdd_rijn_example.py @@ -13,7 +13,7 @@ | Hyperparameter importance across datasets | Jan N. van Rijn and Frank Hutter | In *Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*, 2018 -| Available at https://dl.acm.org/citation.cfm?id=3220058 +| Available at https://dl.acm.org/doi/10.1145/3219819.3220058 """ # License: BSD 3-Clause diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py index 5ae339ae2..0d72846ac 100644 --- a/examples/40_paper/2018_neurips_perrone_example.py +++ b/examples/40_paper/2018_neurips_perrone_example.py @@ -11,7 +11,7 @@ | Scalable Hyperparameter Transfer Learning | Valerio Perrone and Rodolphe Jenatton and Matthias Seeger and Cedric Archambeau | In *Advances in Neural Information Processing Systems 31*, 2018 -| Available at http://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf +| Available at https://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf This example demonstrates how OpenML runs can be used to construct a surrogate model. diff --git a/openml/__init__.py b/openml/__init__.py index 0bab3b1d5..abb83ac0c 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -12,7 +12,7 @@ In particular, this module implements a python interface for the `OpenML REST API `_ (`REST on wikipedia -`_). +`_). """ # License: BSD 3-Clause diff --git a/openml/flows/flow.py b/openml/flows/flow.py index 2acbcb0d1..2a340e625 100644 --- a/openml/flows/flow.py +++ b/openml/flows/flow.py @@ -19,8 +19,9 @@ class OpenMLFlow(OpenMLBase): :meth:`openml.flows.create_flow_from_model`. Using this helper function ensures that all relevant fields are filled in. - Implements https://github.com/openml/website/blob/master/openml_OS/ \ - views/pages/api_new/v1/xsd/openml.implementation.upload.xsd. + Implements `openml.implementation.upload.xsd + `_. Parameters ---------- diff --git a/setup.py b/setup.py index 2d2a638b5..14a59e241 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ long_description=README, long_description_content_type="text/markdown", license="BSD 3-clause", - url="http://openml.org/", + url="https://openml.org/", project_urls={ "Documentation": "https://openml.github.io/openml-python/", "Source Code": "https://github.com/openml/openml-python", From 0c79294b33a64e1fa544e8c2274527697cae9e5e Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 7 Apr 2021 16:44:30 +0200 Subject: [PATCH 06/14] and finally add the link checker --- .github/workflows/docs.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 2219c7fac..9749f4f04 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -17,6 +17,9 @@ jobs: run: | cd doc make html + - name: Check links + run: | + make linkcheck - name: Pull latest gh-pages if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' run: | From 35d0ac38b427b2cec5da3cfbbac994f0abeccc90 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 7 Apr 2021 17:00:03 +0200 Subject: [PATCH 07/14] debug workflow --- .github/workflows/docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 9749f4f04..e52323de9 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -16,9 +16,9 @@ jobs: - name: Make docs run: | cd doc - make html - name: Check links run: | + pwd make linkcheck - name: Pull latest gh-pages if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' From 9e5bb44b28b2f7d806303e4769dd23a7130c035d Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 7 Apr 2021 17:08:21 +0200 Subject: [PATCH 08/14] more debug --- .github/workflows/docs.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index e52323de9..2fd5fff7a 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -18,6 +18,7 @@ jobs: cd doc - name: Check links run: | + cd doc pwd make linkcheck - name: Pull latest gh-pages From f81d61cc1931d32632b5833cb171f4dc480ef7d8 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Wed, 7 Apr 2021 17:18:34 +0200 Subject: [PATCH 09/14] undo debug --- .github/workflows/docs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 2fd5fff7a..ab83aef5c 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -16,10 +16,10 @@ jobs: - name: Make docs run: | cd doc + make html - name: Check links run: | cd doc - pwd make linkcheck - name: Pull latest gh-pages if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push' From 61389b7d12f37e30a28c88c3215653696e9b6873 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Fri, 9 Apr 2021 10:56:08 +0200 Subject: [PATCH 10/14] Add to changelog --- doc/progress.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/progress.rst b/doc/progress.rst index f27dd1137..1e80abb14 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -6,6 +6,14 @@ Changelog ========= +0.12.2 +~~~~~~ + +* DOC: Fixes a few broken links in the documentation. +* MAINT/DOC: Automatically check for broken external links when building the documentation. +* MAINT/DOC: Fail documentation building on warnings. This will make the documentation building + fail if a reference cannot be found (i.e. an internal link is broken). + 0.12.1 ~~~~~~ From 2b180efb9d83877b9fdf08a530888cca8ac90a3d Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Fri, 9 Apr 2021 14:22:27 +0200 Subject: [PATCH 11/14] fix new warning --- openml/flows/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/flows/functions.py b/openml/flows/functions.py index 7211cfe40..73a67ff39 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -295,7 +295,7 @@ def get_flow_id( Returns ------- int or bool, List - flow id iff exists, ``False`` otherwise, List if exact_version is ``False`` + flow id iff exists, ``False`` otherwise, List if ``exact_version is False`` """ if model is None and name is None: raise ValueError( From 1f189818cc7f3e190fdbac23a157ab2c90c21be5 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Sat, 10 Apr 2021 11:29:24 +0200 Subject: [PATCH 12/14] clean up more errors --- doc/_templates/class.rst | 2 + doc/api.rst | 205 ++++++++++++++++++------- openml/extensions/sklearn/extension.py | 32 ++-- openml/flows/functions.py | 2 +- 4 files changed, 172 insertions(+), 69 deletions(-) diff --git a/doc/_templates/class.rst b/doc/_templates/class.rst index 307b0199c..72405badb 100644 --- a/doc/_templates/class.rst +++ b/doc/_templates/class.rst @@ -1,3 +1,5 @@ +:orphan: + :mod:`{{module}}`.{{objname}} {{ underline }}============== diff --git a/doc/api.rst b/doc/api.rst index 96a52e3ee..86bfd121e 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -5,64 +5,29 @@ API *** -Top-level Classes ------------------ -.. automodule:: openml - -.. currentmodule:: openml - -.. autosummary:: - :toctree: generated/ - :template: class.rst - - OpenMLBenchmarkSuite - OpenMLClassificationTask - OpenMLClusteringTask - OpenMLDataFeature - OpenMLDataset - OpenMLEvaluation - OpenMLFlow - OpenMLLearningCurveTask - OpenMLParameter - OpenMLRegressionTask - OpenMLRun - OpenMLSetup - OpenMLSplit - OpenMLStudy - OpenMLSupervisedTask - OpenMLTask +Modules +======= -.. _api_extensions: +:mod:`openml.datasets` +---------------------- +.. automodule:: openml.datasets + :no-members: + :no-inherited-members: -Extensions ----------- +Dataset Classes +~~~~~~~~~~~~~~~ -.. currentmodule:: openml.extensions +.. currentmodule:: openml.datasets .. autosummary:: :toctree: generated/ :template: class.rst - Extension - sklearn.SklearnExtension - -.. currentmodule:: openml.extensions - -.. autosummary:: - :toctree: generated/ - :template: function.rst - - get_extension_by_flow - get_extension_by_model - register_extension - - -Modules -------- + OpenMLDataFeature + OpenMLDataset -:mod:`openml.datasets`: Dataset Functions ------------------------------------------ -.. automodule:: openml.datasets +Dataset Functions +~~~~~~~~~~~~~~~~~ .. currentmodule:: openml.datasets @@ -81,9 +46,25 @@ Modules edit_dataset fork_dataset -:mod:`openml.evaluations`: Evaluation Functions ------------------------------------------------ +:mod:`openml.evaluations` +------------------------- .. automodule:: openml.evaluations + :no-members: + :no-inherited-members: + +Evaluations Classes +~~~~~~~~~~~~~~~~~~~ + +.. currentmodule:: openml.evaluations + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + OpenMLEvaluation + +Evaluations Functions +~~~~~~~~~~~~~~~~~~~~~ .. currentmodule:: openml.evaluations @@ -91,13 +72,29 @@ Modules :toctree: generated/ :template: function.rst - list_evaluations - list_evaluation_measures - list_evaluations_setups + list_evaluations + list_evaluation_measures + list_evaluations_setups :mod:`openml.flows`: Flow Functions ----------------------------------- .. automodule:: openml.flows + :no-members: + :no-inherited-members: + +Flow Classes +~~~~~~~~~~~~ + +.. currentmodule:: openml.flows + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + OpenMLFlow + +Flow Functions +~~~~~~~~~~~~~~ .. currentmodule:: openml.flows @@ -113,6 +110,22 @@ Modules :mod:`openml.runs`: Run Functions ---------------------------------- .. automodule:: openml.runs + :no-members: + :no-inherited-members: + +Run Classes +~~~~~~~~~~~ + +.. currentmodule:: openml.runs + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + OpenMLRun + +Run Functions +~~~~~~~~~~~~~ .. currentmodule:: openml.runs @@ -133,6 +146,23 @@ Modules :mod:`openml.setups`: Setup Functions ------------------------------------- .. automodule:: openml.setups + :no-members: + :no-inherited-members: + +Setup Classes +~~~~~~~~~~~~~ + +.. currentmodule:: openml.setups + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + OpenMLParameter + OpenMLSetup + +Setup Functions +~~~~~~~~~~~~~~~ .. currentmodule:: openml.setups @@ -148,6 +178,23 @@ Modules :mod:`openml.study`: Study Functions ------------------------------------ .. automodule:: openml.study + :no-members: + :no-inherited-members: + +Study Classes +~~~~~~~~~~~~~ + +.. currentmodule:: openml.study + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + OpenMLBenchmarkSuite + OpenMLStudy + +Study Functions +~~~~~~~~~~~~~~~ .. currentmodule:: openml.study @@ -173,6 +220,11 @@ Modules :mod:`openml.tasks`: Task Functions ----------------------------------- .. automodule:: openml.tasks + :no-members: + :no-inherited-members: + +Task Classes +~~~~~~~~~~~~ .. currentmodule:: openml.tasks @@ -180,8 +232,18 @@ Modules :toctree: generated/ :template: class.rst + OpenMLClassificationTask + OpenMLClusteringTask + OpenMLLearningCurveTask + OpenMLRegressionTask + OpenMLSplit + OpenMLSupervisedTask + OpenMLTask TaskType +Task Functions +~~~~~~~~~~~~~~ + .. currentmodule:: openml.tasks .. autosummary:: @@ -192,3 +254,38 @@ Modules get_task get_tasks list_tasks + +.. _api_extensions: + +Extensions +========== + +.. automodule:: openml.extensions + :no-members: + :no-inherited-members: + +Extension Classes +----------------- + +.. currentmodule:: openml.extensions + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + Extension + sklearn.SklearnExtension + +Extension Functions +------------------- + +.. currentmodule:: openml.extensions + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + get_extension_by_flow + get_extension_by_model + register_extension + diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index a0c551e83..5991a7044 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -104,25 +104,29 @@ def can_handle_model(cls, model: Any) -> bool: def trim_flow_name( cls, long_name: str, extra_trim_length: int = 100, _outer: bool = True ) -> str: - """ Shorten generated sklearn flow name to at most `max_length` characters. + """ Shorten generated sklearn flow name to at most ``max_length`` characters. Flows are assumed to have the following naming structure: - (model_selection)? (pipeline)? (steps)+ + ``(model_selection)? (pipeline)? (steps)+`` and will be shortened to: - sklearn.(selection.)?(pipeline.)?(steps)+ + ``sklearn.(selection.)?(pipeline.)?(steps)+`` e.g. (white spaces and newlines added for readability) - sklearn.pipeline.Pipeline( - columntransformer=sklearn.compose._column_transformer.ColumnTransformer( - numeric=sklearn.pipeline.Pipeline( - imputer=sklearn.preprocessing.imputation.Imputer, - standardscaler=sklearn.preprocessing.data.StandardScaler), - nominal=sklearn.pipeline.Pipeline( - simpleimputer=sklearn.impute.SimpleImputer, - onehotencoder=sklearn.preprocessing._encoders.OneHotEncoder)), - variancethreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold, - svc=sklearn.svm.classes.SVC) + + .. code :: + + sklearn.pipeline.Pipeline( + columntransformer=sklearn.compose._column_transformer.ColumnTransformer( + numeric=sklearn.pipeline.Pipeline( + imputer=sklearn.preprocessing.imputation.Imputer, + standardscaler=sklearn.preprocessing.data.StandardScaler), + nominal=sklearn.pipeline.Pipeline( + simpleimputer=sklearn.impute.SimpleImputer, + onehotencoder=sklearn.preprocessing._encoders.OneHotEncoder)), + variancethreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold, + svc=sklearn.svm.classes.SVC) + -> - sklearn.Pipeline(ColumnTransformer,VarianceThreshold,SVC) + ``sklearn.Pipeline(ColumnTransformer,VarianceThreshold,SVC)`` Parameters ---------- diff --git a/openml/flows/functions.py b/openml/flows/functions.py index 73a67ff39..048fa92a4 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -288,7 +288,7 @@ def get_flow_id( name : str Name of the flow. Must provide either ``model`` or ``name``. exact_version : bool - Whether to return the ``flow_id`` of the exact version or all ``flow_id``s where the name + Whether to return the flow id of the exact version or all flow ids where the name of the flow matches. This is only taken into account for a model where a version number is available. From 7a00940507480894146ed7281d16763e2a34bff0 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Fri, 16 Apr 2021 09:27:43 +0200 Subject: [PATCH 13/14] Fix link after rebase --- doc/usage.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/usage.rst b/doc/usage.rst index 72d22647d..f65e8b86e 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -157,7 +157,8 @@ scikit-learn extension in :class:`openml.extensions.sklearn.SklearnExtension` as Runtime measurement is incorporated in the OpenML sklearn-extension. Example usage and potential usage for Hyperparameter Optimisation can be found in the example tutorial: -`HPO using OpenML `_ + +* :ref:`sphx_glr_examples_30_extended_fetch_runtimes_tutorial.py` Here is a list of currently maintained OpenML extensions: From 4c81ae0851b8f8a979310c4de7689f9b2e874c2c Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Tue, 20 Apr 2021 20:03:33 +0200 Subject: [PATCH 14/14] Apply suggestions from code review Co-authored-by: PGijsbers --- examples/20_basic/introduction_tutorial.py | 2 +- examples/20_basic/simple_flows_and_runs_tutorial.py | 4 ++-- examples/30_extended/flows_and_runs_tutorial.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/20_basic/introduction_tutorial.py b/examples/20_basic/introduction_tutorial.py index 4382a6777..cc4e3aea0 100644 --- a/examples/20_basic/introduction_tutorial.py +++ b/examples/20_basic/introduction_tutorial.py @@ -99,7 +99,7 @@ # For this tutorial, our configuration publishes to the test server # as to not crowd the main server with runs created by examples. myrun = run.publish() -print(f"kNN on {data.name}: https://test.openml.org/r/{myrun.run_id}") +print(f"kNN on {data.name}: {myrun.openml_url}") ############################################################################ openml.config.stop_using_configuration_for_example() diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py index db1ce5438..48740e800 100644 --- a/examples/20_basic/simple_flows_and_runs_tutorial.py +++ b/examples/20_basic/simple_flows_and_runs_tutorial.py @@ -42,8 +42,8 @@ # ================== myrun = run.publish() -print("Run was uploaded to https://test.openml.org/r/" + str(myrun.run_id)) -print("The flow can be found at https://test.openml.org/f/" + str(myrun.flow_id)) +print(f"Run was uploaded to {myrun.openml_url}") +print(f"The flow can be found at {myrun.flow.openml_url}") ############################################################################ openml.config.stop_using_configuration_for_example() diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py index 4fdaede7a..bbf255e17 100644 --- a/examples/30_extended/flows_and_runs_tutorial.py +++ b/examples/30_extended/flows_and_runs_tutorial.py @@ -69,7 +69,7 @@ myrun = run.publish() # For this tutorial, our configuration publishes to the test server # as to not pollute the main server. -print("Uploaded to https://test.openml.org/r/" + str(myrun.run_id)) +print(f"Uploaded to {myrun.openml_url}") ############################################################################ # We can now also inspect the flow object which was automatically created: @@ -115,7 +115,7 @@ run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False) myrun = run.publish() -print("Uploaded to https://test.openml.org/r/" + str(myrun.run_id)) +print(f"Uploaded to {myrun.openml_url}") # The above pipeline works with the helper functions that internally deal with pandas DataFrame. @@ -159,7 +159,7 @@ run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False, dataset_format="array") myrun = run.publish() -print("Uploaded to https://test.openml.org/r/" + str(myrun.run_id)) +print(f"Uploaded to {myrun.openml_url}") ############################################################################### # Running flows on tasks offline for later upload @@ -229,7 +229,7 @@ run = openml.runs.run_model_on_task(clf, task, avoid_duplicate_runs=False) myrun = run.publish() - print(f"kNN on {data.name}: https://test.openml.org/r/{myrun.run_id}") + print(f"kNN on {data.name}: {myrun.openml_url}") ############################################################################