Skip to content

Fix MaxRetryError when loading MNIST & Fashion-MNIST #1116

@PSSF23

Description

@PSSF23

Description

When loading MNIST data from the dataset, a MaxRetryError is thrown.

Steps/Code to Reproduce

dataset = openml.datasets.get_dataset(554, download_data=False)
X, y, is_categorical, _ = dataset.get_data(
    dataset_format="array", target=dataset.default_target_attribute
)

Expected Results

No error is thrown. MNIST data is loaded.

Actual Results

---------------------------------------------------------------------------
MaxRetryError                             Traceback (most recent call last)
<ipython-input-7-1f1613061a94> in <module>
      1 dataset = openml.datasets.get_dataset(554, download_data=False)
----> 2 X, y, is_categorical, _ = dataset.get_data(
      3     dataset_format="array", target=dataset.default_target_attribute
      4 )

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/dataset.py in get_data(self, target, include_row_id, include_ignore_attribute, dataset_format)
    696             List of attribute names.
    697         """
--> 698         data, categorical, attribute_names = self._load_data()
    699 
    700         to_exclude = []

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/dataset.py in _load_data(self)
    526         if need_to_create_pickle or need_to_create_feather:
    527             if self.data_file is None:
--> 528                 self._download_data()
    529 
    530             file_to_load = self.data_file if self.parquet_file is None else self.parquet_file

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/dataset.py in _download_data(self)
    304         self.data_file = _get_dataset_arff(self)
    305         if self._minio_url is not None:
--> 306             self.parquet_file = _get_dataset_parquet(self)
    307 
    308     def _get_arff(self, format: str) -> Dict:

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/datasets/functions.py in _get_dataset_parquet(description, cache_directory)
   1001     if not os.path.isfile(output_file_path):
   1002         try:
-> 1003             openml._api_calls._download_minio_file(
   1004                 source=cast(str, url), destination=output_file_path
   1005             )

~/miniconda3/envs/ndd/lib/python3.8/site-packages/openml/_api_calls.py in _download_minio_file(source, destination, exists_ok)
    103 
    104     try:
--> 105         client.fget_object(
    106             bucket_name=bucket, object_name=object_name, file_path=str(destination),
    107         )

~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in fget_object(self, bucket_name, object_name, file_path, request_headers, ssec, version_id, extra_query_params, tmp_file_path)
   1067         makedirs(os.path.dirname(file_path))
   1068 
-> 1069         stat = self.stat_object(
   1070             bucket_name,
   1071             object_name,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in stat_object(self, bucket_name, object_name, ssec, version_id, extra_query_params)
   1904         query_params = extra_query_params or {}
   1905         query_params.update({"versionId": version_id} if version_id else {})
-> 1906         response = self._execute(
   1907             "HEAD",
   1908             bucket_name,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in _execute(self, method, bucket_name, object_name, body, headers, query_params, preload_content, no_body_trace)
    411 
    412         try:
--> 413             return self._url_open(
    414                 method,
    415                 region,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/minio/api.py in _url_open(self, method, region, bucket_name, object_name, body, headers, query_params, preload_content, no_body_trace)
    278                 http_headers.add(key, value)
    279 
--> 280         response = self._http.urlopen(
    281             method,
    282             urlunsplit(url),

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/poolmanager.py in urlopen(self, method, url, redirect, **kw)
    373             response = conn.urlopen(method, url, **kw)
    374         else:
--> 375             response = conn.urlopen(method, u.request_uri, **kw)
    376 
    377         redirect_location = redirect and response.get_redirect_location()

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    844             retries.sleep(response)
    845             log.debug("Retry: %s", url)
--> 846             return self.urlopen(
    847                 method,
    848                 url,

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    834         if retries.is_retry(method, response.status, has_retry_after):
    835             try:
--> 836                 retries = retries.increment(method, url, response=response, _pool=self)
    837             except MaxRetryError:
    838                 if retries.raise_on_status:

~/miniconda3/envs/ndd/lib/python3.8/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    571 
    572         if new_retry.is_exhausted():
--> 573             raise MaxRetryError(_pool, url, error or ResponseError(cause))
    574 
    575         log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

MaxRetryError: HTTPConnectionPool(host='openml1.win.tue.nl', port=80): Max retries exceeded with url: /dataset554/dataset_554.pq (Caused by ResponseError('too many 503 error responses'))

Versions

macOS-10.16-x86_64-i386-64bit
Python 3.8.5 (default, Sep  4 2020, 02:22:02) 
[Clang 10.0.0 ]
NumPy 1.19.5
SciPy 1.7.1
Scikit-Learn 1.0
OpenML 0.12.2

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions