@@ -473,8 +473,6 @@ def get_dataset(
473473 dataset = _create_dataset_from_description (
474474 description , features_file , qualities_file , arff_file , parquet_file , cache_format
475475 )
476- else :
477- dataset = None
478476 return dataset
479477
480478
@@ -994,7 +992,7 @@ def _get_dataset_description(did_cache_dir: str, dataset_id: int) -> Dict[str, s
994992
995993
996994def _get_dataset_parquet (
997- description : Union [Dict [str , Union [ str , int ] ], OpenMLDataset ],
995+ description : Union [Dict [str , str ], OpenMLDataset ],
998996 cache_directory : Optional [str ] = None ,
999997 download_all_files : bool = False ,
1000998) -> Optional [str ]:
@@ -1025,12 +1023,12 @@ def _get_dataset_parquet(
10251023 output_filename : string, optional
10261024 Location of the Parquet file if successfully downloaded, None otherwise.
10271025 """
1028- if isinstance (description , dict ):
1029- url = cast (str , description .get ("oml:minio_url" ))
1030- did = description .get ("oml:id" )
1031- elif isinstance (description , OpenMLDataset ):
1026+ if isinstance (description , OpenMLDataset ):
10321027 url = cast (str , description ._minio_url )
10331028 did = description .dataset_id
1029+ elif isinstance (description , dict ):
1030+ url = cast (str , description .get ("oml:minio_url" ))
1031+ did = int (description .get ("oml:id" , "" ))
10341032 else :
10351033 raise TypeError ("`description` should be either OpenMLDataset or Dict." )
10361034
@@ -1063,7 +1061,7 @@ def _get_dataset_parquet(
10631061
10641062
10651063def _get_dataset_arff (
1066- description : Union [Dict [str , Union [ str , int ] ], OpenMLDataset ],
1064+ description : Union [Dict [str , str ], OpenMLDataset ],
10671065 cache_directory : Optional [str ] = None ,
10681066) -> str :
10691067 """Return the path to the local arff file of the dataset. If is not cached, it is downloaded.
@@ -1088,14 +1086,14 @@ def _get_dataset_arff(
10881086 output_filename : string
10891087 Location of ARFF file.
10901088 """
1091- if isinstance (description , dict ):
1092- md5_checksum_fixture = description .get ("oml:md5_checksum" )
1093- url = description ["oml:url" ]
1094- did = description .get ("oml:id" )
1095- elif isinstance (description , OpenMLDataset ):
1089+ if isinstance (description , OpenMLDataset ):
10961090 md5_checksum_fixture = description .md5_checksum
1097- url = description .url
1091+ url = cast ( str , description .url )
10981092 did = description .dataset_id
1093+ elif isinstance (description , dict ):
1094+ md5_checksum_fixture = description .get ("oml:md5_checksum" )
1095+ url = cast (str , description ["oml:url" ])
1096+ did = int (description .get ("oml:id" , "" ))
10991097 else :
11001098 raise TypeError ("`description` should be either OpenMLDataset or Dict." )
11011099
@@ -1214,8 +1212,8 @@ def _create_dataset_from_description(
12141212 Dataset object from dict and ARFF.
12151213 """
12161214 return OpenMLDataset (
1217- description ["oml:name" ],
1218- description .get ("oml:description" ),
1215+ name = description ["oml:name" ],
1216+ description = description .get ("oml:description" , " " ),
12191217 data_format = description ["oml:format" ],
12201218 dataset_id = description ["oml:id" ],
12211219 version = description ["oml:version" ],
@@ -1246,7 +1244,7 @@ def _create_dataset_from_description(
12461244 )
12471245
12481246
1249- def _get_online_dataset_arff (dataset_id : int ) -> str :
1247+ def _get_online_dataset_arff (dataset_id : int ) -> Optional [ str ] :
12501248 """Download the ARFF file for a given dataset id
12511249 from the OpenML website.
12521250
0 commit comments