tfds-nightly 4.9.9.dev202508240045__py3-none-any.whl → 4.9.9.dev202508260044__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,16 +63,49 @@ def get_croissant_version(version: str | None) -> str | None:
63
63
  return version
64
64
 
65
65
 
66
- def get_dataset_name(dataset: mlc.Dataset) -> str:
67
- """Returns dataset name of the given MLcroissant dataset."""
66
+ def get_dataset_name(dataset: mlc.Dataset, language: str | None = None) -> str:
67
+ """Returns dataset name of the given MLcroissant dataset.
68
+
69
+ Args:
70
+ dataset: The MLcroissant dataset.
71
+ language: For datasets with multiple names in different languages, this
72
+ argument specifies the language to use.
73
+ """
68
74
  if (url := dataset.metadata.url) and url.startswith(_HUGGINGFACE_URL_PREFIX):
69
75
  return url.removeprefix(_HUGGINGFACE_URL_PREFIX)
70
- return dataset.metadata.name
76
+ name = dataset.metadata.name
77
+ if isinstance(name, dict):
78
+ if language is None:
79
+ # Try a heuristic language, e.g., 'en'.
80
+ if "en" in name:
81
+ return name["en"]
82
+ # Otherwise, take the first language in the dict.
83
+ try:
84
+ first_lang = next(iter(name))
85
+ return name[first_lang]
86
+ except StopIteration as exc:
87
+ raise ValueError("Dataset name dictionary is empty.") from exc
88
+ elif language not in dataset.metadata.name:
89
+ raise ValueError(
90
+ f"Language {language} not found in dataset names {name}."
91
+ )
92
+ else:
93
+ return name[language]
94
+ # At this point, name is not a dict anymore.
95
+ return typing.cast(str, name)
96
+
97
+
98
+ def get_tfds_dataset_name(
99
+ dataset: mlc.Dataset, language: str | None = None
100
+ ) -> str:
101
+ """Returns TFDS compatible dataset name of the given MLcroissant dataset.
71
102
 
72
-
73
- def get_tfds_dataset_name(dataset: mlc.Dataset) -> str:
74
- """Returns TFDS compatible dataset name of the given MLcroissant dataset."""
75
- dataset_name = get_dataset_name(dataset)
103
+ Args:
104
+ dataset: The MLcroissant dataset.
105
+ language: For datasets with multiple names in different languages, this
106
+ argument specifies the language to use.
107
+ """
108
+ dataset_name = get_dataset_name(dataset, language=language)
76
109
  return conversion_utils.to_tfds_name(dataset_name)
77
110
 
78
111
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.9.dev202508240045
3
+ Version: 4.9.9.dev202508260044
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -245,7 +245,7 @@ tensorflow_datasets/core/utils/bool_utils_test.py,sha256=rwFRcYV0wBknvYODjeTgRDq
245
245
  tensorflow_datasets/core/utils/colormap.csv,sha256=DDayUU9R19cxhcG3fj4cFwhI46W20U7ofBG0kToUHOw,2732
246
246
  tensorflow_datasets/core/utils/conversion_utils.py,sha256=V8kFmJu38op7-8ufZvEn0fLOH8FMkjQebQ1NstIMRYo,6747
247
247
  tensorflow_datasets/core/utils/conversion_utils_test.py,sha256=rP_nbzQWzmZc_GXp3Y6TirwIGJqiQbF-JtY3B1tOuN0,5346
248
- tensorflow_datasets/core/utils/croissant_utils.py,sha256=9C8sScaEqSRsThqpQQc48GDNR1KFmDkS8hmKIvfZCB0,5181
248
+ tensorflow_datasets/core/utils/croissant_utils.py,sha256=9-_j86KKKkfxgg0aAM1zxlqCdkaC-0p9XzdWjSLmOwk,6265
249
249
  tensorflow_datasets/core/utils/croissant_utils_test.py,sha256=UdkAVYDTPm1L0zmMESScurV_IMA5K3qAKmL_umeMJZI,4497
250
250
  tensorflow_datasets/core/utils/docs.py,sha256=nRE4d8wxYZav8AcT3dkiY0yplAJBx1hygWxkeKj_V7I,1412
251
251
  tensorflow_datasets/core/utils/dtype_utils.py,sha256=LvDe1hbgQem57RiqXjG9U5Roj8-1KkBMmSYTtgctx2U,3246
@@ -2471,10 +2471,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
2471
2471
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
2472
2472
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
2473
2473
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
2474
- tfds_nightly-4.9.9.dev202508240045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2475
- tfds_nightly-4.9.9.dev202508240045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2476
- tfds_nightly-4.9.9.dev202508240045.dist-info/METADATA,sha256=qRe_2vex_eeA3DBe_tMZZPn9-0gsjNd6TNBD0U_ihsA,11291
2477
- tfds_nightly-4.9.9.dev202508240045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2478
- tfds_nightly-4.9.9.dev202508240045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2479
- tfds_nightly-4.9.9.dev202508240045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2480
- tfds_nightly-4.9.9.dev202508240045.dist-info/RECORD,,
2474
+ tfds_nightly-4.9.9.dev202508260044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2475
+ tfds_nightly-4.9.9.dev202508260044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2476
+ tfds_nightly-4.9.9.dev202508260044.dist-info/METADATA,sha256=OlIMhl94mKrf1q3B2umMYXvVuSZqZtNXZfwqnqGm3-0,11291
2477
+ tfds_nightly-4.9.9.dev202508260044.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2478
+ tfds_nightly-4.9.9.dev202508260044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2479
+ tfds_nightly-4.9.9.dev202508260044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2480
+ tfds_nightly-4.9.9.dev202508260044.dist-info/RECORD,,