tfds-nightly 4.9.9.dev202507220045__py3-none-any.whl → 4.9.9.dev202507230045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -134,10 +134,11 @@ def datatype_converter(
134
134
  np.float32.
135
135
 
136
136
  Returns:
137
- Converted datatype for TFDS.
137
+ Converted datatype for TFDS, or None when a Field does not specify a type.
138
138
 
139
139
  Raises:
140
- NotImplementedError
140
+ NotImplementedError when the feature is not supported yet, or ValueError
141
+ when a Field is malformed.
141
142
  """
142
143
  if field.is_enumeration:
143
144
  raise NotImplementedError('Not implemented yet.')
@@ -151,7 +152,7 @@ def datatype_converter(
151
152
  field_data_type = field.data_type
152
153
 
153
154
  if not field_data_type:
154
- # Fields with sub fields are of type None
155
+ # Fields with sub fields are of type None.
155
156
  if field.sub_fields:
156
157
  feature = features_dict.FeaturesDict(
157
158
  {
@@ -170,8 +171,8 @@ def datatype_converter(
170
171
  feature = dtype_mapping[field_data_type]
171
172
  elif enp.lazy.is_np_dtype(field_data_type):
172
173
  feature = field_data_type
173
- # We return a text feature for mlc.DataType.DATE and mlc.DataType.TIME
174
- # features.
174
+ # We return a text feature for date-time features (mlc.DataType.DATE,
175
+ # mlc.DataType.DATETIME, and mlc.DataType.TIME).
175
176
  elif field_data_type == pd.Timestamp or field_data_type == datetime.time:
176
177
  feature = text_feature.Text(doc=field.description)
177
178
  elif field_data_type == mlc.DataType.IMAGE_OBJECT:
@@ -195,7 +196,9 @@ def datatype_converter(
195
196
  doc=field.description, sample_rate=field.source.sampling_rate
196
197
  )
197
198
  else:
198
- raise ValueError(f'Unknown data type: {field_data_type}.')
199
+ raise ValueError(
200
+ f'Unknown data type: {field_data_type} for field {field.id}.'
201
+ )
199
202
 
200
203
  if feature and field.is_array:
201
204
  feature = array_datatype_converter(
@@ -165,6 +165,13 @@ def test_bbox_datatype_converter_with_invalid_format():
165
165
  text_feature.Text,
166
166
  None,
167
167
  ),
168
+ (
169
+ mlc.Field(
170
+ data_types=mlc.DataType.DATETIME, description="DateTime feature"
171
+ ),
172
+ text_feature.Text,
173
+ None,
174
+ ),
168
175
  (
169
176
  mlc.Field(data_types=mlc.DataType.TIME, description="Time feature"),
170
177
  text_feature.Text,
@@ -223,6 +230,13 @@ def test_complex_datatype_converter(field, feature_type, subfield_types):
223
230
  )
224
231
 
225
232
 
233
+ def test_datatype_converter_none():
234
+ field = mlc.Field(
235
+ name="my_field", id="my_field", description="Field with empty data type."
236
+ )
237
+ assert croissant_builder.datatype_converter(field) is None
238
+
239
+
226
240
  def test_multidimensional_datatype_converter():
227
241
  field = mlc.Field(
228
242
  data_types=mlc.DataType.TEXT,
@@ -119,7 +119,7 @@ def even_splits(
119
119
  not evenly divisible by `n`. If `False`, examples are distributed evenly
120
120
  across subsplits, starting by the first. For example, if there is 11
121
121
  examples with `n=3`, splits will contain `[4, 4, 3]` examples
122
- respectivelly.
122
+ respectively.
123
123
 
124
124
  Returns:
125
125
  The list of subsplits. Those splits can be combined together (with
@@ -169,7 +169,7 @@ def split_for_jax_process(
169
169
  not evenly divisible by `n`. If `False`, examples are distributed evenly
170
170
  across subsplits, starting by the first. For example, if there is 11
171
171
  examples with `n=3`, splits will contain `[4, 4, 3]` examples
172
- respectivelly.
172
+ respectively.
173
173
 
174
174
  Returns:
175
175
  subsplit: The sub-split of the given `split` for the current
@@ -191,7 +191,7 @@ class DatasetBuilderTestCase(
191
191
  # The `dl_manager.download` and `dl_manager.download_and_extract` are
192
192
  # patched to record the urls in `_download_urls`.
193
193
  # Calling `dl_manager.download_checksums` stop the url
194
- # registration (as checksums are stored remotelly)
194
+ # registration (as checksums are stored remotely)
195
195
  # `_test_checksums` validates the recorded urls.
196
196
  self._download_urls = set()
197
197
  self._stop_record_download = False
@@ -291,7 +291,7 @@ class DatasetBuilderTestCase(
291
291
  def _add_url(self, url_or_urls):
292
292
  if self._stop_record_download:
293
293
  # Stop record the checksums if dl_manager.download_checksums has been
294
- # called (as checksums may be stored remotelly)
294
+ # called (as checksums may be stored remotely).
295
295
  return
296
296
  if isinstance(url_or_urls, download.resource.Resource):
297
297
  self._download_urls.add(url_or_urls.url)
@@ -147,7 +147,7 @@ class MockFs(object):
147
147
  with self._mock() as m:
148
148
  yield m
149
149
  self._tmp_dir = None
150
- # TODO(epot): recursivelly record all
150
+ # TODO(epot): recursively record all.
151
151
 
152
152
  def _to_tmp(self, p, *, with_state: bool = False):
153
153
  """Normalize the path by returning `tmp_path / p`."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.9.dev202507220045
3
+ Version: 4.9.9.dev202507230045
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -104,7 +104,7 @@ tensorflow_datasets/core/split_builder.py,sha256=cpz-YowMhmiZZVp7eQPNrh23KvE0-Ef
104
104
  tensorflow_datasets/core/split_builder_test.py,sha256=kBUVUnQQB_c82AhgjhK3hoYfiAqLt7tDFTzsvZRGQCw,3223
105
105
  tensorflow_datasets/core/splits.py,sha256=O3jK4Dalp4tEPeZ9AHbkpW1UkJ6uv5m4YRu2x_ZZTJ4,29418
106
106
  tensorflow_datasets/core/splits_test.py,sha256=KrM82r0YsJRTGfpYUCkBxiGDC7BjZFcTvJ-Hbo6HwF0,24987
107
- tensorflow_datasets/core/subsplits_utils.py,sha256=BPHVPAvHlqt4d3HUr4J2Znn8G63pXLPQ29TBi484MOE,6127
107
+ tensorflow_datasets/core/subsplits_utils.py,sha256=6mVCr-QNZfNgX0Ka_htsqmr-JgFXJXJ7IFfl1ytCQio,6125
108
108
  tensorflow_datasets/core/subsplits_utils_test.py,sha256=TIRLtfaf2n38pByhpqYTXEEvs8hrWe2eXk9RFdBMrFQ,5159
109
109
  tensorflow_datasets/core/tf_compat.py,sha256=qdZUtaO9FsZUds7Wf0w0MoRydPPRsuZ0_8ebRJg19gg,1820
110
110
  tensorflow_datasets/core/units.py,sha256=m3ht8oM8wr6oTU3tCbKOj1yaPyXn1MCu7dUjzw0LrPY,1975
@@ -141,8 +141,8 @@ tensorflow_datasets/core/data_sources/python_test.py,sha256=O3yqMPx40JlHN0uFfZPN
141
141
  tensorflow_datasets/core/dataset_builders/__init__.py,sha256=StTA3euephqDZdpTzJQgfWNqB5inZosrAhaWg2BOeio,1945
142
142
  tensorflow_datasets/core/dataset_builders/adhoc_builder.py,sha256=QVE8wWGPOgILPTC27Q28QZ3KIi5N64OGOfKpTq4W4_0,9216
143
143
  tensorflow_datasets/core/dataset_builders/adhoc_builder_test.py,sha256=yhRwrznK78MvHeWGRggnMTiyx_SlR1z30iD5VU3Gweo,13096
144
- tensorflow_datasets/core/dataset_builders/croissant_builder.py,sha256=0lVl7ZP8tc1zUNZAVoUCw9jV_RAo1O9Mc2iFM21WVSM,16674
145
- tensorflow_datasets/core/dataset_builders/croissant_builder_test.py,sha256=4jFx88qcAi6mTU1fk_Kj9PpEPdhFEAYvZQFDD-AK8gw,11758
144
+ tensorflow_datasets/core/dataset_builders/croissant_builder.py,sha256=CCiXOgcr5VJYaQlSf_ss_712BtrDuP6QCyP0K4UgKFs,16876
145
+ tensorflow_datasets/core/dataset_builders/croissant_builder_test.py,sha256=O5j9pUKpgtZKpFQYPAYKQ7DMHXVtuaF_7lwjZZxFRzc,12151
146
146
  tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder.py,sha256=Loq3qeGk1Ias-d2oT_dK47BRNgTA4LKJchNGh7aA4a0,18313
147
147
  tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py,sha256=6N3DLsry9LhDqhpleaoXrrhaGiLJMBgUlwDnAji-1fI,4389
148
148
  tensorflow_datasets/core/dataset_builders/view_builder.py,sha256=eaCtjN5Vg4rK8JD3auA4PhF9mjH5HvQ9dslDX8LbwyM,11907
@@ -2122,7 +2122,7 @@ tensorflow_datasets/summarization/media_sum/media_sum.py,sha256=CIhR_cfQb1aEfu9B
2122
2122
  tensorflow_datasets/summarization/summscreen/__init__.py,sha256=ADxohrpUPJjug4r2kGCCJEWZzVD4s2S0smqLfjkc8YY,718
2123
2123
  tensorflow_datasets/summarization/summscreen/summscreen.py,sha256=DfwGr3vsRhOC62ODJ1Sp7-v219bPjJ93KK043YReV7I,884
2124
2124
  tensorflow_datasets/testing/__init__.py,sha256=aSwY_kciK-EZXp1D_JRkuuCJwtbFljGZ72c9YNB6yfE,6049
2125
- tensorflow_datasets/testing/dataset_builder_testing.py,sha256=Ers73TcGgVjjLWvpfuKmr34QSBl6QB3Z9qvFPzSHjSE,25094
2125
+ tensorflow_datasets/testing/dataset_builder_testing.py,sha256=ziE2twrc1-LQExGp4g5Nbq9hlbFow3VdX8RTC83R6bM,25093
2126
2126
  tensorflow_datasets/testing/dataset_builder_testing_test.py,sha256=Nf7Ykg5bY5o9ZatQKrRJhr-qGTtNKle4aZph4rt72i4,1283
2127
2127
  tensorflow_datasets/testing/dataset_collection_builder_testing.py,sha256=tUv2l53rc9GEo4sWvM9OP9r-Ze54dcDakeLQBMS7yos,4825
2128
2128
  tensorflow_datasets/testing/dataset_collection_builder_testing_test.py,sha256=Dw5tACaDjVt9CZi0V84tMAh2JJexrRwWF1N3DID1Mbs,1155
@@ -2132,7 +2132,7 @@ tensorflow_datasets/testing/mocking.py,sha256=4mIq0ngxfs3w0hFlosGOSTp-mAQVfBfoFw
2132
2132
  tensorflow_datasets/testing/mocking_test.py,sha256=9DMkxcQw_dZTKULNHiKv91e0VcBsUTa6FIhUOLvJKls,13796
2133
2133
  tensorflow_datasets/testing/test_case.py,sha256=_H_M3pp6Vp3dbtPyVy5Um7X8S4V4EKPLrao1mbS2IdU,2554
2134
2134
  tensorflow_datasets/testing/test_case_in_context.py,sha256=7YrdTI_rqR01Q-ToVqewIm1OKDwvxIidPhaffYmjP1E,1872
2135
- tensorflow_datasets/testing/test_utils.py,sha256=wFWG9jryWFA3NnNBrEva4u6HWRu9yfQbXQdYpHkDNKw,26736
2135
+ tensorflow_datasets/testing/test_utils.py,sha256=sQTTXa8YHPXml514vayxiu_E6qHFQ_1Maizy3OR0J8Y,26736
2136
2136
  tensorflow_datasets/testing/test_utils_test.py,sha256=nL2niozCO5Gh4cWPWbDW5_w3w-mHRYZEQmmfej2fpjY,9576
2137
2137
  tensorflow_datasets/testing/version_test.py,sha256=fNMSX1FSNs_66MHcRGAWzoPZWJ-sAvmc-rceKXGK-uM,2791
2138
2138
  tensorflow_datasets/text/__init__.py,sha256=_PtJTw2LQqgxFNVeBCEXrLGF2qg5NNOiXTW9oKZR_ZA,5319
@@ -2468,10 +2468,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
2468
2468
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
2469
2469
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
2470
2470
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
2471
- tfds_nightly-4.9.9.dev202507220045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
- tfds_nightly-4.9.9.dev202507220045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
- tfds_nightly-4.9.9.dev202507220045.dist-info/METADATA,sha256=fjPYKkMek2RLp_EgZpG6zzwuTw74Pz-VNaNeZuCGScc,11694
2474
- tfds_nightly-4.9.9.dev202507220045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
- tfds_nightly-4.9.9.dev202507220045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
- tfds_nightly-4.9.9.dev202507220045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
- tfds_nightly-4.9.9.dev202507220045.dist-info/RECORD,,
2471
+ tfds_nightly-4.9.9.dev202507230045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
+ tfds_nightly-4.9.9.dev202507230045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
+ tfds_nightly-4.9.9.dev202507230045.dist-info/METADATA,sha256=fha1BBcJdcuOuYN_oHawuzQx_EsXdW6fWiqn1eDa3OI,11694
2474
+ tfds_nightly-4.9.9.dev202507230045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
+ tfds_nightly-4.9.9.dev202507230045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
+ tfds_nightly-4.9.9.dev202507230045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
+ tfds_nightly-4.9.9.dev202507230045.dist-info/RECORD,,