tfds-nightly 4.9.9.dev202507210045__py3-none-any.whl → 4.9.9.dev202507230045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,6 +37,7 @@ print(ds['default'][0])
37
37
  from __future__ import annotations
38
38
 
39
39
  from collections.abc import Mapping, Sequence
40
+ import datetime
40
41
  import json
41
42
  from typing import Any
42
43
 
@@ -133,10 +134,11 @@ def datatype_converter(
133
134
  np.float32.
134
135
 
135
136
  Returns:
136
- Converted datatype for TFDS.
137
+ Converted datatype for TFDS, or None when a Field does not specify a type.
137
138
 
138
139
  Raises:
139
- NotImplementedError
140
+ NotImplementedError when the feature is not supported yet, or ValueError
141
+ when a Field is malformed.
140
142
  """
141
143
  if field.is_enumeration:
142
144
  raise NotImplementedError('Not implemented yet.')
@@ -150,7 +152,7 @@ def datatype_converter(
150
152
  field_data_type = field.data_type
151
153
 
152
154
  if not field_data_type:
153
- # Fields with sub fields are of type None
155
+ # Fields with sub fields are of type None.
154
156
  if field.sub_fields:
155
157
  feature = features_dict.FeaturesDict(
156
158
  {
@@ -169,8 +171,9 @@ def datatype_converter(
169
171
  feature = dtype_mapping[field_data_type]
170
172
  elif enp.lazy.is_np_dtype(field_data_type):
171
173
  feature = field_data_type
172
- # We return a text feature for mlc.DataType.DATE features.
173
- elif field_data_type == pd.Timestamp:
174
+ # We return a text feature for date-time features (mlc.DataType.DATE,
175
+ # mlc.DataType.DATETIME, and mlc.DataType.TIME).
176
+ elif field_data_type == pd.Timestamp or field_data_type == datetime.time:
174
177
  feature = text_feature.Text(doc=field.description)
175
178
  elif field_data_type == mlc.DataType.IMAGE_OBJECT:
176
179
  feature = image_feature.Image(doc=field.description)
@@ -193,7 +196,9 @@ def datatype_converter(
193
196
  doc=field.description, sample_rate=field.source.sampling_rate
194
197
  )
195
198
  else:
196
- raise ValueError(f'Unknown data type: {field_data_type}.')
199
+ raise ValueError(
200
+ f'Unknown data type: {field_data_type} for field {field.id}.'
201
+ )
197
202
 
198
203
  if feature and field.is_array:
199
204
  feature = array_datatype_converter(
@@ -165,6 +165,18 @@ def test_bbox_datatype_converter_with_invalid_format():
165
165
  text_feature.Text,
166
166
  None,
167
167
  ),
168
+ (
169
+ mlc.Field(
170
+ data_types=mlc.DataType.DATETIME, description="DateTime feature"
171
+ ),
172
+ text_feature.Text,
173
+ None,
174
+ ),
175
+ (
176
+ mlc.Field(data_types=mlc.DataType.TIME, description="Time feature"),
177
+ text_feature.Text,
178
+ None,
179
+ ),
168
180
  (
169
181
  mlc.Field(
170
182
  data_types=mlc.DataType.IMAGE_OBJECT,
@@ -218,6 +230,13 @@ def test_complex_datatype_converter(field, feature_type, subfield_types):
218
230
  )
219
231
 
220
232
 
233
+ def test_datatype_converter_none():
234
+ field = mlc.Field(
235
+ name="my_field", id="my_field", description="Field with empty data type."
236
+ )
237
+ assert croissant_builder.datatype_converter(field) is None
238
+
239
+
221
240
  def test_multidimensional_datatype_converter():
222
241
  field = mlc.Field(
223
242
  data_types=mlc.DataType.TEXT,
@@ -93,7 +93,7 @@ class Video(sequence_feature.Sequence):
93
93
 
94
94
  def __init__(
95
95
  self,
96
- shape: Sequence[Optional[int]],
96
+ shape: Sequence[Optional[int]] | None = None,
97
97
  encoding_format: str = 'png',
98
98
  ffmpeg_extra_args: Sequence[str] = (),
99
99
  use_colormap: bool = False,
@@ -103,8 +103,8 @@ class Video(sequence_feature.Sequence):
103
103
  """Initializes the connector.
104
104
 
105
105
  Args:
106
- shape: tuple of ints, the shape of the video (num_frames, height, width,
107
- channels), where channels is 1 or 3.
106
+ shape: The shape of the video (num_frames, height, width, channels), where
107
+ channels is 1 or 3.
108
108
  encoding_format: The video is stored as a sequence of encoded images. You
109
109
  can use any encoding format supported by image_feature.Feature.
110
110
  ffmpeg_extra_args: A sequence of additional args to be passed to the
@@ -121,19 +121,22 @@ class Video(sequence_feature.Sequence):
121
121
  ValueError: If the shape is invalid
122
122
  """
123
123
  dtype = tf.dtypes.as_dtype(dtype)
124
- shape = tuple(shape)
125
- if len(shape) != 4:
126
- raise ValueError('Video shape should be of rank 4')
124
+ frame_shape = None
125
+ if shape:
126
+ shape = tuple(shape)
127
+ if len(shape) != 4:
128
+ raise ValueError('Video shape should be of rank 4')
129
+ frame_shape = shape[1:]
127
130
  self._encoding_format = encoding_format
128
131
  self._extra_ffmpeg_args = list(ffmpeg_extra_args or [])
129
132
  super(Video, self).__init__(
130
133
  image_feature.Image(
131
- shape=shape[1:],
134
+ shape=frame_shape,
132
135
  dtype=dtype,
133
136
  encoding_format=encoding_format,
134
137
  use_colormap=use_colormap,
135
138
  ),
136
- length=shape[0],
139
+ length=shape[0] if shape else None,
137
140
  )
138
141
 
139
142
  def _ffmpeg_decode(self, path_or_fobj):
@@ -48,6 +48,22 @@ class VideoFeatureTest(testing.FeatureExpectationsTestCase):
48
48
  test_attributes=dict(_encoding_format='png', _extra_ffmpeg_args=[]),
49
49
  )
50
50
 
51
+ def test_video_with_none_shape(self):
52
+ np_video = np.random.randint(256, size=(128, 64, 64, 3), dtype=np.uint8)
53
+
54
+ self.assertFeature(
55
+ feature=features.Video(shape=None),
56
+ shape=(None, None, None, 3),
57
+ dtype=tf.uint8,
58
+ tests=[
59
+ testing.FeatureExpectationItem(
60
+ value=np_video,
61
+ expected=np_video,
62
+ ),
63
+ ],
64
+ test_attributes=dict(_encoding_format='png', _extra_ffmpeg_args=[]),
65
+ )
66
+
51
67
  def test_video_concatenated_frames(self):
52
68
  video_shape = (None, 400, 640, 3)
53
69
  lsun_examples_path = os.path.join(self._test_data_path, 'lsun_examples')
@@ -119,6 +135,5 @@ class VideoFeatureTest(testing.FeatureExpectationsTestCase):
119
135
  ],
120
136
  )
121
137
 
122
-
123
138
  if __name__ == '__main__':
124
139
  testing.test_main()
@@ -119,7 +119,7 @@ def even_splits(
119
119
  not evenly divisible by `n`. If `False`, examples are distributed evenly
120
120
  across subsplits, starting by the first. For example, if there is 11
121
121
  examples with `n=3`, splits will contain `[4, 4, 3]` examples
122
- respectivelly.
122
+ respectively.
123
123
 
124
124
  Returns:
125
125
  The list of subsplits. Those splits can be combined together (with
@@ -169,7 +169,7 @@ def split_for_jax_process(
169
169
  not evenly divisible by `n`. If `False`, examples are distributed evenly
170
170
  across subsplits, starting by the first. For example, if there is 11
171
171
  examples with `n=3`, splits will contain `[4, 4, 3]` examples
172
- respectivelly.
172
+ respectively.
173
173
 
174
174
  Returns:
175
175
  subsplit: The sub-split of the given `split` for the current
@@ -119,6 +119,8 @@ def convert_hf_features(hf_features) -> feature_lib.FeatureConnector:
119
119
  sample_rate=hf_features.sampling_rate,
120
120
  dtype=np.int32,
121
121
  )
122
+ case hf_datasets.Video():
123
+ return feature_lib.Video()
122
124
 
123
125
  raise TypeError(f'Type {type(hf_features)} is not supported.')
124
126
 
@@ -191,7 +191,7 @@ class DatasetBuilderTestCase(
191
191
  # The `dl_manager.download` and `dl_manager.download_and_extract` are
192
192
  # patched to record the urls in `_download_urls`.
193
193
  # Calling `dl_manager.download_checksums` stop the url
194
- # registration (as checksums are stored remotelly)
194
+ # registration (as checksums are stored remotely)
195
195
  # `_test_checksums` validates the recorded urls.
196
196
  self._download_urls = set()
197
197
  self._stop_record_download = False
@@ -291,7 +291,7 @@ class DatasetBuilderTestCase(
291
291
  def _add_url(self, url_or_urls):
292
292
  if self._stop_record_download:
293
293
  # Stop record the checksums if dl_manager.download_checksums has been
294
- # called (as checksums may be stored remotelly)
294
+ # called (as checksums may be stored remotely).
295
295
  return
296
296
  if isinstance(url_or_urls, download.resource.Resource):
297
297
  self._download_urls.add(url_or_urls.url)
@@ -147,7 +147,7 @@ class MockFs(object):
147
147
  with self._mock() as m:
148
148
  yield m
149
149
  self._tmp_dir = None
150
- # TODO(epot): recursivelly record all
150
+ # TODO(epot): recursively record all.
151
151
 
152
152
  def _to_tmp(self, p, *, with_state: bool = False):
153
153
  """Normalize the path by returning `tmp_path / p`."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.9.dev202507210045
3
+ Version: 4.9.9.dev202507230045
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -104,7 +104,7 @@ tensorflow_datasets/core/split_builder.py,sha256=cpz-YowMhmiZZVp7eQPNrh23KvE0-Ef
104
104
  tensorflow_datasets/core/split_builder_test.py,sha256=kBUVUnQQB_c82AhgjhK3hoYfiAqLt7tDFTzsvZRGQCw,3223
105
105
  tensorflow_datasets/core/splits.py,sha256=O3jK4Dalp4tEPeZ9AHbkpW1UkJ6uv5m4YRu2x_ZZTJ4,29418
106
106
  tensorflow_datasets/core/splits_test.py,sha256=KrM82r0YsJRTGfpYUCkBxiGDC7BjZFcTvJ-Hbo6HwF0,24987
107
- tensorflow_datasets/core/subsplits_utils.py,sha256=BPHVPAvHlqt4d3HUr4J2Znn8G63pXLPQ29TBi484MOE,6127
107
+ tensorflow_datasets/core/subsplits_utils.py,sha256=6mVCr-QNZfNgX0Ka_htsqmr-JgFXJXJ7IFfl1ytCQio,6125
108
108
  tensorflow_datasets/core/subsplits_utils_test.py,sha256=TIRLtfaf2n38pByhpqYTXEEvs8hrWe2eXk9RFdBMrFQ,5159
109
109
  tensorflow_datasets/core/tf_compat.py,sha256=qdZUtaO9FsZUds7Wf0w0MoRydPPRsuZ0_8ebRJg19gg,1820
110
110
  tensorflow_datasets/core/units.py,sha256=m3ht8oM8wr6oTU3tCbKOj1yaPyXn1MCu7dUjzw0LrPY,1975
@@ -141,8 +141,8 @@ tensorflow_datasets/core/data_sources/python_test.py,sha256=O3yqMPx40JlHN0uFfZPN
141
141
  tensorflow_datasets/core/dataset_builders/__init__.py,sha256=StTA3euephqDZdpTzJQgfWNqB5inZosrAhaWg2BOeio,1945
142
142
  tensorflow_datasets/core/dataset_builders/adhoc_builder.py,sha256=QVE8wWGPOgILPTC27Q28QZ3KIi5N64OGOfKpTq4W4_0,9216
143
143
  tensorflow_datasets/core/dataset_builders/adhoc_builder_test.py,sha256=yhRwrznK78MvHeWGRggnMTiyx_SlR1z30iD5VU3Gweo,13096
144
- tensorflow_datasets/core/dataset_builders/croissant_builder.py,sha256=Ef9fSTxFIvUKf_zfCqL3JyUje6f9buIQXvT1iuHgB20,16596
145
- tensorflow_datasets/core/dataset_builders/croissant_builder_test.py,sha256=1KPrGPFYHQvo33TwnG5LmPpdlyRTudPfni4ipFsj0ao,11607
144
+ tensorflow_datasets/core/dataset_builders/croissant_builder.py,sha256=CCiXOgcr5VJYaQlSf_ss_712BtrDuP6QCyP0K4UgKFs,16876
145
+ tensorflow_datasets/core/dataset_builders/croissant_builder_test.py,sha256=O5j9pUKpgtZKpFQYPAYKQ7DMHXVtuaF_7lwjZZxFRzc,12151
146
146
  tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder.py,sha256=Loq3qeGk1Ias-d2oT_dK47BRNgTA4LKJchNGh7aA4a0,18313
147
147
  tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py,sha256=6N3DLsry9LhDqhpleaoXrrhaGiLJMBgUlwDnAji-1fI,4389
148
148
  tensorflow_datasets/core/dataset_builders/view_builder.py,sha256=eaCtjN5Vg4rK8JD3auA4PhF9mjH5HvQ9dslDX8LbwyM,11907
@@ -213,8 +213,8 @@ tensorflow_datasets/core/features/top_level_feature.py,sha256=JeOnaBUqp-xFLuPxUt
213
213
  tensorflow_datasets/core/features/top_level_feature_test.py,sha256=JutGHU-08tg5KWiB3mIB6Q3a80CvS5_F6jG0bfAYXWM,3628
214
214
  tensorflow_datasets/core/features/translation_feature.py,sha256=Qmx39XwMJy18u9eoZlT3Spc0VT0qtqsTHahWoETLZZo,8284
215
215
  tensorflow_datasets/core/features/translation_feature_test.py,sha256=iK8ckwApuMu13BS1-vkny-m_NV6uNTz6ky5gbZEfxoc,6060
216
- tensorflow_datasets/core/features/video_feature.py,sha256=K4lld2N-a9TWrwYss8gEBGnkiraKqKyM81Z4chUj8qU,7720
217
- tensorflow_datasets/core/features/video_feature_test.py,sha256=qUFu8quELWr15r_VaMZGKoFPb2ueZkNmVJHBcgR5fkc,3772
216
+ tensorflow_datasets/core/features/video_feature.py,sha256=o4tuH4HxkG_sYCYUx8R-LVHcuIgFcfCNe8n0cE5MISk,7813
217
+ tensorflow_datasets/core/features/video_feature_test.py,sha256=k0qpYvZIC_-xXTG9EN1Mo2d9RA77CcvJYl9c0U3oXcQ,4255
218
218
  tensorflow_datasets/core/folder_dataset/__init__.py,sha256=Pn2mSU-CPxC89lvywHAD-XrhQj0mvAaqZogpekjr-bs,1515
219
219
  tensorflow_datasets/core/folder_dataset/compute_split_utils.py,sha256=Ob_ZaqfS00zViAtRhHK_ff7R8eJAtYDDh6XjQGXdcP4,13515
220
220
  tensorflow_datasets/core/folder_dataset/compute_split_utils_test.py,sha256=XBo4UC1IydAPuIP1SY2psrEVeEr3y0KPmCEje6yQWhs,3784
@@ -256,7 +256,7 @@ tensorflow_datasets/core/utils/file_utils.py,sha256=vL-ulAVClrvkA71DvEvdGR2EdNmO
256
256
  tensorflow_datasets/core/utils/file_utils_test.py,sha256=SCw_XFRhyxGCFEVjt9pOdupsoULPdi8iT38JBrnUuDM,13708
257
257
  tensorflow_datasets/core/utils/gcs_utils.py,sha256=8mBOgEepkah1Rw36F6DNIVhLzfXbR8iS8KMLQUM5sPk,5154
258
258
  tensorflow_datasets/core/utils/gcs_utils_test.py,sha256=Ig8S37AvFG2g7kNjYxqgmqNKlLPeXt31XD7RY4UzsDg,2578
259
- tensorflow_datasets/core/utils/huggingface_utils.py,sha256=J-TZyisMkjqsFWw6MAA0NWot4KPDmfWe_7t1R9wjQv4,5262
259
+ tensorflow_datasets/core/utils/huggingface_utils.py,sha256=NeYaUoO3vIFH8M0hZ8k4w7AchFZJIGsuV1XwKJVttfw,5325
260
260
  tensorflow_datasets/core/utils/huggingface_utils_test.py,sha256=wYKY5vh5q4ImpkvDjZWwcTbH1s2YORKpsklA-9Qwfxs,4792
261
261
  tensorflow_datasets/core/utils/image_utils.py,sha256=5xHKJO8wsPGZpuFoBsvwaXp_-pnrtwXvyLBSK7itAm4,5939
262
262
  tensorflow_datasets/core/utils/image_utils_test.py,sha256=6QLpWwveq4Jtw0nLxG4S-VGpVsI9qwr6bJm0Vgunbu0,3127
@@ -2122,7 +2122,7 @@ tensorflow_datasets/summarization/media_sum/media_sum.py,sha256=CIhR_cfQb1aEfu9B
2122
2122
  tensorflow_datasets/summarization/summscreen/__init__.py,sha256=ADxohrpUPJjug4r2kGCCJEWZzVD4s2S0smqLfjkc8YY,718
2123
2123
  tensorflow_datasets/summarization/summscreen/summscreen.py,sha256=DfwGr3vsRhOC62ODJ1Sp7-v219bPjJ93KK043YReV7I,884
2124
2124
  tensorflow_datasets/testing/__init__.py,sha256=aSwY_kciK-EZXp1D_JRkuuCJwtbFljGZ72c9YNB6yfE,6049
2125
- tensorflow_datasets/testing/dataset_builder_testing.py,sha256=Ers73TcGgVjjLWvpfuKmr34QSBl6QB3Z9qvFPzSHjSE,25094
2125
+ tensorflow_datasets/testing/dataset_builder_testing.py,sha256=ziE2twrc1-LQExGp4g5Nbq9hlbFow3VdX8RTC83R6bM,25093
2126
2126
  tensorflow_datasets/testing/dataset_builder_testing_test.py,sha256=Nf7Ykg5bY5o9ZatQKrRJhr-qGTtNKle4aZph4rt72i4,1283
2127
2127
  tensorflow_datasets/testing/dataset_collection_builder_testing.py,sha256=tUv2l53rc9GEo4sWvM9OP9r-Ze54dcDakeLQBMS7yos,4825
2128
2128
  tensorflow_datasets/testing/dataset_collection_builder_testing_test.py,sha256=Dw5tACaDjVt9CZi0V84tMAh2JJexrRwWF1N3DID1Mbs,1155
@@ -2132,7 +2132,7 @@ tensorflow_datasets/testing/mocking.py,sha256=4mIq0ngxfs3w0hFlosGOSTp-mAQVfBfoFw
2132
2132
  tensorflow_datasets/testing/mocking_test.py,sha256=9DMkxcQw_dZTKULNHiKv91e0VcBsUTa6FIhUOLvJKls,13796
2133
2133
  tensorflow_datasets/testing/test_case.py,sha256=_H_M3pp6Vp3dbtPyVy5Um7X8S4V4EKPLrao1mbS2IdU,2554
2134
2134
  tensorflow_datasets/testing/test_case_in_context.py,sha256=7YrdTI_rqR01Q-ToVqewIm1OKDwvxIidPhaffYmjP1E,1872
2135
- tensorflow_datasets/testing/test_utils.py,sha256=wFWG9jryWFA3NnNBrEva4u6HWRu9yfQbXQdYpHkDNKw,26736
2135
+ tensorflow_datasets/testing/test_utils.py,sha256=sQTTXa8YHPXml514vayxiu_E6qHFQ_1Maizy3OR0J8Y,26736
2136
2136
  tensorflow_datasets/testing/test_utils_test.py,sha256=nL2niozCO5Gh4cWPWbDW5_w3w-mHRYZEQmmfej2fpjY,9576
2137
2137
  tensorflow_datasets/testing/version_test.py,sha256=fNMSX1FSNs_66MHcRGAWzoPZWJ-sAvmc-rceKXGK-uM,2791
2138
2138
  tensorflow_datasets/text/__init__.py,sha256=_PtJTw2LQqgxFNVeBCEXrLGF2qg5NNOiXTW9oKZR_ZA,5319
@@ -2468,10 +2468,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
2468
2468
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
2469
2469
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
2470
2470
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
2471
- tfds_nightly-4.9.9.dev202507210045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
- tfds_nightly-4.9.9.dev202507210045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
- tfds_nightly-4.9.9.dev202507210045.dist-info/METADATA,sha256=DYsSCdhyzArKSDLHHfJR1cUcV7MJpg9I231kc68ETak,11694
2474
- tfds_nightly-4.9.9.dev202507210045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
- tfds_nightly-4.9.9.dev202507210045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
- tfds_nightly-4.9.9.dev202507210045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
- tfds_nightly-4.9.9.dev202507210045.dist-info/RECORD,,
2471
+ tfds_nightly-4.9.9.dev202507230045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
+ tfds_nightly-4.9.9.dev202507230045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
+ tfds_nightly-4.9.9.dev202507230045.dist-info/METADATA,sha256=fha1BBcJdcuOuYN_oHawuzQx_EsXdW6fWiqn1eDa3OI,11694
2474
+ tfds_nightly-4.9.9.dev202507230045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
+ tfds_nightly-4.9.9.dev202507230045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
+ tfds_nightly-4.9.9.dev202507230045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
+ tfds_nightly-4.9.9.dev202507230045.dist-info/RECORD,,