tfds-nightly 4.9.9.dev202508280044__py3-none-any.whl → 4.9.9.dev202508300044__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,7 @@ def array_datatype_converter(
85
85
  feature: type_utils.TfdsDType | feature_lib.FeatureConnector | None,
86
86
  field: mlc.Field,
87
87
  dtype_mapping: Mapping[type_utils.TfdsDType, type_utils.TfdsDType],
88
+ language: str | None = None,
88
89
  ):
89
90
  """Includes the given feature in a sequence or tensor feature.
90
91
 
@@ -97,6 +98,10 @@ def array_datatype_converter(
97
98
  field: The mlc.Field object.
98
99
  dtype_mapping: A mapping of dtypes to the corresponding dtypes that will be
99
100
  used in TFDS.
101
+ language: For Croissant jsonld which include multi-lingual descriptions, the
102
+ language code to use to extract the description to be used in TFDS. If
103
+ None, it will extract the description in English or the first available
104
+ language in the dictionary.
100
105
 
101
106
  Returns:
102
107
  A sequence or tensor feature including the inner feature.
@@ -108,7 +113,7 @@ def array_datatype_converter(
108
113
  field_dtype = field.data_type
109
114
 
110
115
  description = croissant_utils.extract_localized_string(
111
- field.description, field_name='description'
116
+ field.description, language=language, field_name='description'
112
117
  )
113
118
 
114
119
  if len(field.array_shape_tuple) == 1:
@@ -129,6 +134,7 @@ def datatype_converter(
129
134
  field: mlc.Field,
130
135
  int_dtype: type_utils.TfdsDType = np.int64,
131
136
  float_dtype: type_utils.TfdsDType = np.float32,
137
+ language: str | None = None,
132
138
  ):
133
139
  """Converts a Croissant field to a TFDS-compatible feature.
134
140
 
@@ -137,6 +143,10 @@ def datatype_converter(
137
143
  int_dtype: The dtype to use for TFDS integer features. Defaults to np.int64.
138
144
  float_dtype: The dtype to use for TFDS float features. Defaults to
139
145
  np.float32.
146
+ language: For Croissant jsonld which include multi-lingual descriptions, the
147
+ language code to use to extract the description to be used in TFDS. If
148
+ None, it will extract the description in English or the first available
149
+ language in the dictionary.
140
150
 
141
151
  Returns:
142
152
  Converted datatype for TFDS, or None when a Field does not specify a type.
@@ -156,7 +166,7 @@ def datatype_converter(
156
166
 
157
167
  field_data_type = field.data_type
158
168
  description = croissant_utils.extract_localized_string(
159
- field.description, field_name='description'
169
+ field.description, language=language, field_name='description'
160
170
  )
161
171
 
162
172
  if not field_data_type:
@@ -165,7 +175,10 @@ def datatype_converter(
165
175
  feature = features_dict.FeaturesDict(
166
176
  {
167
177
  subfield.id: datatype_converter(
168
- subfield, int_dtype=int_dtype, float_dtype=float_dtype
178
+ subfield,
179
+ int_dtype=int_dtype,
180
+ float_dtype=float_dtype,
181
+ language=language,
169
182
  )
170
183
  for subfield in field.sub_fields
171
184
  },
@@ -215,6 +228,7 @@ def datatype_converter(
215
228
  feature=feature,
216
229
  field=field,
217
230
  dtype_mapping=dtype_mapping,
231
+ language=language,
218
232
  )
219
233
  # If the field is repeated, we return a sequence feature. `field.repeated` is
220
234
  # deprecated starting from Croissant 1.1, but we still support it for
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.9.dev202508280044
3
+ Version: 4.9.9.dev202508300044
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -141,7 +141,7 @@ tensorflow_datasets/core/data_sources/python_test.py,sha256=O3yqMPx40JlHN0uFfZPN
141
141
  tensorflow_datasets/core/dataset_builders/__init__.py,sha256=StTA3euephqDZdpTzJQgfWNqB5inZosrAhaWg2BOeio,1945
142
142
  tensorflow_datasets/core/dataset_builders/adhoc_builder.py,sha256=1a-5hVjf9t24SD9fWzDDuKoOrA-Vmydf5QxvU7ap-sI,9263
143
143
  tensorflow_datasets/core/dataset_builders/adhoc_builder_test.py,sha256=yhRwrznK78MvHeWGRggnMTiyx_SlR1z30iD5VU3Gweo,13096
144
- tensorflow_datasets/core/dataset_builders/croissant_builder.py,sha256=nmRIRZZGJjXtJgcvlTOsNKbqsAjosjn_M_zOu86uc04,17253
144
+ tensorflow_datasets/core/dataset_builders/croissant_builder.py,sha256=EoK93nCth72yF_Qxjecahu_gLIo58ci2GEz3XkKjanc,17998
145
145
  tensorflow_datasets/core/dataset_builders/croissant_builder_test.py,sha256=ordMGrhNh-S1MjfY0QO8HUnsangqJCQCo3wCVBvMToA,16220
146
146
  tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder.py,sha256=Loq3qeGk1Ias-d2oT_dK47BRNgTA4LKJchNGh7aA4a0,18313
147
147
  tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py,sha256=6N3DLsry9LhDqhpleaoXrrhaGiLJMBgUlwDnAji-1fI,4389
@@ -2471,10 +2471,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
2471
2471
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
2472
2472
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
2473
2473
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
2474
- tfds_nightly-4.9.9.dev202508280044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2475
- tfds_nightly-4.9.9.dev202508280044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2476
- tfds_nightly-4.9.9.dev202508280044.dist-info/METADATA,sha256=NYDGgvfbautnlGWNWg2_8pOq9eblXgaPjGU-2__DPco,11291
2477
- tfds_nightly-4.9.9.dev202508280044.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2478
- tfds_nightly-4.9.9.dev202508280044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2479
- tfds_nightly-4.9.9.dev202508280044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2480
- tfds_nightly-4.9.9.dev202508280044.dist-info/RECORD,,
2474
+ tfds_nightly-4.9.9.dev202508300044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2475
+ tfds_nightly-4.9.9.dev202508300044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2476
+ tfds_nightly-4.9.9.dev202508300044.dist-info/METADATA,sha256=wrFDg9J64IamXsXB0CogdebsLOwFw06ghBx2ZVA8x9I,11291
2477
+ tfds_nightly-4.9.9.dev202508300044.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2478
+ tfds_nightly-4.9.9.dev202508300044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2479
+ tfds_nightly-4.9.9.dev202508300044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2480
+ tfds_nightly-4.9.9.dev202508300044.dist-info/RECORD,,