tfds-nightly 4.9.9.dev202508280044__py3-none-any.whl → 4.9.9.dev202508290044__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensorflow_datasets/core/dataset_builders/croissant_builder.py +17 -3
- {tfds_nightly-4.9.9.dev202508280044.dist-info → tfds_nightly-4.9.9.dev202508290044.dist-info}/METADATA +1 -1
- {tfds_nightly-4.9.9.dev202508280044.dist-info → tfds_nightly-4.9.9.dev202508290044.dist-info}/RECORD +8 -8
- {tfds_nightly-4.9.9.dev202508280044.dist-info → tfds_nightly-4.9.9.dev202508290044.dist-info}/WHEEL +0 -0
- {tfds_nightly-4.9.9.dev202508280044.dist-info → tfds_nightly-4.9.9.dev202508290044.dist-info}/entry_points.txt +0 -0
- {tfds_nightly-4.9.9.dev202508280044.dist-info → tfds_nightly-4.9.9.dev202508290044.dist-info}/licenses/AUTHORS +0 -0
- {tfds_nightly-4.9.9.dev202508280044.dist-info → tfds_nightly-4.9.9.dev202508290044.dist-info}/licenses/LICENSE +0 -0
- {tfds_nightly-4.9.9.dev202508280044.dist-info → tfds_nightly-4.9.9.dev202508290044.dist-info}/top_level.txt +0 -0
@@ -85,6 +85,7 @@ def array_datatype_converter(
|
|
85
85
|
feature: type_utils.TfdsDType | feature_lib.FeatureConnector | None,
|
86
86
|
field: mlc.Field,
|
87
87
|
dtype_mapping: Mapping[type_utils.TfdsDType, type_utils.TfdsDType],
|
88
|
+
language: str | None = None,
|
88
89
|
):
|
89
90
|
"""Includes the given feature in a sequence or tensor feature.
|
90
91
|
|
@@ -97,6 +98,10 @@ def array_datatype_converter(
|
|
97
98
|
field: The mlc.Field object.
|
98
99
|
dtype_mapping: A mapping of dtypes to the corresponding dtypes that will be
|
99
100
|
used in TFDS.
|
101
|
+
language: For Croissant jsonld which include multi-lingual descriptions, the
|
102
|
+
language code to use to extract the description to be used in TFDS. If
|
103
|
+
None, it will extract the description in English or the first available
|
104
|
+
language in the dictionary.
|
100
105
|
|
101
106
|
Returns:
|
102
107
|
A sequence or tensor feature including the inner feature.
|
@@ -108,7 +113,7 @@ def array_datatype_converter(
|
|
108
113
|
field_dtype = field.data_type
|
109
114
|
|
110
115
|
description = croissant_utils.extract_localized_string(
|
111
|
-
field.description, field_name='description'
|
116
|
+
field.description, language=language, field_name='description'
|
112
117
|
)
|
113
118
|
|
114
119
|
if len(field.array_shape_tuple) == 1:
|
@@ -129,6 +134,7 @@ def datatype_converter(
|
|
129
134
|
field: mlc.Field,
|
130
135
|
int_dtype: type_utils.TfdsDType = np.int64,
|
131
136
|
float_dtype: type_utils.TfdsDType = np.float32,
|
137
|
+
language: str | None = None,
|
132
138
|
):
|
133
139
|
"""Converts a Croissant field to a TFDS-compatible feature.
|
134
140
|
|
@@ -137,6 +143,10 @@ def datatype_converter(
|
|
137
143
|
int_dtype: The dtype to use for TFDS integer features. Defaults to np.int64.
|
138
144
|
float_dtype: The dtype to use for TFDS float features. Defaults to
|
139
145
|
np.float32.
|
146
|
+
language: For Croissant jsonld which include multi-lingual descriptions, the
|
147
|
+
language code to use to extract the description to be used in TFDS. If
|
148
|
+
None, it will extract the description in English or the first available
|
149
|
+
language in the dictionary.
|
140
150
|
|
141
151
|
Returns:
|
142
152
|
Converted datatype for TFDS, or None when a Field does not specify a type.
|
@@ -156,7 +166,7 @@ def datatype_converter(
|
|
156
166
|
|
157
167
|
field_data_type = field.data_type
|
158
168
|
description = croissant_utils.extract_localized_string(
|
159
|
-
field.description, field_name='description'
|
169
|
+
field.description, language=language, field_name='description'
|
160
170
|
)
|
161
171
|
|
162
172
|
if not field_data_type:
|
@@ -165,7 +175,10 @@ def datatype_converter(
|
|
165
175
|
feature = features_dict.FeaturesDict(
|
166
176
|
{
|
167
177
|
subfield.id: datatype_converter(
|
168
|
-
subfield,
|
178
|
+
subfield,
|
179
|
+
int_dtype=int_dtype,
|
180
|
+
float_dtype=float_dtype,
|
181
|
+
language=language,
|
169
182
|
)
|
170
183
|
for subfield in field.sub_fields
|
171
184
|
},
|
@@ -215,6 +228,7 @@ def datatype_converter(
|
|
215
228
|
feature=feature,
|
216
229
|
field=field,
|
217
230
|
dtype_mapping=dtype_mapping,
|
231
|
+
language=language,
|
218
232
|
)
|
219
233
|
# If the field is repeated, we return a sequence feature. `field.repeated` is
|
220
234
|
# deprecated starting from Croissant 1.1, but we still support it for
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tfds-nightly
|
3
|
-
Version: 4.9.9.
|
3
|
+
Version: 4.9.9.dev202508290044
|
4
4
|
Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
|
5
5
|
Home-page: https://github.com/tensorflow/datasets
|
6
6
|
Download-URL: https://github.com/tensorflow/datasets/tags
|
{tfds_nightly-4.9.9.dev202508280044.dist-info → tfds_nightly-4.9.9.dev202508290044.dist-info}/RECORD
RENAMED
@@ -141,7 +141,7 @@ tensorflow_datasets/core/data_sources/python_test.py,sha256=O3yqMPx40JlHN0uFfZPN
|
|
141
141
|
tensorflow_datasets/core/dataset_builders/__init__.py,sha256=StTA3euephqDZdpTzJQgfWNqB5inZosrAhaWg2BOeio,1945
|
142
142
|
tensorflow_datasets/core/dataset_builders/adhoc_builder.py,sha256=1a-5hVjf9t24SD9fWzDDuKoOrA-Vmydf5QxvU7ap-sI,9263
|
143
143
|
tensorflow_datasets/core/dataset_builders/adhoc_builder_test.py,sha256=yhRwrznK78MvHeWGRggnMTiyx_SlR1z30iD5VU3Gweo,13096
|
144
|
-
tensorflow_datasets/core/dataset_builders/croissant_builder.py,sha256=
|
144
|
+
tensorflow_datasets/core/dataset_builders/croissant_builder.py,sha256=EoK93nCth72yF_Qxjecahu_gLIo58ci2GEz3XkKjanc,17998
|
145
145
|
tensorflow_datasets/core/dataset_builders/croissant_builder_test.py,sha256=ordMGrhNh-S1MjfY0QO8HUnsangqJCQCo3wCVBvMToA,16220
|
146
146
|
tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder.py,sha256=Loq3qeGk1Ias-d2oT_dK47BRNgTA4LKJchNGh7aA4a0,18313
|
147
147
|
tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py,sha256=6N3DLsry9LhDqhpleaoXrrhaGiLJMBgUlwDnAji-1fI,4389
|
@@ -2471,10 +2471,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
|
|
2471
2471
|
tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
|
2472
2472
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
|
2473
2473
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
|
2474
|
-
tfds_nightly-4.9.9.
|
2475
|
-
tfds_nightly-4.9.9.
|
2476
|
-
tfds_nightly-4.9.9.
|
2477
|
-
tfds_nightly-4.9.9.
|
2478
|
-
tfds_nightly-4.9.9.
|
2479
|
-
tfds_nightly-4.9.9.
|
2480
|
-
tfds_nightly-4.9.9.
|
2474
|
+
tfds_nightly-4.9.9.dev202508290044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
|
2475
|
+
tfds_nightly-4.9.9.dev202508290044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
2476
|
+
tfds_nightly-4.9.9.dev202508290044.dist-info/METADATA,sha256=nkC_JlCaoq6KaOyqYSu_lwR9AZuWyUJPGTXRfs07lD4,11291
|
2477
|
+
tfds_nightly-4.9.9.dev202508290044.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
2478
|
+
tfds_nightly-4.9.9.dev202508290044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
|
2479
|
+
tfds_nightly-4.9.9.dev202508290044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
|
2480
|
+
tfds_nightly-4.9.9.dev202508290044.dist-info/RECORD,,
|
{tfds_nightly-4.9.9.dev202508280044.dist-info → tfds_nightly-4.9.9.dev202508290044.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|