tfds-nightly 4.9.9.dev202508220044__py3-none-any.whl → 4.9.9.dev202508240045__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensorflow_datasets/core/read_only_builder.py +4 -3
- tensorflow_datasets/datasets/multi_news/multi_news_dataset_builder.py +13 -5
- tensorflow_datasets/scripts/documentation/build_community_catalog.py +2 -6
- tensorflow_datasets/scripts/documentation/dataset_markdown_builder.py +6 -6
- {tfds_nightly-4.9.9.dev202508220044.dist-info → tfds_nightly-4.9.9.dev202508240045.dist-info}/METADATA +1 -1
- {tfds_nightly-4.9.9.dev202508220044.dist-info → tfds_nightly-4.9.9.dev202508240045.dist-info}/RECORD +11 -12
- tensorflow_datasets/url_checksums/multi_news.txt +0 -1
- {tfds_nightly-4.9.9.dev202508220044.dist-info → tfds_nightly-4.9.9.dev202508240045.dist-info}/WHEEL +0 -0
- {tfds_nightly-4.9.9.dev202508220044.dist-info → tfds_nightly-4.9.9.dev202508240045.dist-info}/entry_points.txt +0 -0
- {tfds_nightly-4.9.9.dev202508220044.dist-info → tfds_nightly-4.9.9.dev202508240045.dist-info}/licenses/AUTHORS +0 -0
- {tfds_nightly-4.9.9.dev202508220044.dist-info → tfds_nightly-4.9.9.dev202508240045.dist-info}/licenses/LICENSE +0 -0
- {tfds_nightly-4.9.9.dev202508220044.dist-info → tfds_nightly-4.9.9.dev202508240045.dist-info}/top_level.txt +0 -0
@@ -314,10 +314,11 @@ def builder_from_files(
|
|
314
314
|
DatasetNotFoundError: If the dataset cannot be loaded.
|
315
315
|
"""
|
316
316
|
# Find and load dataset builder.
|
317
|
-
|
317
|
+
copy_builder_kwargs = dict(builder_kwargs)
|
318
|
+
builder_dir = _find_builder_dir(name, **copy_builder_kwargs)
|
318
319
|
if builder_dir is None:
|
319
320
|
data_dirs = file_utils.list_data_dirs(
|
320
|
-
given_data_dir=
|
321
|
+
given_data_dir=copy_builder_kwargs.get('data_dir')
|
321
322
|
)
|
322
323
|
raise registered.DatasetNotFoundError(
|
323
324
|
f'Could not find dataset files for: {name}. Make sure you have the'
|
@@ -325,7 +326,7 @@ def builder_from_files(
|
|
325
326
|
f'and that it has been generated in: {data_dirs}. If the dataset has'
|
326
327
|
' configs, you might have to specify the config name.'
|
327
328
|
)
|
328
|
-
file_format =
|
329
|
+
file_format = copy_builder_kwargs.pop('file_format', None)
|
329
330
|
return builder_from_directory(builder_dir, file_format=file_format)
|
330
331
|
|
331
332
|
|
@@ -34,9 +34,16 @@ class Builder(tfds.core.GeneratorBasedBuilder):
|
|
34
34
|
VERSION = tfds.core.Version("2.1.0")
|
35
35
|
RELEASE_NOTES = {
|
36
36
|
"1.0.0": "Initial release.",
|
37
|
-
"2.0.0": "Update the dataset with valid URLs.",
|
38
|
-
"2.1.0":
|
37
|
+
"2.0.0": "[Do not use] Update the dataset with valid URLs.",
|
38
|
+
"2.1.0": (
|
39
|
+
"Update the dataset with the correct URLs. The URLs in this version"
|
40
|
+
" come from HuggingFace's dataset repo, which is curated by the same"
|
41
|
+
" author: https://huggingface.co/datasets/alexfabbri/multi_news."
|
42
|
+
),
|
39
43
|
}
|
44
|
+
BLOCKED_VERSIONS = tfds.core.utils.BlockedVersions(
|
45
|
+
versions={"2.0.0": "The URLs of this version are invalid."}
|
46
|
+
)
|
40
47
|
|
41
48
|
def _info(self) -> tfds.core.DatasetInfo:
|
42
49
|
"""Returns the dataset metadata."""
|
@@ -77,9 +84,10 @@ class Builder(tfds.core.GeneratorBasedBuilder):
|
|
77
84
|
).open() as tgt_f:
|
78
85
|
for i, (src_line, tgt_line) in enumerate(zip(src_f, tgt_f)):
|
79
86
|
yield i, {
|
80
|
-
# In original file, each line has one example and natural
|
81
|
-
# tokens "\n" are being replaced with "NEWLINE_CHAR"
|
82
|
-
# the natural newline token to avoid special
|
87
|
+
# In the original file, each line has one example and natural
|
88
|
+
# newline tokens "\n" are being replaced with "NEWLINE_CHAR"
|
89
|
+
# Here, we restore the natural newline token to avoid the special
|
90
|
+
# vocab token "NEWLINE_CHAR".
|
83
91
|
_DOCUMENT: src_line.strip().replace("NEWLINE_CHAR", "\n"),
|
84
92
|
_SUMMARY: tgt_line.strip().lstrip(),
|
85
93
|
}
|
@@ -168,7 +168,7 @@ class DatasetDocumentation:
|
|
168
168
|
)
|
169
169
|
|
170
170
|
def to_details_markdown(self) -> str:
|
171
|
-
"""
|
171
|
+
"""Markdown to be shown on the details page for the namespace."""
|
172
172
|
extra_links = self.format_extra_links(prefix='* ', infix='\n')
|
173
173
|
details = self.templates.dataset_details_template.format(
|
174
174
|
name=self.name,
|
@@ -194,9 +194,6 @@ class DatasetDocumentation:
|
|
194
194
|
|
195
195
|
def documentation(self, keep_short: bool = False) -> str:
|
196
196
|
"""Returns detailed documentation for all configs of this dataset."""
|
197
|
-
# TODO(weide): if e.g. the description contains markdown chars, then it
|
198
|
-
# messes up the page. Try escaping backticks or using code blocks.
|
199
|
-
# TODO(weide): how to format citation?
|
200
197
|
header_template = '## {config_name}'
|
201
198
|
template = textwrap.dedent("""
|
202
199
|
Use the following command to load this dataset in TFDS:
|
@@ -207,9 +204,7 @@ class DatasetDocumentation:
|
|
207
204
|
|
208
205
|
* **Description**:
|
209
206
|
|
210
|
-
```
|
211
207
|
{description}
|
212
|
-
```
|
213
208
|
|
214
209
|
* **License**: {license}
|
215
210
|
* **Version**: {version}
|
@@ -364,6 +359,7 @@ class HuggingfaceDatasetDocumentation(GithubDatasetDocumentation):
|
|
364
359
|
)
|
365
360
|
)
|
366
361
|
|
362
|
+
version = None
|
367
363
|
if isinstance(config['version'], dict):
|
368
364
|
version = config['version']['version_str']
|
369
365
|
elif isinstance(config['version'], str):
|
@@ -222,9 +222,9 @@ class VersionSection(Section):
|
|
222
222
|
all_versions = set(tfds.core.Version(v) for v in all_versions)
|
223
223
|
for v in sorted(all_versions): # List all available versions
|
224
224
|
if v == builder.version: # Highlight the default version
|
225
|
-
version_name = '**`{}`** (default)'
|
225
|
+
version_name = f'**`{v}`** (default)'
|
226
226
|
else:
|
227
|
-
version_name = '`{}`'
|
227
|
+
version_name = f'`{v}`'
|
228
228
|
if (
|
229
229
|
v in curr_versions # Filter versions only present in RELEASE_NOTES
|
230
230
|
and self._nightly_doc_util
|
@@ -322,14 +322,14 @@ class AutocacheSection(Section):
|
|
322
322
|
autocached_info_parts = []
|
323
323
|
if always_cached:
|
324
324
|
split_names_str = ', '.join(always_cached)
|
325
|
-
autocached_info_parts.append('Yes ({})'
|
325
|
+
autocached_info_parts.append(f'Yes ({split_names_str})')
|
326
326
|
if never_cached:
|
327
327
|
split_names_str = ', '.join(never_cached)
|
328
|
-
autocached_info_parts.append('No ({})'
|
328
|
+
autocached_info_parts.append(f'No ({split_names_str})')
|
329
329
|
if unshuffle_cached:
|
330
330
|
split_names_str = ', '.join(unshuffle_cached)
|
331
331
|
autocached_info_parts.append(
|
332
|
-
'Only when `shuffle_files=False` ({})'
|
332
|
+
f'Only when `shuffle_files=False` ({split_names_str})'
|
333
333
|
)
|
334
334
|
autocached_info = ', '.join(autocached_info_parts)
|
335
335
|
return autocached_info
|
@@ -346,7 +346,7 @@ class SplitInfoSection(Section):
|
|
346
346
|
|
347
347
|
def _get_num_examples(self, split_info):
|
348
348
|
if split_info.num_examples:
|
349
|
-
return '{:,}'
|
349
|
+
return f'{split_info.num_examples:,}'
|
350
350
|
return 'Not computed'
|
351
351
|
|
352
352
|
def get_key(self, builder: tfds.core.DatasetBuilder):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tfds-nightly
|
3
|
-
Version: 4.9.9.
|
3
|
+
Version: 4.9.9.dev202508240045
|
4
4
|
Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
|
5
5
|
Home-page: https://github.com/tensorflow/datasets
|
6
6
|
Download-URL: https://github.com/tensorflow/datasets/tags
|
{tfds_nightly-4.9.9.dev202508220044.dist-info → tfds_nightly-4.9.9.dev202508240045.dist-info}/RECORD
RENAMED
@@ -90,7 +90,7 @@ tensorflow_datasets/core/load.py,sha256=1FQVnKwn8OVS_IgDbs9XN7aIVxQnyfrS0pI2X9dh
|
|
90
90
|
tensorflow_datasets/core/load_test.py,sha256=EEa8GuSIrEbn0RcGrWS3hmmatKBqBA3QOQWpQ1WjVgA,6490
|
91
91
|
tensorflow_datasets/core/naming.py,sha256=B_P77QDA4lkG2FUl4PrzZR0U6qqae_fLxruGBw3ZSVc,25614
|
92
92
|
tensorflow_datasets/core/naming_test.py,sha256=SwydgLjf2Mouow1yVZlc73sb8rp4522NhkTSEmg31vo,30112
|
93
|
-
tensorflow_datasets/core/read_only_builder.py,sha256=
|
93
|
+
tensorflow_datasets/core/read_only_builder.py,sha256=08BmsgEBXhX0ydGo9-9qHLTjBE6pIvAC6VMmc3b9S8U,22206
|
94
94
|
tensorflow_datasets/core/read_only_builder_test.py,sha256=Nw2KQCHBdTW7210Um2K3SzfqAOJB1v1r2yJkzdFehWA,24174
|
95
95
|
tensorflow_datasets/core/reader.py,sha256=s65FNOUDyAhd4OgHOSvE5lr4rnlUnOILjlVcRS6Qbhw,17345
|
96
96
|
tensorflow_datasets/core/reader_test.py,sha256=VcbUIDtvwjTRZs-0beQIiz26TALqLM5FgBsB-Gtw4kw,17882
|
@@ -935,7 +935,7 @@ tensorflow_datasets/datasets/multi_news/README.md,sha256=s0XL9ddJL7oNJ9r7mSG8_Hd
|
|
935
935
|
tensorflow_datasets/datasets/multi_news/TAGS.txt,sha256=OPDe1XqRiLYpvmXuPX2_aMaOKIXYsl562usmTEEqkwg,449
|
936
936
|
tensorflow_datasets/datasets/multi_news/__init__.py,sha256=eFqnTjU7s5iubj6XcKoU8lZUSHecOdnebZFm1vTkjbA,612
|
937
937
|
tensorflow_datasets/datasets/multi_news/checksums.tsv,sha256=S-8k82snl0zj1rjjO5LW7svXRNnDuWRc72qpIcBu6WA,1031
|
938
|
-
tensorflow_datasets/datasets/multi_news/multi_news_dataset_builder.py,sha256
|
938
|
+
tensorflow_datasets/datasets/multi_news/multi_news_dataset_builder.py,sha256=6ZeVdbrtXKIu1sjxURsoGdPLRW0SXmK0BhOnmyrwpk4,3419
|
939
939
|
tensorflow_datasets/datasets/multi_news/multi_news_dataset_builder_test.py,sha256=5amBMQ7PKbPLeZ2kiT18tEb_Z-CMS0DasTRT6goTjXQ,1259
|
940
940
|
tensorflow_datasets/datasets/natural_instructions/CITATIONS.bib,sha256=tcQG5eEGL_wr_5MEnZ6Q_ce2oZm6InbbRKiFqee9g7I,412
|
941
941
|
tensorflow_datasets/datasets/natural_instructions/README.md,sha256=mceGvviI62PO5mh59sYPP_9vuuwKo0g-m7LQilP1mBI,370
|
@@ -2007,11 +2007,11 @@ tensorflow_datasets/scripts/deployment/export_community_datasets_test.py,sha256=
|
|
2007
2007
|
tensorflow_datasets/scripts/documentation/__init__.py,sha256=Z8UWkv0wbzS4AzaLgSpYVGApYv5j57RWY0vN5Z553BQ,613
|
2008
2008
|
tensorflow_datasets/scripts/documentation/build_catalog.py,sha256=SYJoNW-VxvL8xx85uYlFBwbr1k64HcmRBfxsj9-sdYA,8680
|
2009
2009
|
tensorflow_datasets/scripts/documentation/build_catalog_test.py,sha256=qjnqK6lhBh-uNrjLQkEs3AbKFBo5uz_sxhhdT4ibOyA,2532
|
2010
|
-
tensorflow_datasets/scripts/documentation/build_community_catalog.py,sha256=
|
2010
|
+
tensorflow_datasets/scripts/documentation/build_community_catalog.py,sha256=58CT0UaHxw0-mZX1a1aoW96NkszgsntDXuxS_OOZtc8,19709
|
2011
2011
|
tensorflow_datasets/scripts/documentation/build_community_catalog_test.py,sha256=KvCmBzIePyztWPSrCqTJ_j_3puNWXxgSWSfvcMgQPgk,6352
|
2012
2012
|
tensorflow_datasets/scripts/documentation/collection_markdown_builder.py,sha256=4Oofl2dQjlvHTir46x2K6Vpa3amwPaB-3dm43f_GcS8,7287
|
2013
2013
|
tensorflow_datasets/scripts/documentation/collection_markdown_builder_test.py,sha256=t8KEbotAk6zH09HFvJhsrHW23uE8H3_UGLOHOFcvFeQ,3909
|
2014
|
-
tensorflow_datasets/scripts/documentation/dataset_markdown_builder.py,sha256=
|
2014
|
+
tensorflow_datasets/scripts/documentation/dataset_markdown_builder.py,sha256=4W-L77aM2B4xGNQC7i1p5yHvOlIJfC0dONTM-86yeoY,25498
|
2015
2015
|
tensorflow_datasets/scripts/documentation/dataset_markdown_builder_test.py,sha256=WsDbmAO6TYGFpn4VxF49FYvRy3ujNiysk38cWKlFC10,4219
|
2016
2016
|
tensorflow_datasets/scripts/documentation/doc_utils.py,sha256=DKHGhF7I4ZkKcDAJWYC8mxWBBtbWD211Yv6CTqOBSTw,10325
|
2017
2017
|
tensorflow_datasets/scripts/documentation/doc_utils_test.py,sha256=FSncjt0UCgvdN9WcvqzswwEuf7ZGmdUIRL480PzHxNw,5805
|
@@ -2399,7 +2399,6 @@ tensorflow_datasets/url_checksums/movie_lens.txt,sha256=DmPIlh1aM7PxNzI5sVmOGwC4
|
|
2399
2399
|
tensorflow_datasets/url_checksums/movie_rationales.txt,sha256=1GweBeFRzD61ISAkTR5MNiWuujW6PQymgp7ISGBgsAU,139
|
2400
2400
|
tensorflow_datasets/url_checksums/movielens.txt,sha256=i6St5kA_ZV6y8_mk_b47eE9RIf9Pc1VH6asv58kNPlo,731
|
2401
2401
|
tensorflow_datasets/url_checksums/moving_mnist.txt,sha256=OtC5WoEUStRKL2I7jAwIEFF6WvZ-z_1vDGPzxpnGxXA,166
|
2402
|
-
tensorflow_datasets/url_checksums/multi_news.txt,sha256=noajcrnQ_UK7sh-uRR9CJYaeBFenCmj_ZXr_5ih3Gu0,201
|
2403
2402
|
tensorflow_datasets/url_checksums/multi_nli.txt,sha256=LXDz04hlq0b9au9DDHaX_P-KGVi4ZHWV5wEGJcvD8bA,148
|
2404
2403
|
tensorflow_datasets/url_checksums/multi_nli_mismatch.txt,sha256=LXDz04hlq0b9au9DDHaX_P-KGVi4ZHWV5wEGJcvD8bA,148
|
2405
2404
|
tensorflow_datasets/url_checksums/omniglot.txt,sha256=4KFU4nJ5H772d1JmRBmQ2bzoL0rJqCzMdf6XCx1Xa_c,728
|
@@ -2472,10 +2471,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
|
|
2472
2471
|
tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
|
2473
2472
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
|
2474
2473
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
|
2475
|
-
tfds_nightly-4.9.9.
|
2476
|
-
tfds_nightly-4.9.9.
|
2477
|
-
tfds_nightly-4.9.9.
|
2478
|
-
tfds_nightly-4.9.9.
|
2479
|
-
tfds_nightly-4.9.9.
|
2480
|
-
tfds_nightly-4.9.9.
|
2481
|
-
tfds_nightly-4.9.9.
|
2474
|
+
tfds_nightly-4.9.9.dev202508240045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
|
2475
|
+
tfds_nightly-4.9.9.dev202508240045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
2476
|
+
tfds_nightly-4.9.9.dev202508240045.dist-info/METADATA,sha256=qRe_2vex_eeA3DBe_tMZZPn9-0gsjNd6TNBD0U_ihsA,11291
|
2477
|
+
tfds_nightly-4.9.9.dev202508240045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
2478
|
+
tfds_nightly-4.9.9.dev202508240045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
|
2479
|
+
tfds_nightly-4.9.9.dev202508240045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
|
2480
|
+
tfds_nightly-4.9.9.dev202508240045.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
https://drive.google.com/uc?export=download&id=1vRY2wM6rlOZrf9exGTm5pXj5ExlVwJ0C 256966232 64ae4d2483b248c9664b50bacfab6821f8a3e93f382c7587686fa4a127f77626 multi-news-original-20190725T164630Z-001.zip
|
{tfds_nightly-4.9.9.dev202508220044.dist-info → tfds_nightly-4.9.9.dev202508240045.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|