PyPI - tfds-nightly - Versions diffs - 4.9.8.dev202504110044__py3-none-any.whl → 4.9.8.dev202504130103__py3-none-any.whl - Mend

tfds-nightly 4.9.8.dev202504110044py3-none-any.whl → 4.9.8.dev202504130103py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

tensorflow_datasets/datasets/lbpp/CITATIONS.bib ADDED Viewed

@@ -0,0 +1,24 @@
+@inproceedings{matton-etal-2024-leakage,
+    title = "On Leakage of Code Generation Evaluation Datasets",
+    author = "Matton, Alexandre  and
+      Sherborne, Tom  and
+      Aumiller, Dennis  and
+      Tommasone, Elena  and
+      Alizadeh, Milad  and
+      He, Jingyi  and
+      Ma, Raymond  and
+      Voisin, Maxime  and
+      Gilsenan-McMahon, Ellen  and
+      Gall{\'e}, Matthias",
+    editor = "Al-Onaizan, Yaser  and
+      Bansal, Mohit  and
+      Chen, Yun-Nung",
+    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
+    month = nov,
+    year = "2024",
+    address = "Miami, Florida, USA",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2024.findings-emnlp.772/",
+    doi = "10.18653/v1/2024.findings-emnlp.772",
+    pages = "13215--13223",
+}

tensorflow_datasets/datasets/lbpp/README.md ADDED Viewed

@@ -0,0 +1,6 @@
+*Less Basic Python Programming* is a collection of 161 programming problems
+with accompanying unit tests.
+They were created with the aim of being fresh (not leaked at the time of
+creation) and more difficult than similar datasets (e.g., HumanEval and MBPP).
+It can serve as a drop-in replacement or enrichment of those datasets as they
+are structured in an equivalent way.

tensorflow_datasets/datasets/lbpp/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# coding=utf-8
+# Copyright 2024 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

tensorflow_datasets/datasets/lbpp/lbpp_dataset_builder.py ADDED Viewed

@@ -0,0 +1,174 @@
+# coding=utf-8
+# Copyright 2024 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Copyright 2024 Cohere and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Author Note: Data loader is heavily inspired by the builder in
+# https://github.com/google-research/google-research/tree/main/lbpp_dataset
+"""Cohere Less Basic Python Problems. All columns decoded."""
+import base64
+import json
+import pickle
+import zlib
+from tensorflow_datasets.core.utils.lazy_imports_utils import pandas as pd
+import tensorflow_datasets.public_api as tfds
+_HOMEPAGE = "https://aclanthology.org/2024.findings-emnlp.772/"
+_VERSION = tfds.core.Version("2.0.0")
+_COLUMNS = [
+    "task_id",
+    "language",
+    "title",
+    "instruction",
+    "completion",
+    "test_file",
+    "test_list",
+    "signature",
+    "categories",
+    "test_setup",
+]
+_LANGUAGES = ["python", "cpp", "go", "java", "js", "rust"]
+_ALL_LANGUAGE_ALIASES = ["all", "multilingual"]
+_LANGUAGE_ALIAS_MAP = {
+    "default": "python",
+    "javascript": "js",
+}
+def decode_str(str_to_decode: str):
+  return json.loads(
+      pickle.loads(
+          zlib.decompress(base64.b64decode(str_to_decode.encode("utf-8")))
+      )
+  )
+class LBPPConfig(tfds.core.BuilderConfig):
+  """BuilderConfig."""
+  def __init__(self, name, description, features, **kwargs):
+    super(LBPPConfig, self).__init__(name=name, version=_VERSION, **kwargs)
+    self.name = name
+    self.description = description
+    self.features = features
+class Builder(tfds.core.GeneratorBasedBuilder):
+  """Builder for LBPP dataset."""
+  VERSION = _VERSION
+  LICENSE = "apache-2.0"
+  BUILDER_CONFIGS = [
+      LBPPConfig(
+          name="all", description="Multilingual LBPP", features=_COLUMNS
+      ),
+      LBPPConfig(
+          name="multilingual",
+          description="Multilingual LBPP",
+          features=_COLUMNS,
+      ),
+      LBPPConfig(name="default", description="Python LBPP", features=_COLUMNS),
+      LBPPConfig(name="python", description="Python LBPP", features=_COLUMNS),
+      LBPPConfig(name="cpp", description="C++ LBPP", features=_COLUMNS),
+      LBPPConfig(name="go", description="Go LBPP", features=_COLUMNS),
+      LBPPConfig(name="java", description="Java LBPP", features=_COLUMNS),
+      LBPPConfig(name="js", description="JavaScript LBPP", features=_COLUMNS),
+      LBPPConfig(
+          name="javascript", description="JavaScript LBPP", features=_COLUMNS
+      ),
+      LBPPConfig(name="rust", description="JavaScript LBPP", features=_COLUMNS),
+  ]
+  DEFAULT_CONFIG_NAME = "python"
+  def _info(self):
+    return self.dataset_info_from_configs(
+        features=tfds.features.FeaturesDict({
+            "task_id": tfds.features.Text(),
+            "language": tfds.features.Text(),
+            "title": tfds.features.Text(),
+            "instruction": tfds.features.Text(),
+            "completion": tfds.features.Text(),
+            "test_file": tfds.features.Text(),
+            "test_list": tfds.features.Sequence(tfds.features.Text()),
+            "signature": tfds.features.Text(),
+            "categories": tfds.features.Sequence(tfds.features.Text()),
+            "test_setup": tfds.features.Text(),
+        }),
+        homepage=_HOMEPAGE,
+        supervised_keys=None,
+    )
+  def _split_generators(self, dl_manager):
+    # Map alias to actual language
+    data_loading_name = _LANGUAGE_ALIAS_MAP.get(
+        self.builder_config.name, self.builder_config.name
+    )
+    hf_url_prefix = (
+        "https://huggingface.co/datasets/CohereForAI/lbpp/resolve/main/"
+    )
+    if data_loading_name in _ALL_LANGUAGE_ALIASES:
+      # Download all languages
+      download_targets = [
+          f"{hf_url_prefix}{lang}/test.parquet" for lang in _LANGUAGES
+      ]
+    else:
+      download_targets = [f"{hf_url_prefix}{data_loading_name}/test.parquet"]
+    downloaded_files = dl_manager.download(download_targets)
+    return [
+        tfds.core.SplitGenerator(
+            name=tfds.Split.TEST,
+            gen_kwargs={
+                "filepaths": downloaded_files,
+            },
+        )
+    ]
+  def _generate_examples(self, filepaths: list[str]):
+    key = 0
+    for filepath in filepaths:
+      df = pd.read_parquet(filepath)
+      for line in df.to_dict(orient="records"):
+        yield key, {
+            "task_id": line["task_id"],
+            "language": line["language"],
+            "title": line["title"],
+            "instruction": line["instruction"],
+            "completion": decode_str(line["completion"]),
+            "test_file": decode_str(line["test_file"]),
+            "test_list": decode_str(line["test_list"]),
+            "signature": line["signature"] or "",
+            "categories": line["categories"],
+            "test_setup": decode_str(line["test_setup"]),
+        }
+        key += 1

{tfds_nightly-4.9.8.dev202504110044.dist-info → tfds_nightly-4.9.8.dev202504130103.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tfds-nightly
-Version: 4.9.8.dev202504110044
+Version: 4.9.8.dev202504130103
 Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
 Home-page: https://github.com/tensorflow/datasets
 Download-URL: https://github.com/tensorflow/datasets/tags

{tfds_nightly-4.9.8.dev202504110044.dist-info → tfds_nightly-4.9.8.dev202504130103.dist-info}/RECORD RENAMED Viewed

@@ -818,6 +818,10 @@ tensorflow_datasets/datasets/lambada/__init__.py,sha256=UCI6C5ryp7VdOk-mbtcPxp6J
 tensorflow_datasets/datasets/lambada/checksums.tsv,sha256=EgETmQzHlYMzJjZ5u71Pho16UdrbagX34a5jcpgmSmU,172
 tensorflow_datasets/datasets/lambada/lambada_dataset_builder.py,sha256=BNYLj9XI0pdwm1c7wO5ZqJ7BMbMdnCmUr44nX2F8nxE,2269
 tensorflow_datasets/datasets/lambada/lambada_dataset_builder_test.py,sha256=gJwJQ6CJYOt6QlO5c5-VALf804ByrDMwf4dGjOTjE5Y,1070
+tensorflow_datasets/datasets/lbpp/CITATIONS.bib,sha256=-GCp4MjOuqbVCM6scAoXKhNVuRblq-9W1gJQ304FvPU,835
+tensorflow_datasets/datasets/lbpp/README.md,sha256=g1DwOdrH-6todsxwhdnRoJGkB2NzXw5Mq-CgJ-Lsnjo,372
+tensorflow_datasets/datasets/lbpp/__init__.py,sha256=UCI6C5ryp7VdOk-mbtcPxp6JjEup1pI68YhAiayrHjc,612
+tensorflow_datasets/datasets/lbpp/lbpp_dataset_builder.py,sha256=OAG0Pm-jAdb37QB4Al70uXzu7KLqq5mXtT2EirzFWAE,5835
 tensorflow_datasets/datasets/lfw/CITATIONS.bib,sha256=hYwRfGh52dQKV9Z4EX0ec-esXVWaOY3d3g-ulDGYyKQ,340
 tensorflow_datasets/datasets/lfw/README.md,sha256=SKbgr4y6FxNy2Abei2kk2VQ6N1a7MHKcLOhnTGhLI3I,98
 tensorflow_datasets/datasets/lfw/TAGS.txt,sha256=3k_aYCPHQfbOnta3Z41F8BFfpHbzanXx2ZLL7h59TpU,46
@@ -2455,10 +2459,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=DJ687VN9hAp6SLXnr_P12
 tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=38tJQ73VHz8vOJn-AyZh2we2YJucbSRIgmgcrsC6bQM,719
 tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=k-7YD1SGr5bASfdR2_09rrqz-8cpWdIcBWWEXhCvzuk,16903
 tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=vLlluBW77ASNVC4ix7t8idkSUBI6q1-B7zmRV_ICCQM,1778
-tfds_nightly-4.9.8.dev202504110044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
-tfds_nightly-4.9.8.dev202504110044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-tfds_nightly-4.9.8.dev202504110044.dist-info/METADATA,sha256=H17CedG4r0iROeeuCajllqeuV30nomA-6fC7rTODtjQ,11879
-tfds_nightly-4.9.8.dev202504110044.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-tfds_nightly-4.9.8.dev202504110044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
-tfds_nightly-4.9.8.dev202504110044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
-tfds_nightly-4.9.8.dev202504110044.dist-info/RECORD,,
+tfds_nightly-4.9.8.dev202504130103.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
+tfds_nightly-4.9.8.dev202504130103.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+tfds_nightly-4.9.8.dev202504130103.dist-info/METADATA,sha256=Xb2ci-GJ8aIVhADL-FVI0VIfcvUCVEUiPDRUNdZTGhI,11879
+tfds_nightly-4.9.8.dev202504130103.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+tfds_nightly-4.9.8.dev202504130103.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
+tfds_nightly-4.9.8.dev202504130103.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
+tfds_nightly-4.9.8.dev202504130103.dist-info/RECORD,,

{tfds_nightly-4.9.8.dev202504110044.dist-info → tfds_nightly-4.9.8.dev202504130103.dist-info}/WHEEL RENAMED Viewed

File without changes

{tfds_nightly-4.9.8.dev202504110044.dist-info → tfds_nightly-4.9.8.dev202504130103.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tfds_nightly-4.9.8.dev202504110044.dist-info → tfds_nightly-4.9.8.dev202504130103.dist-info}/licenses/AUTHORS RENAMED Viewed

File without changes

{tfds_nightly-4.9.8.dev202504110044.dist-info → tfds_nightly-4.9.8.dev202504130103.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{tfds_nightly-4.9.8.dev202504110044.dist-info → tfds_nightly-4.9.8.dev202504130103.dist-info}/top_level.txt RENAMED Viewed

File without changes

tfds-nightly 4.9.8.dev202504110044__py3-none-any.whl → 4.9.8.dev202504130103__py3-none-any.whl

tfds-nightly 4.9.8.dev202504110044py3-none-any.whl → 4.9.8.dev202504130103py3-none-any.whl