PyPI - wandb - Versions diffs - 0.22.0__py3-none-macosx_12_0_arm64.whl → 0.22.2__py3-none-macosx_12_0_arm64.whl - Mend

wandb 0.22.0__py3-none-macosx_12_0_arm64.whl → 0.22.2__py3-none-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

wandb/__init__.py +1 -1
wandb/__init__.pyi +8 -5
wandb/_pydantic/__init__.py +12 -11
wandb/_pydantic/base.py +49 -19
wandb/apis/__init__.py +2 -0
wandb/apis/attrs.py +2 -0
wandb/apis/importers/internals/internal.py +16 -23
wandb/apis/internal.py +2 -0
wandb/apis/normalize.py +2 -0
wandb/apis/public/__init__.py +3 -2
wandb/apis/public/api.py +215 -164
wandb/apis/public/artifacts.py +23 -20
wandb/apis/public/const.py +2 -0
wandb/apis/public/files.py +33 -24
wandb/apis/public/history.py +2 -0
wandb/apis/public/jobs.py +20 -18
wandb/apis/public/projects.py +4 -2
wandb/apis/public/query_generator.py +3 -0
wandb/apis/public/registries/__init__.py +7 -0
wandb/apis/public/registries/_freezable_list.py +9 -12
wandb/apis/public/registries/registries_search.py +8 -6
wandb/apis/public/registries/registry.py +22 -17
wandb/apis/public/reports.py +2 -0
wandb/apis/public/runs.py +261 -57
wandb/apis/public/sweeps.py +10 -9
wandb/apis/public/teams.py +2 -0
wandb/apis/public/users.py +2 -0
wandb/apis/public/utils.py +16 -15
wandb/automations/_generated/__init__.py +54 -127
wandb/automations/_generated/create_generic_webhook_integration.py +1 -7
wandb/automations/_generated/fragments.py +26 -91
wandb/bin/gpu_stats +0 -0
wandb/bin/wandb-core +0 -0
wandb/cli/beta.py +16 -2
wandb/cli/beta_leet.py +74 -0
wandb/cli/beta_sync.py +9 -11
wandb/cli/cli.py +34 -7
wandb/errors/errors.py +3 -3
wandb/proto/v3/wandb_api_pb2.py +86 -0
wandb/proto/v3/wandb_internal_pb2.py +352 -351
wandb/proto/v3/wandb_settings_pb2.py +2 -2
wandb/proto/v3/wandb_sync_pb2.py +19 -6
wandb/proto/v4/wandb_api_pb2.py +37 -0
wandb/proto/v4/wandb_internal_pb2.py +352 -351
wandb/proto/v4/wandb_settings_pb2.py +2 -2
wandb/proto/v4/wandb_sync_pb2.py +10 -6
wandb/proto/v5/wandb_api_pb2.py +38 -0
wandb/proto/v5/wandb_internal_pb2.py +352 -351
wandb/proto/v5/wandb_settings_pb2.py +2 -2
wandb/proto/v5/wandb_sync_pb2.py +10 -6
wandb/proto/v6/wandb_api_pb2.py +48 -0
wandb/proto/v6/wandb_internal_pb2.py +352 -351
wandb/proto/v6/wandb_settings_pb2.py +2 -2
wandb/proto/v6/wandb_sync_pb2.py +10 -6
wandb/proto/wandb_api_pb2.py +18 -0
wandb/proto/wandb_generate_proto.py +1 -0
wandb/sdk/artifacts/_factories.py +7 -2
wandb/sdk/artifacts/_generated/__init__.py +112 -412
wandb/sdk/artifacts/_generated/fragments.py +65 -0
wandb/sdk/artifacts/_generated/operations.py +52 -22
wandb/sdk/artifacts/_generated/run_input_artifacts.py +3 -23
wandb/sdk/artifacts/_generated/run_output_artifacts.py +3 -23
wandb/sdk/artifacts/_generated/type_info.py +19 -0
wandb/sdk/artifacts/_gqlutils.py +47 -0
wandb/sdk/artifacts/_models/__init__.py +4 -0
wandb/sdk/artifacts/_models/base_model.py +20 -0
wandb/sdk/artifacts/_validators.py +40 -12
wandb/sdk/artifacts/artifact.py +99 -118
wandb/sdk/artifacts/artifact_file_cache.py +6 -1
wandb/sdk/artifacts/artifact_manifest_entry.py +67 -14
wandb/sdk/artifacts/storage_handler.py +18 -12
wandb/sdk/artifacts/storage_handlers/azure_handler.py +11 -6
wandb/sdk/artifacts/storage_handlers/gcs_handler.py +9 -6
wandb/sdk/artifacts/storage_handlers/http_handler.py +9 -4
wandb/sdk/artifacts/storage_handlers/local_file_handler.py +10 -6
wandb/sdk/artifacts/storage_handlers/multi_handler.py +5 -4
wandb/sdk/artifacts/storage_handlers/s3_handler.py +10 -8
wandb/sdk/artifacts/storage_handlers/tracking_handler.py +6 -4
wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +24 -21
wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +4 -2
wandb/sdk/artifacts/storage_policies/_multipart.py +187 -0
wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +71 -242
wandb/sdk/artifacts/storage_policy.py +25 -12
wandb/sdk/data_types/bokeh.py +5 -1
wandb/sdk/data_types/image.py +17 -6
wandb/sdk/data_types/object_3d.py +67 -2
wandb/sdk/interface/interface.py +31 -4
wandb/sdk/interface/interface_queue.py +10 -0
wandb/sdk/interface/interface_shared.py +0 -7
wandb/sdk/interface/interface_sock.py +9 -3
wandb/sdk/internal/_generated/__init__.py +2 -12
wandb/sdk/internal/job_builder.py +27 -10
wandb/sdk/internal/sender.py +5 -2
wandb/sdk/internal/settings_static.py +2 -82
wandb/sdk/launch/create_job.py +2 -1
wandb/sdk/launch/runner/kubernetes_runner.py +25 -20
wandb/sdk/launch/utils.py +82 -1
wandb/sdk/lib/progress.py +8 -74
wandb/sdk/lib/service/service_client.py +5 -9
wandb/sdk/lib/service/service_connection.py +39 -23
wandb/sdk/mailbox/mailbox_handle.py +2 -0
wandb/sdk/projects/_generated/__init__.py +12 -33
wandb/sdk/wandb_init.py +23 -3
wandb/sdk/wandb_login.py +53 -27
wandb/sdk/wandb_run.py +10 -5
wandb/sdk/wandb_settings.py +63 -25
wandb/sync/sync.py +7 -2
wandb/util.py +1 -1
{wandb-0.22.0.dist-info → wandb-0.22.2.dist-info}/METADATA +1 -1
{wandb-0.22.0.dist-info → wandb-0.22.2.dist-info}/RECORD +113 -103
wandb/sdk/artifacts/_graphql_fragments.py +0 -19
{wandb-0.22.0.dist-info → wandb-0.22.2.dist-info}/WHEEL +0 -0
{wandb-0.22.0.dist-info → wandb-0.22.2.dist-info}/entry_points.txt +0 -0
{wandb-0.22.0.dist-info → wandb-0.22.2.dist-info}/licenses/LICENSE +0 -0

wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py CHANGED Viewed

@@ -3,32 +3,35 @@
 from __future__ import annotations
 import concurrent.futures
-import functools
 import hashlib
 import logging
-import math
 import os
-import queue
 import shutil
-import threading
 from collections import deque
-from typing import IO, TYPE_CHECKING, Any, NamedTuple, Sequence
+from operator import itemgetter
+from typing import TYPE_CHECKING, Any
 from urllib.parse import quote
 import requests
-from wandb import env
 from wandb.errors.term import termwarn
 from wandb.proto.wandb_internal_pb2 import ServerFeature
 from wandb.sdk.artifacts.artifact_file_cache import (
     ArtifactFileCache,
-    Opener,
     get_artifact_file_cache,
 )
 from wandb.sdk.artifacts.staging import get_staging_dir
 from wandb.sdk.artifacts.storage_handlers.multi_handler import MultiHandler
 from wandb.sdk.artifacts.storage_handlers.tracking_handler import TrackingHandler
 from wandb.sdk.artifacts.storage_layout import StorageLayout
+from wandb.sdk.artifacts.storage_policies._multipart import (
+    MAX_MULTI_UPLOAD_SIZE,
+    MIN_MULTI_UPLOAD_SIZE,
+    KiB,
+    calc_part_size,
+    multipart_download,
+    scan_chunks,
+)
 from wandb.sdk.artifacts.storage_policies.register import WANDB_STORAGE_POLICY
 from wandb.sdk.artifacts.storage_policy import StoragePolicy
 from wandb.sdk.internal.internal_api import Api as InternalApi
@@ -44,34 +47,9 @@ if TYPE_CHECKING:
     from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
     from wandb.sdk.internal import progress
-# AWS S3 max upload parts without having to make additional requests for extra parts
-S3_MAX_PART_NUMBERS = 1000
-S3_MIN_MULTI_UPLOAD_SIZE = 2 * 1024**3
-S3_MAX_MULTI_UPLOAD_SIZE = 5 * 1024**4
-# Minimum size to switch to multipart download, same as upload, 2GB.
-_MULTIPART_DOWNLOAD_SIZE = S3_MIN_MULTI_UPLOAD_SIZE
-# Multipart download part size is same as multpart upload size, which is hard coded to 100MB.
-# https://github.com/wandb/wandb/blob/7b2a13cb8efcd553317167b823c8e52d8c3f7c4e/core/pkg/artifacts/saver.go#L496
-# https://docs.aws.amazon.com/AmazonS3/latest/userguide/optimizing-performance-guidelines.html#optimizing-performance-guidelines-get-range
-_DOWNLOAD_PART_SIZE_BYTES = 100 * 1024 * 1024
-# Chunk size for reading http response and writing to disk. 1MB.
-_HTTP_RES_CHUNK_SIZE_BYTES = 1 * 1024 * 1024
-# Signal end of _ChunkQueue, consumer (file writer) should stop after getting this item.
-# NOTE: it should only be used for multithread executor, it does notwork for multiprocess executor.
-# multipart download is using the executor from artifact.download() which is a multithread executor.
-_CHUNK_QUEUE_SENTINEL = object()
 logger = logging.getLogger(__name__)
-class _ChunkContent(NamedTuple):
-    offset: int
-    data: bytes
 class WandbStoragePolicy(StoragePolicy):
     @classmethod
     def name(cls) -> str:
@@ -91,6 +69,8 @@ class WandbStoragePolicy(StoragePolicy):
         session: requests.Session | None = None,
     ) -> None:
         self._config = config or {}
+        if (storage_region := self._config.get("storageRegion")) is not None:
+            self._validate_storage_region(storage_region)
         self._cache = cache or get_artifact_file_cache()
         self._session = session or make_http_session()
         self._api = api or InternalApi()
@@ -99,7 +79,15 @@ class WandbStoragePolicy(StoragePolicy):
             default_handler=TrackingHandler(),
         )
-    def config(self) -> dict:
+    def _validate_storage_region(self, storage_region: Any) -> None:
+        if not isinstance(storage_region, str):
+            raise TypeError(
+                f"storageRegion must be a string, got {type(storage_region).__name__}: {storage_region!r}"
+            )
+        if not storage_region.strip():
+            raise ValueError("storageRegion must be a non-empty string")
+    def config(self) -> dict[str, Any]:
         return self._config
     def load_file(
@@ -107,8 +95,9 @@ class WandbStoragePolicy(StoragePolicy):
         artifact: Artifact,
         manifest_entry: ArtifactManifestEntry,
         dest_path: str | None = None,
+        # FIXME: We should avoid passing the executor into multiple inner functions,
+        # it leads to confusing code and opaque tracebacks/call stacks.
         executor: concurrent.futures.Executor | None = None,
-        multipart: bool | None = None,
     ) -> FilePathStr:
         """Use cache or download the file using signed url.
@@ -116,10 +105,8 @@ class WandbStoragePolicy(StoragePolicy):
             executor: Passed from caller, artifact has a thread pool for multi file download.
                 Reuse the thread pool for multi part download. The thread pool is closed when
                 artifact download is done.
-            multipart: If set to `None` (default), the artifact will be downloaded
-                in parallel using multipart download if individual file size is greater than
-                2GB. If set to `True` or `False`, the artifact will be downloaded in
-                parallel or serially regardless of the file size.
+                If this is None, download the file serially.
         """
         if dest_path is not None:
             self._cache._override_cache_path = dest_path
@@ -131,14 +118,10 @@ class WandbStoragePolicy(StoragePolicy):
         if hit:
             return path
-        if (url := manifest_entry._download_url) is not None:
+        if url := manifest_entry._download_url:
             # Use multipart parallel download for large file
-            if (
-                executor
-                and (size := manifest_entry.size)
-                and self._should_multipart_download(size, multipart)
-            ):
-                self._multipart_file_download(executor, url, size, cache_open)
+            if executor and (size := manifest_entry.size):
+                multipart_download(executor, self._session, url, size, cache_open)
                 return path
             # Serial download
@@ -161,142 +144,16 @@ class WandbStoragePolicy(StoragePolicy):
             else:
                 auth = ("api", self._api.api_key or "")
-            file_url = self._file_url(
-                self._api,
-                artifact.entity,
-                artifact.project,
-                artifact.name.split(":")[0],
-                manifest_entry,
-            )
+            file_url = self._file_url(self._api, artifact, manifest_entry)
             response = self._session.get(
                 file_url, auth=auth, cookies=cookies, headers=headers, stream=True
             )
         with cache_open(mode="wb") as file:
-            for data in response.iter_content(chunk_size=16 * 1024):
+            for data in response.iter_content(chunk_size=16 * KiB):
                 file.write(data)
         return path
-    def _should_multipart_download(
-        self,
-        file_size: int,
-        multipart: bool | None,
-    ) -> bool:
-        if multipart is not None:
-            return multipart
-        return file_size >= _MULTIPART_DOWNLOAD_SIZE
-    def _write_chunks_to_file(
-        self,
-        f: IO,
-        q: queue.Queue,
-        download_has_error: threading.Event,
-    ):
-        while not download_has_error.is_set():
-            item = q.get()
-            if item is _CHUNK_QUEUE_SENTINEL:
-                # Normal shutdown, all the chunks are written
-                return
-            elif isinstance(item, _ChunkContent):
-                try:
-                    # NOTE: Seek works without pre allocating the file on disk.
-                    # It automatically creates a sparse file, e.g. ls -hl would show
-                    # a bigger size compared to du -sh * because downloading different
-                    # chunks is not a sequential write.
-                    # See https://man7.org/linux/man-pages/man2/lseek.2.html
-                    f.seek(item.offset)
-                    f.write(item.data)
-                except Exception as e:
-                    if env.is_debug():
-                        logger.debug(f"Error writing chunk to file: {e}")
-                    download_has_error.set()
-                    raise
-            else:
-                raise ValueError(f"Unknown queue item type: {type(item)}")
-    def _download_part(
-        self,
-        download_url: str,
-        headers: dict,
-        start: int,
-        q: queue.Queue,
-        download_has_error: threading.Event,
-    ):
-        # Other threads has error, no need to start
-        if download_has_error.is_set():
-            return
-        response = self._session.get(url=download_url, headers=headers, stream=True)
-        file_offset = start
-        for content in response.iter_content(chunk_size=_HTTP_RES_CHUNK_SIZE_BYTES):
-            if download_has_error.is_set():
-                return
-            q.put(_ChunkContent(offset=file_offset, data=content))
-            file_offset += len(content)
-    def _multipart_file_download(
-        self,
-        executor: concurrent.futures.Executor,
-        download_url: str,
-        file_size_bytes: int,
-        cache_open: Opener,
-    ):
-        """Download file as multiple parts in parallel.
-        Only one thread for writing to file. Each part run one http request in one thread.
-        HTTP response chunk of a file part is sent to the writer thread via a queue.
-        """
-        q: queue.Queue[_ChunkContent | object] = queue.Queue(maxsize=500)
-        download_has_error = threading.Event()
-        # Put cache_open at top so we remove the tmp file when there is network error.
-        with cache_open("wb") as f:
-            # Start writer thread first.
-            write_handler = functools.partial(
-                self._write_chunks_to_file, f, q, download_has_error
-            )
-            write_future = executor.submit(write_handler)
-            # Start download threads for each part.
-            download_futures: deque[concurrent.futures.Future] = deque()
-            part_size = _DOWNLOAD_PART_SIZE_BYTES
-            num_parts = int(math.ceil(file_size_bytes / float(part_size)))
-            for i in range(num_parts):
-                # https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Range
-                # Start and end are both inclusive, empty end means use the actual end of the file.
-                start = i * part_size
-                bytes_range = f"bytes={start}-"
-                if i != (num_parts - 1):
-                    # bytes=0-499
-                    bytes_range += f"{start + part_size - 1}"
-                headers = {"Range": bytes_range}
-                download_handler = functools.partial(
-                    self._download_part,
-                    download_url,
-                    headers,
-                    start,
-                    q,
-                    download_has_error,
-                )
-                download_futures.append(executor.submit(download_handler))
-            # Wait for download
-            done, not_done = concurrent.futures.wait(
-                download_futures, return_when=concurrent.futures.FIRST_EXCEPTION
-            )
-            try:
-                for fut in done:
-                    fut.result()
-            except Exception as e:
-                if env.is_debug():
-                    logger.debug(f"Error downloading file: {e}")
-                download_has_error.set()
-                raise
-            finally:
-                # Always signal the writer to stop
-                q.put(_CHUNK_QUEUE_SENTINEL)
-                write_future.result()
     def store_reference(
         self,
         artifact: Artifact,
@@ -304,7 +161,7 @@ class WandbStoragePolicy(StoragePolicy):
         name: str | None = None,
         checksum: bool = True,
         max_objects: int | None = None,
-    ) -> Sequence[ArtifactManifestEntry]:
+    ) -> list[ArtifactManifestEntry]:
         return self._handler.store_path(
             artifact, path, name=name, checksum=checksum, max_objects=max_objects
         )
@@ -324,13 +181,16 @@ class WandbStoragePolicy(StoragePolicy):
     def _file_url(
         self,
         api: InternalApi,
-        entity_name: str,
-        project_name: str,
-        artifact_name: str,
+        artifact: Artifact,
         entry: ArtifactManifestEntry,
     ) -> str:
         layout = self._config.get("storageLayout", StorageLayout.V1)
         region = self._config.get("storageRegion", "default")
+        entity_name = artifact.entity
+        project_name = artifact.project
+        artifact_name = artifact.name.split(":")[0]
         md5_hex = b64_to_hex_id(entry.digest)
         base_url: str = api.settings("base_url")
@@ -357,30 +217,21 @@ class WandbStoragePolicy(StoragePolicy):
         multipart_urls: dict[int, str],
         extra_headers: dict[str, str],
     ) -> list[dict[str, Any]]:
-        etags = []
-        part_number = 1
-        with open(file_path, "rb") as f:
-            while True:
-                data = f.read(chunk_size)
-                if not data:
-                    break
-                md5_b64_str = str(hex_to_b64_id(hex_digests[part_number]))
-                upload_resp = self._api.upload_multipart_file_chunk_retry(
-                    multipart_urls[part_number],
-                    data,
-                    extra_headers={
-                        "content-md5": md5_b64_str,
-                        "content-length": str(len(data)),
-                        "content-type": extra_headers.get("Content-Type", ""),
-                    },
-                )
-                assert upload_resp is not None
-                etags.append(
-                    {"partNumber": part_number, "hexMD5": upload_resp.headers["ETag"]}
-                )
-                part_number += 1
-        return etags
+        etags: deque[dict[str, Any]] = deque()
+        file_chunks = scan_chunks(file_path, chunk_size)
+        for num, data in enumerate(file_chunks, start=1):
+            rsp = self._api.upload_multipart_file_chunk_retry(
+                multipart_urls[num],
+                data,
+                extra_headers={
+                    "content-md5": hex_to_b64_id(hex_digests[num]),
+                    "content-length": str(len(data)),
+                    "content-type": extra_headers.get("Content-Type") or "",
+                },
+            )
+            assert rsp is not None
+            etags.append({"partNumber": num, "hexMD5": rsp.headers["ETag"]})
+        return list(etags)
     def default_file_upload(
         self,
@@ -393,20 +244,9 @@ class WandbStoragePolicy(StoragePolicy):
         with open(file_path, "rb") as file:
             # This fails if we don't send the first byte before the signed URL expires.
             self._api.upload_file_retry(
-                upload_url,
-                file,
-                progress_callback,
-                extra_headers=extra_headers,
+                upload_url, file, progress_callback, extra_headers=extra_headers
             )
-    def calc_chunk_size(self, file_size: int) -> int:
-        # Default to chunk size of 100MiB. S3 has cap of 10,000 upload parts.
-        # If file size exceeds the default chunk size, recalculate chunk size.
-        default_chunk_size = 100 * 1024**2
-        if default_chunk_size * S3_MAX_PART_NUMBERS < file_size:
-            return math.ceil(file_size / S3_MAX_PART_NUMBERS)
-        return default_chunk_size
     def store_file(
         self,
         artifact_id: str,
@@ -422,28 +262,20 @@ class WandbStoragePolicy(StoragePolicy):
             False if it needed to be uploaded or was a reference (nothing to dedupe).
         """
         file_size = entry.size or 0
-        chunk_size = self.calc_chunk_size(file_size)
-        upload_parts = []
-        hex_digests = {}
-        file_path = entry.local_path if entry.local_path is not None else ""
+        chunk_size = calc_part_size(file_size)
+        file_path = entry.local_path or ""
         # Logic for AWS s3 multipart upload.
         # Only chunk files if larger than 2 GiB. Currently can only support up to 5TiB.
-        if (
-            file_size >= S3_MIN_MULTI_UPLOAD_SIZE
-            and file_size <= S3_MAX_MULTI_UPLOAD_SIZE
-        ):
-            part_number = 1
-            with open(file_path, "rb") as f:
-                while True:
-                    data = f.read(chunk_size)
-                    if not data:
-                        break
-                    hex_digest = hashlib.md5(data).hexdigest()
-                    upload_parts.append(
-                        {"hexMD5": hex_digest, "partNumber": part_number}
-                    )
-                    hex_digests[part_number] = hex_digest
-                    part_number += 1
+        if MIN_MULTI_UPLOAD_SIZE <= file_size <= MAX_MULTI_UPLOAD_SIZE:
+            file_chunks = scan_chunks(file_path, chunk_size)
+            upload_parts = [
+                {"partNumber": num, "hexMD5": hashlib.md5(data).hexdigest()}
+                for num, data in enumerate(file_chunks, start=1)
+            ]
+            hex_digests = dict(map(itemgetter("partNumber", "hexMD5"), upload_parts))
+        else:
+            upload_parts = []
+            hex_digests = {}
         resp = preparer.prepare(
             {
@@ -457,24 +289,21 @@ class WandbStoragePolicy(StoragePolicy):
         entry.birth_artifact_id = resp.birth_artifact_id
-        multipart_urls = resp.multipart_upload_urls
         if resp.upload_url is None:
             return True
         if entry.local_path is None:
             return False
-        extra_headers = {
-            header.split(":", 1)[0]: header.split(":", 1)[1]
-            for header in (resp.upload_headers or {})
-        }
+        extra_headers = dict(hdr.split(":", 1) for hdr in (resp.upload_headers or []))
         # This multipart upload isn't available, do a regular single url upload
-        if multipart_urls is None and resp.upload_url:
+        if (multipart_urls := resp.multipart_upload_urls) is None and resp.upload_url:
             self.default_file_upload(
                 resp.upload_url, file_path, extra_headers, progress_callback
             )
+        elif multipart_urls is None:
+            raise ValueError(f"No multipart urls to upload for file: {file_path}")
         else:
-            if multipart_urls is None:
-                raise ValueError(f"No multipart urls to upload for file: {file_path}")
             # Upload files using s3 multipart upload urls
             etags = self.s3_multipart_file_upload(
                 file_path,
@@ -503,7 +332,7 @@ class WandbStoragePolicy(StoragePolicy):
         staging_dir = get_staging_dir()
         try:
-            if not entry.skip_cache and not hit:
+            if not (entry.skip_cache or hit):
                 with cache_open("wb") as f, open(entry.local_path, "rb") as src:
                     shutil.copyfileobj(src, f)
             if entry.local_path.startswith(staging_dir):

wandb/sdk/artifacts/storage_policy.py CHANGED Viewed

@@ -3,7 +3,8 @@
 from __future__ import annotations
 import concurrent.futures
-from typing import TYPE_CHECKING, Sequence
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any
 from wandb.sdk.internal.internal_api import Api as InternalApi
 from wandb.sdk.lib.paths import FilePathStr, URIStr
@@ -15,37 +16,47 @@ if TYPE_CHECKING:
     from wandb.sdk.internal.progress import ProgressFn
-class StoragePolicy:
+_POLICY_REGISTRY: dict[str, type[StoragePolicy]] = {}
+class StoragePolicy(ABC):
+    def __init_subclass__(cls, **kwargs: Any) -> None:
+        super().__init_subclass__(**kwargs)
+        _POLICY_REGISTRY[cls.name()] = cls
     @classmethod
     def lookup_by_name(cls, name: str) -> type[StoragePolicy]:
-        import wandb.sdk.artifacts.storage_policies  # noqa: F401
-        for sub in cls.__subclasses__():
-            if sub.name() == name:
-                return sub
-        raise NotImplementedError(f"Failed to find storage policy '{name}'")
+        if policy := _POLICY_REGISTRY.get(name):
+            return policy
+        raise ValueError(f"Failed to find storage policy {name!r}")
     @classmethod
+    @abstractmethod
     def name(cls) -> str:
         raise NotImplementedError
     @classmethod
-    def from_config(cls, config: dict, api: InternalApi | None = None) -> StoragePolicy:
+    @abstractmethod
+    def from_config(
+        cls, config: dict[str, Any], api: InternalApi | None = None
+    ) -> StoragePolicy:
         raise NotImplementedError
-    def config(self) -> dict:
+    @abstractmethod
+    def config(self) -> dict[str, Any]:
         raise NotImplementedError
+    @abstractmethod
     def load_file(
         self,
         artifact: Artifact,
         manifest_entry: ArtifactManifestEntry,
         dest_path: str | None = None,
         executor: concurrent.futures.Executor | None = None,
-        multipart: bool | None = None,
     ) -> FilePathStr:
         raise NotImplementedError
+    @abstractmethod
     def store_file(
         self,
         artifact_id: str,
@@ -56,6 +67,7 @@ class StoragePolicy:
     ) -> bool:
         raise NotImplementedError
+    @abstractmethod
     def store_reference(
         self,
         artifact: Artifact,
@@ -63,9 +75,10 @@ class StoragePolicy:
         name: str | None = None,
         checksum: bool = True,
         max_objects: int | None = None,
-    ) -> Sequence[ArtifactManifestEntry]:
+    ) -> list[ArtifactManifestEntry]:
         raise NotImplementedError
+    @abstractmethod
     def load_reference(
         self,
         manifest_entry: ArtifactManifestEntry,

wandb/sdk/data_types/bokeh.py CHANGED Viewed

@@ -5,6 +5,7 @@ import pathlib
 from typing import TYPE_CHECKING, Union
 from wandb import util
+from wandb._strutils import nameof
 from wandb.sdk.lib import runid
 from . import _dtypes
@@ -34,7 +35,10 @@ class Bokeh(Media):
         ],
     ):
         super().__init__()
-        bokeh = util.get_module("bokeh", required=True)
+        bokeh = util.get_module(
+            "bokeh",
+            required=f"{nameof(Bokeh)!r} requires the bokeh package.  Please install it with `pip install bokeh`.",
+        )
         if isinstance(data_or_path, (str, pathlib.Path)) and os.path.exists(
             data_or_path
         ):

wandb/sdk/data_types/image.py CHANGED Viewed

@@ -161,6 +161,17 @@ class Image(BatchableMedia):
     ) -> None:
         """Initialize a `wandb.Image` object.
+        This class handles various image data formats and automatically normalizes
+        pixel values to the range [0, 255] when needed, ensuring compatibility
+        with the W&B backend.
+        * Data in range [0, 1] is multiplied by 255 and converted to uint8
+        * Data in range [-1, 1] is rescaled from [-1, 1] to [0, 255] by mapping
+            -1 to 0 and 1 to 255, then converted to uint8
+        * Data outside [-1, 1] but not in [0, 255] is clipped to [0, 255] and
+            converted to uint8 (with a warning if values fall outside [0, 255])
+        * Data already in [0, 255] is converted to uint8 without modification
         Args:
             data_or_path: Accepts NumPy array/pytorch tensor of image data,
                 a PIL image object, or a path to an image file. If a NumPy
@@ -168,7 +179,7 @@ class Image(BatchableMedia):
                 the image data will be saved to the given file type.
                 If the values are not in the range [0, 255] or all values are in the range [0, 1],
                 the image pixel values will be normalized to the range [0, 255]
-                unless `normalize` is set to False.
+                unless `normalize` is set to `False`.
             - pytorch tensor should be in the format (channel, height, width)
             - NumPy array should be in the format (height, width, channel)
             mode: The PIL mode for an image. Most common are "L", "RGB",
@@ -178,13 +189,13 @@ class Image(BatchableMedia):
             classes: A list of class information for the image,
                 used for labeling bounding boxes, and image masks.
             boxes: A dictionary containing bounding box information for the image.
-                see: https://docs.wandb.ai/ref/python/data-types/boundingboxes2d/
+                see https://docs.wandb.ai/ref/python/data-types/boundingboxes2d/
             masks: A dictionary containing mask information for the image.
-                see: https://docs.wandb.ai/ref/python/data-types/imagemask/
+                see https://docs.wandb.ai/ref/python/data-types/imagemask/
             file_type: The file type to save the image as.
-                This parameter has no effect if data_or_path is a path to an image file.
-            normalize: If True, normalize the image pixel values to fall within the range of [0, 255].
-                Normalize is only applied if data_or_path is a numpy array or pytorch tensor.
+                This parameter has no effect if `data_or_path` is a path to an image file.
+            normalize: If `True`, normalize the image pixel values to fall within the range of [0, 255].
+                Normalize is only applied if `data_or_path` is a numpy array or pytorch tensor.
         Examples:
         Create a wandb.Image from a numpy array