PyPI - wandb - Versions diffs - 0.19.12rc1__py3-none-win32.whl → 0.20.1__py3-none-win32.whl - Mend

wandb 0.19.12rc1py3-none-win32.whl → 0.20.1py3-none-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

wandb/__init__.py +1 -2
wandb/__init__.pyi +3 -6
wandb/_iterutils.py +26 -7
wandb/_pydantic/__init__.py +2 -1
wandb/_pydantic/utils.py +7 -0
wandb/agents/pyagent.py +9 -15
wandb/analytics/sentry.py +1 -2
wandb/apis/attrs.py +3 -4
wandb/apis/importers/internals/util.py +1 -1
wandb/apis/importers/validation.py +2 -2
wandb/apis/importers/wandb.py +30 -25
wandb/apis/normalize.py +2 -2
wandb/apis/public/__init__.py +1 -0
wandb/apis/public/api.py +37 -33
wandb/apis/public/artifacts.py +103 -72
wandb/apis/public/jobs.py +3 -2
wandb/apis/public/registries/registries_search.py +4 -2
wandb/apis/public/registries/registry.py +1 -1
wandb/apis/public/registries/utils.py +9 -9
wandb/apis/public/runs.py +18 -6
wandb/automations/_filters/expressions.py +1 -1
wandb/automations/_filters/operators.py +1 -1
wandb/automations/_filters/run_metrics.py +1 -1
wandb/beta/workflows.py +6 -5
wandb/bin/gpu_stats.exe +0 -0
wandb/bin/wandb-core +0 -0
wandb/cli/cli.py +54 -73
wandb/docker/__init__.py +21 -74
wandb/docker/names.py +40 -0
wandb/env.py +0 -1
wandb/errors/util.py +1 -1
wandb/filesync/step_checksum.py +1 -1
wandb/filesync/step_upload.py +1 -1
wandb/integration/diffusers/resolvers/multimodal.py +1 -2
wandb/integration/gym/__init__.py +5 -6
wandb/integration/keras/callbacks/model_checkpoint.py +2 -2
wandb/integration/keras/keras.py +13 -19
wandb/integration/kfp/kfp_patch.py +2 -3
wandb/integration/langchain/wandb_tracer.py +1 -1
wandb/integration/metaflow/metaflow.py +13 -13
wandb/integration/openai/fine_tuning.py +3 -2
wandb/integration/sagemaker/auth.py +2 -1
wandb/integration/sklearn/utils.py +2 -1
wandb/integration/tensorboard/__init__.py +1 -1
wandb/integration/tensorboard/log.py +2 -5
wandb/integration/tensorflow/__init__.py +2 -2
wandb/jupyter.py +20 -17
wandb/plot/confusion_matrix.py +1 -1
wandb/plot/utils.py +8 -7
wandb/proto/v3/wandb_internal_pb2.py +355 -335
wandb/proto/v3/wandb_settings_pb2.py +2 -2
wandb/proto/v3/wandb_telemetry_pb2.py +12 -12
wandb/proto/v4/wandb_internal_pb2.py +339 -335
wandb/proto/v4/wandb_settings_pb2.py +2 -2
wandb/proto/v4/wandb_telemetry_pb2.py +12 -12
wandb/proto/v5/wandb_internal_pb2.py +339 -335
wandb/proto/v5/wandb_settings_pb2.py +2 -2
wandb/proto/v5/wandb_telemetry_pb2.py +12 -12
wandb/proto/v6/wandb_internal_pb2.py +339 -335
wandb/proto/v6/wandb_settings_pb2.py +2 -2
wandb/proto/v6/wandb_telemetry_pb2.py +12 -12
wandb/proto/wandb_deprecated.py +6 -8
wandb/sdk/artifacts/_internal_artifact.py +43 -0
wandb/sdk/artifacts/_validators.py +55 -35
wandb/sdk/artifacts/artifact.py +117 -115
wandb/sdk/artifacts/artifact_download_logger.py +2 -0
wandb/sdk/artifacts/artifact_saver.py +1 -3
wandb/sdk/artifacts/artifact_state.py +2 -0
wandb/sdk/artifacts/artifact_ttl.py +2 -0
wandb/sdk/artifacts/exceptions.py +14 -0
wandb/sdk/artifacts/staging.py +2 -0
wandb/sdk/artifacts/storage_handlers/local_file_handler.py +2 -6
wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -1
wandb/sdk/artifacts/storage_handlers/tracking_handler.py +2 -6
wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -5
wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -1
wandb/sdk/artifacts/storage_layout.py +2 -0
wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -3
wandb/sdk/backend/backend.py +11 -182
wandb/sdk/data_types/_dtypes.py +2 -6
wandb/sdk/data_types/audio.py +20 -3
wandb/sdk/data_types/base_types/media.py +12 -7
wandb/sdk/data_types/base_types/wb_value.py +8 -18
wandb/sdk/data_types/bokeh.py +19 -2
wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +17 -1
wandb/sdk/data_types/helper_types/image_mask.py +7 -1
wandb/sdk/data_types/html.py +4 -4
wandb/sdk/data_types/image.py +178 -103
wandb/sdk/data_types/molecule.py +6 -6
wandb/sdk/data_types/object_3d.py +10 -5
wandb/sdk/data_types/saved_model.py +11 -6
wandb/sdk/data_types/table.py +313 -83
wandb/sdk/data_types/table_decorators.py +108 -0
wandb/sdk/data_types/utils.py +43 -7
wandb/sdk/data_types/video.py +21 -3
wandb/sdk/interface/interface.py +10 -0
wandb/sdk/internal/datastore.py +2 -6
wandb/sdk/internal/file_pusher.py +1 -5
wandb/sdk/internal/file_stream.py +8 -17
wandb/sdk/internal/handler.py +2 -2
wandb/sdk/internal/incremental_table_util.py +53 -0
wandb/sdk/internal/internal.py +3 -5
wandb/sdk/internal/internal_api.py +66 -89
wandb/sdk/internal/job_builder.py +2 -7
wandb/sdk/internal/profiler.py +2 -2
wandb/sdk/internal/progress.py +1 -3
wandb/sdk/internal/run.py +1 -6
wandb/sdk/internal/sender.py +24 -36
wandb/sdk/internal/system/assets/aggregators.py +1 -7
wandb/sdk/internal/system/assets/disk.py +3 -3
wandb/sdk/internal/system/assets/gpu.py +4 -4
wandb/sdk/internal/system/assets/gpu_amd.py +4 -4
wandb/sdk/internal/system/assets/interfaces.py +6 -6
wandb/sdk/internal/system/assets/tpu.py +1 -1
wandb/sdk/internal/system/assets/trainium.py +6 -6
wandb/sdk/internal/system/system_info.py +5 -7
wandb/sdk/internal/system/system_monitor.py +4 -4
wandb/sdk/internal/tb_watcher.py +5 -7
wandb/sdk/launch/_launch.py +1 -1
wandb/sdk/launch/_project_spec.py +19 -20
wandb/sdk/launch/agent/agent.py +3 -3
wandb/sdk/launch/agent/config.py +1 -1
wandb/sdk/launch/agent/job_status_tracker.py +2 -2
wandb/sdk/launch/builder/build.py +2 -3
wandb/sdk/launch/builder/kaniko_builder.py +5 -4
wandb/sdk/launch/environment/gcp_environment.py +1 -2
wandb/sdk/launch/registry/azure_container_registry.py +2 -2
wandb/sdk/launch/registry/elastic_container_registry.py +2 -2
wandb/sdk/launch/registry/google_artifact_registry.py +3 -3
wandb/sdk/launch/runner/abstract.py +5 -5
wandb/sdk/launch/runner/kubernetes_monitor.py +2 -2
wandb/sdk/launch/runner/kubernetes_runner.py +1 -1
wandb/sdk/launch/runner/sagemaker_runner.py +2 -4
wandb/sdk/launch/runner/vertex_runner.py +2 -7
wandb/sdk/launch/sweeps/__init__.py +1 -1
wandb/sdk/launch/sweeps/scheduler.py +2 -2
wandb/sdk/launch/sweeps/utils.py +3 -3
wandb/sdk/launch/utils.py +3 -4
wandb/sdk/lib/apikey.py +5 -8
wandb/sdk/lib/config_util.py +3 -3
wandb/sdk/lib/fsm.py +3 -18
wandb/sdk/lib/gitlib.py +6 -5
wandb/sdk/lib/ipython.py +2 -2
wandb/sdk/lib/json_util.py +9 -14
wandb/sdk/lib/printer.py +3 -8
wandb/sdk/lib/redirect.py +1 -1
wandb/sdk/lib/retry.py +3 -7
wandb/sdk/lib/run_moment.py +2 -2
wandb/sdk/lib/service_connection.py +3 -1
wandb/sdk/lib/service_token.py +1 -2
wandb/sdk/mailbox/mailbox_handle.py +3 -7
wandb/sdk/mailbox/response_handle.py +2 -6
wandb/sdk/service/streams.py +3 -7
wandb/sdk/verify/verify.py +5 -6
wandb/sdk/wandb_config.py +1 -1
wandb/sdk/wandb_init.py +38 -106
wandb/sdk/wandb_login.py +7 -6
wandb/sdk/wandb_run.py +52 -240
wandb/sdk/wandb_settings.py +71 -60
wandb/sdk/wandb_setup.py +40 -14
wandb/sdk/wandb_watch.py +5 -7
wandb/sync/__init__.py +1 -1
wandb/sync/sync.py +13 -13
wandb/util.py +17 -35
wandb/wandb_agent.py +8 -11
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/METADATA +5 -5
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/RECORD +170 -168
wandb/docker/auth.py +0 -435
wandb/docker/www_authenticate.py +0 -94
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/WHEEL +0 -0
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/entry_points.txt +0 -0
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/licenses/LICENSE +0 -0

wandb/sdk/internal/internal_api.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import ast
 import base64
 import datetime
 import functools
@@ -12,7 +11,6 @@ import sys
 import threading
 from copy import deepcopy
 from pathlib import Path
-from types import MappingProxyType
 from typing import (
     IO,
     TYPE_CHECKING,
@@ -70,42 +68,42 @@ if TYPE_CHECKING:
     class CreateArtifactFileSpecInput(TypedDict, total=False):
         """Corresponds to `type CreateArtifactFileSpecInput` in schema.graphql."""
-        artifactID: str  # noqa: N815
+        artifactID: str
         name: str
         md5: str
         mimetype: Optional[str]
-        artifactManifestID: Optional[str]  # noqa: N815
-        uploadPartsInput: Optional[List[Dict[str, object]]]  # noqa: N815
+        artifactManifestID: Optional[str]
+        uploadPartsInput: Optional[List[Dict[str, object]]]
     class CreateArtifactFilesResponseFile(TypedDict):
         id: str
         name: str
-        displayName: str  # noqa: N815
-        uploadUrl: Optional[str]  # noqa: N815
-        uploadHeaders: Sequence[str]  # noqa: N815
-        uploadMultipartUrls: "UploadPartsResponse"  # noqa: N815
-        storagePath: str  # noqa: N815
+        displayName: str
+        uploadUrl: Optional[str]
+        uploadHeaders: Sequence[str]
+        uploadMultipartUrls: "UploadPartsResponse"
+        storagePath: str
         artifact: "CreateArtifactFilesResponseFileNode"
     class CreateArtifactFilesResponseFileNode(TypedDict):
         id: str
     class UploadPartsResponse(TypedDict):
-        uploadUrlParts: List["UploadUrlParts"]  # noqa: N815
-        uploadID: str  # noqa: N815
+        uploadUrlParts: List["UploadUrlParts"]
+        uploadID: str
     class UploadUrlParts(TypedDict):
-        partNumber: int  # noqa: N815
-        uploadUrl: str  # noqa: N815
+        partNumber: int
+        uploadUrl: str
     class CompleteMultipartUploadArtifactInput(TypedDict):
         """Corresponds to `type CompleteMultipartUploadArtifactInput` in schema.graphql."""
-        completeMultipartAction: str  # noqa: N815
-        completedParts: Dict[int, str]  # noqa: N815
-        artifactID: str  # noqa: N815
-        storagePath: str  # noqa: N815
-        uploadID: str  # noqa: N815
+        completeMultipartAction: str
+        completedParts: Dict[int, str]
+        artifactID: str
+        storagePath: str
+        uploadID: str
         md5: str
     class CompleteMultipartUploadArtifactResponse(TypedDict):
@@ -238,7 +236,7 @@ class Api:
             ]
         ] = None,
         load_settings: bool = True,
-        retry_timedelta: datetime.timedelta = datetime.timedelta(  # noqa: B008 # okay because it's immutable
+        retry_timedelta: datetime.timedelta = datetime.timedelta(  # okay because it's immutable
             days=7
         ),
         environ: MutableMapping = os.environ,
@@ -364,7 +362,7 @@ class Api:
         self.server_create_run_queue_supports_priority: Optional[bool] = None
         self.server_supports_template_variables: Optional[bool] = None
         self.server_push_to_run_queue_supports_priority: Optional[bool] = None
-        self._server_features_cache: Optional[dict[str, bool]] = None
+        self._server_features_cache: Optional[Dict[str, bool]] = None
     def gql(self, *args: Any, **kwargs: Any) -> Any:
         ret = self._retry_gql(
@@ -399,8 +397,7 @@ class Api:
         except requests.exceptions.HTTPError as err:
             response = err.response
             assert response is not None
-            logger.error(f"{response.status_code} response executing GraphQL.")
-            logger.error(response.text)
+            logger.exception("Error executing GraphQL.")
             for error in parse_backend_error_messages(response):
                 wandb.termerror(f"Error while calling W&B API: {error} ({response})")
             raise
@@ -869,45 +866,43 @@ class Api:
         _, _, mutations = self.server_info_introspection()
         return "updateRunQueueItemWarning" in mutations
-    def _server_features(self) -> Mapping[str, bool]:
-        """Returns a cached, read-only lookup of current server feature flags."""
-        if self._server_features_cache is None:
-            query = gql(SERVER_FEATURES_QUERY_GQL)
-            try:
-                response = self.gql(query)
-            except Exception as e:
-                # Unfortunately we currently have to match on the text of the error message
-                if 'Cannot query field "features" on type "ServerInfo".' in str(e):
-                    self._server_features_cache = {}
-                else:
-                    raise
+    def _server_features(self) -> Dict[str, bool]:
+        # NOTE: Avoid caching via `@cached_property`, due to undocumented
+        # locking behavior before Python 3.12.
+        # See: https://github.com/python/cpython/issues/87634
+        query = gql(SERVER_FEATURES_QUERY_GQL)
+        try:
+            response = self.gql(query)
+        except Exception as e:
+            # Unfortunately we currently have to match on the text of the error message,
+            # as the `gql` client raises `Exception` rather than a more specific error.
+            if 'Cannot query field "features" on type "ServerInfo".' in str(e):
+                self._server_features_cache = {}
             else:
-                info = ServerFeaturesQuery.model_validate(response).server_info
-                if info and (feats := info.features):
-                    self._server_features_cache = {
-                        f.name: f.is_enabled for f in feats if f
-                    }
-                else:
-                    self._server_features_cache = {}
+                raise
+        else:
+            info = ServerFeaturesQuery.model_validate(response).server_info
+            if info and (feats := info.features):
+                self._server_features_cache = {f.name: f.is_enabled for f in feats if f}
+            else:
+                self._server_features_cache = {}
+        return self._server_features_cache
-        return MappingProxyType(self._server_features_cache)
+    def _server_supports(self, feature: Union[int, str]) -> bool:
+        """Return whether the current server supports the given feature.
-    def _check_server_feature_with_fallback(self, feature_value: ServerFeature) -> bool:
-        """Wrapper around check_server_feature that warns and returns False for older unsupported servers.
+        This also caches the underlying lookup of server feature flags,
+        and it maps {feature_name (str) -> is_enabled (bool)}.
         Good to use for features that have a fallback mechanism for older servers.
-        Args:
-            feature_value (ServerFeature): The enum value of the feature to check.
-        Returns:
-            bool: True if the feature is enabled, False otherwise.
-        Exceptions:
-            Exception: If an error other than the server not supporting feature queries occurs.
         """
-        return self._server_features().get(ServerFeature.Name(feature_value), False)
+        # If we're given the protobuf enum value, convert to a string name.
+        # NOTE: We deliberately use names (str) instead of enum values (int)
+        # as the keys here, since:
+        # - the server identifies features by their name, rather than (client-side) enum value
+        # - the defined list of client-side flags may be behind the server-side list of flags
+        key = ServerFeature.Name(feature) if isinstance(feature, int) else feature
+        return self._server_features().get(key) or False
     @normalize_exceptions
     def update_run_queue_item_warning(
@@ -2092,9 +2087,7 @@ class Api:
             )
             if default is None or default.get("queueID") is None:
                 raise CommError(
-                    "Unable to create default queue for {}/{}. No queues for agent to poll".format(
-                        entity, project
-                    )
+                    f"Unable to create default queue for {entity}/{project}. No queues for agent to poll"
                 )
             project_queues = [{"id": default["queueID"], "name": "default"}]
         polling_queue_ids = [
@@ -2571,15 +2564,11 @@ class Api:
         res = self.gql(query, variable_values)
         if res.get("project") is None:
             raise CommError(
-                "Error fetching run info for {}/{}/{}. Check that this project exists and you have access to this entity and project".format(
-                    entity, project, name
-                )
+                f"Error fetching run info for {entity}/{project}/{name}. Check that this project exists and you have access to this entity and project"
             )
         elif res["project"].get("run") is None:
             raise CommError(
-                "Error fetching run info for {}/{}/{}. Check that this run id exists".format(
-                    entity, project, name
-                )
+                f"Error fetching run info for {entity}/{project}/{name}. Check that this run id exists"
             )
         run_info: dict = res["project"]["run"]["runInfo"]
         return run_info
@@ -2993,11 +2982,8 @@ class Api:
                 logger.debug("upload_file: %s complete", url)
             response.raise_for_status()
         except requests.exceptions.RequestException as e:
-            logger.error(f"upload_file exception {url}: {e}")
-            request_headers = e.request.headers if e.request is not None else ""
-            logger.error(f"upload_file request headers: {request_headers!r}")
+            logger.exception(f"upload_file exception for {url=}")
             response_content = e.response.content if e.response is not None else ""
-            logger.error(f"upload_file response body: {response_content!r}")
             status_code = e.response.status_code if e.response is not None else 0
             # S3 reports retryable request timeouts out-of-band
             is_aws_retryable = status_code == 400 and "RequestTimeout" in str(
@@ -3059,11 +3045,8 @@ class Api:
                     logger.debug("upload_file: %s complete", url)
                 response.raise_for_status()
         except requests.exceptions.RequestException as e:
-            logger.error(f"upload_file exception {url}: {e}")
-            request_headers = e.request.headers if e.request is not None else ""
-            logger.error(f"upload_file request headers: {request_headers}")
+            logger.exception(f"upload_file exception for {url=}")
             response_content = e.response.content if e.response is not None else ""
-            logger.error(f"upload_file response body: {response_content!r}")
             status_code = e.response.status_code if e.response is not None else 0
             # S3 reports retryable request timeouts out-of-band
             is_aws_retryable = (
@@ -3190,10 +3173,8 @@ class Api:
                 },
                 timeout=60,
             )
-        except Exception as e:
-            # GQL raises exceptions with stringified python dictionaries :/
-            message = ast.literal_eval(e.args[0])["message"]
-            logger.error("Error communicating with W&B: %s", message)
+        except Exception:
+            logger.exception("Error communicating with W&B.")
             return []
         else:
             result: List[Dict[str, Any]] = json.loads(
@@ -3235,10 +3216,8 @@ class Api:
                         parameter["distribution"] = "uniform"
                     else:
                         raise ValueError(
-                            "Parameter {} is ambiguous, please specify bounds as both floats (for a float_"
-                            "uniform distribution) or ints (for an int_uniform distribution).".format(
-                                parameter_name
-                            )
+                            f"Parameter {parameter_name} is ambiguous, please specify bounds as both floats (for a float_"
+                            "uniform distribution) or ints (for an int_uniform distribution)."
                         )
         return config
@@ -3387,8 +3366,8 @@ class Api:
                     variable_values=variables,
                     check_retry_fn=util.no_retry_4xx,
                 )
-            except UsageError as e:
-                raise e
+            except UsageError:
+                raise
             except Exception as e:
                 # graphql schema exception is generic
                 err = e
@@ -3783,10 +3762,8 @@ class Api:
             "usedAs": use_as,
         }
-        server_allows_entity_project_information = (
-            self._check_server_feature_with_fallback(
-                ServerFeature.USE_ARTIFACT_WITH_ENTITY_AND_PROJECT_INFORMATION  # type: ignore
-            )
+        server_allows_entity_project_information = self._server_supports(
+            ServerFeature.USE_ARTIFACT_WITH_ENTITY_AND_PROJECT_INFORMATION
         )
         if server_allows_entity_project_information:
             query_vars.extend(
@@ -4565,9 +4542,9 @@ class Api:
         s = self.sweep(sweep=sweep, entity=entity, project=project, specs="{}")
         curr_state = s["state"].upper()
         if state == "PAUSED" and curr_state not in ("PAUSED", "RUNNING"):
-            raise Exception("Cannot pause {} sweep.".format(curr_state.lower()))
+            raise Exception(f"Cannot pause {curr_state.lower()} sweep.")
         elif state != "RUNNING" and curr_state not in ("RUNNING", "PAUSED", "PENDING"):
-            raise Exception("Sweep already {}.".format(curr_state.lower()))
+            raise Exception(f"Sweep already {curr_state.lower()}.")
         sweep_id = s["id"]
         mutation = gql(
             """

wandb/sdk/internal/job_builder.py CHANGED Viewed

@@ -19,6 +19,7 @@ from typing import (
 )
 import wandb
+from wandb.sdk.artifacts._internal_artifact import InternalArtifact
 from wandb.sdk.artifacts.artifact import Artifact
 from wandb.sdk.data_types._dtypes import TypeRegistry
 from wandb.sdk.internal.internal_api import Api
@@ -128,12 +129,6 @@ def get_min_supported_for_source_dict(
     return min_seen
-class JobArtifact(Artifact):
-    def __init__(self, name: str, *args: Any, **kwargs: Any):
-        super().__init__(name, "placeholder", *args, **kwargs)
-        self._type = JOB_ARTIFACT_TYPE  # Get around type restriction.
 class JobBuilder:
     _settings: SettingsStatic
     _metadatafile_path: Optional[str]
@@ -552,7 +547,7 @@ class JobBuilder:
         assert source_info is not None
         assert name is not None
-        artifact = JobArtifact(name)
+        artifact = InternalArtifact(name, JOB_ARTIFACT_TYPE)
         _logger.info("adding wandb-job metadata file")
         with artifact.new_file("wandb-job.json") as f:

wandb/sdk/internal/profiler.py CHANGED Viewed

@@ -54,12 +54,12 @@ def torch_trace_handler():
             prof.step()
     ```
     """
-    from wandb.util import parse_version
+    from packaging.version import parse
     torch = wandb.util.get_module(PYTORCH_MODULE, required=True)
     torch_profiler = wandb.util.get_module(PYTORCH_PROFILER_MODULE, required=True)
-    if parse_version(torch.__version__) < parse_version("1.9.0"):
+    if parse(torch.__version__) < parse("1.9.0"):
         raise Error(
             f"torch version must be at least 1.9 in order to use the PyTorch Profiler API.\
             \nVersion of torch currently installed: {torch.__version__}"

wandb/sdk/internal/progress.py CHANGED Viewed

@@ -43,9 +43,7 @@ class Progress:
             # files getting truncated while uploading seems like something
             # that shouldn't really be happening anyway.
             raise CommError(
-                "File {} size shrank from {} to {} while it was being uploaded.".format(
-                    self.file.name, self.len, self.bytes_read
-                )
+                f"File {self.file.name} size shrank from {self.len} to {self.bytes_read} while it was being uploaded."
             )
         # Growing files are also likely to be bad, but our code didn't break
         # on those in the past, so it's riskier to make that an error now.

wandb/sdk/internal/run.py CHANGED Viewed

@@ -5,12 +5,7 @@ Semi-stubbed run for internal process use.
 """
-import sys
-if sys.version_info >= (3, 12):
-    from typing import override
-else:
-    from typing_extensions import override
+from typing_extensions import override
 from wandb.sdk import wandb_run

wandb/sdk/internal/sender.py CHANGED Viewed

@@ -749,14 +749,12 @@ class SendManager:
                 self._resume_state.wandb_runtime = new_runtime
             tags = resume_status.get("tags") or []
-        except (IndexError, ValueError) as e:
-            logger.error("unable to load resume tails", exc_info=e)
+        except (IndexError, ValueError):
+            logger.exception("unable to load resume tails")
             if self._settings.resume == "must":
                 error = wandb_internal_pb2.ErrorInfo()
                 error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
-                error.message = "resume='must' but could not resume ({}) ".format(
-                    run.run_id
-                )
+                error.message = f"resume='must' but could not resume ({run.run_id}) "
                 return error
         # TODO: Do we need to restore config / summary?
@@ -772,7 +770,7 @@ class SendManager:
         self._resume_state.summary = summary
         self._resume_state.tags = tags
         self._resume_state.resumed = True
-        logger.info("configured resuming with: {}".format(self._resume_state))
+        logger.info(f"configured resuming with: {self._resume_state}")
         return None
     def _telemetry_get_framework(self) -> str:
@@ -816,9 +814,7 @@ class SendManager:
             self._interface.publish_config(
                 key=("_wandb", "spell_url"), val=env.get("SPELL_RUN_URL")
             )
-            url = "{}/{}/{}/runs/{}".format(
-                self._api.app_url, self._run.entity, self._run.project, self._run.run_id
-            )
+            url = f"{self._api.app_url}/{self._run.entity}/{self._run.project}/runs/{self._run.run_id}"
             requests.put(
                 env.get("SPELL_API_URL", "https://api.spell.run") + "/wandb_url",
                 json={"access_token": env.get("WANDB_ACCESS_TOKEN"), "url": url},
@@ -829,23 +825,22 @@ class SendManager:
         # TODO: do something if sync spell is not successful?
     def _setup_fork(self, server_run: dict):
-        assert self._settings.fork_from
-        assert self._settings.fork_from.metric == "_step"
         assert self._run
-        first_step = int(self._settings.fork_from.value) + 1
+        assert self._run.branch_point
+        first_step = int(self._run.branch_point.value) + 1
         self._resume_state.step = first_step
         self._resume_state.history = server_run.get("historyLineCount", 0)
         self._run.forked = True
         self._run.starting_step = first_step
     def _load_rewind_state(self, run: "RunRecord"):
-        assert self._settings.resume_from
+        assert run.branch_point
         self._rewind_response = self._api.rewind_run(
             run_name=run.run_id,
             entity=run.entity or None,
             project=run.project or None,
-            metric_name=self._settings.resume_from.metric,
-            metric_value=self._settings.resume_from.value,
+            metric_name=run.branch_point.metric,
+            metric_value=run.branch_point.value,
             program_path=self._settings.program or None,
         )
         self._resume_state.history = self._rewind_response.get("historyLineCount", 0)
@@ -854,12 +849,11 @@ class SendManager:
         )
     def _install_rewind_state(self):
-        assert self._settings.resume_from
-        assert self._settings.resume_from.metric == "_step"
         assert self._run
+        assert self._run.branch_point
         assert self._rewind_response
-        first_step = int(self._settings.resume_from.value) + 1
+        first_step = int(self._run.branch_point.value) + 1
         self._resume_state.step = first_step
         # We set the fork flag here because rewind uses the forking
@@ -903,8 +897,8 @@ class SendManager:
             config_value_dict = self._config_backend_dict()
             self._config_save(config_value_dict)
-        do_fork = self._settings.fork_from is not None and is_wandb_init
-        do_rewind = self._settings.resume_from is not None and is_wandb_init
+        do_rewind = run.branch_point.run == run.run_id
+        do_fork = not do_rewind and run.branch_point.run != ""
         do_resume = bool(self._settings.resume)
         num_resume_options_set = sum([do_fork, do_rewind, do_resume])
@@ -1188,7 +1182,7 @@ class SendManager:
             try:
                 d[item.key] = json.loads(item.value_json)
             except json.JSONDecodeError:
-                logger.error("error decoding stats json: %s", item.value_json)
+                logger.exception("error decoding stats json: %s", item.value_json)
         row: Dict[str, Any] = dict(system=d)
         self._flatten(row)
         row["_wandb"] = True
@@ -1500,17 +1494,15 @@ class SendManager:
         try:
             res = self._send_artifact(artifact)
             logger.info(f"sent artifact {artifact.name} - {res}")
-        except Exception as e:
-            logger.error(
-                'send_artifact: failed for artifact "{}/{}": {}'.format(
-                    artifact.type, artifact.name, e
-                )
+        except Exception:
+            logger.exception(
+                f'send_artifact: failed for artifact "{artifact.type}/{artifact.name}"'
             )
     def _send_artifact(
         self, artifact: "ArtifactRecord", history_step: Optional[int] = None
     ) -> Optional[Dict]:
-        from wandb.util import parse_version
+        from packaging.version import parse
         assert self._pusher
         saver = ArtifactSaver(
@@ -1523,9 +1515,7 @@ class SendManager:
         if artifact.distributed_id:
             max_cli_version = self._max_cli_version()
-            if max_cli_version is None or parse_version(
-                max_cli_version
-            ) < parse_version("0.10.16"):
+            if max_cli_version is None or parse(max_cli_version) < parse("0.10.16"):
                 logger.warning(
                     "This W&B Server doesn't support distributed artifacts, "
                     "have your administrator install wandb/local >= 0.9.37"
@@ -1561,13 +1551,11 @@ class SendManager:
         return res
     def send_alert(self, record: "Record") -> None:
-        from wandb.util import parse_version
+        from packaging.version import parse
         alert = record.alert
         max_cli_version = self._max_cli_version()
-        if max_cli_version is None or parse_version(max_cli_version) < parse_version(
-            "0.10.9"
-        ):
+        if max_cli_version is None or parse(max_cli_version) < parse("0.10.9"):
             logger.warning(
                 "This W&B server doesn't support alerts, "
                 "have your administrator install wandb/local >= 0.9.31"
@@ -1580,8 +1568,8 @@ class SendManager:
                     level=alert.level,
                     wait_duration=alert.wait_duration,
                 )
-            except Exception as e:
-                logger.error(f"send_alert: failed for alert {alert.title!r}: {e}")
+            except Exception:
+                logger.exception(f"send_alert: failed for alert {alert.title!r}")
     def finish(self) -> None:
         logger.info("shutting down sender")

wandb/sdk/internal/system/assets/aggregators.py CHANGED Viewed

@@ -1,10 +1,4 @@
-import sys
-from typing import Union
-if sys.version_info >= (3, 9):
-    from collections.abc import Sequence
-else:
-    from typing import Sequence
+from typing import Sequence, Union
 Number = Union[int, float]

wandb/sdk/internal/system/assets/disk.py CHANGED Viewed

@@ -33,7 +33,7 @@ class DiskUsagePercent:
             try:
                 psutil.disk_usage(path)
                 self.paths.append(path)
-            except Exception as e:  # noqa
+            except Exception as e:
                 termwarn(f"Could not access disk path {path}: {e}", repeat=False)
     def sample(self) -> None:
@@ -74,7 +74,7 @@ class DiskUsage:
             try:
                 psutil.disk_usage(path)
                 self.paths.append(path)
-            except Exception as e:  # noqa
+            except Exception as e:
                 termwarn(f"Could not access disk path {path}: {e}", repeat=False)
     def sample(self) -> None:
@@ -198,7 +198,7 @@ class Disk:
                     "total": total,
                     "used": used,
                 }
-            except Exception as e:  # noqa
+            except Exception as e:
                 termwarn(f"Could not access disk path {disk_path}: {e}", repeat=False)
         return {self.name: disk_metrics}

wandb/sdk/internal/system/assets/gpu.py CHANGED Viewed

@@ -377,8 +377,8 @@ class GPU:
             return True
         except pynvml.NVMLError_LibraryNotFound:  # type: ignore
             return False
-        except Exception as e:
-            logger.error(f"Error initializing NVML: {e}")
+        except Exception:
+            logger.exception("Error initializing NVML.")
             return False
     def start(self) -> None:
@@ -410,7 +410,7 @@ class GPU:
         except pynvml.NVMLError:
             pass
-        except Exception as e:
-            logger.error(f"Error Probing GPU: {e}")
+        except Exception:
+            logger.exception("Error Probing GPU.")
         return info

wandb/sdk/internal/system/assets/gpu_amd.py CHANGED Viewed

@@ -104,8 +104,8 @@ class GPUAMDStats:
             if cards:
                 self.samples.append(cards)
-        except (OSError, ValueError, TypeError, subprocess.CalledProcessError) as e:
-            logger.exception(f"GPU stats error: {e}")
+        except (OSError, ValueError, TypeError, subprocess.CalledProcessError):
+            logger.exception("GPU stats error")
     def clear(self) -> None:
         self.samples.clear()
@@ -228,6 +228,6 @@ class GPUAMD:
                 for key in stats.keys()
                 if key.startswith("card")
             ]
-        except Exception as e:
-            logger.exception(f"GPUAMD probe error: {e}")
+        except Exception:
+            logger.exception("GPUAMD probe error")
         return info

wandb/sdk/internal/system/assets/interfaces.py CHANGED Viewed

@@ -136,8 +136,8 @@ class MetricsMonitor:
                         logger.info(f"Process {metric.name} has exited.")
                         self._shutdown_event.set()
                         break
-                    except Exception as e:
-                        logger.error(f"Failed to sample metric: {e}")
+                    except Exception:
+                        logger.exception("Failed to sample metric.")
                 self._shutdown_event.wait(self.sampling_interval)
                 if self._shutdown_event.is_set():
                     break
@@ -153,8 +153,8 @@ class MetricsMonitor:
                 # aggregated_metrics = wandb.util.merge_dicts(
                 #     aggregated_metrics, metric.serialize()
                 # )
-            except Exception as e:
-                logger.error(f"Failed to serialize metric: {e}")
+            except Exception:
+                logger.exception("Failed to serialize metric.")
         return aggregated_metrics
     def publish(self) -> None:
@@ -165,8 +165,8 @@ class MetricsMonitor:
                 self._interface.publish_stats(aggregated_metrics)
             for metric in self.metrics:
                 metric.clear()
-        except Exception as e:
-            logger.error(f"Failed to publish metrics: {e}")
+        except Exception:
+            logger.exception("Failed to publish metrics.")
     def start(self) -> None:
         if (self._process is not None) or self._shutdown_event.is_set():

wandb/sdk/internal/system/assets/tpu.py CHANGED Viewed

@@ -91,7 +91,7 @@ class TPU:
     ) -> str:
         if service_addr is not None:
             if tpu_name is not None:
-                logger.warn(
+                logger.warning(
                     "Both service_addr and tpu_name arguments provided. "
                     "Ignoring tpu_name and using service_addr."
                 )

wandb 0.19.12rc1__py3-none-win32.whl → 0.20.1__py3-none-win32.whl

wandb 0.19.12rc1py3-none-win32.whl → 0.20.1py3-none-win32.whl