PyPI - wandb - Versions diffs - 0.19.4rc1__py3-none-any.whl → 0.19.6rc4__py3-none-any.whl - Mend

wandb 0.19.4rc1py3-none-any.whl → 0.19.6rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

wandb/__init__.py +1 -1
wandb/__init__.pyi +1 -8
wandb/_iterutils.py +46 -0
wandb/apis/internal.py +4 -0
wandb/apis/normalize.py +13 -5
wandb/bin/gpu_stats +0 -0
wandb/cli/cli.py +9 -2
wandb/proto/v3/wandb_internal_pb2.py +36 -36
wandb/proto/v3/wandb_settings_pb2.py +2 -2
wandb/proto/v4/wandb_internal_pb2.py +36 -36
wandb/proto/v4/wandb_settings_pb2.py +2 -2
wandb/proto/v5/wandb_internal_pb2.py +36 -36
wandb/proto/v5/wandb_settings_pb2.py +2 -2
wandb/sdk/artifacts/artifact.py +120 -8
wandb/sdk/artifacts/storage_handlers/local_file_handler.py +12 -5
wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -1
wandb/sdk/backend/backend.py +7 -11
wandb/sdk/data_types/base_types/wb_value.py +10 -10
wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +2 -2
wandb/sdk/data_types/helper_types/image_mask.py +2 -2
wandb/sdk/data_types/image.py +0 -3
wandb/sdk/data_types/saved_model.py +1 -1
wandb/sdk/data_types/utils.py +2 -6
wandb/sdk/interface/interface.py +26 -12
wandb/sdk/interface/interface_sock.py +7 -11
wandb/sdk/internal/internal_api.py +9 -1
wandb/sdk/internal/sender.py +2 -2
wandb/sdk/internal/system/assets/cpu.py +1 -1
wandb/sdk/lib/apikey.py +7 -19
wandb/sdk/lib/mailbox.py +0 -14
wandb/sdk/lib/retry.py +6 -3
wandb/sdk/lib/run_moment.py +19 -7
wandb/sdk/lib/server.py +20 -0
wandb/sdk/lib/service_connection.py +2 -2
wandb/sdk/wandb_init.py +71 -46
wandb/sdk/wandb_login.py +86 -110
wandb/sdk/wandb_metadata.py +60 -31
wandb/sdk/wandb_run.py +32 -45
wandb/sdk/wandb_settings.py +465 -143
wandb/sdk/wandb_setup.py +10 -22
wandb/util.py +44 -12
{wandb-0.19.4rc1.dist-info → wandb-0.19.6rc4.dist-info}/METADATA +1 -1
{wandb-0.19.4rc1.dist-info → wandb-0.19.6rc4.dist-info}/RECORD +46 -45
{wandb-0.19.4rc1.dist-info → wandb-0.19.6rc4.dist-info}/WHEEL +0 -0
{wandb-0.19.4rc1.dist-info → wandb-0.19.6rc4.dist-info}/entry_points.txt +0 -0
{wandb-0.19.4rc1.dist-info → wandb-0.19.6rc4.dist-info}/licenses/LICENSE +0 -0

wandb/sdk/wandb_init.py CHANGED Viewed

@@ -20,7 +20,7 @@ import platform
 import sys
 import tempfile
 import time
-from typing import TYPE_CHECKING, Any, Literal, Sequence
+from typing import Any, Literal, Sequence
 if sys.version_info >= (3, 11):
     from typing import Self
@@ -40,16 +40,13 @@ from wandb.util import _is_artifact_representation
 from . import wandb_login, wandb_setup
 from .backend.backend import Backend
-from .lib import SummaryDisabled, filesystem, module, printer, telemetry
+from .lib import SummaryDisabled, filesystem, module, paths, printer, telemetry
 from .lib.deprecate import Deprecated, deprecate
 from .lib.mailbox import Mailbox, MailboxProgress
 from .wandb_helper import parse_config
 from .wandb_run import Run, TeardownHook, TeardownStage
 from .wandb_settings import Settings
-if TYPE_CHECKING:
-    from wandb.proto import wandb_internal_pb2 as pb
 def _huggingface_version() -> str | None:
     if "transformers" in sys.modules:
@@ -180,7 +177,6 @@ class _WandbInit:
             force=run_settings.force,
             _disable_warning=True,
             _silent=run_settings.quiet or run_settings.silent,
-            _entity=run_settings.entity,
         )
     def warn_env_vars_change_after_setup(self) -> None:
@@ -454,6 +450,23 @@ class _WandbInit:
                 artifacts=result.artifacts,
             )
+        wandb_internal = result.base_no_artifacts.setdefault("_wandb", dict())
+        if settings.save_code and settings.program_relpath:
+            wandb_internal["code_path"] = paths.LogicalPath(
+                os.path.join("code", settings.program_relpath)
+            )
+        if settings.fork_from is not None:
+            wandb_internal["branch_point"] = {
+                "run_id": settings.fork_from.run,
+                "step": settings.fork_from.value,
+            }
+        if settings.resume_from is not None:
+            wandb_internal["branch_point"] = {
+                "run_id": settings.resume_from.run,
+                "step": settings.resume_from.value,
+            }
         return result
     def teardown(self) -> None:
@@ -864,7 +877,7 @@ class _WandbInit:
                 tel.feature.core = True
             if settings._shared:
                 wandb.termwarn(
-                    "The `_shared` feature is experimental and may change. "
+                    "The `shared` mode feature is experimental and may change. "
                     "Please contact support@wandb.com for guidance and to report any issues."
                 )
                 tel.feature.shared_mode = True
@@ -891,7 +904,6 @@ class _WandbInit:
         run._set_backend(backend)
         run._set_teardown_hooks(self._teardown_hooks)
-        backend._hack_set_run(run)
         assert backend.interface
         mailbox.enable_keepalive()
         backend.interface.publish_header()
@@ -902,8 +914,6 @@ class _WandbInit:
         if not (settings.disable_git or settings.x_disable_machine_info):
             run._populate_git_info()
-        run_result: pb.RunUpdateResult | None = None
         if settings._offline and settings.resume:
             wandb.termwarn(
                 "`resume` will be ignored since W&B syncing is set to `offline`. "
@@ -911,6 +921,16 @@ class _WandbInit:
             )
         error: wandb.Error | None = None
+        # In shared mode, generate a unique label if not provided.
+        # The label is used to distinguish between system metrics and console logs
+        # from different writers to the same run.
+        if settings._shared and not settings.x_label:
+            # TODO: If executed in a known distributed environment (e.g. Ray or SLURM),
+            #   use the env vars to generate a label (e.g. SLURM_JOB_ID or RANK)
+            prefix = settings.host or ""
+            label = runid.generate_id()
+            settings.x_label = f"{prefix}-{label}" if prefix else label
         timeout = settings.init_timeout
         self._logger.info(
@@ -923,47 +943,53 @@ class _WandbInit:
             on_progress=self._on_progress_init,
             cancel=True,
         )
-        if result:
-            run_result = result.run_result
-        if run_result is None:
-            error_message = (
-                f"Run initialization has timed out after {timeout} sec. "
-                "Please try increasing the timeout with the `init_timeout` setting: "
-                "`wandb.init(settings=wandb.Settings(init_timeout=120))`."
-            )
-            # We're not certain whether the error we encountered is due to an issue
-            # with the server (a "CommError") or if it's a problem within the SDK (an "Error").
-            # This means that the error could be a result of the server being unresponsive,
-            # or it could be because we were unable to communicate with the wandb service.
-            error = CommError(error_message)
-            run_init_handle._cancel()
-        elif run_result.HasField("error"):
-            error = ProtobufErrorHandler.to_exception(run_result.error)
-        if error is not None:
-            self._logger.error(f"encountered error: {error}")
+        # Raise an error if deliver_run failed.
+        #
+        # This is wrapped in a try-except to perform additional cleanup logic
+        # when x_disable_service is True.
+        #
+        # TODO: Remove try-except once x_disable_service is removed.
+        try:
+            if not result or not result.run_result:
+                run_init_handle._cancel()
+                # This may either be an issue with the W&B server (a CommError)
+                # or a bug in the SDK (an Error). We cannot distinguish between
+                # the two causes here.
+                raise CommError(
+                    f"Run initialization has timed out after {timeout} sec."
+                    " Please try increasing the timeout with the `init_timeout`"
+                    " setting: `wandb.init(settings=wandb.Settings(init_timeout=120))`."
+                )
+            if error := ProtobufErrorHandler.to_exception(result.run_result.error):
+                raise error
+            if not result.run_result.HasField("run"):
+                raise Error("Assertion failed: run_result is missing the run field")
+        except Exception:
             if not service:
-                # Shutdown the backend and get rid of the logger
-                # we don't need to do console cleanup at this point
+                # Kill the background thread or process.
                 backend.cleanup()
-                self.teardown()
-            raise error
-        assert run_result is not None  # for mypy
-        if not run_result.HasField("run"):
-            raise Error(
-                "It appears that something have gone wrong during the program "
-                "execution as an unexpected missing field was encountered. "
-                "(run_result is missing the 'run' field)"
-            )
+                # Do some Jupyter and logger cleanup.
+                #
+                # NOTE: This shouldn't be necessary. The logger is global,
+                #   so on any error outside of this try-catch, we fail to
+                #   clean it up, causing the next run to write some of its
+                #   initial logs to this run's log file. The Jupyter
+                #   monkeypatching should probably happen at the library level
+                #   (in wandb.setup()) rather than per-run.
+                self.teardown()
+            raise
-        if run_result.run.resumed:
+        if result.run_result.run.resumed:
             self._logger.info("run resumed")
             with telemetry.context(run=run) as tel:
-                tel.feature.resumed = run_result.run.resumed
-        run._set_run_obj(run_result.run)
+                tel.feature.resumed = result.run_result.run.resumed
+        run._set_run_obj(result.run_result.run)
         self._logger.info("starting run threads in backend")
         # initiate run (stats and metadata probing)
@@ -1069,7 +1095,6 @@ def _attach(
         run._init(settings=settings)
     run._set_library(_wl)
     run._set_backend(backend)
-    backend._hack_set_run(run)
     assert backend.interface
     mailbox.enable_keepalive()

wandb/sdk/wandb_login.py CHANGED Viewed

@@ -8,6 +8,7 @@ import os
 from typing import Literal, Optional, Tuple
 import click
+from requests.exceptions import ConnectionError
 import wandb
 from wandb.errors import AuthenticationError, UsageError
@@ -16,7 +17,6 @@ from wandb.old.settings import Settings as OldSettings
 from ..apis import InternalApi
 from .internal.internal_api import Api
 from .lib import apikey
-from .wandb_settings import Settings
 def _handle_host_wandb_setting(host: Optional[str], cloud: bool = False) -> None:
@@ -73,30 +73,16 @@ def login(
         UsageError - if api_key cannot be configured and no tty
     """
     _handle_host_wandb_setting(host)
-    if wandb.setup()._settings._noop:
-        return True
-    configured = _login(
+    return _login(
         anonymous=anonymous,
         key=key,
         relogin=relogin,
         host=host,
         force=force,
         timeout=timeout,
+        verify=verify,
     )
-    if verify:
-        from . import wandb_setup
-        singleton = wandb_setup.singleton()
-        assert singleton is not None
-        viewer = singleton._server._viewer
-        if not viewer:
-            raise AuthenticationError(
-                "API key verification failed. Make sure your API key is valid."
-            )
-    return True if configured else False
 class ApiKeyStatus(enum.Enum):
     VALID = 1
@@ -106,26 +92,15 @@ class ApiKeyStatus(enum.Enum):
 class _WandbLogin:
-    def __init__(self):
-        self._settings: Optional[Settings] = None
-        self._backend = None
-        self._silent: Optional[bool] = None
-        self._entity: Optional[str] = None
-        self._wl = None
-        self._key = None
-        self._relogin = None
-    def setup(
+    def __init__(
         self,
-        *,
-        anonymous: Optional[Literal["allow", "must", "never"]] = None,
+        anonymous: Optional[Literal["must", "allow", "never"]] = None,
+        force: Optional[bool] = None,
+        host: Optional[str] = None,
         key: Optional[str] = None,
         relogin: Optional[bool] = None,
-        host: Optional[str] = None,
-        force: Optional[bool] = None,
         timeout: Optional[int] = None,
-    ) -> None:
-        """Updates login-related settings on the global setup object."""
+    ):
         self._relogin = relogin
         login_settings = {
@@ -135,61 +110,47 @@ class _WandbLogin:
             "force": force,
             "login_timeout": timeout,
         }
+        self.is_anonymous = anonymous == "must"
-        # make sure they are applied globally
-        self._wl = wandb.setup(
-            settings=wandb.Settings(
-                **{k: v for k, v in login_settings.items() if v is not None}
-            )
-        )
-        self._settings = self._wl.settings
+        self._wandb_setup = wandb.setup()
+        self._wandb_setup.settings.update_from_dict(login_settings)
+        self._settings = self._wandb_setup.settings
+    def _update_global_anonymous_setting(self) -> None:
+        api = InternalApi()
+        if self.is_anonymous:
+            api.set_setting("anonymous", "must", globally=True, persist=True)
+        else:
+            api.clear_setting("anonymous", globally=True, persist=True)
     def is_apikey_configured(self) -> bool:
         """Returns whether an API key is set or can be inferred."""
         return apikey.api_key(settings=self._settings) is not None
-    def should_use_identity_token(self):
-        return self._settings.identity_token_file is not None
-    def set_backend(self, backend):
-        self._backend = backend
-    def set_silent(self, silent: bool) -> None:
-        self._silent = silent
-    def set_entity(self, entity: str) -> None:
-        self._entity = entity
-    def login(self) -> bool:
-        """Returns whether the user is logged in (i.e. an API key exists).
-        If the user is logged in, this also prints an informational message.
-        """
-        apikey_configured = self.is_apikey_configured()
-        if self._settings.relogin or self._relogin:
-            apikey_configured = False
-        if not apikey_configured:
-            return False
-        if not self._silent:
-            self._print_logged_in_message()
-        return apikey_configured
     def _print_logged_in_message(self) -> None:
         """Prints a message telling the user they are logged in."""
-        username = self._wl._get_username()
+        username = self._wandb_setup._get_username()
         if username:
+            host_str = (
+                f" to {click.style(self._settings.base_url, fg='green')}"
+                if self._settings.base_url
+                else ""
+            )
             # check to see if we got an entity from the setup call or from the user
-            entity = self._entity or self._wl._get_entity()
+            entity = self._settings.entity or self._wandb_setup._get_entity()
             entity_str = ""
             # check if entity exist, valid (is part of a certain team) and different from the username
-            if entity and entity in self._wl._get_teams() and entity != username:
+            if (
+                entity
+                and entity in self._wandb_setup._get_teams()
+                and entity != username
+            ):
                 entity_str = f" ({click.style(entity, fg='yellow')})"
-            login_state_str = f"Currently logged in as: {click.style(username, fg='yellow')}{entity_str}"
+            login_state_str = f"Currently logged in as: {click.style(username, fg='yellow')}{entity_str}{host_str}"
         else:
             login_state_str = "W&B API key is configured"
@@ -210,9 +171,8 @@ class _WandbLogin:
                 "WANDB_API_KEY environment variable, or running "
                 "`wandb login` from the command line."
             )
-        apikey.write_key(self._settings, key)
-        self.update_session(key)
-        self._key = key
+        if key:
+            apikey.write_key(self._settings, key)
     def update_session(
         self,
@@ -230,11 +190,11 @@ class _WandbLogin:
             login_settings = dict(mode="disabled")
         elif key:
             login_settings = dict(api_key=key)
-        self._wl._settings.update_from_dict(login_settings)
+        self._wandb_setup.settings.update_from_dict(login_settings)
         # Whenever the key changes, make sure to pull in user settings
         # from server.
-        if not self._wl.settings._offline:
-            self._wl._update_user_settings()
+        if not self._wandb_setup.settings._offline:
+            self._wandb_setup._update_user_settings()
     def _prompt_api_key(self) -> Tuple[Optional[str], ApiKeyStatus]:
         api = Api(self._settings)
@@ -259,7 +219,7 @@ class _WandbLogin:
                 return None, ApiKeyStatus.OFFLINE
             return key, ApiKeyStatus.VALID
-    def prompt_api_key(self) -> None:
+    def prompt_api_key(self) -> Tuple[Optional[str], ApiKeyStatus]:
         """Updates the global API key by prompting the user."""
         key, status = self._prompt_api_key()
         if status == ApiKeyStatus.NOTTY:
@@ -270,8 +230,24 @@ class _WandbLogin:
             )
             raise UsageError("api_key not configured (no-tty). call " + directive)
-        self.update_session(key, status=status)
-        self._key = key
+        return key, status
+    def _verify_login(self, key: str) -> None:
+        api = InternalApi(api_key=key)
+        try:
+            is_api_key_valid = api.validate_api_key()
+            if not is_api_key_valid:
+                raise AuthenticationError(
+                    "API key verification failed. Make sure your API key is valid."
+                )
+        except ConnectionError:
+            raise AuthenticationError(
+                "Unable to connect to server to verify API token."
+            )
+        except Exception:
+            raise AuthenticationError("An error occurred while verifying the API key.")
 def _login(
@@ -282,38 +258,29 @@ def _login(
     host: Optional[str] = None,
     force: Optional[bool] = None,
     timeout: Optional[int] = None,
-    _backend=None,
+    verify: bool = False,
     _silent: Optional[bool] = None,
     _disable_warning: Optional[bool] = None,
-    _entity: Optional[str] = None,
-):
+) -> bool:
     if wandb.run is not None:
         if not _disable_warning:
             wandb.termwarn("Calling wandb.login() after wandb.init() has no effect.")
         return True
-    wlogin = _WandbLogin()
-    if _backend:
-        wlogin.set_backend(_backend)
-    if _silent:
-        wlogin.set_silent(_silent)
-    if _entity:
-        wlogin.set_entity(_entity)
-    # configure login object
-    wlogin.setup(
+    wlogin = _WandbLogin(
         anonymous=anonymous,
+        force=force,
+        host=host,
         key=key,
         relogin=relogin,
-        host=host,
-        force=force,
         timeout=timeout,
     )
-    if wlogin._settings._offline:
+    if wlogin._settings._noop:
+        return True
+    if wlogin._settings._offline and not wlogin._settings.x_cli_only_mode:
+        wandb.termwarn("Unable to verify login in offline mode.")
         return False
     elif wandb.util._is_kaggle() and not wandb.util._has_internet():
         wandb.termerror(
@@ -321,19 +288,28 @@ def _login(
         )
         return False
-    if wlogin.should_use_identity_token():
+    if wlogin._settings.identity_token_file:
         return True
-    # perform a login
-    logged_in = wlogin.login()
+    key_is_pre_configured = False
+    key_status = None
+    if key is None:
+        # Check if key is already set in the settings, or configured in the users .netrc file.
+        key = apikey.api_key(settings=wlogin._settings)
+        if key and not relogin:
+            key_is_pre_configured = True
+        else:
+            key, key_status = wlogin.prompt_api_key()
+    if verify:
+        wlogin._verify_login(key)
-    if key:
+    if not key_is_pre_configured:
         wlogin.configure_api_key(key)
+        wlogin.update_session(key, status=key_status)
+        wlogin._update_global_anonymous_setting()
-    if logged_in:
-        return logged_in
-    if not key:
-        wlogin.prompt_api_key()
+    if key and not _silent:
+        wlogin._print_logged_in_message()
-    return wlogin._key or False
+    return key is not None

wandb/sdk/wandb_metadata.py CHANGED Viewed

@@ -95,6 +95,7 @@ class GpuNvidiaInfo(BaseModel, validate_assignment=True):
     memory_total: int | None = None
     cuda_cores: int | None = None
     architecture: str | None = None
+    uuid: str | None = None
     def to_proto(self) -> wandb_internal_pb2.GpuNvidiaInfo:
         return wandb_internal_pb2.GpuNvidiaInfo(
@@ -102,6 +103,7 @@ class GpuNvidiaInfo(BaseModel, validate_assignment=True):
             memory_total=self.memory_total or 0,
             cuda_cores=self.cuda_cores or 0,
             architecture=self.architecture or "",
+            uuid=self.uuid or "",
         )
     @classmethod
@@ -111,6 +113,7 @@ class GpuNvidiaInfo(BaseModel, validate_assignment=True):
             memory_total=proto.memory_total,
             cuda_cores=proto.cuda_cores,
             architecture=proto.architecture,
+            uuid=proto.uuid,
         )
@@ -234,37 +237,63 @@ class Metadata(BaseModel, validate_assignment=True):
     NOTE: Definitions must be kept in sync with wandb_internal.proto::MetadataRequest.
     Attributes:
-        os (str, optional): Operating system.
-        python (str, optional): Python version.
-        heartbeat_at (datetime, optional): Timestamp of last heartbeat.
-        started_at (datetime, optional): Timestamp of run start.
-        docker (str, optional): Docker image.
-        cuda (str, optional): CUDA version.
-        args (List[str]): Command-line arguments.
-        state (str, optional): Run state.
-        program (str, optional): Program name.
-        code_path (str, optional): Path to code.
-        git (GitRepoRecord, optional): Git repository information.
-        email (str, optional): Email address.
-        root (str, optional): Root directory.
-        host (str, optional): Host name.
-        username (str, optional): Username.
-        executable (str, optional): Python executable path.
-        code_path_local (str, optional): Local code path.
-        colab (str, optional): Colab URL.
-        cpu_count (int, optional): CPU count.
-        cpu_count_logical (int, optional): Logical CPU count.
-        gpu_type (str, optional): GPU type.
-        disk (Dict[str, DiskInfo]): Disk information.
-        memory (MemoryInfo, optional): Memory information.
-        cpu (CpuInfo, optional): CPU information.
-        apple (AppleInfo, optional): Apple silicon information.
-        gpu_nvidia (List[GpuNvidiaInfo]): NVIDIA GPU information.
-        gpu_amd (List[GpuAmdInfo]): AMD GPU information.
-        slurm (Dict[str, str]): Slurm environment information.
-        cuda_version (str, optional): CUDA version.
-        trainium (TrainiumInfo, optional): Trainium information.
-        tpu (TPUInfo, optional): TPU information.
+        os: Operating system.
+        python: Python version.
+        heartbeat_at: Timestamp of last heartbeat.
+        started_at: Timestamp of run start.
+        docker: Docker image.
+        cuda: CUDA version.
+        args: Command-line arguments.
+        state: Run state.
+        program: Program name.
+        code_path: Path to code.
+        git: Git repository information.
+        email: Email address.
+        root: Root directory.
+        host: Host name.
+        username: Username.
+        executable: Python executable path.
+        code_path_local: Local code path.
+        colab: Colab URL.
+        cpu_count: CPU count.
+        cpu_count_logical: Logical CPU count.
+        gpu_type: GPU type.
+        disk: Disk information.
+        memory: Memory information.
+        cpu: CPU information.
+        apple: Apple silicon information.
+        gpu_nvidia: NVIDIA GPU information.
+        gpu_amd: AMD GPU information.
+        slurm: Slurm environment information.
+        cuda_version: CUDA version.
+        trainium: Trainium information.
+        tpu: TPU information.
+    Examples:
+        Update Run metadata:
+        ```python
+        with wandb.init(settings=settings) as run:
+            run._metadata.gpu_nvidia = [
+                {
+                    "name": "Tesla T4",
+                    "memory_total": "16106127360",
+                    "cuda_cores": 2560,
+                    "architecture": "Turing",
+                },
+                ...,
+            ]
+            run._metadata.gpu_type = "Tesla T4"
+            run._metadata.gpu_count = 42
+            run._metadata.tpu = {
+                "name": "v6e",
+                "hbm_gib": 32,
+                "devices_per_chip": 1,
+                "count": 1337,
+            }
+        ```
     """
     # TODO: Pydantic configuration.

wandb 0.19.4rc1__py3-none-any.whl → 0.19.6rc4__py3-none-any.whl

wandb 0.19.4rc1py3-none-any.whl → 0.19.6rc4py3-none-any.whl