PyPI - wandb - Versions diffs - 0.13.10__py3-none-any.whl → 0.14.0__py3-none-any.whl - Mend

wandb 0.13.10py3-none-any.whl → 0.14.0py3-none-any.whl

Files changed (228) hide show

wandb/__init__.py +2 -3
wandb/apis/__init__.py +1 -3
wandb/apis/importers/__init__.py +4 -0
wandb/apis/importers/base.py +312 -0
wandb/apis/importers/mlflow.py +113 -0
wandb/apis/internal.py +29 -2
wandb/apis/normalize.py +6 -5
wandb/apis/public.py +163 -180
wandb/apis/reports/_templates.py +6 -12
wandb/apis/reports/report.py +1 -1
wandb/apis/reports/runset.py +1 -3
wandb/apis/reports/util.py +12 -10
wandb/beta/workflows.py +57 -34
wandb/catboost/__init__.py +1 -2
wandb/cli/cli.py +215 -133
wandb/data_types.py +63 -56
wandb/docker/__init__.py +78 -16
wandb/docker/auth.py +21 -22
wandb/env.py +0 -1
wandb/errors/__init__.py +8 -116
wandb/errors/term.py +1 -1
wandb/fastai/__init__.py +1 -2
wandb/filesync/dir_watcher.py +8 -5
wandb/filesync/step_prepare.py +76 -75
wandb/filesync/step_upload.py +1 -2
wandb/integration/catboost/__init__.py +1 -3
wandb/integration/catboost/catboost.py +8 -14
wandb/integration/fastai/__init__.py +7 -13
wandb/integration/gym/__init__.py +35 -4
wandb/integration/keras/__init__.py +3 -3
wandb/integration/keras/callbacks/metrics_logger.py +9 -8
wandb/integration/keras/callbacks/model_checkpoint.py +9 -9
wandb/integration/keras/callbacks/tables_builder.py +31 -19
wandb/integration/kfp/kfp_patch.py +20 -17
wandb/integration/kfp/wandb_logging.py +1 -2
wandb/integration/lightgbm/__init__.py +21 -19
wandb/integration/prodigy/prodigy.py +6 -7
wandb/integration/sacred/__init__.py +9 -12
wandb/integration/sagemaker/__init__.py +1 -3
wandb/integration/sagemaker/auth.py +0 -1
wandb/integration/sagemaker/config.py +1 -1
wandb/integration/sagemaker/resources.py +1 -1
wandb/integration/sb3/sb3.py +8 -4
wandb/integration/tensorboard/__init__.py +1 -3
wandb/integration/tensorboard/log.py +8 -8
wandb/integration/tensorboard/monkeypatch.py +11 -9
wandb/integration/tensorflow/__init__.py +1 -3
wandb/integration/xgboost/__init__.py +4 -6
wandb/integration/yolov8/__init__.py +7 -0
wandb/integration/yolov8/yolov8.py +250 -0
wandb/jupyter.py +31 -35
wandb/lightgbm/__init__.py +1 -2
wandb/old/settings.py +2 -2
wandb/plot/bar.py +1 -2
wandb/plot/confusion_matrix.py +1 -3
wandb/plot/histogram.py +1 -2
wandb/plot/line.py +1 -2
wandb/plot/line_series.py +4 -4
wandb/plot/pr_curve.py +17 -20
wandb/plot/roc_curve.py +1 -3
wandb/plot/scatter.py +1 -2
wandb/proto/v3/wandb_server_pb2.py +85 -39
wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
wandb/proto/v4/wandb_server_pb2.py +51 -39
wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
wandb/sdk/__init__.py +1 -3
wandb/sdk/backend/backend.py +1 -1
wandb/sdk/data_types/_dtypes.py +38 -30
wandb/sdk/data_types/base_types/json_metadata.py +1 -3
wandb/sdk/data_types/base_types/media.py +17 -17
wandb/sdk/data_types/base_types/wb_value.py +33 -26
wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +91 -125
wandb/sdk/data_types/helper_types/classes.py +1 -1
wandb/sdk/data_types/helper_types/image_mask.py +12 -12
wandb/sdk/data_types/histogram.py +5 -4
wandb/sdk/data_types/html.py +1 -2
wandb/sdk/data_types/image.py +11 -11
wandb/sdk/data_types/molecule.py +3 -6
wandb/sdk/data_types/object_3d.py +1 -2
wandb/sdk/data_types/plotly.py +1 -2
wandb/sdk/data_types/saved_model.py +10 -8
wandb/sdk/data_types/video.py +1 -1
wandb/sdk/integration_utils/data_logging.py +5 -5
wandb/sdk/interface/artifacts.py +288 -266
wandb/sdk/interface/interface.py +2 -3
wandb/sdk/interface/interface_grpc.py +1 -1
wandb/sdk/interface/interface_queue.py +1 -1
wandb/sdk/interface/interface_relay.py +1 -1
wandb/sdk/interface/interface_shared.py +1 -2
wandb/sdk/interface/interface_sock.py +1 -1
wandb/sdk/interface/message_future.py +1 -1
wandb/sdk/interface/message_future_poll.py +1 -1
wandb/sdk/interface/router.py +1 -1
wandb/sdk/interface/router_queue.py +1 -1
wandb/sdk/interface/router_relay.py +1 -1
wandb/sdk/interface/router_sock.py +1 -1
wandb/sdk/interface/summary_record.py +1 -1
wandb/sdk/internal/artifacts.py +1 -1
wandb/sdk/internal/datastore.py +2 -3
wandb/sdk/internal/file_pusher.py +5 -3
wandb/sdk/internal/file_stream.py +22 -19
wandb/sdk/internal/handler.py +5 -4
wandb/sdk/internal/internal.py +1 -1
wandb/sdk/internal/internal_api.py +115 -55
wandb/sdk/internal/job_builder.py +1 -3
wandb/sdk/internal/profiler.py +1 -1
wandb/sdk/internal/progress.py +4 -6
wandb/sdk/internal/sample.py +1 -3
wandb/sdk/internal/sender.py +28 -16
wandb/sdk/internal/settings_static.py +5 -5
wandb/sdk/internal/system/assets/__init__.py +1 -0
wandb/sdk/internal/system/assets/cpu.py +3 -9
wandb/sdk/internal/system/assets/disk.py +2 -4
wandb/sdk/internal/system/assets/gpu.py +6 -18
wandb/sdk/internal/system/assets/gpu_apple.py +2 -4
wandb/sdk/internal/system/assets/interfaces.py +50 -22
wandb/sdk/internal/system/assets/ipu.py +1 -3
wandb/sdk/internal/system/assets/memory.py +7 -13
wandb/sdk/internal/system/assets/network.py +4 -8
wandb/sdk/internal/system/assets/open_metrics.py +283 -0
wandb/sdk/internal/system/assets/tpu.py +1 -4
wandb/sdk/internal/system/assets/trainium.py +26 -14
wandb/sdk/internal/system/system_info.py +2 -3
wandb/sdk/internal/system/system_monitor.py +52 -20
wandb/sdk/internal/tb_watcher.py +12 -13
wandb/sdk/launch/_project_spec.py +54 -65
wandb/sdk/launch/agent/agent.py +374 -90
wandb/sdk/launch/builder/abstract.py +61 -7
wandb/sdk/launch/builder/build.py +81 -110
wandb/sdk/launch/builder/docker_builder.py +181 -0
wandb/sdk/launch/builder/kaniko_builder.py +419 -0
wandb/sdk/launch/builder/noop.py +31 -12
wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +70 -20
wandb/sdk/launch/environment/abstract.py +28 -0
wandb/sdk/launch/environment/aws_environment.py +276 -0
wandb/sdk/launch/environment/gcp_environment.py +271 -0
wandb/sdk/launch/environment/local_environment.py +65 -0
wandb/sdk/launch/github_reference.py +3 -8
wandb/sdk/launch/launch.py +38 -29
wandb/sdk/launch/launch_add.py +6 -8
wandb/sdk/launch/loader.py +230 -0
wandb/sdk/launch/registry/abstract.py +54 -0
wandb/sdk/launch/registry/elastic_container_registry.py +163 -0
wandb/sdk/launch/registry/google_artifact_registry.py +203 -0
wandb/sdk/launch/registry/local_registry.py +62 -0
wandb/sdk/launch/runner/abstract.py +1 -16
wandb/sdk/launch/runner/{kubernetes.py → kubernetes_runner.py} +83 -95
wandb/sdk/launch/runner/local_container.py +46 -22
wandb/sdk/launch/runner/local_process.py +1 -4
wandb/sdk/launch/runner/{aws.py → sagemaker_runner.py} +53 -212
wandb/sdk/launch/runner/{gcp_vertex.py → vertex_runner.py} +38 -55
wandb/sdk/launch/sweeps/__init__.py +3 -2
wandb/sdk/launch/sweeps/scheduler.py +132 -39
wandb/sdk/launch/sweeps/scheduler_sweep.py +80 -89
wandb/sdk/launch/utils.py +101 -30
wandb/sdk/launch/wandb_reference.py +2 -7
wandb/sdk/lib/_settings_toposort_generate.py +166 -0
wandb/sdk/lib/_settings_toposort_generated.py +201 -0
wandb/sdk/lib/apikey.py +2 -4
wandb/sdk/lib/config_util.py +4 -1
wandb/sdk/lib/console.py +1 -3
wandb/sdk/lib/deprecate.py +3 -3
wandb/sdk/lib/file_stream_utils.py +7 -5
wandb/sdk/lib/filenames.py +1 -1
wandb/sdk/lib/filesystem.py +61 -5
wandb/sdk/lib/git.py +1 -3
wandb/sdk/lib/import_hooks.py +4 -7
wandb/sdk/lib/ipython.py +8 -5
wandb/sdk/lib/lazyloader.py +1 -3
wandb/sdk/lib/mailbox.py +14 -4
wandb/sdk/lib/proto_util.py +10 -5
wandb/sdk/lib/redirect.py +15 -22
wandb/sdk/lib/reporting.py +1 -3
wandb/sdk/lib/retry.py +4 -5
wandb/sdk/lib/runid.py +1 -3
wandb/sdk/lib/server.py +15 -9
wandb/sdk/lib/sock_client.py +1 -1
wandb/sdk/lib/sparkline.py +1 -1
wandb/sdk/lib/wburls.py +1 -1
wandb/sdk/service/port_file.py +1 -2
wandb/sdk/service/service.py +36 -13
wandb/sdk/service/service_base.py +12 -1
wandb/sdk/verify/verify.py +5 -7
wandb/sdk/wandb_artifacts.py +142 -177
wandb/sdk/wandb_config.py +5 -8
wandb/sdk/wandb_helper.py +1 -1
wandb/sdk/wandb_init.py +24 -13
wandb/sdk/wandb_login.py +9 -9
wandb/sdk/wandb_manager.py +39 -4
wandb/sdk/wandb_metric.py +2 -6
wandb/sdk/wandb_require.py +4 -15
wandb/sdk/wandb_require_helpers.py +1 -9
wandb/sdk/wandb_run.py +95 -141
wandb/sdk/wandb_save.py +1 -3
wandb/sdk/wandb_settings.py +149 -54
wandb/sdk/wandb_setup.py +66 -46
wandb/sdk/wandb_summary.py +13 -10
wandb/sdk/wandb_sweep.py +6 -7
wandb/sdk/wandb_watch.py +1 -1
wandb/sklearn/calculate/confusion_matrix.py +1 -1
wandb/sklearn/calculate/learning_curve.py +1 -1
wandb/sklearn/calculate/summary_metrics.py +1 -3
wandb/sklearn/plot/__init__.py +1 -1
wandb/sklearn/plot/classifier.py +27 -18
wandb/sklearn/plot/clusterer.py +4 -5
wandb/sklearn/plot/regressor.py +4 -4
wandb/sklearn/plot/shared.py +2 -2
wandb/sync/__init__.py +1 -3
wandb/sync/sync.py +4 -5
wandb/testing/relay.py +11 -10
wandb/trigger.py +1 -1
wandb/util.py +106 -81
wandb/viz.py +4 -4
wandb/wandb_agent.py +50 -50
wandb/wandb_controller.py +2 -3
wandb/wandb_run.py +1 -2
wandb/wandb_torch.py +1 -1
wandb/xgboost/__init__.py +1 -2
{wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/METADATA +6 -2
{wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/RECORD +224 -209
{wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/WHEEL +1 -1
wandb/sdk/launch/builder/docker.py +0 -80
wandb/sdk/launch/builder/kaniko.py +0 -393
wandb/sdk/launch/builder/loader.py +0 -32
wandb/sdk/launch/runner/loader.py +0 -50
{wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/LICENSE +0 -0
{wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/entry_points.txt +0 -0
{wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/top_level.txt +0 -0

wandb/sdk/launch/sweeps/scheduler_sweep.py CHANGED Viewed

@@ -1,36 +1,28 @@
 """Scheduler for classic wandb Sweeps."""
 import logging
-import pprint
 import queue
 import socket
 import time
-from dataclasses import dataclass
+from pprint import pformat as pf
 from typing import Any, Dict, List
 import wandb
 from wandb.sdk.launch.sweeps import SchedulerError
 from wandb.sdk.launch.sweeps.scheduler import (
     LOG_PREFIX,
+    RunState,
     Scheduler,
     SchedulerState,
-    SimpleRunState,
     SweepRun,
+    _Worker,
 )
-from wandb.wandb_agent import Agent as LegacySweepAgent
+from wandb.wandb_agent import _create_sweep_command_args
-logger = logging.getLogger(__name__)
-@dataclass
-class _Worker:
-    agent_config: Dict[str, Any]
-    agent_id: str
+_logger = logging.getLogger(__name__)
 class SweepScheduler(Scheduler):
-    """A SweepScheduler is a controller/agent that will populate a Launch RunQueue with
-    launch jobs it creates from run suggestions it pulls from an internal sweeps RunQueue.
-    """
+    """A controller/agent that populates a Launch RunQueue from a sweeps RunQueue."""
     def __init__(
         self,
@@ -41,11 +33,6 @@ class SweepScheduler(Scheduler):
         **kwargs: Any,
     ):
         super().__init__(*args, **kwargs)
-        # Optionally run multiple workers in (pseudo-)parallel. Workers do not
-        # actually run training workloads, they simply send heartbeat messages
-        # (emulating a real agent) and add new runs to the launch queue. The
-        # launch agent is the one that actually runs the training workloads.
-        self._workers: Dict[int, _Worker] = {}
         self._num_workers: int = num_workers
         # Thread will pop items off the Sweeps RunQueue using AgentHeartbeat
         # and put them in this internal queue, which will be used to populate
@@ -56,7 +43,7 @@ class SweepScheduler(Scheduler):
     def _start(self) -> None:
         for worker_id in range(self._num_workers):
-            logger.debug(f"{LOG_PREFIX}Starting AgentHeartbeat worker {worker_id}\n")
+            _logger.debug(f"{LOG_PREFIX}Starting AgentHeartbeat worker {worker_id}\n")
             agent_config = self._api.register_agent(
                 f"{socket.gethostname()}-{worker_id}",  # host
                 sweep_id=self._sweep_id,
@@ -68,92 +55,96 @@ class SweepScheduler(Scheduler):
                 agent_id=agent_config["id"],
             )
-    def _heartbeat(self, worker_id: int) -> None:
-        # Make sure Scheduler is alive
-        if not self.is_alive():
-            return
+    def _get_sweep_commands(self, worker_id: int) -> List[Dict[str, Any]]:
         # AgentHeartbeat wants a Dict of runs which are running or queued
         _run_states: Dict[str, bool] = {}
         for run_id, run in self._yield_runs():
             # Filter out runs that are from a different worker thread
-            if run.worker_id == worker_id and run.state == SimpleRunState.ALIVE:
+            if run.worker_id == worker_id and run.state == RunState.ALIVE:
                 _run_states[run_id] = True
-        logger.debug(
-            f"{LOG_PREFIX}AgentHeartbeat sending: \n{pprint.pformat(_run_states)}\n"
-        )
+        _logger.debug(f"{LOG_PREFIX}Sending states: \n{pf(_run_states)}\n")
         commands: List[Dict[str, Any]] = self._api.agent_heartbeat(
             self._workers[worker_id].agent_id,  # agent_id: str
             {},  # metrics: dict
             _run_states,  # run_states: dict
         )
-        logger.debug(
-            f"{LOG_PREFIX}AgentHeartbeat received {len(commands)} commands: \n{pprint.pformat(commands)}\n"
-        )
-        if commands:
-            for command in commands:
-                # The command "type" can be one of "run", "resume", "stop", "exit"
-                _type = command.get("type", None)
-                if _type in ["exit", "stop"]:
+        _logger.debug(f"{LOG_PREFIX}AgentHeartbeat commands: \n{pf(commands)}\n")
+        return commands
+    def _heartbeat(self, worker_id: int) -> bool:
+        # Make sure Scheduler is alive
+        if not self.is_alive():
+            return False
+        elif self.state == SchedulerState.FLUSH_RUNS:
+            # already hit run_cap, just noop
+            return False
+        commands: List[Dict[str, Any]] = self._get_sweep_commands(worker_id)
+        for command in commands:
+            # The command "type" can be one of "run", "resume", "stop", "exit"
+            _type = command.get("type")
+            if _type in ["exit", "stop"]:
+                run_cap = command.get("run_cap")
+                if run_cap is not None:
+                    # If Sweep hit run_cap, go into flushing state
+                    wandb.termlog(f"{LOG_PREFIX}Sweep hit run_cap: {run_cap}")
+                    self.state = SchedulerState.FLUSH_RUNS
+                else:
                     # Tell (virtual) agent to stop running
                     self.state = SchedulerState.STOPPED
-                    self.exit()
-                    return
-                elif _type in ["run", "resume"]:
-                    _run_id = command.get("run_id", None)
-                    if _run_id is None:
-                        self.state = SchedulerState.FAILED
-                        raise SchedulerError(
-                            f"AgentHeartbeat command {command} missing run_id"
-                        )
-                    if _run_id in self._runs:
-                        wandb.termlog(f"{LOG_PREFIX} Skipping duplicate run {run_id}")
-                    else:
-                        run = SweepRun(
-                            id=_run_id,
-                            args=command.get("args", {}),
-                            logs=command.get("logs", []),
-                            program=command.get("program", None),
-                            worker_id=worker_id,
-                        )
-                        self._runs[run.id] = run
-                        self._heartbeat_queue.put(run)
-                else:
+                return False
+            if _type in ["run", "resume"]:
+                _run_id = command.get("run_id")
+                if not _run_id:
                     self.state = SchedulerState.FAILED
-                    raise SchedulerError(f"AgentHeartbeat unknown command type {_type}")
+                    raise SchedulerError(f"No runId in agent heartbeat: {command}")
+                if _run_id in self._runs:
+                    wandb.termlog(f"{LOG_PREFIX}Skipping duplicate run: {_run_id}")
+                    continue
+                run = SweepRun(
+                    id=_run_id,
+                    args=command.get("args", {}),
+                    logs=command.get("logs", []),
+                    worker_id=worker_id,
+                )
+                self._runs[run.id] = run
+                self._heartbeat_queue.put(run)
+            else:
+                self.state = SchedulerState.FAILED
+                raise SchedulerError(f"AgentHeartbeat unknown command: {_type}")
+        return True
     def _run(self) -> None:
         # Go through all workers and heartbeat
-        for worker_id in self._workers.keys():
+        for worker_id in self._workers:
             self._heartbeat(worker_id)
-        try:
-            run: SweepRun = self._heartbeat_queue.get(
-                timeout=self._heartbeat_queue_timeout
-            )
-        except queue.Empty:
-            wandb.termlog(f"{LOG_PREFIX}No jobs in Sweeps RunQueue, waiting...")
-            time.sleep(self._heartbeat_queue_sleep)
-            return
-        # If run is already stopped just ignore the request
-        if run.state in [
-            SimpleRunState.DEAD,
-            SimpleRunState.UNKNOWN,
-        ]:
-            return
-        wandb.termlog(
-            f"{LOG_PREFIX}Converting Sweep Run (RunID:{run.id}) to Launch Job"
-        )
-        _ = self._add_to_launch_queue(
-            run_id=run.id,
-            entry_point=["python3", run.program] if run.program else None,
-            # Use legacy sweep utilities to extract args dict from agent heartbeat run.args
-            config={
-                "overrides": {
-                    "run_config": LegacySweepAgent._create_command_args(
-                        {"args": run.args}
-                    )["args_dict"]
-                }
-            },
-        )
+        for _worker_id in self._workers:
+            try:
+                run: SweepRun = self._heartbeat_queue.get(
+                    timeout=self._heartbeat_queue_timeout
+                )
+                # If run is already stopped just ignore the request
+                if run.state in [RunState.DEAD, RunState.UNKNOWN]:
+                    wandb.termwarn(f"{LOG_PREFIX}Ignoring dead run {run.id}")
+                    _logger.debug(f"dead run {run.id} state: {run.state}")
+                    continue
+                sweep_args = _create_sweep_command_args({"args": run.args})["args_dict"]
+                launch_config = {"overrides": {"run_config": sweep_args}}
+                self._add_to_launch_queue(run_id=run.id, config=launch_config)
+            except queue.Empty:
+                if self.state == SchedulerState.FLUSH_RUNS:
+                    wandb.termlog(f"{LOG_PREFIX}Sweep stopped, waiting on runs...")
+                else:
+                    wandb.termlog(f"{LOG_PREFIX}No new runs to launch, waiting...")
+                time.sleep(self._heartbeat_queue_sleep)
+                return
     def _exit(self) -> None:
         pass

wandb/sdk/launch/utils.py CHANGED Viewed

@@ -10,15 +10,49 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 import click
 import wandb
+import wandb.docker as docker
 from wandb import util
 from wandb.apis.internal import Api
-from wandb.errors import CommError, LaunchError
+from wandb.errors import CommError, Error
 from wandb.sdk.launch.wandb_reference import WandbReference
+from .builder.templates._wandb_bootstrap import (
+    FAILED_PACKAGES_POSTFIX,
+    FAILED_PACKAGES_PREFIX,
+)
+FAILED_PACKAGES_REGEX = re.compile(
+    f"{re.escape(FAILED_PACKAGES_PREFIX)}(.*){re.escape(FAILED_PACKAGES_POSTFIX)}"
+)
 if TYPE_CHECKING:  # pragma: no cover
     from wandb.apis.public import Artifact as PublicArtifact
+class LaunchError(Error):
+    """Raised when a known error occurs in wandb launch."""
+    pass
+class LaunchDockerError(Error):
+    """Raised when Docker daemon is not running."""
+    pass
+class ExecutionError(Error):
+    """Generic execution exception."""
+    pass
+class SweepError(Error):
+    """Raised when a known error occurs with wandb sweeps."""
+    pass
 # TODO: this should be restricted to just Git repos and not S3 and stuff like that
 _GIT_URI_REGEX = re.compile(r"^[^/|^~|^\.].*(git|bitbucket)")
 _VALID_IP_REGEX = r"^https?://[0-9]+(?:\.[0-9]+){3}(:[0-9]+)?"
@@ -128,11 +162,10 @@ def construct_launch_spec(
     parameters: Optional[Dict[str, Any]],
     resource_args: Optional[Dict[str, Any]],
     launch_config: Optional[Dict[str, Any]],
-    cuda: Optional[bool],
     run_id: Optional[str],
     repository: Optional[str],
 ) -> Dict[str, Any]:
-    """Constructs the launch specification from CLI arguments."""
+    """Construct the launch specification from CLI arguments."""
     # override base config (if supplied) with supplied args
     launch_spec = launch_config if launch_config is not None else {}
     if uri is not None:
@@ -184,8 +217,6 @@ def construct_launch_spec(
     if entry_point:
         launch_spec["overrides"]["entry_point"] = entry_point
-    if cuda is not None:
-        launch_spec["cuda"] = cuda
     if run_id is not None:
         launch_spec["run_id"] = run_id
@@ -214,7 +245,7 @@ def validate_launch_spec_source(launch_spec: Dict[str, Any]) -> None:
 def parse_wandb_uri(uri: str) -> Tuple[str, str, str]:
-    """Parses wandb uri to retrieve entity, project and run name."""
+    """Parse wandb uri to retrieve entity, project and run name."""
     ref = WandbReference.parse(uri)
     if not ref or not ref.entity or not ref.project or not ref.run_id:
         raise LaunchError(f"Trouble parsing wandb uri {uri}")
@@ -222,10 +253,12 @@ def parse_wandb_uri(uri: str) -> Tuple[str, str, str]:
 def is_bare_wandb_uri(uri: str) -> bool:
-    """Checks if the uri is of the format
-    /<entity>/<project>/runs/<run_name>[other stuff]
+    """Check that a wandb uri is valid.
+    URI must be in the format
+    `/<entity>/<project>/runs/<run_name>[other stuff]`
     or
-    /<entity>/<project>/artifacts/job/<job_name>[other stuff]
+    `/<entity>/<project>/artifacts/job/<job_name>[other stuff]`.
     """
     _logger.info(f"Checking if uri {uri} is bare...")
     return uri.startswith("/") and WandbReference.is_uri_job_or_run(uri)
@@ -306,7 +339,7 @@ def get_local_python_deps(
 def diff_pip_requirements(req_1: List[str], req_2: List[str]) -> Dict[str, str]:
-    """Returns a list of pip requirements that are not in req_1 but are in req_2."""
+    """Return a list of pip requirements that are not in req_1 but are in req_2."""
     def _parse_req(req: List[str]) -> Dict[str, str]:
         # TODO: This can be made more exhaustive, but for 99% of cases this is fine
@@ -366,7 +399,7 @@ def validate_wandb_python_deps(
     requirements_file: Optional[str],
     dir: str,
 ) -> None:
-    """Warns if local python dependencies differ from wandb requirements.txt"""
+    """Warn if local python dependencies differ from wandb requirements.txt."""
     if requirements_file is not None:
         requirements_path = os.path.join(dir, requirements_file)
         with open(requirements_path) as f:
@@ -417,10 +450,7 @@ def apply_patch(patch_string: str, dst_dir: str) -> None:
 def _make_refspec_from_version(version: Optional[str]) -> List[str]:
-    """
-    Helper to create a refspec that checks for the existence of origin/main
-    and the version, if provided.
-    """
+    """Create a refspec that checks for the existence of origin/main and the version."""
     if version:
         return [f"+{version}"]
@@ -452,10 +482,10 @@ def _fetch_git_repo(dst_dir: str, uri: str, version: Optional[str]) -> str:
             repo.git.checkout(version)
         except git.exc.GitCommandError as e:
             raise LaunchError(
-                "Unable to checkout version '%s' of git repo %s"
+                f"Unable to checkout version '{version}' of git repo {uri}"
                 "- please ensure that the version exists in the repo. "
-                "Error: %s" % (version, uri, e)
-            )
+                f"Error: {e}"
+            ) from e
     else:
         if getattr(repo, "references", None) is not None:
             branches = [ref.name for ref in repo.references]
@@ -475,10 +505,10 @@ def _fetch_git_repo(dst_dir: str, uri: str, version: Optional[str]) -> str:
             )
         except (AttributeError, IndexError) as e:
             raise LaunchError(
-                "Unable to checkout default version '%s' of git repo %s "
+                f"Unable to checkout default version '{version}' of git repo {uri} "
                 "- to specify a git version use: --git-version \n"
-                "Error: %s" % (version, uri, e)
-            )
+                f"Error: {e}"
+            ) from e
     repo.submodule_update(init=True, recursive=True)
     return version
@@ -557,10 +587,9 @@ def validate_build_and_registry_configs(
 def get_kube_context_and_api_client(
-    kubernetes: Any,  # noqa: F811
-    resource_args: Dict[str, Any],  # noqa: F811
+    kubernetes: Any,
+    resource_args: Dict[str, Any],
 ) -> Tuple[Any, Any]:
     config_file = resource_args.get("config_file", None)
     context = None
     if config_file is not None or os.path.exists(os.path.expanduser("~/.kube/config")):
@@ -579,7 +608,14 @@ def get_kube_context_and_api_client(
             raise LaunchError(f"Specified context {context_name} was not found.")
         else:
             context = active_context
+        # TODO: We should not really be performing this check if the user is not
+        # using EKS but I don't see an obvious way to make an eks specific code path
+        # right here.
+        util.get_module(
+            "awscli",
+            "awscli is required to load a kubernetes context "
+            "from eks. Please run `pip install wandb[launch]` to install it.",
+        )
         kubernetes.config.load_kube_config(config_file, context["name"])
         api_client = kubernetes.config.new_client_from_config(
             config_file, context=context["name"]
@@ -598,7 +634,7 @@ def resolve_build_and_registry_config(
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     resolved_build_config: Dict[str, Any] = {}
     if build_config is None and default_launch_config is not None:
-        resolved_build_config = default_launch_config.get("build", {})
+        resolved_build_config = default_launch_config.get("builder", {})
     elif build_config is not None:
         resolved_build_config = build_config
     resolved_registry_config: Dict[str, Any] = {}
@@ -611,10 +647,10 @@ def resolve_build_and_registry_config(
 def check_logged_in(api: Api) -> bool:
-    """
-    Uses an internal api reference to check if a user is logged in
-    raises an error if the viewer doesn't load, likely broken API key
-    expected time cost is 0.1-0.2 seconds
+    """Check if a user is logged in.
+    Raises an error if the viewer doesn't load (likely a broken API key). Expected time
+    cost is 0.1-0.2 seconds.
     """
     res = api.api.viewer()
     if not res:
@@ -633,3 +669,38 @@ def make_name_dns_safe(name: str) -> str:
     # Actual length limit is 253, but we want to leave room for the generated suffix
     resp = resp[:200]
     return resp
+def warn_failed_packages_from_build_logs(log: str, image_uri: str) -> None:
+    match = FAILED_PACKAGES_REGEX.search(log)
+    if match:
+        wandb.termwarn(
+            f"Failed to install the following packages: {match.group(1)} for image: {image_uri}. Will attempt to launch image without them."
+        )
+def docker_image_exists(docker_image: str, should_raise: bool = False) -> bool:
+    """Check if a specific image is already available.
+    Optionally raises an exception if the image is not found.
+    """
+    _logger.info("Checking if base image exists...")
+    try:
+        docker.run(["docker", "image", "inspect", docker_image])
+        return True
+    except (docker.DockerError, ValueError) as e:
+        if should_raise:
+            raise e
+        _logger.info("Base image not found. Generating new base image")
+        return False
+def pull_docker_image(docker_image: str) -> None:
+    """Pull the requested docker image."""
+    if docker_image_exists(docker_image):
+        # don't pull images if they exist already, eg if they are local images
+        return
+    try:
+        docker.run(["docker", "pull", docker_image])
+    except docker.DockerError as e:
+        raise LaunchError(f"Docker server returned error: {e}")

wandb/sdk/launch/wandb_reference.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""
-Support for parsing W&B URLs (which might be user provided) into constituent parts.
-"""
+"""Support for parsing W&B URLs (which might be user provided) into constituent parts."""
 from dataclasses import dataclass
 from enum import IntEnum
@@ -35,7 +33,6 @@ RESERVED_JOB_PATHS = ("_view",)
 @dataclass
 class WandbReference:
     # TODO: This will include port, should we separate that out?
     host: Optional[str] = None
@@ -88,9 +85,7 @@ class WandbReference:
     @staticmethod
     def parse(uri: str) -> Optional["WandbReference"]:
-        """
-        Attempt to parse a string as a W&B URL.
-        """
+        """Attempt to parse a string as a W&B URL."""
         # TODO: Error if HTTP and host is not localhost?
         if (
             not uri.startswith("/")

wandb/sdk/lib/_settings_toposort_generate.py ADDED Viewed

@@ -0,0 +1,166 @@
+import inspect
+import sys
+from typing import Any, Dict, List, Optional, Set, Tuple
+from wandb.errors import UsageError
+from wandb.sdk.wandb_settings import Settings
+if sys.version_info >= (3, 8):
+    from typing import get_args, get_origin, get_type_hints
+elif sys.version_info >= (3, 7):
+    from typing_extensions import get_args, get_origin, get_type_hints
+else:
+    def get_args(obj: Any) -> Optional[Any]:
+        return obj.__args__ if hasattr(obj, "__args__") else None
+    def get_origin(obj: Any) -> Optional[Any]:
+        return obj.__origin__ if hasattr(obj, "__origin__") else None
+    def get_type_hints(obj: Any) -> Dict[str, Any]:
+        return dict(obj.__annotations__) if hasattr(obj, "__annotations__") else dict()
+template = """
+__all__ = ("SETTINGS_TOPOLOGICALLY_SORTED", "_Setting")
+import sys
+from typing import Tuple
+if sys.version_info >= (3, 8):
+    from typing import Final, Literal
+else:
+    from typing_extensions import Final, Literal
+_Setting = Literal[
+    $settings_literal_list
+]
+SETTINGS_TOPOLOGICALLY_SORTED: Final[Tuple[_Setting, ...]] = (
+    $settings_topologically_sorted
+)
+"""
+class Graph:
+    # A simple class representing an unweighted directed graph
+    # that uses an adjacency list representation.
+    # We use to ensure that we don't have cyclic dependencies in the settings
+    # and that modifications to the settings are applied in the correct order.
+    def __init__(self) -> None:
+        self.adj_list: Dict[str, Set[str]] = {}
+    def add_node(self, node: str) -> None:
+        if node not in self.adj_list:
+            self.adj_list[node] = set()
+    def add_edge(self, node1: str, node2: str) -> None:
+        self.adj_list[node1].add(node2)
+    def get_neighbors(self, node: str) -> Set[str]:
+        return self.adj_list[node]
+    # return a list of nodes sorted in topological order
+    def topological_sort_dfs(self) -> List[str]:
+        sorted_copy = {k: sorted(v) for k, v in self.adj_list.items()}
+        sorted_nodes: List[str] = []
+        visited_nodes: Set[str] = set()
+        current_nodes: Set[str] = set()
+        def visit(n: str) -> None:
+            if n in visited_nodes:
+                return None
+            if n in current_nodes:
+                raise UsageError("Cyclic dependency detected in wandb.Settings")
+            current_nodes.add(n)
+            for neighbor in sorted_copy[n]:
+                visit(neighbor)
+            current_nodes.remove(n)
+            visited_nodes.add(n)
+            sorted_nodes.append(n)
+            return None
+        for node in self.adj_list:
+            if node not in visited_nodes:
+                visit(node)
+        return sorted_nodes
+def _get_modification_order(
+    settings: Settings,
+) -> Tuple[Tuple[str, ...], Tuple[str, ...]]:
+    """Return the order in which settings should be modified, based on dependencies."""
+    dependency_graph = Graph()
+    props = tuple(get_type_hints(Settings).keys())
+    # discover prop dependencies from validator methods and runtime hooks
+    prefix = "_validate_"
+    symbols = set(dir(settings))
+    validator_methods = tuple(sorted(m for m in symbols if m.startswith(prefix)))
+    # extract dependencies from validator methods
+    for m in validator_methods:
+        setting = m.split(prefix)[1]
+        dependency_graph.add_node(setting)
+        # if the method is not static, inspect its code to find the attributes it depends on
+        if (
+            not isinstance(Settings.__dict__[m], staticmethod)
+            and not isinstance(Settings.__dict__[m], classmethod)
+            and Settings.__dict__[m].__code__.co_argcount > 0
+        ):
+            unbound_closure_vars = inspect.getclosurevars(Settings.__dict__[m]).unbound
+            dependencies = (v for v in unbound_closure_vars if v in props)
+            for d in dependencies:
+                dependency_graph.add_node(d)
+                dependency_graph.add_edge(setting, d)
+    # extract dependencies from props' runtime hooks
+    default_props = settings._default_props()
+    for prop, spec in default_props.items():
+        if "hook" not in spec:
+            continue
+        dependency_graph.add_node(prop)
+        hook = spec["hook"]
+        if callable(hook):
+            hook = [hook]
+        for h in hook:
+            unbound_closure_vars = inspect.getclosurevars(h).unbound
+            dependencies = (v for v in unbound_closure_vars if v in props)
+            for d in dependencies:
+                dependency_graph.add_node(d)
+                dependency_graph.add_edge(prop, d)
+    modification_order = dependency_graph.topological_sort_dfs()
+    return props, tuple(modification_order)
+def generate(settings: Settings) -> None:
+    _settings_literal_list, _settings_topologically_sorted = _get_modification_order(
+        settings
+    )
+    settings_literal_list = ", ".join(f'"{s}"' for s in _settings_literal_list)
+    settings_topologically_sorted = ", ".join(
+        f'"{s}"' for s in _settings_topologically_sorted
+    )
+    print(
+        template.replace("$settings_literal_list", settings_literal_list,).replace(
+            "$settings_topologically_sorted",
+            settings_topologically_sorted,
+        )
+    )
+if __name__ == "__main__":
+    generate(Settings())

wandb 0.13.10__py3-none-any.whl → 0.14.0__py3-none-any.whl

wandb 0.13.10py3-none-any.whl → 0.14.0py3-none-any.whl