PyPI - wandb - Versions diffs - 0.15.9__py3-none-any.whl → 0.15.11__py3-none-any.whl - Mend

wandb 0.15.9py3-none-any.whl → 0.15.11py3-none-any.whl

Files changed (114) hide show

wandb/__init__.py +5 -1
wandb/apis/public.py +137 -17
wandb/apis/reports/_panels.py +1 -1
wandb/apis/reports/blocks.py +1 -0
wandb/apis/reports/report.py +27 -5
wandb/cli/cli.py +52 -41
wandb/docker/__init__.py +17 -0
wandb/docker/auth.py +1 -1
wandb/env.py +24 -4
wandb/filesync/step_checksum.py +3 -3
wandb/integration/openai/openai.py +3 -0
wandb/integration/ultralytics/__init__.py +9 -0
wandb/integration/ultralytics/bbox_utils.py +196 -0
wandb/integration/ultralytics/callback.py +458 -0
wandb/integration/ultralytics/classification_utils.py +66 -0
wandb/integration/ultralytics/mask_utils.py +141 -0
wandb/integration/ultralytics/pose_utils.py +92 -0
wandb/integration/xgboost/xgboost.py +3 -3
wandb/integration/yolov8/__init__.py +0 -7
wandb/integration/yolov8/yolov8.py +22 -3
wandb/old/settings.py +7 -0
wandb/plot/line_series.py +0 -1
wandb/proto/v3/wandb_internal_pb2.py +353 -300
wandb/proto/v3/wandb_server_pb2.py +37 -41
wandb/proto/v3/wandb_settings_pb2.py +2 -2
wandb/proto/v3/wandb_telemetry_pb2.py +16 -16
wandb/proto/v4/wandb_internal_pb2.py +272 -260
wandb/proto/v4/wandb_server_pb2.py +37 -40
wandb/proto/v4/wandb_settings_pb2.py +2 -2
wandb/proto/v4/wandb_telemetry_pb2.py +16 -16
wandb/proto/wandb_internal_codegen.py +7 -31
wandb/sdk/artifacts/artifact.py +321 -189
wandb/sdk/artifacts/artifact_cache.py +14 -0
wandb/sdk/artifacts/artifact_manifest.py +5 -4
wandb/sdk/artifacts/artifact_manifest_entry.py +37 -9
wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -9
wandb/sdk/artifacts/artifact_saver.py +13 -50
wandb/sdk/artifacts/artifact_ttl.py +6 -0
wandb/sdk/artifacts/artifacts_cache.py +119 -93
wandb/sdk/artifacts/staging.py +25 -0
wandb/sdk/artifacts/storage_handlers/s3_handler.py +12 -7
wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +2 -3
wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
wandb/sdk/artifacts/storage_policies/register.py +1 -0
wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +4 -3
wandb/sdk/artifacts/storage_policy.py +4 -2
wandb/sdk/backend/backend.py +0 -16
wandb/sdk/data_types/image.py +3 -1
wandb/sdk/integration_utils/auto_logging.py +38 -13
wandb/sdk/interface/interface.py +16 -135
wandb/sdk/interface/interface_shared.py +9 -147
wandb/sdk/interface/interface_sock.py +0 -26
wandb/sdk/internal/file_pusher.py +20 -3
wandb/sdk/internal/file_stream.py +3 -1
wandb/sdk/internal/handler.py +53 -70
wandb/sdk/internal/internal_api.py +220 -130
wandb/sdk/internal/job_builder.py +41 -37
wandb/sdk/internal/sender.py +7 -25
wandb/sdk/internal/system/assets/disk.py +144 -11
wandb/sdk/internal/system/system_info.py +6 -2
wandb/sdk/launch/__init__.py +5 -0
wandb/sdk/launch/{launch.py → _launch.py} +53 -54
wandb/sdk/launch/{launch_add.py → _launch_add.py} +34 -31
wandb/sdk/launch/_project_spec.py +13 -2
wandb/sdk/launch/agent/agent.py +103 -59
wandb/sdk/launch/agent/run_queue_item_file_saver.py +6 -4
wandb/sdk/launch/builder/build.py +19 -1
wandb/sdk/launch/builder/docker_builder.py +5 -1
wandb/sdk/launch/builder/kaniko_builder.py +5 -1
wandb/sdk/launch/create_job.py +20 -5
wandb/sdk/launch/loader.py +14 -5
wandb/sdk/launch/runner/abstract.py +0 -2
wandb/sdk/launch/runner/kubernetes_monitor.py +329 -0
wandb/sdk/launch/runner/kubernetes_runner.py +66 -209
wandb/sdk/launch/runner/local_container.py +5 -2
wandb/sdk/launch/runner/local_process.py +4 -1
wandb/sdk/launch/sweeps/scheduler.py +43 -25
wandb/sdk/launch/sweeps/utils.py +5 -3
wandb/sdk/launch/utils.py +3 -1
wandb/sdk/lib/_settings_toposort_generate.py +3 -9
wandb/sdk/lib/_settings_toposort_generated.py +27 -3
wandb/sdk/lib/_wburls_generated.py +1 -0
wandb/sdk/lib/filenames.py +27 -6
wandb/sdk/lib/filesystem.py +181 -7
wandb/sdk/lib/fsm.py +5 -3
wandb/sdk/lib/gql_request.py +3 -0
wandb/sdk/lib/ipython.py +7 -0
wandb/sdk/lib/wburls.py +1 -0
wandb/sdk/service/port_file.py +2 -15
wandb/sdk/service/server.py +7 -55
wandb/sdk/service/service.py +56 -26
wandb/sdk/service/service_base.py +1 -1
wandb/sdk/service/streams.py +11 -5
wandb/sdk/verify/verify.py +2 -2
wandb/sdk/wandb_init.py +8 -2
wandb/sdk/wandb_manager.py +4 -14
wandb/sdk/wandb_run.py +143 -53
wandb/sdk/wandb_settings.py +148 -35
wandb/testing/relay.py +85 -38
wandb/util.py +87 -4
wandb/wandb_torch.py +24 -38
{wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/METADATA +48 -23
{wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/RECORD +107 -103
{wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/WHEEL +1 -1
wandb/proto/v3/wandb_server_pb2_grpc.py +0 -1422
wandb/proto/v4/wandb_server_pb2_grpc.py +0 -1422
wandb/proto/wandb_server_pb2_grpc.py +0 -8
wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +0 -61
wandb/sdk/interface/interface_grpc.py +0 -460
wandb/sdk/service/server_grpc.py +0 -444
wandb/sdk/service/service_grpc.py +0 -73
{wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/LICENSE +0 -0
{wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/entry_points.txt +0 -0
{wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/top_level.txt +0 -0

wandb/sdk/launch/{launch_add.py → _launch_add.py} RENAMED Viewed

@@ -2,7 +2,7 @@ import pprint
 from typing import Any, Dict, List, Optional
 import wandb
-import wandb.apis.public as public
+from wandb.apis import public
 from wandb.apis.internal import Api
 from wandb.sdk.launch._project_spec import create_project_from_spec
 from wandb.sdk.launch.builder.build import build_image_from_project
@@ -49,39 +49,42 @@ def launch_add(
     """Enqueue a W&B launch experiment. With either a source uri, job or docker_image.
     Arguments:
-    uri: URI of experiment to run. A wandb run uri or a Git repository URI.
-    job: string reference to a wandb.Job eg: wandb/test/my-job:latest
-    config: A dictionary containing the configuration for the run. May also contain
-        resource specific arguments under the key "resource_args"
-    project: Target project to send launched run to
-    entity: Target entity to send launched run to
-    queue: the name of the queue to enqueue the run to
-    resource: Execution backend for the run: W&B provides built-in support for "local-container" backend
-    entry_point: Entry point to run within the project. Defaults to using the entry point used
-        in the original run for wandb URIs, or main.py for git repository URIs.
-    name: Name run under which to launch the run.
-    version: For Git-based projects, either a commit hash or a branch name.
-    docker_image: The name of the docker image to use for the run.
-    resource_args: Resource related arguments for launching runs onto a remote backend.
-        Will be stored on the constructed launch config under ``resource_args``.
-    run_id: optional string indicating the id of the launched run
-    build: optional flag defaulting to false, requires queue to be set
-        if build, an image is created, creates a job artifact, pushes a reference
-            to that job artifact to queue
-    repository: optional string to control the name of the remote repository, used when
-        pushing images to a registry
-    project_queue: optional string to control the name of the project for the queue. Primarily used
-        for back compatibility with project scoped queues
+        uri: URI of experiment to run. A wandb run uri or a Git repository URI.
+        job: string reference to a wandb.Job eg: wandb/test/my-job:latest
+        config: A dictionary containing the configuration for the run. May also contain
+            resource specific arguments under the key "resource_args"
+        project: Target project to send launched run to
+        entity: Target entity to send launched run to
+        queue: the name of the queue to enqueue the run to
+        resource: Execution backend for the run: W&B provides built-in support for "local-container" backend
+        entry_point: Entry point to run within the project. Defaults to using the entry point used
+            in the original run for wandb URIs, or main.py for git repository URIs.
+        name: Name run under which to launch the run.
+        version: For Git-based projects, either a commit hash or a branch name.
+        docker_image: The name of the docker image to use for the run.
+        resource_args: Resource related arguments for launching runs onto a remote backend.
+            Will be stored on the constructed launch config under ``resource_args``.
+        run_id: optional string indicating the id of the launched run
+        build: optional flag defaulting to false, requires queue to be set
+            if build, an image is created, creates a job artifact, pushes a reference
+                to that job artifact to queue
+        repository: optional string to control the name of the remote repository, used when
+            pushing images to a registry
+        project_queue: optional string to control the name of the project for the queue. Primarily used
+            for back compatibility with project scoped queues
     Example:
-        import wandb
-        project_uri = "https://github.com/wandb/examples"
-        params = {"alpha": 0.5, "l1_ratio": 0.01}
-        # Run W&B project and create a reproducible docker environment
-        # on a local host
-        api = wandb.apis.internal.Api()
-        wandb.launch_add(uri=project_uri, parameters=params)
+    ```python
+    from wandb.sdk.launch import launch_add
+    project_uri = "https://github.com/wandb/examples"
+    params = {"alpha": 0.5, "l1_ratio": 0.01}
+    # Run W&B project and create a reproducible docker environment
+    # on a local host
+    api = wandb.apis.internal.Api()
+    launch_add(uri=project_uri, parameters=params)
+    ```
     Returns:

wandb/sdk/launch/_project_spec.py CHANGED Viewed

@@ -106,6 +106,7 @@ class LaunchProject:
         self.override_config: Dict[str, Any] = overrides.get("run_config", {})
         self.override_artifacts: Dict[str, Any] = overrides.get("artifacts", {})
         self.override_entrypoint: Optional[EntryPoint] = None
+        self.override_dockerfile: Optional[str] = overrides.get("dockerfile")
         self.deps_type: Optional[str] = None
         self._runtime: Optional[str] = None
         self.run_id = run_id or generate_id()
@@ -117,7 +118,8 @@ class LaunchProject:
         if override_entrypoint:
             _logger.info("Adding override entry point")
             self.override_entrypoint = EntryPoint(
-                " ".join(override_entrypoint[0]), override_entrypoint
+                name=self._get_entrypoint_file(override_entrypoint),
+                command=override_entrypoint,
             )
         if overrides.get("sweep_id") is not None:
@@ -185,6 +187,15 @@ class LaunchProject:
             assert self.job is not None
             return wandb.util.make_docker_image_name_safe(self.job.split(":")[0])
+    def _get_entrypoint_file(self, entrypoint: List[str]) -> Optional[str]:
+        if not entrypoint:
+            return None
+        if entrypoint[0].endswith(".py") or entrypoint[0].endswith(".sh"):
+            return entrypoint[0]
+        if len(entrypoint) < 2:
+            return None
+        return entrypoint[1]
     def fill_macros(self, image: str) -> Dict[str, Any]:
         """Substitute values for macros in resource arguments.
@@ -415,7 +426,7 @@ class LaunchProject:
 class EntryPoint:
     """An entry point into a wandb launch specification."""
-    def __init__(self, name: str, command: List[str]):
+    def __init__(self, name: Optional[str], command: List[str]):
         self.name = name
         self.command = command

wandb/sdk/launch/agent/agent.py CHANGED Viewed

@@ -6,13 +6,12 @@ import threading
 import time
 import traceback
 from multiprocessing import Event
-from multiprocessing.pool import ThreadPool
 from typing import Any, Dict, List, Optional, Union
 import wandb
 from wandb.apis.internal import Api
 from wandb.errors import CommError
-from wandb.sdk.launch.launch_add import launch_add
+from wandb.sdk.launch._launch_add import launch_add
 from wandb.sdk.launch.runner.local_container import LocalSubmittedRun
 from wandb.sdk.launch.runner.local_process import LocalProcessRunner
 from wandb.sdk.launch.sweeps.scheduler import Scheduler
@@ -35,9 +34,21 @@ AGENT_KILLED = "KILLED"
 HIDDEN_AGENT_RUN_TYPE = "sweep-controller"
-MAX_THREADS = 64
 MAX_RESUME_COUNT = 5
+RUN_INFO_GRACE_PERIOD = 60
+_env_timeout = os.environ.get("WANDB_LAUNCH_START_TIMEOUT")
+if _env_timeout:
+    try:
+        RUN_START_TIMEOUT = float(_env_timeout)
+    except ValueError:
+        raise LaunchError(
+            f"Invalid value for WANDB_LAUNCH_START_TIMEOUT: {_env_timeout}"
+        )
+else:
+    RUN_START_TIMEOUT = 60 * 30  # default 30 minutes
 _logger = logging.getLogger(__name__)
@@ -129,13 +140,15 @@ class LaunchAgent:
         self._access = _convert_access("project")
         self._max_jobs = _max_from_config(config, "max_jobs")
         self._max_schedulers = _max_from_config(config, "max_schedulers")
-        self._pool = ThreadPool(
-            processes=int(min(MAX_THREADS, self._max_jobs + self._max_schedulers)),
-            initargs=(self._jobs, self._jobs_lock),
-        )
         self._secure_mode = config.get("secure_mode", False)
         self.default_config: Dict[str, Any] = config
+        # Get agent version from env var if present, otherwise wandb version
+        self.version: str = "wandb@" + wandb.__version__
+        env_agent_version = os.environ.get("WANDB_AGENT_VERSION")
+        if env_agent_version and env_agent_version != "wandb-launch-agent":
+            self.version = env_agent_version
         # serverside creation
         self.gorilla_supports_agents = (
             self._api.launch_agent_introspection() is not None
@@ -150,6 +163,7 @@ class LaunchAgent:
             self._project,
             self._queues,
             self.default_config,
+            self.version,
             self.gorilla_supports_agents,
         )
         self._id = create_response["launchAgentId"]
@@ -289,27 +303,43 @@ class LaunchAgent:
                 job_and_run_status.err_stage,
                 fnames,
             )
-        elif job_and_run_status.completed_status not in ["stopped", "failed"]:
-            _logger.info(
-                "Skipping check for completed run status because run was successful"
-            )
         elif job_and_run_status.run is not None:
             run_info = None
-            # sweep runs exist but have no info before they are started
-            # so run_info returned will be None
-            # normal runs just throw a comm error
-            # TODO: make more clear
-            try:
-                run_info = self._api.get_run_info(
-                    self._entity, job_and_run_status.project, job_and_run_status.run_id
-                )
+            # We do some weird stuff here getting run info to check for a
+            # created in run in W&B.
+            #
+            # We retry for 60 seconds with an exponential backoff in case
+            # upsert run is taking a while.
+            #
+            # Sweep runs exist but have no info before they are started
+            # so run_info returned will be None, while normal runs just throw a
+            # comm error.
+            start_time = time.time()
+            interval = 1
+            while True:
+                try:
+                    run_info = self._api.get_run_info(
+                        self._entity,
+                        job_and_run_status.project,
+                        job_and_run_status.run_id,
+                    )
+                except CommError:
+                    pass
+                if (
+                    run_info is not None
+                    or time.time() - start_time > RUN_INFO_GRACE_PERIOD
+                ):
+                    break
+                if run_info is None:
+                    time.sleep(interval)
+                    interval *= 2
-            except CommError:
-                pass
             if run_info is None:
-                _msg = "The submitted run was not successfully started"
                 fnames = None
+                if job_and_run_status.completed_status == "finished":
+                    _msg = "The submitted job exited successfully but failed to call wandb.init"
+                else:
+                    _msg = "The submitted run was not successfully started"
                 logs = job_and_run_status.run.get_logs()
                 if logs:
                     fnames = job_and_run_status.saver.save_contents(
@@ -319,7 +349,7 @@ class LaunchAgent:
                     job_and_run_status.run_queue_item_id, _msg, "run", fnames
                 )
         else:
-            _logger.info("Finish thread id had no exception, ror run")
+            _logger.info(f"Finish thread id {thread_id} had no exception and no run")
             wandb._sentry.exception(
                 "launch agent called finish thread id on thread without run or exception"
             )
@@ -359,19 +389,21 @@ class LaunchAgent:
         # Abort if this job attempts to override secure mode
         self._assert_secure(launch_spec)
-        self._pool.apply_async(
-            self.thread_run_job,
-            (
+        job_tracker = JobAndRunStatusTracker(job["runQueueItemId"], queue, file_saver)
+        t = threading.Thread(
+            target=self.thread_run_job,
+            args=(
                 launch_spec,
                 job,
                 self.default_config,
                 self._api,
-                queue,
-                file_saver,
+                job_tracker,
             ),
+            daemon=True,
         )
+        t.start()
     def _assert_secure(self, launch_spec: Dict[str, Any]) -> None:
         """If secure mode is set, make sure no vulnerable keys are overridden."""
         if not self._secure_mode:
@@ -422,21 +454,23 @@ class LaunchAgent:
                     for queue in self._queues:
                         job = self.pop_from_queue(queue)
                         if job:
-                            file_saver = RunQueueItemFileSaver(
-                                self._wandb_run, job["runQueueItemId"]
-                            )
-                            if _is_scheduler_job(job.get("runSpec")):
-                                # If job is a scheduler, and we are already at the cap, ignore,
-                                #    don't ack, and it will be pushed back onto the queue in 1 min
-                                if self.num_running_schedulers >= self._max_schedulers:
-                                    wandb.termwarn(
-                                        f"{LOG_PREFIX}Agent already running the maximum number "
-                                        f"of sweep schedulers: {self._max_schedulers}. To set "
-                                        "this value use `max_schedulers` key in the agent config"
-                                    )
-                                    continue
                             try:
+                                file_saver = RunQueueItemFileSaver(
+                                    self._wandb_run, job["runQueueItemId"]
+                                )
+                                if _is_scheduler_job(job.get("runSpec")):
+                                    # If job is a scheduler, and we are already at the cap, ignore,
+                                    #    don't ack, and it will be pushed back onto the queue in 1 min
+                                    if (
+                                        self.num_running_schedulers
+                                        >= self._max_schedulers
+                                    ):
+                                        wandb.termwarn(
+                                            f"{LOG_PREFIX}Agent already running the maximum number "
+                                            f"of sweep schedulers: {self._max_schedulers}. To set "
+                                            "this value use `max_schedulers` key in the agent config"
+                                        )
+                                        continue
                                 self.run_job(job, queue, file_saver)
                             except Exception as e:
                                 wandb.termerror(
@@ -480,8 +514,6 @@ class LaunchAgent:
             self.update_status(AGENT_KILLED)
             wandb.termlog(f"{LOG_PREFIX}Shutting down, active jobs:")
             self.print_status()
-            self._pool.close()
-            self._pool.join()
     # Threaded functions
     def thread_run_job(
@@ -490,15 +522,13 @@ class LaunchAgent:
         job: Dict[str, Any],
         default_config: Dict[str, Any],
         api: Api,
-        queue: str,
-        file_saver: RunQueueItemFileSaver,
+        job_tracker: JobAndRunStatusTracker,
     ) -> None:
         thread_id = threading.current_thread().ident
-        assert thread_id is not None
-        job_tracker = JobAndRunStatusTracker(job["runQueueItemId"], queue, file_saver)
-        with self._jobs_lock:
-            self._jobs[thread_id] = job_tracker
+        assert thread_id
         try:
+            with self._jobs_lock:
+                self._jobs[thread_id] = job_tracker
             self._thread_run_job(
                 launch_spec, job, default_config, api, thread_id, job_tracker
             )
@@ -540,7 +570,7 @@ class LaunchAgent:
                 _logger.debug(f"Fetch sweep state error: {e}")
                 state = None
-            if state and state != "RUNNING" and state != "PAUSED":
+            if state != "RUNNING" and state != "PAUSED":
                 raise LaunchError(
                     f"Launch agent picked up sweep job, but sweep ({launch_spec['sweep_id']}) was in a terminal state ({state})"
                 )
@@ -594,7 +624,18 @@ class LaunchAgent:
             return
         with self._jobs_lock:
             job_tracker.run = run
+        start_time = time.time()
         while self._jobs_event.is_set():
+            # If run has failed to start before timeout, kill it
+            state = run.get_status().state
+            if state == "starting" and RUN_START_TIMEOUT > 0:
+                if time.time() - start_time > RUN_START_TIMEOUT:
+                    run.cancel()
+                    raise LaunchError(
+                        f"Run failed to start within {RUN_START_TIMEOUT} seconds. "
+                        "If you want to increase this timeout, set WANDB_LAUNCH_START_TIMEOUT "
+                        "to a larger value."
+                    )
             if self._check_run_finished(job_tracker, launch_spec):
                 return
             time.sleep(AGENT_POLLING_INTERVAL)
@@ -655,12 +696,15 @@ class LaunchAgent:
                     wandb.termlog(f"{LOG_PREFIX}Scheduler finished with ID: {run.id}")
                     if status == "failed":
                         # on fail, update sweep state. scheduler run_id should == sweep_id
-                        self._api.set_sweep_state(
-                            sweep=job_tracker.run_id,
-                            entity=job_tracker.entity,
-                            project=job_tracker.project,
-                            state="CANCELED",
-                        )
+                        try:
+                            self._api.set_sweep_state(
+                                sweep=job_tracker.run_id,
+                                entity=job_tracker.entity,
+                                project=job_tracker.project,
+                                state="CANCELED",
+                            )
+                        except Exception as e:
+                            raise LaunchError(f"Failed to update sweep state: {e}")
                 else:
                     wandb.termlog(f"{LOG_PREFIX}Job finished with ID: {run.id}")
                 with self._jobs_lock:

wandb/sdk/launch/agent/run_queue_item_file_saver.py CHANGED Viewed

@@ -5,8 +5,6 @@ import sys
 from typing import List, Optional, Union
 import wandb
-from wandb.sdk.lib import RunDisabled
-from wandb.sdk.wandb_run import Run
 if sys.version_info >= (3, 8):
     from typing import Literal
@@ -18,7 +16,11 @@ FileSubtypes = Literal["warning", "error"]
 class RunQueueItemFileSaver:
     def __init__(
-        self, agent_run: Optional[Union[Run, RunDisabled]], run_queue_item_id: str
+        self,
+        agent_run: Optional[
+            Union["wandb.sdk.wandb_run.Run", "wandb.sdk.lib.RunDisabled"]
+        ],
+        run_queue_item_id: str,
     ):
         self.run_queue_item_id = run_queue_item_id
         self.run = agent_run
@@ -26,7 +28,7 @@ class RunQueueItemFileSaver:
     def save_contents(
         self, contents: str, fname: str, file_sub_type: FileSubtypes
     ) -> Optional[List[str]]:
-        if not isinstance(self.run, Run):
+        if not isinstance(self.run, wandb.sdk.wandb_run.Run):
             wandb.termwarn("Not saving file contents because agent has no run")
             return None
         root_dir = self.run._settings.files_dir

wandb/sdk/launch/builder/build.py CHANGED Viewed

@@ -36,6 +36,7 @@ _logger = logging.getLogger(__name__)
 _GENERATED_DOCKERFILE_NAME = "Dockerfile.wandb-autogenerated"
+_DEFAULT_DOCKERFILE_NAME = "Dockerfile.wandb"
 def validate_docker_installation() -> None:
@@ -237,7 +238,7 @@ def get_env_vars_dict(
     if launch_project.sweep_id:
         env_vars["WANDB_SWEEP_ID"] = launch_project.sweep_id
     if launch_project.launch_spec.get("_resume_count", 0) > 0:
-        env_vars["WANDB_RESUME"] = "must"
+        env_vars["WANDB_RESUME"] = "allow"
     _inject_wandb_config_env_vars(
         launch_project.override_config, env_vars, max_env_length
@@ -321,7 +322,24 @@ def generate_dockerfile(
     entry_point: EntryPoint,
     runner_type: str,
     builder_type: str,
+    dockerfile: Optional[str] = None,
 ) -> str:
+    override_entrypoint = launch_project.override_entrypoint or entry_point
+    if launch_project.project_dir is not None:
+        if not dockerfile and override_entrypoint.name is not None:
+            entrypoint_dir = os.path.dirname(override_entrypoint.name)
+            path = os.path.join(
+                launch_project.project_dir, entrypoint_dir, _DEFAULT_DOCKERFILE_NAME
+            )
+            if os.path.exists(path):
+                dockerfile = os.path.join(entrypoint_dir, _DEFAULT_DOCKERFILE_NAME)
+        if dockerfile:
+            path = os.path.join(launch_project.project_dir, dockerfile)
+            if not os.path.exists(path):
+                raise LaunchError(f"Dockerfile does not exist at {path}")
+            wandb.termlog(f"Using dockerfile: {dockerfile}")
+            return open(path).read()
     # get python versions truncated to major.minor to ensure image availability
     if launch_project.python_version:
         spl = launch_project.python_version.split(".")[:2]

wandb/sdk/launch/builder/docker_builder.py CHANGED Viewed

@@ -121,7 +121,11 @@ class DockerBuilder(AbstractBuilder):
             entrypoint (EntryPoint): The entrypoint to use.
         """
         dockerfile_str = generate_dockerfile(
-            launch_project, entrypoint, launch_project.resource, "docker"
+            launch_project=launch_project,
+            entry_point=entrypoint,
+            runner_type=launch_project.resource,
+            builder_type="docker",
+            dockerfile=launch_project.override_dockerfile,
         )
         image_tag = image_tag_from_dockerfile_and_source(launch_project, dockerfile_str)

wandb/sdk/launch/builder/kaniko_builder.py CHANGED Viewed

@@ -241,7 +241,11 @@ class KanikoBuilder(AbstractBuilder):
             raise LaunchError("No registry specified for Kaniko build.")
         # kaniko builder doesn't seem to work with a custom user id, need more investigation
         dockerfile_str = generate_dockerfile(
-            launch_project, entrypoint, launch_project.resource, "kaniko"
+            launch_project=launch_project,
+            entry_point=entrypoint,
+            runner_type=launch_project.resource,
+            builder_type="kaniko",
+            dockerfile=launch_project.override_dockerfile,
         )
         image_tag = image_tag_from_dockerfile_and_source(launch_project, dockerfile_str)
         repo_uri = self.registry.get_repo_uri()

wandb/sdk/launch/create_job.py CHANGED Viewed

@@ -63,7 +63,7 @@ def create_job(
             runtime="3.9",
             entrypoint="train.py",
         )
-        # then, use you newly created job
+        # then run the newly created job
         artifact_job.call()
         ```
     """
@@ -180,7 +180,6 @@ def _create_job(
         run_name=run.id,  # run will be deleted after creation
         description=description,
         metadata=metadata,
-        labels=["manually-created"],
         is_user_created=True,
         aliases=[{"artifactCollectionName": name, "alias": a} for a in aliases],
     )
@@ -335,19 +334,33 @@ def _create_repo_metadata(
         entrypoint = rel_entrypoint
     # check if requirements.txt exists
-    if not os.path.exists(os.path.join(local_dir, "requirements.txt")):
-        repo_formd = path.replace(entrypoint, "")
+    # start at the location of the python file and recurse up to the git root
+    req_dir = local_dir
+    while (
+        not os.path.exists(os.path.join(req_dir, "requirements.txt"))
+        and req_dir != tempdir
+    ):
+        req_dir = os.path.dirname(req_dir)
+    if not os.path.exists(os.path.join(req_dir, "requirements.txt")):
         wandb.termerror(
-            f"Could not find requirements.txt file in git repo at: {repo_formd}/requirements.txt"
+            "Could not find requirements.txt file in git repo at "
+            f"{os.path.join(os.path.dirname(path), 'requirements.txt')} "
+            "or parent directories."
         )
         return None
+    wandb.termlog(
+        f"Using requirements.txt in {req_dir.replace(tempdir, '') or 'repository root'}"
+    )
     metadata = {
         "git": {
             "commit": commit,
             "remote": ref.url,
         },
         "root": ref.repo,
+        "codePathLocal": entrypoint,  # not in git context, optionally also set local
         "codePath": entrypoint,
         "entrypoint": [f"python{python_version}", entrypoint],
         "python": python_version,  # used to build container
@@ -426,6 +439,8 @@ def _configure_job_builder_for_partial(tmpdir: str, job_source: str) -> JobBuild
     job_builder = JobBuilder(
         settings=settings,
     )
+    # never allow notebook runs
+    job_builder._is_notebook_run = False
     # set run inputs and outputs to empty dicts
     job_builder.set_config({})
     job_builder.set_summary({})

wandb/sdk/launch/loader.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Any, Dict, Optional
 import wandb
 from wandb.apis.internal import Api
+from wandb.docker import is_docker_installed
 from wandb.sdk.launch.errors import LaunchError
 from .builder.abstract import AbstractBuilder
@@ -141,7 +142,10 @@ def builder_from_config(
     This helper function is used to create a builder from a config. The
     config should have a "type" key that specifies the type of builder to import
     and create. The remaining keys are passed to the builder's from_config
-    method. If the config is None or empty, a DockerBuilder is returned.
+    method. If the config is None or empty, a default builder is returned.
+    The default builder will be a DockerBuilder if we find a working docker cli
+    on the system, otherwise it will be a NoOpBuilder.
     Arguments:
         config (Dict[str, Any]): The builder config.
@@ -154,11 +158,16 @@ def builder_from_config(
         LaunchError: If the builder is not configured correctly.
     """
     if not config:
-        from .builder.docker_builder import DockerBuilder
+        if is_docker_installed():
+            from .builder.docker_builder import DockerBuilder
+            return DockerBuilder.from_config(
+                {}, environment, registry
+            )  # This is the default builder.
+        from .builder.noop import NoOpBuilder
-        return DockerBuilder.from_config(
-            {}, environment, registry
-        )  # This is the default builder.
+        return NoOpBuilder.from_config({}, environment, registry)
     builder_type = config.get("type")
     if builder_type is None:

wandb/sdk/launch/runner/abstract.py CHANGED Viewed

@@ -13,7 +13,6 @@ from typing import Any, Dict, List, Optional, Union
 from dockerpycreds.utils import find_executable  # type: ignore
 import wandb
-from wandb import Settings
 from wandb.apis.internal import Api
 from wandb.sdk.lib import runid
@@ -136,7 +135,6 @@ class AbstractRunner(ABC):
         api: Api,
         backend_config: Dict[str, Any],
     ) -> None:
-        self._settings = Settings()
         self._api = api
         self.backend_config = backend_config
         self._cwd = os.getcwd()

wandb 0.15.9__py3-none-any.whl → 0.15.11__py3-none-any.whl

wandb 0.15.9py3-none-any.whl → 0.15.11py3-none-any.whl