PyPI - wandb - Versions diffs - 0.17.0rc1__py3-none-win_amd64.whl → 0.17.1__py3-none-win_amd64.whl - Mend

wandb 0.17.0rc1__py3-none-win_amd64.whl → 0.17.1__py3-none-win_amd64.whl

Files changed (174) hide show

wandb/__init__.py +1 -2
wandb/apis/importers/internals/internal.py +0 -1
wandb/apis/importers/wandb.py +12 -7
wandb/apis/internal.py +0 -3
wandb/apis/public/api.py +213 -79
wandb/apis/public/artifacts.py +335 -100
wandb/apis/public/files.py +9 -9
wandb/apis/public/jobs.py +16 -4
wandb/apis/public/projects.py +26 -28
wandb/apis/public/query_generator.py +1 -1
wandb/apis/public/runs.py +163 -65
wandb/apis/public/sweeps.py +2 -2
wandb/apis/reports/__init__.py +1 -7
wandb/apis/reports/v1/__init__.py +5 -27
wandb/apis/reports/v2/__init__.py +7 -19
wandb/apis/workspaces/__init__.py +8 -0
wandb/beta/workflows.py +8 -3
wandb/bin/wandb-core +0 -0
wandb/cli/cli.py +131 -59
wandb/data_types.py +6 -3
wandb/docker/__init__.py +2 -2
wandb/env.py +3 -3
wandb/errors/term.py +10 -2
wandb/filesync/step_checksum.py +1 -4
wandb/filesync/step_prepare.py +4 -24
wandb/filesync/step_upload.py +5 -107
wandb/filesync/upload_job.py +0 -76
wandb/integration/gym/__init__.py +35 -15
wandb/integration/huggingface/resolver.py +2 -2
wandb/integration/keras/callbacks/metrics_logger.py +1 -1
wandb/integration/keras/keras.py +1 -1
wandb/integration/openai/fine_tuning.py +21 -3
wandb/integration/prodigy/prodigy.py +1 -1
wandb/jupyter.py +16 -17
wandb/old/summary.py +1 -1
wandb/plot/confusion_matrix.py +1 -1
wandb/plot/pr_curve.py +2 -1
wandb/plot/roc_curve.py +2 -1
wandb/{plots → plot}/utils.py +13 -25
wandb/proto/v3/wandb_internal_pb2.py +54 -54
wandb/proto/v3/wandb_settings_pb2.py +2 -2
wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
wandb/proto/v4/wandb_internal_pb2.py +54 -54
wandb/proto/v4/wandb_settings_pb2.py +2 -2
wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
wandb/proto/v5/wandb_base_pb2.py +30 -0
wandb/proto/v5/wandb_internal_pb2.py +355 -0
wandb/proto/v5/wandb_server_pb2.py +63 -0
wandb/proto/v5/wandb_settings_pb2.py +45 -0
wandb/proto/v5/wandb_telemetry_pb2.py +41 -0
wandb/proto/wandb_base_pb2.py +2 -0
wandb/proto/wandb_deprecated.py +9 -1
wandb/proto/wandb_generate_deprecated.py +34 -0
wandb/proto/{wandb_internal_codegen.py → wandb_generate_proto.py} +1 -35
wandb/proto/wandb_internal_pb2.py +2 -0
wandb/proto/wandb_server_pb2.py +2 -0
wandb/proto/wandb_settings_pb2.py +2 -0
wandb/proto/wandb_telemetry_pb2.py +2 -0
wandb/sdk/artifacts/artifact.py +68 -22
wandb/sdk/artifacts/artifact_manifest.py +1 -1
wandb/sdk/artifacts/artifact_manifest_entry.py +6 -3
wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -1
wandb/sdk/artifacts/artifact_saver.py +1 -10
wandb/sdk/artifacts/storage_handlers/local_file_handler.py +6 -2
wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -1
wandb/sdk/artifacts/storage_handlers/tracking_handler.py +6 -4
wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +2 -42
wandb/sdk/artifacts/storage_policy.py +1 -12
wandb/sdk/data_types/_dtypes.py +8 -8
wandb/sdk/data_types/image.py +2 -2
wandb/sdk/data_types/video.py +5 -3
wandb/sdk/integration_utils/data_logging.py +5 -5
wandb/sdk/interface/interface.py +14 -1
wandb/sdk/interface/interface_shared.py +1 -1
wandb/sdk/internal/file_pusher.py +2 -5
wandb/sdk/internal/file_stream.py +6 -19
wandb/sdk/internal/internal_api.py +148 -136
wandb/sdk/internal/job_builder.py +208 -136
wandb/sdk/internal/progress.py +0 -28
wandb/sdk/internal/sender.py +102 -39
wandb/sdk/internal/settings_static.py +8 -1
wandb/sdk/internal/system/assets/trainium.py +3 -3
wandb/sdk/internal/system/system_info.py +4 -2
wandb/sdk/internal/update.py +1 -1
wandb/sdk/launch/__init__.py +9 -1
wandb/sdk/launch/_launch.py +4 -24
wandb/sdk/launch/_launch_add.py +1 -3
wandb/sdk/launch/_project_spec.py +187 -225
wandb/sdk/launch/agent/agent.py +59 -19
wandb/sdk/launch/agent/config.py +0 -3
wandb/sdk/launch/builder/abstract.py +68 -1
wandb/sdk/launch/builder/build.py +165 -576
wandb/sdk/launch/builder/context_manager.py +235 -0
wandb/sdk/launch/builder/docker_builder.py +7 -23
wandb/sdk/launch/builder/kaniko_builder.py +12 -25
wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
wandb/sdk/launch/create_job.py +51 -45
wandb/sdk/launch/environment/aws_environment.py +26 -1
wandb/sdk/launch/inputs/files.py +148 -0
wandb/sdk/launch/inputs/internal.py +224 -0
wandb/sdk/launch/inputs/manage.py +95 -0
wandb/sdk/launch/registry/google_artifact_registry.py +1 -1
wandb/sdk/launch/runner/abstract.py +2 -2
wandb/sdk/launch/runner/kubernetes_monitor.py +45 -12
wandb/sdk/launch/runner/kubernetes_runner.py +6 -8
wandb/sdk/launch/runner/local_container.py +2 -3
wandb/sdk/launch/runner/local_process.py +8 -29
wandb/sdk/launch/runner/sagemaker_runner.py +20 -14
wandb/sdk/launch/runner/vertex_runner.py +8 -7
wandb/sdk/launch/sweeps/scheduler.py +5 -3
wandb/sdk/launch/sweeps/scheduler_sweep.py +1 -1
wandb/sdk/launch/sweeps/utils.py +4 -4
wandb/sdk/launch/utils.py +16 -138
wandb/sdk/lib/_settings_toposort_generated.py +2 -5
wandb/sdk/lib/apikey.py +4 -2
wandb/sdk/lib/config_util.py +3 -3
wandb/sdk/lib/import_hooks.py +1 -1
wandb/sdk/lib/proto_util.py +22 -1
wandb/sdk/lib/redirect.py +20 -15
wandb/sdk/lib/tracelog.py +1 -1
wandb/sdk/service/service.py +2 -1
wandb/sdk/service/streams.py +5 -5
wandb/sdk/wandb_init.py +25 -59
wandb/sdk/wandb_login.py +28 -25
wandb/sdk/wandb_run.py +123 -53
wandb/sdk/wandb_settings.py +33 -64
wandb/sdk/wandb_setup.py +1 -1
wandb/sdk/wandb_watch.py +1 -1
wandb/sklearn/plot/classifier.py +10 -12
wandb/sklearn/plot/clusterer.py +1 -1
wandb/sync/sync.py +2 -2
wandb/testing/relay.py +32 -17
wandb/util.py +36 -37
wandb/wandb_agent.py +3 -3
wandb/wandb_controller.py +5 -4
{wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/METADATA +8 -10
{wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/RECORD +140 -162
wandb/apis/reports/v1/_blocks.py +0 -1406
wandb/apis/reports/v1/_helpers.py +0 -70
wandb/apis/reports/v1/_panels.py +0 -1282
wandb/apis/reports/v1/_templates.py +0 -478
wandb/apis/reports/v1/blocks.py +0 -27
wandb/apis/reports/v1/helpers.py +0 -2
wandb/apis/reports/v1/mutations.py +0 -66
wandb/apis/reports/v1/panels.py +0 -17
wandb/apis/reports/v1/report.py +0 -268
wandb/apis/reports/v1/runset.py +0 -144
wandb/apis/reports/v1/templates.py +0 -7
wandb/apis/reports/v1/util.py +0 -406
wandb/apis/reports/v1/validators.py +0 -131
wandb/apis/reports/v2/blocks.py +0 -25
wandb/apis/reports/v2/expr_parsing.py +0 -257
wandb/apis/reports/v2/gql.py +0 -68
wandb/apis/reports/v2/interface.py +0 -1911
wandb/apis/reports/v2/internal.py +0 -867
wandb/apis/reports/v2/metrics.py +0 -6
wandb/apis/reports/v2/panels.py +0 -15
wandb/catboost/__init__.py +0 -9
wandb/fastai/__init__.py +0 -9
wandb/keras/__init__.py +0 -19
wandb/lightgbm/__init__.py +0 -9
wandb/plots/__init__.py +0 -6
wandb/plots/explain_text.py +0 -36
wandb/plots/heatmap.py +0 -81
wandb/plots/named_entity.py +0 -43
wandb/plots/part_of_speech.py +0 -50
wandb/plots/plot_definitions.py +0 -768
wandb/plots/precision_recall.py +0 -121
wandb/plots/roc.py +0 -103
wandb/sacred/__init__.py +0 -3
wandb/xgboost/__init__.py +0 -9
{wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/WHEEL +0 -0
{wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/entry_points.txt +0 -0
{wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/licenses/LICENSE +0 -0

wandb/sdk/launch/inputs/manage.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""Functions for declaring overridable configuration for launch jobs."""
+from typing import List, Optional
+def manage_config_file(
+    path: str,
+    include: Optional[List[str]] = None,
+    exclude: Optional[List[str]] = None,
+):
+    r"""Declare an overridable configuration file for a launch job.
+    If a new job version is created from the active run, the configuration file
+    will be added to the job's inputs. If the job is launched and overrides
+    have been provided for the configuration file, this function will detect
+    the overrides from the environment and update the configuration file on disk.
+    Note that these overrides will only be applied in ephemeral containers.
+    `include` and `exclude` are lists of dot separated paths with the config.
+    The paths are used to filter subtrees of the configuration file out of the
+    job's inputs.
+    For example, given the following configuration file:
+        ```yaml
+        model:
+            name: resnet
+            layers: 18
+        training:
+            epochs: 10
+            batch_size: 32
+        ```
+    Passing `include=['model']` will only include the `model` subtree in the
+    job's inputs. Passing `exclude=['model.layers']` will exclude the `layers`
+    key from the `model` subtree. Note that `exclude` takes precedence over
+    `include`.
+    `.` is used as a separator for nested keys. If a key contains a `.`, it
+    should be escaped with a backslash, e.g. `include=[r'model\.layers']`. Note
+    the use of `r` to denote a raw string when using escape chars.
+    Args:
+        path (str): The path to the configuration file. This path must be
+            relative and must not contain backwards traversal, i.e. `..`.
+        include (List[str]): A list of keys to include in the configuration file.
+        exclude (List[str]): A list of keys to exclude from the configuration file.
+    Raises:
+        LaunchError: If the path is not valid, or if there is no active run.
+    """
+    from .internal import handle_config_file_input
+    return handle_config_file_input(path, include, exclude)
+def manage_wandb_config(
+    include: Optional[List[str]] = None,
+    exclude: Optional[List[str]] = None,
+):
+    r"""Declare wandb.config as an overridable configuration for a launch job.
+    If a new job version is created from the active run, the run config
+    (wandb.config) will become an overridable input of the job. If the job is
+    launched and overrides have been provided for the run config, the overrides
+    will be applied to the run config when `wandb.init` is called.
+    `include` and `exclude` are lists of dot separated paths with the config.
+    The paths are used to filter subtrees of the configuration file out of the
+    job's inputs.
+    For example, given the following run config contents:
+        ```yaml
+        model:
+            name: resnet
+            layers: 18
+        training:
+            epochs: 10
+            batch_size: 32
+        ```
+    Passing `include=['model']` will only include the `model` subtree in the
+    job's inputs. Passing `exclude=['model.layers']` will exclude the `layers`
+    key from the `model` subtree. Note that `exclude` takes precedence over
+    `include`.
+    `.` is used as a separator for nested keys. If a key contains a `.`, it
+    should be escaped with a backslash, e.g. `include=[r'model\.layers']`. Note
+    the use of `r` to denote a raw string when using escape chars.
+    Args:
+        include (List[str]): A list of subtrees to include in the configuration.
+        exclude (List[str]): A list of subtrees to exclude from the configuration.
+    Raises:
+        LaunchError: If there is no active run.
+    """
+    from .internal import handle_run_config_input
+    handle_run_config_input(include, exclude)

wandb/sdk/launch/registry/google_artifact_registry.py CHANGED Viewed

@@ -211,7 +211,7 @@ class GoogleArtifactRegistry(AbstractRegistry):
             for image in await list_images(request={"parent": parent}):
                 if tag in image.tags:
                     return True
-        except google.api_core.exceptions.NotFound as e:
+        except google.api_core.exceptions.NotFound as e:  # type: ignore[attr-defined]
             raise LaunchError(
                 f"The Google Artifact Registry repository {self.repository} "
                 f"does not exist. Please create it or modify your registry configuration."

wandb/sdk/launch/runner/abstract.py CHANGED Viewed

@@ -40,9 +40,9 @@ State = Literal[
 class Status:
-    def __init__(self, state: "State" = "unknown", data=None):  # type: ignore
+    def __init__(self, state: "State" = "unknown", messages: List[str] = None):  # type: ignore
         self.state = state
-        self.data = data or {}
+        self.messages = messages or []
     def __repr__(self) -> "State":
         return self.state

wandb/sdk/launch/runner/kubernetes_monitor.py CHANGED Viewed

@@ -14,6 +14,7 @@ from kubernetes_asyncio.client import (  # type: ignore  # noqa: F401
     BatchV1Api,
     CoreV1Api,
     CustomObjectsApi,
+    V1Pod,
     V1PodStatus,
 )
@@ -118,6 +119,27 @@ def _is_container_creating(status: "V1PodStatus") -> bool:
     return False
+def _is_pod_unschedulable(status: "V1PodStatus") -> Tuple[bool, str]:
+    """Return whether the pod is unschedulable along with the reason message."""
+    if not status.conditions:
+        return False, ""
+    for condition in status.conditions:
+        if (
+            condition.type == "PodScheduled"
+            and condition.status == "False"
+            and condition.reason == "Unschedulable"
+        ):
+            return True, condition.message
+    return False, ""
+def _get_crd_job_name(object: "V1Pod") -> Optional[str]:
+    refs = object.metadata.owner_references
+    if refs:
+        return refs[0].name
+    return None
 def _state_from_conditions(conditions: List[Dict[str, Any]]) -> Optional[State]:
     """Get the status from the pod conditions."""
     true_conditions = [
@@ -298,10 +320,18 @@ class LaunchKubernetesMonitor:
                 counts[state] += 1
         return counts
-    def _set_status(self, job_name: str, status: Status) -> None:
+    def _set_status_state(self, job_name: str, state: State) -> None:
         """Set the status of the run."""
-        if self._job_states.get(job_name) != status:
-            self._job_states[job_name] = status
+        if job_name not in self._job_states:
+            self._job_states[job_name] = Status(state)
+        elif self._job_states[job_name].state != state:
+            self._job_states[job_name].state = state
+    def _add_status_message(self, job_name: str, message: str) -> None:
+        if job_name not in self._job_states:
+            self._job_states[job_name] = Status("unknown")
+        wandb.termwarn(f"Warning from Kubernetes for job {job_name}: {message}")
+        self._job_states[job_name].messages.append(message)
     async def _monitor_pods(self, namespace: str) -> None:
         """Monitor a namespace for changes."""
@@ -312,15 +342,19 @@ class LaunchKubernetesMonitor:
             label_selector=self._label_selector,
         ):
             obj = event.get("object")
-            job_name = obj.metadata.labels.get("job-name")
+            job_name = obj.metadata.labels.get("job-name") or _get_crd_job_name(obj)
             if job_name is None or not hasattr(obj, "status"):
                 continue
             if self.__get_status(job_name) in ["finished", "failed"]:
                 continue
+            is_unschedulable, reason = _is_pod_unschedulable(obj.status)
+            if is_unschedulable:
+                self._add_status_message(job_name, reason)
             if obj.status.phase == "Running" or _is_container_creating(obj.status):
-                self._set_status(job_name, Status("running"))
+                self._set_status_state(job_name, "running")
             elif _is_preempted(obj.status):
-                self._set_status(job_name, Status("preempted"))
+                self._set_status_state(job_name, "preempted")
     async def _monitor_jobs(self, namespace: str) -> None:
         """Monitor a namespace for changes."""
@@ -334,15 +368,15 @@ class LaunchKubernetesMonitor:
             job_name = obj.metadata.name
             if obj.status.succeeded == 1:
-                self._set_status(job_name, Status("finished"))
+                self._set_status_state(job_name, "finished")
             elif obj.status.failed is not None and obj.status.failed >= 1:
-                self._set_status(job_name, Status("failed"))
+                self._set_status_state(job_name, "failed")
             # If the job is deleted and we haven't seen a terminal state
             # then we will consider the job failed.
             if event.get("type") == "DELETED":
                 if self._job_states.get(job_name) != Status("finished"):
-                    self._set_status(job_name, Status("failed"))
+                    self._set_status_state(job_name, "failed")
     async def _monitor_crd(
         self, namespace: str, custom_resource: CustomResource
@@ -355,7 +389,7 @@ class LaunchKubernetesMonitor:
             plural=custom_resource.plural,
             group=custom_resource.group,
             version=custom_resource.version,
-            label_selector=self._label_selector,  # TODO: Label selector doesn't work for CRDs.
+            label_selector=self._label_selector,
         ):
             object = event.get("object")
             name = object.get("metadata", dict()).get("name")
@@ -383,8 +417,7 @@ class LaunchKubernetesMonitor:
                     )
             if state is None:
                 continue
-            status = Status(state)
-            self._set_status(name, status)
+            self._set_status_state(name, state)
 class SafeWatch:

wandb/sdk/launch/runner/kubernetes_runner.py CHANGED Viewed

@@ -29,7 +29,6 @@ from wandb.sdk.lib.retry import ExponentialBackoff, retry_async
 from wandb.util import get_module
 from .._project_spec import EntryPoint, LaunchProject
-from ..builder.build import get_env_vars_dict
 from ..errors import LaunchError
 from ..utils import (
     LOG_PREFIX,
@@ -374,8 +373,7 @@ class KubernetesRunner(AbstractRunner):
                 }
         entry_point = (
-            launch_project.override_entrypoint
-            or launch_project.get_single_entry_point()
+            launch_project.override_entrypoint or launch_project.get_job_entry_point()
         )
         if launch_project.docker_image:
             # dont specify run id if user provided image, could have multiple runs
@@ -401,8 +399,8 @@ class KubernetesRunner(AbstractRunner):
             launch_project.override_entrypoint is not None,
         )
-        env_vars = get_env_vars_dict(
-            launch_project, self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
+        env_vars = launch_project.get_env_vars_dict(
+            self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
         )
         api_key_secret = None
         for cont in containers:
@@ -511,8 +509,8 @@ class KubernetesRunner(AbstractRunner):
         api_version = resource_args.get("apiVersion", "batch/v1")
         if api_version not in ["batch/v1", "batch/v1beta1"]:
-            env_vars = get_env_vars_dict(
-                launch_project, self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
+            env_vars = launch_project.get_env_vars_dict(
+                self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
             )
             # Crawl the resource args and add our env vars to the containers.
             add_wandb_env(resource_args, env_vars)
@@ -537,7 +535,7 @@ class KubernetesRunner(AbstractRunner):
             if LaunchAgent.initialized():
                 add_label_to_pods(
                     resource_args,
-                    WANDB_K8S_LABEL_MONITOR,
+                    WANDB_K8S_LABEL_AGENT,
                     LaunchAgent.name(),
                 )
                 resource_args["metadata"]["labels"][WANDB_K8S_LABEL_AGENT] = (

wandb/sdk/launch/runner/local_container.py CHANGED Viewed

@@ -12,7 +12,6 @@ from wandb.sdk.launch.environment.abstract import AbstractEnvironment
 from wandb.sdk.launch.registry.abstract import AbstractRegistry
 from .._project_spec import LaunchProject
-from ..builder.build import get_env_vars_dict
 from ..errors import LaunchError
 from ..utils import (
     LOG_PREFIX,
@@ -133,8 +132,8 @@ class LocalContainerRunner(AbstractRunner):
         docker_args = self._populate_docker_args(launch_project, image_uri)
         synchronous: bool = self.backend_config[PROJECT_SYNCHRONOUS]
-        env_vars = get_env_vars_dict(
-            launch_project, self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
+        env_vars = launch_project.get_env_vars_dict(
+            self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
         )
         # When running against local port, need to swap to local docker host

wandb/sdk/launch/runner/local_process.py CHANGED Viewed

@@ -4,16 +4,12 @@ from typing import Any, List, Optional
 import wandb
-from .._project_spec import LaunchProject, get_entry_point_command
-from ..builder.build import get_env_vars_dict
+from .._project_spec import LaunchProject
 from ..errors import LaunchError
 from ..utils import (
     LOG_PREFIX,
     MAX_ENV_LENGTHS,
     PROJECT_SYNCHRONOUS,
-    _is_wandb_uri,
-    download_wandb_python_deps,
-    parse_wandb_uri,
     sanitize_wandb_api_key,
     validate_wandb_python_deps,
 )
@@ -47,8 +43,7 @@ class LocalProcessRunner(AbstractRunner):
         synchronous: bool = self.backend_config[PROJECT_SYNCHRONOUS]
         entry_point = (
-            launch_project.override_entrypoint
-            or launch_project.get_single_entry_point()
+            launch_project.override_entrypoint or launch_project.get_job_entry_point()
         )
         cmd: List[Any] = []
@@ -56,23 +51,7 @@ class LocalProcessRunner(AbstractRunner):
         if launch_project.project_dir is None:
             raise LaunchError("Launch LocalProcessRunner received empty project dir")
-        # Check to make sure local python dependencies match run's requirement.txt
-        if launch_project.uri and _is_wandb_uri(launch_project.uri):
-            source_entity, source_project, run_name = parse_wandb_uri(
-                launch_project.uri
-            )
-            run_requirements_file = download_wandb_python_deps(
-                source_entity,
-                source_project,
-                run_name,
-                self._api,
-                launch_project.project_dir,
-            )
-            validate_wandb_python_deps(
-                run_requirements_file,
-                launch_project.project_dir,
-            )
-        elif launch_project.job:
+        if launch_project.job:
             assert launch_project._job_artifact is not None
             try:
                 validate_wandb_python_deps(
@@ -81,14 +60,14 @@ class LocalProcessRunner(AbstractRunner):
                 )
             except Exception:
                 wandb.termwarn("Unable to validate python dependencies")
-        env_vars = get_env_vars_dict(
-            launch_project, self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
+        env_vars = launch_project.get_env_vars_dict(
+            self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
         )
         for env_key, env_value in env_vars.items():
             cmd += [f"{shlex.quote(env_key)}={shlex.quote(env_value)}"]
-        entry_cmd = get_entry_point_command(entry_point, launch_project.override_args)
-        cmd += entry_cmd
+        if entry_point is not None:
+            cmd += entry_point.command
+        cmd += launch_project.override_args
         command_str = " ".join(cmd).strip()
         _msg = f"{LOG_PREFIX}Launching run as a local-process with command {sanitize_wandb_api_key(command_str)}"

wandb/sdk/launch/runner/sagemaker_runner.py CHANGED Viewed

@@ -12,8 +12,7 @@ from wandb.apis.internal import Api
 from wandb.sdk.launch.environment.aws_environment import AwsEnvironment
 from wandb.sdk.launch.errors import LaunchError
-from .._project_spec import EntryPoint, LaunchProject, get_entry_point_command
-from ..builder.build import get_env_vars_dict
+from .._project_spec import EntryPoint, LaunchProject
 from ..registry.abstract import AbstractRegistry
 from ..utils import (
     LOG_PREFIX,
@@ -68,6 +67,7 @@ class SagemakerSubmittedRun(AbstractRun):
                 logGroupName="/aws/sagemaker/TrainingJobs",
                 logStreamName=log_name,
             )
+            assert "events" in res
             return "\n".join(
                 [f'{event["timestamp"]}:{event["message"]}' for event in res["events"]]
             )
@@ -179,7 +179,10 @@ class SageMakerRunner(AbstractRunner):
         caller_id = client.get_caller_identity()
         account_id = caller_id["Account"]
         _logger.info(f"Using account ID {account_id}")
-        role_arn = get_role_arn(given_sagemaker_args, self.backend_config, account_id)
+        partition = await self.environment.get_partition()
+        role_arn = get_role_arn(
+            given_sagemaker_args, self.backend_config, account_id, partition
+        )
         # Create a sagemaker client to launch the job.
         sagemaker_client = session.client("sagemaker")
@@ -221,12 +224,12 @@ class SageMakerRunner(AbstractRunner):
         launch_project.fill_macros(image_uri)
         _logger.info("Connecting to sagemaker client")
         entry_point = (
-            launch_project.override_entrypoint
-            or launch_project.get_single_entry_point()
-        )
-        command_args = get_entry_point_command(
-            entry_point, launch_project.override_args
+            launch_project.override_entrypoint or launch_project.get_job_entry_point()
         )
+        command_args = []
+        if entry_point is not None:
+            command_args += entry_point.command
+        command_args += launch_project.override_args
         if command_args:
             command_str = " ".join(command_args)
             wandb.termlog(
@@ -349,18 +352,18 @@ def build_sagemaker_args(
     if sagemaker_args.get("ResourceConfig") is None:
         raise LaunchError(
-            "Sagemaker launcher requires a ResourceConfig Sagemaker resource argument"
+            "Sagemaker launcher requires a ResourceConfig resource argument"
         )
     if sagemaker_args.get("StoppingCondition") is None:
         raise LaunchError(
-            "Sagemaker launcher requires a StoppingCondition Sagemaker resource argument"
+            "Sagemaker launcher requires a StoppingCondition resource argument"
         )
     given_env = given_sagemaker_args.get(
         "Environment", sagemaker_args.get("environment", {})
     )
-    calced_env = get_env_vars_dict(launch_project, api, max_env_length)
+    calced_env = launch_project.get_env_vars_dict(api, max_env_length)
     total_env = {**calced_env, **given_env}
     sagemaker_args["Environment"] = total_env
@@ -405,7 +408,10 @@ async def launch_sagemaker_job(
 def get_role_arn(
-    sagemaker_args: Dict[str, Any], backend_config: Dict[str, Any], account_id: str
+    sagemaker_args: Dict[str, Any],
+    backend_config: Dict[str, Any],
+    account_id: str,
+    partition: str,
 ) -> str:
     """Get the role arn from the sagemaker args or the backend config."""
     role_arn = sagemaker_args.get("RoleArn") or sagemaker_args.get("role_arn")
@@ -416,7 +422,7 @@ def get_role_arn(
             "AWS sagemaker require a string RoleArn set this by adding a `RoleArn` key to the sagemaker"
             "field of resource_args"
         )
-    if role_arn.startswith("arn:aws:iam::"):
+    if role_arn.startswith(f"arn:{partition}:iam::"):
         return role_arn  # type: ignore
-    return f"arn:aws:iam::{account_id}:role/{role_arn}"
+    return f"arn:{partition}:iam::{account_id}:role/{role_arn}"

wandb/sdk/launch/runner/vertex_runner.py CHANGED Viewed

@@ -8,8 +8,7 @@ if False:
 from wandb.apis.internal import Api
 from wandb.util import get_module
-from .._project_spec import LaunchProject, get_entry_point_command
-from ..builder.build import get_env_vars_dict
+from .._project_spec import LaunchProject
 from ..environment.gcp_environment import GcpEnvironment
 from ..errors import LaunchError
 from ..registry.abstract import AbstractRegistry
@@ -113,14 +112,16 @@ class VertexRunner(AbstractRunner):
         synchronous: bool = self.backend_config[PROJECT_SYNCHRONOUS]
         entry_point = (
-            launch_project.override_entrypoint
-            or launch_project.get_single_entry_point()
+            launch_project.override_entrypoint or launch_project.get_job_entry_point()
         )
         # TODO: Set entrypoint in each container
-        entry_cmd = get_entry_point_command(entry_point, launch_project.override_args)
-        env_vars = get_env_vars_dict(
-            launch_project=launch_project,
+        entry_cmd = []
+        if entry_point is not None:
+            entry_cmd += entry_point.command
+        entry_cmd += launch_project.override_args
+        env_vars = launch_project.get_env_vars_dict(
             api=self._api,
             max_env_length=MAX_ENV_LENGTHS[self.__class__.__name__],
         )

wandb/sdk/launch/sweeps/scheduler.py CHANGED Viewed

@@ -408,7 +408,7 @@ class Scheduler(ABC):
         return count
     def _try_load_executable(self) -> bool:
-        """Check existance of valid executable for a run.
+        """Check existence of valid executable for a run.
         logs and returns False when job is unreachable
         """
@@ -423,7 +423,7 @@ class Scheduler(ABC):
                 return False
             return True
         elif self._kwargs.get("image_uri"):
-            # TODO(gst): check docker existance? Use registry in launch config?
+            # TODO(gst): check docker existence? Use registry in launch config?
             return True
         else:
             return False
@@ -611,7 +611,7 @@ class Scheduler(ABC):
                     f"Failed to get runstate for run ({run_id}). Error: {traceback.format_exc()}"
                 )
                 run_state = RunState.FAILED
-            else:  # first time we get unknwon state
+            else:  # first time we get unknown state
                 run_state = RunState.UNKNOWN
         except (AttributeError, ValueError):
             wandb.termwarn(
@@ -668,6 +668,8 @@ class Scheduler(ABC):
         launch_config = copy.deepcopy(self._wandb_run.config.get("launch", {}))
         if "overrides" not in launch_config:
             launch_config["overrides"] = {"run_config": {}}
+        if "run_config" not in launch_config["overrides"]:
+            launch_config["overrides"]["run_config"] = {}
         launch_config["overrides"]["run_config"].update(args["args_dict"])
         if macro_args:  # pipe in hyperparam args as params to launch

wandb/sdk/launch/sweeps/scheduler_sweep.py CHANGED Viewed

@@ -59,7 +59,7 @@ class SweepScheduler(Scheduler):
         return None
     def _get_sweep_commands(self, worker_id: int) -> List[Dict[str, Any]]:
-        """Helper to recieve sweep command from backend."""
+        """Helper to receive sweep command from backend."""
         # AgentHeartbeat wants a Dict of runs which are running or queued
         _run_states: Dict[str, bool] = {}
         for run_id, run in self._yield_runs():

wandb/sdk/launch/sweeps/utils.py CHANGED Viewed

@@ -211,13 +211,13 @@ def create_sweep_command_args(command: Dict) -> Dict[str, Any]:
     """
     if "args" not in command:
-        raise ValueError('No "args" found in command: %s' % command)
+        raise ValueError('No "args" found in command: {}'.format(command))
     # four different formats of command args
     # (1) standard command line flags (e.g. --foo=bar)
     flags: List[str] = []
     # (2) flags without hyphens (e.g. foo=bar)
     flags_no_hyphens: List[str] = []
-    # (3) flags with false booleans ommited  (e.g. --foo)
+    # (3) flags with false booleans omitted  (e.g. --foo)
     flags_no_booleans: List[str] = []
     # (4) flags as a dictionary (used for constructing a json)
     flags_dict: Dict[str, Any] = {}
@@ -228,7 +228,7 @@ def create_sweep_command_args(command: Dict) -> Dict[str, Any]:
         try:
             _value: Any = config["value"]
         except KeyError:
-            raise ValueError('No "value" found for command["args"]["%s"]' % param)
+            raise ValueError('No "value" found for command["args"]["{}"]'.format(param))
         _flag: str = f"{param}={_value}"
         flags.append("--" + _flag)
@@ -257,7 +257,7 @@ def make_launch_sweep_entrypoint(
     """Use args dict from create_sweep_command_args to construct entrypoint.
     If replace is True, remove macros from entrypoint, fill them in with args
-    and then return the args in seperate return value.
+    and then return the args in separate return value.
     """
     if not command:
         return None, None