wandb 0.13.10__py3-none-any.whl → 0.14.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +2 -3
- wandb/apis/__init__.py +1 -3
- wandb/apis/importers/__init__.py +4 -0
- wandb/apis/importers/base.py +312 -0
- wandb/apis/importers/mlflow.py +113 -0
- wandb/apis/internal.py +29 -2
- wandb/apis/normalize.py +6 -5
- wandb/apis/public.py +163 -180
- wandb/apis/reports/_templates.py +6 -12
- wandb/apis/reports/report.py +1 -1
- wandb/apis/reports/runset.py +1 -3
- wandb/apis/reports/util.py +12 -10
- wandb/beta/workflows.py +57 -34
- wandb/catboost/__init__.py +1 -2
- wandb/cli/cli.py +215 -133
- wandb/data_types.py +63 -56
- wandb/docker/__init__.py +78 -16
- wandb/docker/auth.py +21 -22
- wandb/env.py +0 -1
- wandb/errors/__init__.py +8 -116
- wandb/errors/term.py +1 -1
- wandb/fastai/__init__.py +1 -2
- wandb/filesync/dir_watcher.py +8 -5
- wandb/filesync/step_prepare.py +76 -75
- wandb/filesync/step_upload.py +1 -2
- wandb/integration/catboost/__init__.py +1 -3
- wandb/integration/catboost/catboost.py +8 -14
- wandb/integration/fastai/__init__.py +7 -13
- wandb/integration/gym/__init__.py +35 -4
- wandb/integration/keras/__init__.py +3 -3
- wandb/integration/keras/callbacks/metrics_logger.py +9 -8
- wandb/integration/keras/callbacks/model_checkpoint.py +9 -9
- wandb/integration/keras/callbacks/tables_builder.py +31 -19
- wandb/integration/kfp/kfp_patch.py +20 -17
- wandb/integration/kfp/wandb_logging.py +1 -2
- wandb/integration/lightgbm/__init__.py +21 -19
- wandb/integration/prodigy/prodigy.py +6 -7
- wandb/integration/sacred/__init__.py +9 -12
- wandb/integration/sagemaker/__init__.py +1 -3
- wandb/integration/sagemaker/auth.py +0 -1
- wandb/integration/sagemaker/config.py +1 -1
- wandb/integration/sagemaker/resources.py +1 -1
- wandb/integration/sb3/sb3.py +8 -4
- wandb/integration/tensorboard/__init__.py +1 -3
- wandb/integration/tensorboard/log.py +8 -8
- wandb/integration/tensorboard/monkeypatch.py +11 -9
- wandb/integration/tensorflow/__init__.py +1 -3
- wandb/integration/xgboost/__init__.py +4 -6
- wandb/integration/yolov8/__init__.py +7 -0
- wandb/integration/yolov8/yolov8.py +250 -0
- wandb/jupyter.py +31 -35
- wandb/lightgbm/__init__.py +1 -2
- wandb/old/settings.py +2 -2
- wandb/plot/bar.py +1 -2
- wandb/plot/confusion_matrix.py +1 -3
- wandb/plot/histogram.py +1 -2
- wandb/plot/line.py +1 -2
- wandb/plot/line_series.py +4 -4
- wandb/plot/pr_curve.py +17 -20
- wandb/plot/roc_curve.py +1 -3
- wandb/plot/scatter.py +1 -2
- wandb/proto/v3/wandb_server_pb2.py +85 -39
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_server_pb2.py +51 -39
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/__init__.py +1 -3
- wandb/sdk/backend/backend.py +1 -1
- wandb/sdk/data_types/_dtypes.py +38 -30
- wandb/sdk/data_types/base_types/json_metadata.py +1 -3
- wandb/sdk/data_types/base_types/media.py +17 -17
- wandb/sdk/data_types/base_types/wb_value.py +33 -26
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +91 -125
- wandb/sdk/data_types/helper_types/classes.py +1 -1
- wandb/sdk/data_types/helper_types/image_mask.py +12 -12
- wandb/sdk/data_types/histogram.py +5 -4
- wandb/sdk/data_types/html.py +1 -2
- wandb/sdk/data_types/image.py +11 -11
- wandb/sdk/data_types/molecule.py +3 -6
- wandb/sdk/data_types/object_3d.py +1 -2
- wandb/sdk/data_types/plotly.py +1 -2
- wandb/sdk/data_types/saved_model.py +10 -8
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/integration_utils/data_logging.py +5 -5
- wandb/sdk/interface/artifacts.py +288 -266
- wandb/sdk/interface/interface.py +2 -3
- wandb/sdk/interface/interface_grpc.py +1 -1
- wandb/sdk/interface/interface_queue.py +1 -1
- wandb/sdk/interface/interface_relay.py +1 -1
- wandb/sdk/interface/interface_shared.py +1 -2
- wandb/sdk/interface/interface_sock.py +1 -1
- wandb/sdk/interface/message_future.py +1 -1
- wandb/sdk/interface/message_future_poll.py +1 -1
- wandb/sdk/interface/router.py +1 -1
- wandb/sdk/interface/router_queue.py +1 -1
- wandb/sdk/interface/router_relay.py +1 -1
- wandb/sdk/interface/router_sock.py +1 -1
- wandb/sdk/interface/summary_record.py +1 -1
- wandb/sdk/internal/artifacts.py +1 -1
- wandb/sdk/internal/datastore.py +2 -3
- wandb/sdk/internal/file_pusher.py +5 -3
- wandb/sdk/internal/file_stream.py +22 -19
- wandb/sdk/internal/handler.py +5 -4
- wandb/sdk/internal/internal.py +1 -1
- wandb/sdk/internal/internal_api.py +115 -55
- wandb/sdk/internal/job_builder.py +1 -3
- wandb/sdk/internal/profiler.py +1 -1
- wandb/sdk/internal/progress.py +4 -6
- wandb/sdk/internal/sample.py +1 -3
- wandb/sdk/internal/sender.py +28 -16
- wandb/sdk/internal/settings_static.py +5 -5
- wandb/sdk/internal/system/assets/__init__.py +1 -0
- wandb/sdk/internal/system/assets/cpu.py +3 -9
- wandb/sdk/internal/system/assets/disk.py +2 -4
- wandb/sdk/internal/system/assets/gpu.py +6 -18
- wandb/sdk/internal/system/assets/gpu_apple.py +2 -4
- wandb/sdk/internal/system/assets/interfaces.py +50 -22
- wandb/sdk/internal/system/assets/ipu.py +1 -3
- wandb/sdk/internal/system/assets/memory.py +7 -13
- wandb/sdk/internal/system/assets/network.py +4 -8
- wandb/sdk/internal/system/assets/open_metrics.py +283 -0
- wandb/sdk/internal/system/assets/tpu.py +1 -4
- wandb/sdk/internal/system/assets/trainium.py +26 -14
- wandb/sdk/internal/system/system_info.py +2 -3
- wandb/sdk/internal/system/system_monitor.py +52 -20
- wandb/sdk/internal/tb_watcher.py +12 -13
- wandb/sdk/launch/_project_spec.py +54 -65
- wandb/sdk/launch/agent/agent.py +374 -90
- wandb/sdk/launch/builder/abstract.py +61 -7
- wandb/sdk/launch/builder/build.py +81 -110
- wandb/sdk/launch/builder/docker_builder.py +181 -0
- wandb/sdk/launch/builder/kaniko_builder.py +419 -0
- wandb/sdk/launch/builder/noop.py +31 -12
- wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +70 -20
- wandb/sdk/launch/environment/abstract.py +28 -0
- wandb/sdk/launch/environment/aws_environment.py +276 -0
- wandb/sdk/launch/environment/gcp_environment.py +271 -0
- wandb/sdk/launch/environment/local_environment.py +65 -0
- wandb/sdk/launch/github_reference.py +3 -8
- wandb/sdk/launch/launch.py +38 -29
- wandb/sdk/launch/launch_add.py +6 -8
- wandb/sdk/launch/loader.py +230 -0
- wandb/sdk/launch/registry/abstract.py +54 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +163 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +203 -0
- wandb/sdk/launch/registry/local_registry.py +62 -0
- wandb/sdk/launch/runner/abstract.py +1 -16
- wandb/sdk/launch/runner/{kubernetes.py → kubernetes_runner.py} +83 -95
- wandb/sdk/launch/runner/local_container.py +46 -22
- wandb/sdk/launch/runner/local_process.py +1 -4
- wandb/sdk/launch/runner/{aws.py → sagemaker_runner.py} +53 -212
- wandb/sdk/launch/runner/{gcp_vertex.py → vertex_runner.py} +38 -55
- wandb/sdk/launch/sweeps/__init__.py +3 -2
- wandb/sdk/launch/sweeps/scheduler.py +132 -39
- wandb/sdk/launch/sweeps/scheduler_sweep.py +80 -89
- wandb/sdk/launch/utils.py +101 -30
- wandb/sdk/launch/wandb_reference.py +2 -7
- wandb/sdk/lib/_settings_toposort_generate.py +166 -0
- wandb/sdk/lib/_settings_toposort_generated.py +201 -0
- wandb/sdk/lib/apikey.py +2 -4
- wandb/sdk/lib/config_util.py +4 -1
- wandb/sdk/lib/console.py +1 -3
- wandb/sdk/lib/deprecate.py +3 -3
- wandb/sdk/lib/file_stream_utils.py +7 -5
- wandb/sdk/lib/filenames.py +1 -1
- wandb/sdk/lib/filesystem.py +61 -5
- wandb/sdk/lib/git.py +1 -3
- wandb/sdk/lib/import_hooks.py +4 -7
- wandb/sdk/lib/ipython.py +8 -5
- wandb/sdk/lib/lazyloader.py +1 -3
- wandb/sdk/lib/mailbox.py +14 -4
- wandb/sdk/lib/proto_util.py +10 -5
- wandb/sdk/lib/redirect.py +15 -22
- wandb/sdk/lib/reporting.py +1 -3
- wandb/sdk/lib/retry.py +4 -5
- wandb/sdk/lib/runid.py +1 -3
- wandb/sdk/lib/server.py +15 -9
- wandb/sdk/lib/sock_client.py +1 -1
- wandb/sdk/lib/sparkline.py +1 -1
- wandb/sdk/lib/wburls.py +1 -1
- wandb/sdk/service/port_file.py +1 -2
- wandb/sdk/service/service.py +36 -13
- wandb/sdk/service/service_base.py +12 -1
- wandb/sdk/verify/verify.py +5 -7
- wandb/sdk/wandb_artifacts.py +142 -177
- wandb/sdk/wandb_config.py +5 -8
- wandb/sdk/wandb_helper.py +1 -1
- wandb/sdk/wandb_init.py +24 -13
- wandb/sdk/wandb_login.py +9 -9
- wandb/sdk/wandb_manager.py +39 -4
- wandb/sdk/wandb_metric.py +2 -6
- wandb/sdk/wandb_require.py +4 -15
- wandb/sdk/wandb_require_helpers.py +1 -9
- wandb/sdk/wandb_run.py +95 -141
- wandb/sdk/wandb_save.py +1 -3
- wandb/sdk/wandb_settings.py +149 -54
- wandb/sdk/wandb_setup.py +66 -46
- wandb/sdk/wandb_summary.py +13 -10
- wandb/sdk/wandb_sweep.py +6 -7
- wandb/sdk/wandb_watch.py +1 -1
- wandb/sklearn/calculate/confusion_matrix.py +1 -1
- wandb/sklearn/calculate/learning_curve.py +1 -1
- wandb/sklearn/calculate/summary_metrics.py +1 -3
- wandb/sklearn/plot/__init__.py +1 -1
- wandb/sklearn/plot/classifier.py +27 -18
- wandb/sklearn/plot/clusterer.py +4 -5
- wandb/sklearn/plot/regressor.py +4 -4
- wandb/sklearn/plot/shared.py +2 -2
- wandb/sync/__init__.py +1 -3
- wandb/sync/sync.py +4 -5
- wandb/testing/relay.py +11 -10
- wandb/trigger.py +1 -1
- wandb/util.py +106 -81
- wandb/viz.py +4 -4
- wandb/wandb_agent.py +50 -50
- wandb/wandb_controller.py +2 -3
- wandb/wandb_run.py +1 -2
- wandb/wandb_torch.py +1 -1
- wandb/xgboost/__init__.py +1 -2
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/METADATA +6 -2
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/RECORD +224 -209
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/WHEEL +1 -1
- wandb/sdk/launch/builder/docker.py +0 -80
- wandb/sdk/launch/builder/kaniko.py +0 -393
- wandb/sdk/launch/builder/loader.py +0 -32
- wandb/sdk/launch/runner/loader.py +0 -50
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/LICENSE +0 -0
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/entry_points.txt +0 -0
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/top_level.txt +0 -0
wandb/sdk/launch/launch.py
CHANGED
@@ -6,18 +6,18 @@ import yaml
|
|
6
6
|
|
7
7
|
import wandb
|
8
8
|
from wandb.apis.internal import Api
|
9
|
-
from wandb.errors import ExecutionError, LaunchError
|
10
9
|
|
10
|
+
from . import loader
|
11
11
|
from ._project_spec import create_project_from_spec, fetch_and_validate_project
|
12
12
|
from .agent import LaunchAgent
|
13
|
-
from .builder import loader as builder_loader
|
14
13
|
from .builder.build import construct_builder_args
|
15
|
-
from .runner import loader
|
16
14
|
from .runner.abstract import AbstractRun
|
17
15
|
from .utils import (
|
18
16
|
LAUNCH_CONFIG_FILE,
|
19
17
|
LAUNCH_DEFAULT_PROJECT,
|
20
18
|
PROJECT_SYNCHRONOUS,
|
19
|
+
ExecutionError,
|
20
|
+
LaunchError,
|
21
21
|
construct_launch_spec,
|
22
22
|
validate_launch_spec_source,
|
23
23
|
)
|
@@ -25,37 +25,56 @@ from .utils import (
|
|
25
25
|
_logger = logging.getLogger(__name__)
|
26
26
|
|
27
27
|
|
28
|
-
def resolve_agent_config(
|
28
|
+
def resolve_agent_config( # noqa: C901
|
29
29
|
api: Api,
|
30
30
|
entity: Optional[str],
|
31
31
|
project: Optional[str],
|
32
32
|
max_jobs: Optional[int],
|
33
33
|
queues: Optional[Tuple[str]],
|
34
|
+
config: Optional[str],
|
34
35
|
) -> Tuple[Dict[str, Any], Api]:
|
36
|
+
"""Resolve the agent config.
|
37
|
+
|
38
|
+
Arguments:
|
39
|
+
api (Api): The api.
|
40
|
+
entity (str): The entity.
|
41
|
+
project (str): The project.
|
42
|
+
max_jobs (int): The max number of jobs.
|
43
|
+
queues (Tuple[str]): The queues.
|
44
|
+
config (str): The config.
|
45
|
+
|
46
|
+
Returns:
|
47
|
+
Tuple[Dict[str, Any], Api]: The resolved config and api.
|
48
|
+
"""
|
35
49
|
defaults = {
|
36
50
|
"entity": api.default_entity,
|
37
51
|
"project": LAUNCH_DEFAULT_PROJECT,
|
38
52
|
"max_jobs": 1,
|
53
|
+
"max_schedulers": 1,
|
39
54
|
"queues": [],
|
40
55
|
"api_key": api.api_key,
|
41
56
|
"base_url": api.settings("base_url"),
|
42
57
|
"registry": {},
|
43
|
-
"
|
58
|
+
"builder": {},
|
44
59
|
"runner": {},
|
45
60
|
}
|
46
61
|
user_set_project = False
|
47
62
|
resolved_config: Dict[str, Any] = defaults
|
48
|
-
|
49
|
-
|
50
|
-
|
63
|
+
config_path = config or os.path.expanduser(LAUNCH_CONFIG_FILE)
|
64
|
+
if os.path.isfile(config_path):
|
65
|
+
launch_config = {}
|
66
|
+
with open(config_path) as f:
|
51
67
|
try:
|
52
|
-
|
53
|
-
print(config)
|
68
|
+
launch_config = yaml.safe_load(f)
|
54
69
|
except yaml.YAMLError as e:
|
55
70
|
raise LaunchError(f"Invalid launch agent config: {e}")
|
56
|
-
if
|
71
|
+
if launch_config.get("project") is not None:
|
57
72
|
user_set_project = True
|
58
|
-
resolved_config.update(
|
73
|
+
resolved_config.update(launch_config.items())
|
74
|
+
elif config is not None:
|
75
|
+
raise LaunchError(
|
76
|
+
f"Could not find use specified launch config file: {config_path}"
|
77
|
+
)
|
59
78
|
if os.environ.get("WANDB_PROJECT") is not None:
|
60
79
|
resolved_config.update({"project": os.environ.get("WANDB_PROJECT")})
|
61
80
|
user_set_project = True
|
@@ -132,7 +151,6 @@ def _run(
|
|
132
151
|
resource_args: Optional[Dict[str, Any]],
|
133
152
|
launch_config: Optional[Dict[str, Any]],
|
134
153
|
synchronous: Optional[bool],
|
135
|
-
cuda: Optional[bool],
|
136
154
|
api: Api,
|
137
155
|
run_id: Optional[str],
|
138
156
|
repository: Optional[str],
|
@@ -152,7 +170,6 @@ def _run(
|
|
152
170
|
parameters,
|
153
171
|
resource_args,
|
154
172
|
launch_config,
|
155
|
-
cuda,
|
156
173
|
run_id,
|
157
174
|
repository,
|
158
175
|
)
|
@@ -164,20 +181,15 @@ def _run(
|
|
164
181
|
runner_config: Dict[str, Any] = {}
|
165
182
|
runner_config[PROJECT_SYNCHRONOUS] = synchronous
|
166
183
|
|
167
|
-
|
168
|
-
|
169
|
-
registry = launch_config.get("registry", {})
|
170
|
-
registry["url"] = repository
|
171
|
-
launch_config["registry"] = registry
|
172
|
-
|
173
|
-
build_config, registry_config = construct_builder_args(
|
174
|
-
launch_config,
|
175
|
-
)
|
184
|
+
config = launch_config or {}
|
185
|
+
build_config, registry_config = construct_builder_args(config)
|
176
186
|
|
177
|
-
|
178
|
-
|
187
|
+
environment = loader.environment_from_config(config.get("environment", {}))
|
188
|
+
registry = loader.registry_from_config(registry_config, environment)
|
189
|
+
builder = loader.builder_from_config(build_config, environment, registry)
|
190
|
+
backend = loader.runner_from_config(resource, api, runner_config, environment)
|
179
191
|
if backend:
|
180
|
-
submitted_run = backend.run(launch_project, builder
|
192
|
+
submitted_run = backend.run(launch_project, builder)
|
181
193
|
# this check will always pass, run is only optional in the agent case where
|
182
194
|
# a run queue id is present on the backend config
|
183
195
|
assert submitted_run
|
@@ -203,7 +215,6 @@ def run(
|
|
203
215
|
docker_image: Optional[str] = None,
|
204
216
|
config: Optional[Dict[str, Any]] = None,
|
205
217
|
synchronous: Optional[bool] = True,
|
206
|
-
cuda: Optional[bool] = None,
|
207
218
|
run_id: Optional[str] = None,
|
208
219
|
repository: Optional[str] = None,
|
209
220
|
) -> AbstractRun:
|
@@ -233,7 +244,6 @@ def run(
|
|
233
244
|
asynchronous runs launched via this method will be terminated. If
|
234
245
|
``synchronous`` is True and the run fails, the current process will
|
235
246
|
error out as well.
|
236
|
-
cuda: Whether to build a CUDA-enabled docker image or not
|
237
247
|
run_id: ID for the run (To ultimately replace the :name: field)
|
238
248
|
repository: string name of repository path for remote registry
|
239
249
|
|
@@ -276,7 +286,6 @@ def run(
|
|
276
286
|
resource_args=resource_args,
|
277
287
|
launch_config=config,
|
278
288
|
synchronous=synchronous,
|
279
|
-
cuda=cuda,
|
280
289
|
api=api,
|
281
290
|
run_id=run_id,
|
282
291
|
repository=repository,
|
wandb/sdk/launch/launch_add.py
CHANGED
@@ -4,7 +4,6 @@ from typing import Any, Dict, List, Optional
|
|
4
4
|
import wandb
|
5
5
|
import wandb.apis.public as public
|
6
6
|
from wandb.apis.internal import Api
|
7
|
-
from wandb.errors import LaunchError
|
8
7
|
from wandb.sdk.launch._project_spec import (
|
9
8
|
compute_command_args,
|
10
9
|
create_project_from_spec,
|
@@ -13,6 +12,7 @@ from wandb.sdk.launch.builder.build import build_image_from_project
|
|
13
12
|
from wandb.sdk.launch.utils import (
|
14
13
|
LAUNCH_DEFAULT_PROJECT,
|
15
14
|
LOG_PREFIX,
|
15
|
+
LaunchError,
|
16
16
|
construct_launch_spec,
|
17
17
|
validate_launch_spec_source,
|
18
18
|
)
|
@@ -44,7 +44,6 @@ def launch_add(
|
|
44
44
|
params: Optional[Dict[str, Any]] = None,
|
45
45
|
project_queue: Optional[str] = None,
|
46
46
|
resource_args: Optional[Dict[str, Any]] = None,
|
47
|
-
cuda: Optional[bool] = None,
|
48
47
|
run_id: Optional[str] = None,
|
49
48
|
build: Optional[bool] = False,
|
50
49
|
repository: Optional[str] = None,
|
@@ -69,7 +68,6 @@ def launch_add(
|
|
69
68
|
the parameters used to run the original run.
|
70
69
|
resource_args: Resource related arguments for launching runs onto a remote backend.
|
71
70
|
Will be stored on the constructed launch config under ``resource_args``.
|
72
|
-
cuda: Whether to build a CUDA-enabled docker image or not
|
73
71
|
run_id: optional string indicating the id of the launched run
|
74
72
|
build: optional flag defaulting to false, requires queue to be set
|
75
73
|
if build, an image is created, creates a job artifact, pushes a reference
|
@@ -116,7 +114,6 @@ def launch_add(
|
|
116
114
|
params,
|
117
115
|
project_queue,
|
118
116
|
resource_args,
|
119
|
-
cuda,
|
120
117
|
run_id=run_id,
|
121
118
|
build=build,
|
122
119
|
repository=repository,
|
@@ -139,7 +136,6 @@ def _launch_add(
|
|
139
136
|
params: Optional[Dict[str, Any]],
|
140
137
|
project_queue: Optional[str],
|
141
138
|
resource_args: Optional[Dict[str, Any]] = None,
|
142
|
-
cuda: Optional[bool] = None,
|
143
139
|
run_id: Optional[str] = None,
|
144
140
|
build: Optional[bool] = False,
|
145
141
|
repository: Optional[str] = None,
|
@@ -158,7 +154,6 @@ def _launch_add(
|
|
158
154
|
params,
|
159
155
|
resource_args,
|
160
156
|
config,
|
161
|
-
cuda,
|
162
157
|
run_id,
|
163
158
|
repository,
|
164
159
|
)
|
@@ -174,7 +169,7 @@ def _launch_add(
|
|
174
169
|
launch_spec["job"] = None
|
175
170
|
|
176
171
|
launch_project = create_project_from_spec(launch_spec, api)
|
177
|
-
docker_image_uri = build_image_from_project(launch_project, api, config)
|
172
|
+
docker_image_uri = build_image_from_project(launch_project, api, config or {})
|
178
173
|
run = wandb.run or wandb.init(
|
179
174
|
project=launch_spec["project"],
|
180
175
|
entity=launch_spec["entity"],
|
@@ -207,7 +202,10 @@ def _launch_add(
|
|
207
202
|
if updated_spec.get("resource"):
|
208
203
|
launch_spec["resource"] = updated_spec.get("resource")
|
209
204
|
|
210
|
-
|
205
|
+
if project_queue == LAUNCH_DEFAULT_PROJECT:
|
206
|
+
wandb.termlog(f"{LOG_PREFIX}Added run to queue {queue_name}.")
|
207
|
+
else:
|
208
|
+
wandb.termlog(f"{LOG_PREFIX}Added run to queue {project_queue}/{queue_name}.")
|
211
209
|
wandb.termlog(f"{LOG_PREFIX}Launch spec:\n{pprint.pformat(launch_spec)}\n")
|
212
210
|
public_api = public.Api()
|
213
211
|
container_job = False
|
@@ -0,0 +1,230 @@
|
|
1
|
+
"""Utilities for the agent."""
|
2
|
+
from typing import Any, Dict, Optional
|
3
|
+
|
4
|
+
from wandb.apis.internal import Api
|
5
|
+
from wandb.sdk.launch.utils import LaunchError
|
6
|
+
|
7
|
+
from .builder.abstract import AbstractBuilder
|
8
|
+
from .environment.abstract import AbstractEnvironment
|
9
|
+
from .registry.abstract import AbstractRegistry
|
10
|
+
from .registry.local_registry import LocalRegistry
|
11
|
+
from .runner.abstract import AbstractRunner
|
12
|
+
|
13
|
+
WANDB_RUNNERS = {
|
14
|
+
"local-container",
|
15
|
+
"local-process",
|
16
|
+
"kubernetes",
|
17
|
+
"vertex",
|
18
|
+
"sagemaker",
|
19
|
+
}
|
20
|
+
|
21
|
+
|
22
|
+
def environment_from_config(config: Optional[Dict[str, Any]]) -> AbstractEnvironment:
|
23
|
+
"""Create an environment from a config.
|
24
|
+
|
25
|
+
This helper function is used to create an environment from a config. The
|
26
|
+
config should have a "type" key that specifies the type of environment to
|
27
|
+
create. The remaining keys are passed to the environment's from_config
|
28
|
+
method. If the config is None or empty, a LocalEnvironment is returned.
|
29
|
+
|
30
|
+
Arguments:
|
31
|
+
config (Dict[str, Any]): The config.
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
Environment: The environment constructed.
|
35
|
+
"""
|
36
|
+
if not config:
|
37
|
+
from .environment.local_environment import LocalEnvironment
|
38
|
+
|
39
|
+
return LocalEnvironment() # This is the default, dummy environment.
|
40
|
+
env_type = config.get("type")
|
41
|
+
if not env_type:
|
42
|
+
raise LaunchError(
|
43
|
+
"Could not create environment from config. Environment type not specified!"
|
44
|
+
)
|
45
|
+
if env_type == "aws":
|
46
|
+
from .environment.aws_environment import AwsEnvironment
|
47
|
+
|
48
|
+
return AwsEnvironment.from_config(config)
|
49
|
+
if env_type == "gcp":
|
50
|
+
from .environment.gcp_environment import GcpEnvironment
|
51
|
+
|
52
|
+
return GcpEnvironment.from_config(config)
|
53
|
+
raise LaunchError(
|
54
|
+
f"Could not create environment from config. Invalid type: {env_type}"
|
55
|
+
)
|
56
|
+
|
57
|
+
|
58
|
+
def registry_from_config(
|
59
|
+
config: Optional[Dict[str, Any]], environment: AbstractEnvironment
|
60
|
+
) -> AbstractRegistry:
|
61
|
+
"""Create a registry from a config.
|
62
|
+
|
63
|
+
This helper function is used to create a registry from a config. The
|
64
|
+
config should have a "type" key that specifies the type of registry to
|
65
|
+
create. The remaining keys are passed to the registry's from_config
|
66
|
+
method. If the config is None or empty, a LocalRegistry is returned.
|
67
|
+
|
68
|
+
Arguments:
|
69
|
+
config (Dict[str, Any]): The registry config.
|
70
|
+
environment (Environment): The environment of the registry.
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
The registry if config is not None, otherwise None.
|
74
|
+
|
75
|
+
Raises:
|
76
|
+
LaunchError: If the registry is not configured correctly.
|
77
|
+
"""
|
78
|
+
if not config:
|
79
|
+
from .registry.local_registry import LocalRegistry
|
80
|
+
|
81
|
+
return LocalRegistry() # This is the default, dummy registry.
|
82
|
+
registry_type = config.get("type")
|
83
|
+
if registry_type is None:
|
84
|
+
from .registry.local_registry import LocalRegistry
|
85
|
+
|
86
|
+
return LocalRegistry() # This is the default, dummy registry.
|
87
|
+
if registry_type == "ecr":
|
88
|
+
from .environment.aws_environment import AwsEnvironment
|
89
|
+
|
90
|
+
if not isinstance(environment, AwsEnvironment):
|
91
|
+
raise LaunchError(
|
92
|
+
"Could not create ECR registry. "
|
93
|
+
"Environment must be an instance of AWSEnvironment."
|
94
|
+
)
|
95
|
+
from .registry.elastic_container_registry import ElasticContainerRegistry
|
96
|
+
|
97
|
+
return ElasticContainerRegistry.from_config(config, environment)
|
98
|
+
if registry_type == "gcr":
|
99
|
+
from .environment.gcp_environment import GcpEnvironment
|
100
|
+
|
101
|
+
if not isinstance(environment, GcpEnvironment):
|
102
|
+
raise LaunchError(
|
103
|
+
"Could not create GCR registry. "
|
104
|
+
"Environment must be an instance of GCPEnvironment."
|
105
|
+
)
|
106
|
+
from .registry.google_artifact_registry import GoogleArtifactRegistry
|
107
|
+
|
108
|
+
return GoogleArtifactRegistry.from_config(config, environment)
|
109
|
+
raise LaunchError(
|
110
|
+
f"Could not create registry from config. Invalid registry type: {registry_type}"
|
111
|
+
)
|
112
|
+
|
113
|
+
|
114
|
+
def builder_from_config(
|
115
|
+
config: Optional[Dict[str, Any]],
|
116
|
+
environment: AbstractEnvironment,
|
117
|
+
registry: AbstractRegistry,
|
118
|
+
) -> AbstractBuilder:
|
119
|
+
"""Create a builder from a config.
|
120
|
+
|
121
|
+
This helper function is used to create a builder from a config. The
|
122
|
+
config should have a "type" key that specifies the type of builder to import
|
123
|
+
and create. The remaining keys are passed to the builder's from_config
|
124
|
+
method. If the config is None or empty, a DockerBuilder is returned.
|
125
|
+
|
126
|
+
Arguments:
|
127
|
+
config (Dict[str, Any]): The builder config.
|
128
|
+
registry (Registry): The registry of the builder.
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
The builder.
|
132
|
+
|
133
|
+
Raises:
|
134
|
+
LaunchError: If the builder is not configured correctly.
|
135
|
+
"""
|
136
|
+
if not config:
|
137
|
+
from .builder.docker_builder import DockerBuilder
|
138
|
+
|
139
|
+
return DockerBuilder.from_config(
|
140
|
+
{}, environment, registry
|
141
|
+
) # This is the default builder.
|
142
|
+
|
143
|
+
builder_type = config.get("type")
|
144
|
+
if builder_type is None:
|
145
|
+
raise LaunchError(
|
146
|
+
"Could not create builder from config. Builder type not specified"
|
147
|
+
)
|
148
|
+
if builder_type == "docker":
|
149
|
+
from .builder.docker_builder import DockerBuilder
|
150
|
+
|
151
|
+
return DockerBuilder.from_config(config, environment, registry)
|
152
|
+
if builder_type == "kaniko":
|
153
|
+
if isinstance(registry, LocalRegistry):
|
154
|
+
raise LaunchError(
|
155
|
+
"Could not create Kaniko builder. "
|
156
|
+
"Registry must be a remote registry."
|
157
|
+
)
|
158
|
+
from .builder.kaniko_builder import KanikoBuilder
|
159
|
+
|
160
|
+
return KanikoBuilder.from_config(config, environment, registry)
|
161
|
+
if builder_type == "noop":
|
162
|
+
from .builder.noop import NoOpBuilder
|
163
|
+
|
164
|
+
return NoOpBuilder.from_config(config, environment, registry)
|
165
|
+
raise LaunchError(
|
166
|
+
f"Could not create builder from config. Invalid builder type: {builder_type}"
|
167
|
+
)
|
168
|
+
|
169
|
+
|
170
|
+
def runner_from_config(
|
171
|
+
runner_name: str,
|
172
|
+
api: Api,
|
173
|
+
runner_config: Dict[str, Any],
|
174
|
+
environment: AbstractEnvironment,
|
175
|
+
) -> AbstractRunner:
|
176
|
+
"""Create a runner from a config.
|
177
|
+
|
178
|
+
This helper function is used to create a runner from a config. The
|
179
|
+
config should have a "type" key that specifies the type of runner to import
|
180
|
+
and create. The remaining keys are passed to the runner's from_config
|
181
|
+
method. If the config is None or empty, a LocalContainerRunner is returned.
|
182
|
+
|
183
|
+
Arguments:
|
184
|
+
runner_name (str): The name of the backend.
|
185
|
+
api (Api): The API.
|
186
|
+
runner_config (Dict[str, Any]): The backend config.
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
The runner.
|
190
|
+
|
191
|
+
Raises:
|
192
|
+
LaunchError: If the runner is not configured correctly.
|
193
|
+
"""
|
194
|
+
if not runner_name or runner_name in ["local-container", "local"]:
|
195
|
+
from .runner.local_container import LocalContainerRunner
|
196
|
+
|
197
|
+
return LocalContainerRunner(api, runner_config, environment)
|
198
|
+
if runner_name == "local-process":
|
199
|
+
from .runner.local_process import LocalProcessRunner
|
200
|
+
|
201
|
+
return LocalProcessRunner(api, runner_config)
|
202
|
+
if runner_name == "sagemaker":
|
203
|
+
from .environment.aws_environment import AwsEnvironment
|
204
|
+
|
205
|
+
if not isinstance(environment, AwsEnvironment):
|
206
|
+
raise LaunchError(
|
207
|
+
"Could not create Sagemaker runner. "
|
208
|
+
"Environment must be an instance of AwsEnvironment."
|
209
|
+
)
|
210
|
+
from .runner.sagemaker_runner import SageMakerRunner
|
211
|
+
|
212
|
+
return SageMakerRunner(api, runner_config, environment)
|
213
|
+
if runner_name in ["vertex", "gcp-vertex"]:
|
214
|
+
from .environment.gcp_environment import GcpEnvironment
|
215
|
+
|
216
|
+
if not isinstance(environment, GcpEnvironment):
|
217
|
+
raise LaunchError(
|
218
|
+
"Could not create Vertex runner. "
|
219
|
+
"Environment must be an instance of GcpEnvironment."
|
220
|
+
)
|
221
|
+
from .runner.vertex_runner import VertexRunner
|
222
|
+
|
223
|
+
return VertexRunner(api, runner_config, environment)
|
224
|
+
if runner_name == "kubernetes":
|
225
|
+
from .runner.kubernetes_runner import KubernetesRunner
|
226
|
+
|
227
|
+
return KubernetesRunner(api, runner_config, environment)
|
228
|
+
raise LaunchError(
|
229
|
+
f"Could not create runner from config. Invalid runner name: {runner_name}"
|
230
|
+
)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
"""Abstract base class for registries."""
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Tuple
|
4
|
+
|
5
|
+
from ..environment.abstract import AbstractEnvironment
|
6
|
+
|
7
|
+
|
8
|
+
class AbstractRegistry(ABC):
|
9
|
+
"""Abstract base class for registries."""
|
10
|
+
|
11
|
+
uri: str
|
12
|
+
|
13
|
+
@abstractmethod
|
14
|
+
def verify(self) -> None:
|
15
|
+
"""Verify that the registry is configured correctly."""
|
16
|
+
raise NotImplementedError
|
17
|
+
|
18
|
+
@abstractmethod
|
19
|
+
def get_username_password(self) -> Tuple[str, str]:
|
20
|
+
"""Get the username and password for the registry.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
(str, str): The username and password.
|
24
|
+
"""
|
25
|
+
raise NotImplementedError
|
26
|
+
|
27
|
+
@abstractmethod
|
28
|
+
def get_repo_uri(self) -> str:
|
29
|
+
"""Get the URI for a repository.
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
str: The URI.
|
33
|
+
"""
|
34
|
+
raise NotImplementedError
|
35
|
+
|
36
|
+
@abstractmethod
|
37
|
+
def check_image_exists(self, image_uri: str) -> bool:
|
38
|
+
"""Check if an image exists in the registry.
|
39
|
+
|
40
|
+
Arguments:
|
41
|
+
image_uri (str): The URI of the image.
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
bool: True if the image exists.
|
45
|
+
"""
|
46
|
+
raise NotImplementedError
|
47
|
+
|
48
|
+
@classmethod
|
49
|
+
@abstractmethod
|
50
|
+
def from_config(
|
51
|
+
cls, config: dict, environment: "AbstractEnvironment", verify: bool = True
|
52
|
+
) -> "AbstractRegistry":
|
53
|
+
"""Create a registry from a config."""
|
54
|
+
raise NotImplementedError
|
@@ -0,0 +1,163 @@
|
|
1
|
+
"""Implementation of Elastic Container Registry class for wandb launch."""
|
2
|
+
import base64
|
3
|
+
import logging
|
4
|
+
from typing import Dict, Tuple
|
5
|
+
|
6
|
+
from wandb.sdk.launch.environment.aws_environment import AwsEnvironment
|
7
|
+
from wandb.sdk.launch.utils import LaunchError
|
8
|
+
from wandb.util import get_module
|
9
|
+
|
10
|
+
from .abstract import AbstractRegistry
|
11
|
+
|
12
|
+
botocore = get_module(
|
13
|
+
"botocore",
|
14
|
+
required="AWS environment requires botocore to be installed. Please install "
|
15
|
+
"it with `pip install wandb[launch]`.",
|
16
|
+
)
|
17
|
+
|
18
|
+
_logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
|
21
|
+
class ElasticContainerRegistry(AbstractRegistry):
|
22
|
+
"""Elastic Container Registry class.
|
23
|
+
|
24
|
+
Attributes:
|
25
|
+
repo_name (str): The name of the repository.
|
26
|
+
environment (AwsEnvironment): The AWS environment.
|
27
|
+
uri (str): The uri of the repository.
|
28
|
+
"""
|
29
|
+
|
30
|
+
repo_name: str
|
31
|
+
environment: AwsEnvironment
|
32
|
+
uri: str
|
33
|
+
|
34
|
+
def __init__(self, repo_name: str, environment: AwsEnvironment) -> None:
|
35
|
+
"""Initialize the Elastic Container Registry.
|
36
|
+
|
37
|
+
Arguments:
|
38
|
+
repo_name (str): The name of the repository.
|
39
|
+
environment (AwsEnvironment): The AWS environment.
|
40
|
+
|
41
|
+
Raises:
|
42
|
+
LaunchError: If there is an error verifying the registry.
|
43
|
+
"""
|
44
|
+
super().__init__()
|
45
|
+
_logger.info(
|
46
|
+
f"Initializing Elastic Container Registry with repotisory {repo_name}."
|
47
|
+
)
|
48
|
+
self.repo_name = repo_name
|
49
|
+
self.environment = environment
|
50
|
+
self.verify()
|
51
|
+
|
52
|
+
@classmethod
|
53
|
+
def from_config( # type: ignore[override]
|
54
|
+
cls,
|
55
|
+
config: Dict,
|
56
|
+
environment: AwsEnvironment,
|
57
|
+
verify: bool = True,
|
58
|
+
) -> "ElasticContainerRegistry":
|
59
|
+
"""Create an Elastic Container Registry from a config.
|
60
|
+
|
61
|
+
Arguments:
|
62
|
+
config (dict): The config.
|
63
|
+
environment (AwsEnvironment): The AWS environment.
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
ElasticContainerRegistry: The Elastic Container Registry.
|
67
|
+
"""
|
68
|
+
if config.get("type") != "ecr":
|
69
|
+
raise LaunchError(
|
70
|
+
f"Could not create ElasticContainerRegistry from config. Expected type 'ecr' "
|
71
|
+
f"but got '{config.get('type')}'."
|
72
|
+
)
|
73
|
+
repository = config.get("repository")
|
74
|
+
if not repository:
|
75
|
+
raise LaunchError(
|
76
|
+
"Could not create ElasticContainerRegistry from config. 'repository' is required."
|
77
|
+
)
|
78
|
+
return cls(repository, environment)
|
79
|
+
|
80
|
+
def verify(self) -> None:
|
81
|
+
"""Verify that the registry is accessible and the configured repo exists.
|
82
|
+
|
83
|
+
Raises:
|
84
|
+
RegistryError: If there is an error verifying the registry.
|
85
|
+
"""
|
86
|
+
_logger.debug("Verifying Elastic Container Registry.")
|
87
|
+
try:
|
88
|
+
session = self.environment.get_session()
|
89
|
+
client = session.client("ecr")
|
90
|
+
response = client.describe_repositories(repositoryNames=[self.repo_name])
|
91
|
+
self.uri = response["repositories"][0]["repositoryUri"].split("/")[0]
|
92
|
+
|
93
|
+
except botocore.exceptions.ClientError as e:
|
94
|
+
code = e.response["Error"]["Code"]
|
95
|
+
msg = e.response["Error"]["Message"]
|
96
|
+
# TODO: Log the code and the message here?
|
97
|
+
raise LaunchError(
|
98
|
+
f"Error verifying Elastic Container Registry: {code} {msg}"
|
99
|
+
)
|
100
|
+
|
101
|
+
def get_username_password(self) -> Tuple[str, str]:
|
102
|
+
"""Get the username and password for the registry.
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
(str, str): The username and password.
|
106
|
+
|
107
|
+
Raises:
|
108
|
+
RegistryError: If there is an error getting the username and password.
|
109
|
+
"""
|
110
|
+
_logger.debug("Getting username and password for Elastic Container Registry.")
|
111
|
+
try:
|
112
|
+
session = self.environment.get_session()
|
113
|
+
client = session.client("ecr")
|
114
|
+
response = client.get_authorization_token()
|
115
|
+
username, password = base64.standard_b64decode(
|
116
|
+
response["authorizationData"][0]["authorizationToken"]
|
117
|
+
).split(b":")
|
118
|
+
return username.decode("utf-8"), password.decode("utf-8")
|
119
|
+
|
120
|
+
except botocore.exceptions.ClientError as e:
|
121
|
+
code = e.response["Error"]["Code"]
|
122
|
+
msg = e.response["Error"]["Message"]
|
123
|
+
# TODO: Log the code and the message here?
|
124
|
+
raise LaunchError(f"Error getting username and password: {code} {msg}")
|
125
|
+
|
126
|
+
def get_repo_uri(self) -> str:
|
127
|
+
"""Get the uri of the repository.
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
str: The uri of the repository.
|
131
|
+
"""
|
132
|
+
return self.uri + "/" + self.repo_name
|
133
|
+
|
134
|
+
def check_image_exists(self, image_uri: str) -> bool:
|
135
|
+
"""Check if the image tag exists.
|
136
|
+
|
137
|
+
Arguments:
|
138
|
+
image_uri (str): The full image_uri.
|
139
|
+
|
140
|
+
Returns:
|
141
|
+
bool: True if the image tag exists.
|
142
|
+
"""
|
143
|
+
uri, tag = image_uri.split(":")
|
144
|
+
if uri != self.get_repo_uri():
|
145
|
+
raise LaunchError(
|
146
|
+
f"Image uri {image_uri} does not match Elastic Container Registry uri {self.get_repo_uri()}."
|
147
|
+
)
|
148
|
+
|
149
|
+
_logger.debug("Checking if image tag exists.")
|
150
|
+
try:
|
151
|
+
session = self.environment.get_session()
|
152
|
+
client = session.client("ecr")
|
153
|
+
response = client.describe_images(
|
154
|
+
repositoryName=self.repo_name, imageIds=[{"imageTag": tag}]
|
155
|
+
)
|
156
|
+
return len(response["imageDetails"]) > 0
|
157
|
+
|
158
|
+
except botocore.exceptions.ClientError as e:
|
159
|
+
code = e.response["Error"]["Code"]
|
160
|
+
if code == "ImageNotFoundException":
|
161
|
+
return False
|
162
|
+
msg = e.response["Error"]["Message"]
|
163
|
+
raise LaunchError(f"Error checking if image tag exists: {code} {msg}")
|