wandb 0.16.3__py3-none-any.whl → 0.16.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +2 -2
- wandb/agents/pyagent.py +1 -1
- wandb/apis/importers/__init__.py +1 -4
- wandb/apis/importers/internals/internal.py +386 -0
- wandb/apis/importers/internals/protocols.py +125 -0
- wandb/apis/importers/internals/util.py +78 -0
- wandb/apis/importers/mlflow.py +125 -88
- wandb/apis/importers/validation.py +108 -0
- wandb/apis/importers/wandb.py +1604 -0
- wandb/apis/public/api.py +7 -10
- wandb/apis/public/artifacts.py +38 -0
- wandb/apis/public/files.py +11 -2
- wandb/apis/reports/v2/__init__.py +0 -19
- wandb/apis/reports/v2/expr_parsing.py +0 -1
- wandb/apis/reports/v2/interface.py +15 -18
- wandb/apis/reports/v2/internal.py +12 -45
- wandb/cli/cli.py +52 -55
- wandb/integration/gym/__init__.py +2 -1
- wandb/integration/keras/callbacks/model_checkpoint.py +1 -1
- wandb/integration/keras/keras.py +6 -4
- wandb/integration/kfp/kfp_patch.py +2 -2
- wandb/integration/openai/fine_tuning.py +1 -2
- wandb/integration/ultralytics/callback.py +0 -1
- wandb/proto/v3/wandb_internal_pb2.py +332 -312
- wandb/proto/v3/wandb_settings_pb2.py +13 -3
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +316 -312
- wandb/proto/v4/wandb_settings_pb2.py +5 -3
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/artifacts/artifact.py +75 -31
- wandb/sdk/artifacts/artifact_manifest.py +5 -2
- wandb/sdk/artifacts/artifact_manifest_entry.py +6 -1
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +8 -2
- wandb/sdk/artifacts/artifact_saver.py +19 -47
- wandb/sdk/artifacts/storage_handler.py +2 -1
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +22 -9
- wandb/sdk/artifacts/storage_policy.py +4 -1
- wandb/sdk/data_types/base_types/wb_value.py +1 -1
- wandb/sdk/data_types/image.py +2 -2
- wandb/sdk/interface/interface.py +49 -13
- wandb/sdk/interface/interface_shared.py +17 -11
- wandb/sdk/internal/file_stream.py +20 -1
- wandb/sdk/internal/handler.py +1 -4
- wandb/sdk/internal/internal_api.py +3 -1
- wandb/sdk/internal/job_builder.py +49 -19
- wandb/sdk/internal/profiler.py +1 -1
- wandb/sdk/internal/sender.py +96 -124
- wandb/sdk/internal/sender_config.py +197 -0
- wandb/sdk/internal/settings_static.py +9 -0
- wandb/sdk/internal/system/system_info.py +5 -3
- wandb/sdk/internal/update.py +1 -1
- wandb/sdk/launch/_launch.py +3 -3
- wandb/sdk/launch/_launch_add.py +28 -29
- wandb/sdk/launch/_project_spec.py +148 -136
- wandb/sdk/launch/agent/agent.py +3 -7
- wandb/sdk/launch/agent/config.py +0 -27
- wandb/sdk/launch/builder/build.py +54 -28
- wandb/sdk/launch/builder/docker_builder.py +4 -15
- wandb/sdk/launch/builder/kaniko_builder.py +72 -45
- wandb/sdk/launch/create_job.py +6 -40
- wandb/sdk/launch/loader.py +10 -0
- wandb/sdk/launch/registry/anon.py +29 -0
- wandb/sdk/launch/registry/local_registry.py +4 -1
- wandb/sdk/launch/runner/kubernetes_runner.py +20 -2
- wandb/sdk/launch/runner/local_container.py +15 -10
- wandb/sdk/launch/runner/sagemaker_runner.py +1 -1
- wandb/sdk/launch/sweeps/scheduler.py +11 -3
- wandb/sdk/launch/utils.py +14 -0
- wandb/sdk/lib/__init__.py +2 -5
- wandb/sdk/lib/_settings_toposort_generated.py +4 -1
- wandb/sdk/lib/apikey.py +0 -5
- wandb/sdk/lib/config_util.py +0 -31
- wandb/sdk/lib/filesystem.py +11 -1
- wandb/sdk/lib/run_moment.py +72 -0
- wandb/sdk/service/service.py +7 -2
- wandb/sdk/service/streams.py +1 -6
- wandb/sdk/verify/verify.py +2 -1
- wandb/sdk/wandb_init.py +12 -1
- wandb/sdk/wandb_login.py +43 -26
- wandb/sdk/wandb_run.py +164 -110
- wandb/sdk/wandb_settings.py +58 -16
- wandb/testing/relay.py +5 -6
- wandb/util.py +50 -7
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/METADATA +8 -1
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/RECORD +89 -82
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/WHEEL +1 -1
- wandb/apis/importers/base.py +0 -400
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/LICENSE +0 -0
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/top_level.txt +0 -0
@@ -11,18 +11,13 @@ from wandb.sdk.launch.builder.build import registry_from_uri
|
|
11
11
|
from wandb.sdk.launch.environment.abstract import AbstractEnvironment
|
12
12
|
from wandb.sdk.launch.registry.abstract import AbstractRegistry
|
13
13
|
|
14
|
-
from .._project_spec import
|
15
|
-
EntryPoint,
|
16
|
-
LaunchProject,
|
17
|
-
create_metadata_file,
|
18
|
-
get_entry_point_command,
|
19
|
-
)
|
14
|
+
from .._project_spec import EntryPoint, LaunchProject
|
20
15
|
from ..errors import LaunchDockerError, LaunchError
|
16
|
+
from ..registry.anon import AnonynmousRegistry
|
21
17
|
from ..registry.local_registry import LocalRegistry
|
22
18
|
from ..utils import (
|
23
19
|
LOG_PREFIX,
|
24
20
|
event_loop_thread_exec,
|
25
|
-
sanitize_wandb_api_key,
|
26
21
|
warn_failed_packages_from_build_logs,
|
27
22
|
)
|
28
23
|
from .build import (
|
@@ -106,6 +101,8 @@ class DockerBuilder(AbstractBuilder):
|
|
106
101
|
"""Login to the registry."""
|
107
102
|
if isinstance(self.registry, LocalRegistry):
|
108
103
|
_logger.info(f"{LOG_PREFIX}No registry configured, skipping login.")
|
104
|
+
elif isinstance(self.registry, AnonynmousRegistry):
|
105
|
+
_logger.info(f"{LOG_PREFIX}Anonymous registry, skipping login.")
|
109
106
|
else:
|
110
107
|
username, password = await self.registry.get_username_password()
|
111
108
|
login = event_loop_thread_exec(docker.login)
|
@@ -155,14 +152,6 @@ class DockerBuilder(AbstractBuilder):
|
|
155
152
|
f"image {image_uri} does not already exist in repository, building."
|
156
153
|
)
|
157
154
|
|
158
|
-
entry_cmd = get_entry_point_command(entrypoint, launch_project.override_args)
|
159
|
-
|
160
|
-
create_metadata_file(
|
161
|
-
launch_project,
|
162
|
-
image_uri,
|
163
|
-
sanitize_wandb_api_key(" ".join(entry_cmd)),
|
164
|
-
dockerfile_str,
|
165
|
-
)
|
166
155
|
build_ctx_path = _create_docker_build_ctx(launch_project, dockerfile_str)
|
167
156
|
dockerfile = os.path.join(build_ctx_path, _WANDB_DOCKERFILE_NAME)
|
168
157
|
try:
|
@@ -3,6 +3,7 @@ import base64
|
|
3
3
|
import json
|
4
4
|
import logging
|
5
5
|
import os
|
6
|
+
import shutil
|
6
7
|
import tarfile
|
7
8
|
import tempfile
|
8
9
|
import time
|
@@ -23,17 +24,11 @@ from wandb.sdk.launch.registry.elastic_container_registry import (
|
|
23
24
|
from wandb.sdk.launch.registry.google_artifact_registry import GoogleArtifactRegistry
|
24
25
|
from wandb.util import get_module
|
25
26
|
|
26
|
-
from .._project_spec import
|
27
|
-
EntryPoint,
|
28
|
-
LaunchProject,
|
29
|
-
create_metadata_file,
|
30
|
-
get_entry_point_command,
|
31
|
-
)
|
27
|
+
from .._project_spec import EntryPoint, LaunchProject
|
32
28
|
from ..errors import LaunchError
|
33
29
|
from ..utils import (
|
34
30
|
LOG_PREFIX,
|
35
31
|
get_kube_context_and_api_client,
|
36
|
-
sanitize_wandb_api_key,
|
37
32
|
warn_failed_packages_from_build_logs,
|
38
33
|
)
|
39
34
|
from .build import (
|
@@ -56,6 +51,14 @@ _logger = logging.getLogger(__name__)
|
|
56
51
|
_DEFAULT_BUILD_TIMEOUT_SECS = 1800 # 30 minute build timeout
|
57
52
|
|
58
53
|
SERVICE_ACCOUNT_NAME = os.environ.get("WANDB_LAUNCH_SERVICE_ACCOUNT_NAME", "default")
|
54
|
+
PVC_NAME = os.environ.get("WANDB_LAUNCH_KANIKO_PVC_NAME")
|
55
|
+
PVC_MOUNT_PATH = (
|
56
|
+
os.environ.get("WANDB_LAUNCH_KANIKO_PVC_MOUNT_PATH", "/kaniko").rstrip("/")
|
57
|
+
if PVC_NAME
|
58
|
+
else None
|
59
|
+
)
|
60
|
+
DOCKER_CONFIG_SECRET = os.environ.get("WANDB_LAUNCH_KANIKO_AUTH_SECRET")
|
61
|
+
|
59
62
|
|
60
63
|
if os.path.exists("/var/run/secrets/kubernetes.io/serviceaccount/namespace"):
|
61
64
|
with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace") as f:
|
@@ -115,12 +118,6 @@ class KanikoBuilder(AbstractBuilder):
|
|
115
118
|
verify (bool, optional): Whether to verify the functionality of the builder.
|
116
119
|
Defaults to True.
|
117
120
|
"""
|
118
|
-
if build_context_store is None:
|
119
|
-
raise LaunchError(
|
120
|
-
"You are required to specify an external build "
|
121
|
-
"context store for Kaniko builds. Please specify a storage url "
|
122
|
-
"in the 'build-context-store' field of your builder config."
|
123
|
-
)
|
124
121
|
self.environment = environment
|
125
122
|
self.registry = registry
|
126
123
|
self.build_job_name = build_job_name
|
@@ -154,13 +151,16 @@ class KanikoBuilder(AbstractBuilder):
|
|
154
151
|
raise LaunchError(
|
155
152
|
"Builder config must include 'type':'kaniko' to create a KanikoBuilder."
|
156
153
|
)
|
157
|
-
build_context_store = config.get("build-context-store")
|
154
|
+
build_context_store = config.get("build-context-store", "")
|
158
155
|
if build_context_store is None:
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
156
|
+
if not PVC_MOUNT_PATH:
|
157
|
+
raise LaunchError(
|
158
|
+
"You must specify a build context store for kaniko builds. "
|
159
|
+
"You can set builder.build-context-store in your agent config "
|
160
|
+
"to a valid s3, gcs, or azure blog storage URI. Or, configure "
|
161
|
+
"a persistent volume claim through the agent helm chart: "
|
162
|
+
"https://github.com/wandb/helm-charts/tree/main/charts/launch-agent"
|
163
|
+
)
|
164
164
|
build_job_name = config.get("build-job-name", "wandb-launch-container-build")
|
165
165
|
secret_name = config.get("secret-name", "")
|
166
166
|
secret_key = config.get("secret-key", "")
|
@@ -170,6 +170,7 @@ class KanikoBuilder(AbstractBuilder):
|
|
170
170
|
image_uri = config.get("destination")
|
171
171
|
if image_uri is not None:
|
172
172
|
registry = registry_from_uri(image_uri)
|
173
|
+
|
173
174
|
return cls(
|
174
175
|
environment,
|
175
176
|
registry,
|
@@ -186,9 +187,8 @@ class KanikoBuilder(AbstractBuilder):
|
|
186
187
|
Raises:
|
187
188
|
LaunchError: If the builder config is invalid.
|
188
189
|
"""
|
189
|
-
if self.
|
190
|
-
|
191
|
-
await self.environment.verify_storage_uri(self.build_context_store)
|
190
|
+
if self.build_context_store:
|
191
|
+
await self.environment.verify_storage_uri(self.build_context_store)
|
192
192
|
|
193
193
|
def login(self) -> None:
|
194
194
|
"""Login to the registry."""
|
@@ -197,8 +197,6 @@ class KanikoBuilder(AbstractBuilder):
|
|
197
197
|
async def _create_docker_ecr_config_map(
|
198
198
|
self, job_name: str, corev1_client: client.CoreV1Api, repository: str
|
199
199
|
) -> None:
|
200
|
-
if self.registry is None:
|
201
|
-
raise LaunchError("No registry specified for Kaniko build.")
|
202
200
|
username, password = await self.registry.get_username_password()
|
203
201
|
encoded = base64.b64encode(f"{username}:{password}".encode()).decode("utf-8")
|
204
202
|
ecr_config_map = client.V1ConfigMap(
|
@@ -235,11 +233,21 @@ class KanikoBuilder(AbstractBuilder):
|
|
235
233
|
with tarfile.TarFile.open(fileobj=context_file, mode="w:gz") as context_tgz:
|
236
234
|
context_tgz.add(context_path, arcname=".")
|
237
235
|
context_file.close()
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
236
|
+
if PVC_MOUNT_PATH is None:
|
237
|
+
destination = f"{self.build_context_store}/{run_id}.tgz"
|
238
|
+
if self.environment is None:
|
239
|
+
raise LaunchError("No environment specified for Kaniko build.")
|
240
|
+
await self.environment.upload_file(context_file.name, destination)
|
241
|
+
return destination
|
242
|
+
else:
|
243
|
+
destination = f"{PVC_MOUNT_PATH}/{run_id}.tgz"
|
244
|
+
try:
|
245
|
+
shutil.copy(context_file.name, destination)
|
246
|
+
except Exception as e:
|
247
|
+
raise LaunchError(
|
248
|
+
f"Error copying build context to PVC mounted at {PVC_MOUNT_PATH}: {e}"
|
249
|
+
) from e
|
250
|
+
return f"tar:///context/{run_id}.tgz"
|
243
251
|
|
244
252
|
async def build_image(
|
245
253
|
self,
|
@@ -248,9 +256,6 @@ class KanikoBuilder(AbstractBuilder):
|
|
248
256
|
job_tracker: Optional[JobAndRunStatusTracker] = None,
|
249
257
|
) -> str:
|
250
258
|
await self.verify()
|
251
|
-
# TODO: this should probably throw an error if the registry is a local registry
|
252
|
-
if not self.registry:
|
253
|
-
raise LaunchError("No registry specified for Kaniko build.")
|
254
259
|
# kaniko builder doesn't seem to work with a custom user id, need more investigation
|
255
260
|
dockerfile_str = generate_dockerfile(
|
256
261
|
launch_project=launch_project,
|
@@ -262,7 +267,6 @@ class KanikoBuilder(AbstractBuilder):
|
|
262
267
|
image_tag = image_tag_from_dockerfile_and_source(launch_project, dockerfile_str)
|
263
268
|
repo_uri = await self.registry.get_repo_uri()
|
264
269
|
image_uri = repo_uri + ":" + image_tag
|
265
|
-
|
266
270
|
if (
|
267
271
|
not launch_project.build_required()
|
268
272
|
and await self.registry.check_image_exists(image_uri)
|
@@ -271,16 +275,6 @@ class KanikoBuilder(AbstractBuilder):
|
|
271
275
|
|
272
276
|
_logger.info(f"Building image {image_uri}...")
|
273
277
|
|
274
|
-
entry_cmd = " ".join(
|
275
|
-
get_entry_point_command(entrypoint, launch_project.override_args)
|
276
|
-
)
|
277
|
-
|
278
|
-
create_metadata_file(
|
279
|
-
launch_project,
|
280
|
-
image_uri,
|
281
|
-
sanitize_wandb_api_key(entry_cmd),
|
282
|
-
sanitize_wandb_api_key(dockerfile_str),
|
283
|
-
)
|
284
278
|
context_path = _create_docker_build_ctx(launch_project, dockerfile_str)
|
285
279
|
run_id = launch_project.run_id
|
286
280
|
|
@@ -381,6 +375,20 @@ class KanikoBuilder(AbstractBuilder):
|
|
381
375
|
env = []
|
382
376
|
volume_mounts = []
|
383
377
|
volumes = []
|
378
|
+
|
379
|
+
if PVC_MOUNT_PATH:
|
380
|
+
volumes.append(
|
381
|
+
client.V1Volume(
|
382
|
+
name="kaniko-pvc",
|
383
|
+
persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
|
384
|
+
claim_name=PVC_NAME
|
385
|
+
),
|
386
|
+
)
|
387
|
+
)
|
388
|
+
volume_mounts.append(
|
389
|
+
client.V1VolumeMount(name="kaniko-pvc", mount_path="/context")
|
390
|
+
)
|
391
|
+
|
384
392
|
if bool(self.secret_name) != bool(self.secret_key):
|
385
393
|
raise LaunchError(
|
386
394
|
"Both secret_name and secret_key or neither must be specified "
|
@@ -419,8 +427,27 @@ class KanikoBuilder(AbstractBuilder):
|
|
419
427
|
),
|
420
428
|
)
|
421
429
|
]
|
422
|
-
|
423
|
-
|
430
|
+
if DOCKER_CONFIG_SECRET:
|
431
|
+
volumes.append(
|
432
|
+
client.V1Volume(
|
433
|
+
name="kaniko-docker-config",
|
434
|
+
secret=client.V1SecretVolumeSource(
|
435
|
+
secret_name=DOCKER_CONFIG_SECRET,
|
436
|
+
items=[
|
437
|
+
client.V1KeyToPath(
|
438
|
+
key=".dockerconfigjson", path="config.json"
|
439
|
+
)
|
440
|
+
],
|
441
|
+
),
|
442
|
+
)
|
443
|
+
)
|
444
|
+
volume_mounts.append(
|
445
|
+
client.V1VolumeMount(
|
446
|
+
name="kaniko-docker-config",
|
447
|
+
mount_path="/kaniko/.docker",
|
448
|
+
)
|
449
|
+
)
|
450
|
+
elif self.secret_name and self.secret_key:
|
424
451
|
volumes += [
|
425
452
|
client.V1Volume(
|
426
453
|
name="docker-config",
|
wandb/sdk/launch/create_job.py
CHANGED
@@ -315,39 +315,6 @@ def _create_repo_metadata(
|
|
315
315
|
wandb.termerror(f"Entrypoint {entrypoint} not found in git repo")
|
316
316
|
return None
|
317
317
|
|
318
|
-
# check if requirements.txt exists
|
319
|
-
# start at the location of the python file and recurse up to the git root
|
320
|
-
entrypoint_dir = os.path.dirname(entrypoint)
|
321
|
-
if entrypoint_dir:
|
322
|
-
req_dir = os.path.join(local_dir, entrypoint_dir)
|
323
|
-
else:
|
324
|
-
req_dir = local_dir
|
325
|
-
|
326
|
-
# If there is a Dockerfile.wandb in the starting rec dir, don't require a requirements.txt
|
327
|
-
if os.path.exists(os.path.join(req_dir, "Dockerfile.wandb")):
|
328
|
-
wandb.termlog(
|
329
|
-
f"Using Dockerfile.wandb in {req_dir.replace(tempdir, '') or 'repository root'}"
|
330
|
-
)
|
331
|
-
else:
|
332
|
-
while (
|
333
|
-
not os.path.exists(os.path.join(req_dir, "requirements.txt"))
|
334
|
-
and req_dir != tempdir
|
335
|
-
):
|
336
|
-
req_dir = os.path.dirname(req_dir)
|
337
|
-
|
338
|
-
if not os.path.exists(os.path.join(req_dir, "requirements.txt")):
|
339
|
-
path_with_subdir = os.path.dirname(
|
340
|
-
os.path.join(path or "", entrypoint or "")
|
341
|
-
)
|
342
|
-
wandb.termerror(
|
343
|
-
f"Could not find requirements.txt file in git repo at {path_with_subdir}"
|
344
|
-
)
|
345
|
-
return None
|
346
|
-
|
347
|
-
wandb.termlog(
|
348
|
-
f"Using requirements.txt in {req_dir.replace(tempdir, '') or 'repository root'}"
|
349
|
-
)
|
350
|
-
|
351
318
|
metadata = {
|
352
319
|
"git": {
|
353
320
|
"commit": commit,
|
@@ -366,18 +333,16 @@ def _create_repo_metadata(
|
|
366
333
|
def _create_artifact_metadata(
|
367
334
|
path: str, entrypoint: str, runtime: Optional[str] = None
|
368
335
|
) -> Tuple[Dict[str, Any], List[str]]:
|
369
|
-
if not os.path.
|
336
|
+
if not os.path.isdir(path):
|
370
337
|
wandb.termerror("Path must be a valid file or directory")
|
371
338
|
return {}, []
|
372
339
|
|
373
|
-
if not os.path.exists(os.path.join(path, "requirements.txt")):
|
374
|
-
wandb.termerror(f"Could not find requirements.txt file in: {path}")
|
375
|
-
return {}, []
|
376
|
-
|
377
340
|
# read local requirements.txt and dump to temp dir for builder
|
378
341
|
requirements = []
|
379
|
-
|
380
|
-
|
342
|
+
depspath = os.path.join(path, "requirements.txt")
|
343
|
+
if os.path.exists(depspath):
|
344
|
+
with open(depspath) as f:
|
345
|
+
requirements = f.read().splitlines()
|
381
346
|
|
382
347
|
if runtime:
|
383
348
|
python_version = _clean_python_version(runtime)
|
@@ -431,6 +396,7 @@ def _configure_job_builder_for_partial(tmpdir: str, job_source: str) -> JobBuild
|
|
431
396
|
settings.update({"files_dir": tmpdir, "job_source": job_source})
|
432
397
|
job_builder = JobBuilder(
|
433
398
|
settings=settings, # type: ignore
|
399
|
+
verbose=True,
|
434
400
|
)
|
435
401
|
# never allow notebook runs
|
436
402
|
job_builder._is_notebook_run = False
|
wandb/sdk/launch/loader.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
"""Utilities for the agent."""
|
2
2
|
from typing import Any, Dict, Optional
|
3
3
|
|
4
|
+
import wandb
|
4
5
|
from wandb.apis.internal import Api
|
5
6
|
from wandb.docker import is_docker_installed
|
6
7
|
from wandb.sdk.launch.errors import LaunchError
|
@@ -87,6 +88,15 @@ def registry_from_config(
|
|
87
88
|
from .registry.local_registry import LocalRegistry
|
88
89
|
|
89
90
|
return LocalRegistry() # This is the default, dummy registry.
|
91
|
+
|
92
|
+
wandb.termwarn(
|
93
|
+
"The `registry` block of the launch agent config is being deprecated. "
|
94
|
+
"Please specify an image repository URI under the `builder.destination` "
|
95
|
+
"key of your launch agent config. See "
|
96
|
+
"https://docs.wandb.ai/guides/launch/setup-agent-advanced#agent-configuration "
|
97
|
+
"for more information."
|
98
|
+
)
|
99
|
+
|
90
100
|
registry_type = config.get("type")
|
91
101
|
if registry_type is None or registry_type == "local":
|
92
102
|
from .registry.local_registry import LocalRegistry
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from typing import Tuple
|
2
|
+
|
3
|
+
from wandb.docker import is_docker_installed
|
4
|
+
from wandb.sdk.launch.utils import docker_image_exists
|
5
|
+
|
6
|
+
from .abstract import AbstractRegistry
|
7
|
+
|
8
|
+
|
9
|
+
class AnonynmousRegistry(AbstractRegistry):
|
10
|
+
def __init__(self, uri: str) -> None:
|
11
|
+
"""Initialize the registry."""
|
12
|
+
self.uri = uri
|
13
|
+
|
14
|
+
async def get_username_password(self) -> Tuple[str, str]:
|
15
|
+
"""Get the username and password for the registry."""
|
16
|
+
raise NotImplementedError("Anonymous registry does not require authentication")
|
17
|
+
|
18
|
+
async def get_repo_uri(self) -> str:
|
19
|
+
return self.uri
|
20
|
+
|
21
|
+
async def check_image_exists(self, image_uri: str) -> bool:
|
22
|
+
"""Check if an image exists in the registry."""
|
23
|
+
if not is_docker_installed():
|
24
|
+
return False
|
25
|
+
return docker_image_exists(image_uri)
|
26
|
+
|
27
|
+
@classmethod
|
28
|
+
def from_config(cls, config: dict) -> "AbstractRegistry":
|
29
|
+
return cls(uri=config["uri"])
|
@@ -2,6 +2,7 @@
|
|
2
2
|
import logging
|
3
3
|
from typing import Tuple
|
4
4
|
|
5
|
+
from wandb.docker import is_docker_installed
|
5
6
|
from wandb.sdk.launch.errors import LaunchError
|
6
7
|
from wandb.sdk.launch.utils import docker_image_exists
|
7
8
|
|
@@ -60,4 +61,6 @@ class LocalRegistry(AbstractRegistry):
|
|
60
61
|
Returns:
|
61
62
|
bool: True.
|
62
63
|
"""
|
63
|
-
|
64
|
+
if is_docker_installed():
|
65
|
+
return docker_image_exists(image_uri)
|
66
|
+
return False
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""Implementation of KubernetesRunner class for wandb launch."""
|
2
2
|
import asyncio
|
3
3
|
import base64
|
4
|
+
import datetime
|
4
5
|
import json
|
5
6
|
import logging
|
6
7
|
import os
|
@@ -23,6 +24,7 @@ from wandb.sdk.launch.runner.kubernetes_monitor import (
|
|
23
24
|
CustomResource,
|
24
25
|
LaunchKubernetesMonitor,
|
25
26
|
)
|
27
|
+
from wandb.sdk.lib.retry import ExponentialBackoff, retry_async
|
26
28
|
from wandb.util import get_module
|
27
29
|
|
28
30
|
from .._project_spec import EntryPoint, LaunchProject
|
@@ -59,6 +61,7 @@ from kubernetes_asyncio.client.models.v1_secret import ( # type: ignore # noqa:
|
|
59
61
|
from kubernetes_asyncio.client.rest import ApiException # type: ignore # noqa: E402
|
60
62
|
|
61
63
|
TIMEOUT = 5
|
64
|
+
API_KEY_SECRET_MAX_RETRIES = 5
|
62
65
|
|
63
66
|
_logger = logging.getLogger(__name__)
|
64
67
|
|
@@ -421,8 +424,23 @@ class KubernetesRunner(AbstractRunner):
|
|
421
424
|
else:
|
422
425
|
secret_name += f"-{launch_project.run_id}"
|
423
426
|
|
424
|
-
|
425
|
-
|
427
|
+
def handle_exception(e):
|
428
|
+
wandb.termwarn(
|
429
|
+
f"Exception when ensuring Kubernetes API key secret: {e}. Retrying..."
|
430
|
+
)
|
431
|
+
|
432
|
+
api_key_secret = await retry_async(
|
433
|
+
backoff=ExponentialBackoff(
|
434
|
+
initial_sleep=datetime.timedelta(seconds=1),
|
435
|
+
max_sleep=datetime.timedelta(minutes=1),
|
436
|
+
max_retries=API_KEY_SECRET_MAX_RETRIES,
|
437
|
+
),
|
438
|
+
fn=ensure_api_key_secret,
|
439
|
+
on_exc=handle_exception,
|
440
|
+
core_api=core_api,
|
441
|
+
secret_name=secret_name,
|
442
|
+
namespace=namespace,
|
443
|
+
api_key=value,
|
426
444
|
)
|
427
445
|
env.append(
|
428
446
|
{
|
@@ -148,15 +148,14 @@ class LocalContainerRunner(AbstractRunner):
|
|
148
148
|
env_vars["WANDB_BASE_URL"] = "http://host.docker.internal:9001"
|
149
149
|
|
150
150
|
if launch_project.docker_image:
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
)
|
151
|
+
try:
|
152
|
+
pull_docker_image(image_uri)
|
153
|
+
except Exception as e:
|
154
|
+
wandb.termwarn(f"Error attempting to pull docker image {image_uri}")
|
155
|
+
if not docker_image_exists(image_uri):
|
156
|
+
raise LaunchError(
|
157
|
+
f"Failed to pull docker image {image_uri} with error: {e}"
|
158
|
+
)
|
160
159
|
|
161
160
|
assert launch_project.docker_image == image_uri
|
162
161
|
|
@@ -234,7 +233,13 @@ def _thread_process_runner(
|
|
234
233
|
if not chunk:
|
235
234
|
break
|
236
235
|
index = chunk.find(b"\r")
|
237
|
-
decoded_chunk =
|
236
|
+
decoded_chunk = None
|
237
|
+
while not decoded_chunk:
|
238
|
+
try:
|
239
|
+
decoded_chunk = chunk.decode()
|
240
|
+
except UnicodeDecodeError:
|
241
|
+
# Multi-byte character cut off, try to get the rest of it
|
242
|
+
chunk += os.read(process.stdout.fileno(), 1) # type: ignore
|
238
243
|
if index != -1:
|
239
244
|
run._stdout += decoded_chunk
|
240
245
|
print(chunk.decode(), end="")
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""Abstract Scheduler class."""
|
2
2
|
import asyncio
|
3
3
|
import base64
|
4
|
+
import copy
|
4
5
|
import logging
|
5
6
|
import os
|
6
7
|
import socket
|
@@ -24,7 +25,10 @@ from wandb.sdk.launch.sweeps.utils import (
|
|
24
25
|
create_sweep_command_args,
|
25
26
|
make_launch_sweep_entrypoint,
|
26
27
|
)
|
27
|
-
from wandb.sdk.launch.utils import
|
28
|
+
from wandb.sdk.launch.utils import (
|
29
|
+
event_loop_thread_exec,
|
30
|
+
strip_resource_args_and_template_vars,
|
31
|
+
)
|
28
32
|
from wandb.sdk.lib.runid import generate_id
|
29
33
|
|
30
34
|
if TYPE_CHECKING:
|
@@ -658,7 +662,7 @@ class Scheduler(ABC):
|
|
658
662
|
pidx = entry_point.index("${program}")
|
659
663
|
entry_point[pidx] = self._sweep_config["program"]
|
660
664
|
|
661
|
-
launch_config = self._wandb_run.config.get("launch", {})
|
665
|
+
launch_config = copy.deepcopy(self._wandb_run.config.get("launch", {}))
|
662
666
|
if "overrides" not in launch_config:
|
663
667
|
launch_config["overrides"] = {"run_config": {}}
|
664
668
|
launch_config["overrides"]["run_config"].update(args["args_dict"])
|
@@ -694,11 +698,14 @@ class Scheduler(ABC):
|
|
694
698
|
)
|
695
699
|
|
696
700
|
# override resource and args of job
|
697
|
-
_job_launch_config = self._wandb_run.config.get("launch") or {}
|
701
|
+
_job_launch_config = copy.deepcopy(self._wandb_run.config.get("launch")) or {}
|
698
702
|
|
699
703
|
# default priority is "medium"
|
700
704
|
_priority = int(launch_config.get("priority", 2)) # type: ignore
|
701
705
|
|
706
|
+
# strip resource_args and template_variables from launch_config
|
707
|
+
strip_resource_args_and_template_vars(_job_launch_config)
|
708
|
+
|
702
709
|
run_id = run.id or generate_id()
|
703
710
|
queued_run = launch_add(
|
704
711
|
run_id=run_id,
|
@@ -712,6 +719,7 @@ class Scheduler(ABC):
|
|
712
719
|
project_queue=self._project_queue,
|
713
720
|
resource=_job_launch_config.get("resource"),
|
714
721
|
resource_args=_job_launch_config.get("resource_args"),
|
722
|
+
template_variables=_job_launch_config.get("template_variables"),
|
715
723
|
author=self._kwargs.get("author"),
|
716
724
|
sweep_id=self._sweep_id,
|
717
725
|
priority=_priority,
|
wandb/sdk/launch/utils.py
CHANGED
@@ -221,6 +221,17 @@ def get_default_entity(api: Api, launch_config: Optional[Dict[str, Any]]):
|
|
221
221
|
return config_entity or api.default_entity
|
222
222
|
|
223
223
|
|
224
|
+
def strip_resource_args_and_template_vars(launch_spec: Dict[str, Any]) -> None:
|
225
|
+
if launch_spec.get("resource_args", None) and launch_spec.get(
|
226
|
+
"template_variables", None
|
227
|
+
):
|
228
|
+
wandb.termwarn(
|
229
|
+
"Launch spec contains both resource_args and template_variables, "
|
230
|
+
"only one can be set. Using template_variables."
|
231
|
+
)
|
232
|
+
launch_spec.pop("resource_args")
|
233
|
+
|
234
|
+
|
224
235
|
def construct_launch_spec(
|
225
236
|
uri: Optional[str],
|
226
237
|
job: Optional[str],
|
@@ -298,6 +309,9 @@ def construct_launch_spec(
|
|
298
309
|
else:
|
299
310
|
launch_config["registry"] = {"url": repository}
|
300
311
|
|
312
|
+
# dont send both resource args and template variables
|
313
|
+
strip_resource_args_and_template_vars(launch_spec)
|
314
|
+
|
301
315
|
return launch_spec
|
302
316
|
|
303
317
|
|
wandb/sdk/lib/__init__.py
CHANGED
@@ -15,6 +15,7 @@ _Setting = Literal[
|
|
15
15
|
"_aws_lambda",
|
16
16
|
"_async_upload_concurrency_limit",
|
17
17
|
"_cli_only_mode",
|
18
|
+
"_code_path_local",
|
18
19
|
"_colab",
|
19
20
|
"_cuda",
|
20
21
|
"_disable_meta",
|
@@ -101,6 +102,7 @@ _Setting = Literal[
|
|
101
102
|
"entity",
|
102
103
|
"files_dir",
|
103
104
|
"force",
|
105
|
+
"fork_from",
|
104
106
|
"git_commit",
|
105
107
|
"git_remote",
|
106
108
|
"git_remote_url",
|
@@ -191,6 +193,8 @@ SETTINGS_TOPOLOGICALLY_SORTED: Final[Tuple[_Setting, ...]] = (
|
|
191
193
|
"run_id",
|
192
194
|
"start_method",
|
193
195
|
"_aws_lambda",
|
196
|
+
"program",
|
197
|
+
"_code_path_local",
|
194
198
|
"_colab",
|
195
199
|
"_disable_machine_info",
|
196
200
|
"_disable_meta",
|
@@ -227,7 +231,6 @@ SETTINGS_TOPOLOGICALLY_SORTED: Final[Tuple[_Setting, ...]] = (
|
|
227
231
|
"log_symlink_internal",
|
228
232
|
"log_symlink_user",
|
229
233
|
"log_user",
|
230
|
-
"program",
|
231
234
|
"project_url",
|
232
235
|
"resume_fname",
|
233
236
|
"run_url",
|
wandb/sdk/lib/apikey.py
CHANGED
@@ -179,11 +179,6 @@ def write_netrc(host: str, entity: str, key: str) -> Optional[bool]:
|
|
179
179
|
return None
|
180
180
|
try:
|
181
181
|
normalized_host = urlparse(host).netloc.split(":")[0]
|
182
|
-
if normalized_host != "localhost" and "." not in normalized_host:
|
183
|
-
wandb.termerror(
|
184
|
-
f"Host must be a url in the form https://some.address.com, received {host}"
|
185
|
-
)
|
186
|
-
return None
|
187
182
|
netrc_path = get_netrc_file_path()
|
188
183
|
wandb.termlog(
|
189
184
|
f"Appending key for {normalized_host} to your netrc file: {netrc_path}"
|