wandb 0.16.3__py3-none-any.whl → 0.16.5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +2 -2
- wandb/agents/pyagent.py +1 -1
- wandb/apis/importers/__init__.py +1 -4
- wandb/apis/importers/internals/internal.py +386 -0
- wandb/apis/importers/internals/protocols.py +125 -0
- wandb/apis/importers/internals/util.py +78 -0
- wandb/apis/importers/mlflow.py +125 -88
- wandb/apis/importers/validation.py +108 -0
- wandb/apis/importers/wandb.py +1604 -0
- wandb/apis/public/api.py +7 -10
- wandb/apis/public/artifacts.py +38 -0
- wandb/apis/public/files.py +11 -2
- wandb/apis/reports/v2/__init__.py +0 -19
- wandb/apis/reports/v2/expr_parsing.py +0 -1
- wandb/apis/reports/v2/interface.py +15 -18
- wandb/apis/reports/v2/internal.py +12 -45
- wandb/cli/cli.py +52 -55
- wandb/integration/gym/__init__.py +2 -1
- wandb/integration/keras/callbacks/model_checkpoint.py +1 -1
- wandb/integration/keras/keras.py +6 -4
- wandb/integration/kfp/kfp_patch.py +2 -2
- wandb/integration/openai/fine_tuning.py +1 -2
- wandb/integration/ultralytics/callback.py +0 -1
- wandb/proto/v3/wandb_internal_pb2.py +332 -312
- wandb/proto/v3/wandb_settings_pb2.py +13 -3
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +316 -312
- wandb/proto/v4/wandb_settings_pb2.py +5 -3
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/artifacts/artifact.py +75 -31
- wandb/sdk/artifacts/artifact_manifest.py +5 -2
- wandb/sdk/artifacts/artifact_manifest_entry.py +6 -1
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +8 -2
- wandb/sdk/artifacts/artifact_saver.py +19 -47
- wandb/sdk/artifacts/storage_handler.py +2 -1
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +22 -9
- wandb/sdk/artifacts/storage_policy.py +4 -1
- wandb/sdk/data_types/base_types/wb_value.py +1 -1
- wandb/sdk/data_types/image.py +2 -2
- wandb/sdk/interface/interface.py +49 -13
- wandb/sdk/interface/interface_shared.py +17 -11
- wandb/sdk/internal/file_stream.py +20 -1
- wandb/sdk/internal/handler.py +1 -4
- wandb/sdk/internal/internal_api.py +3 -1
- wandb/sdk/internal/job_builder.py +49 -19
- wandb/sdk/internal/profiler.py +1 -1
- wandb/sdk/internal/sender.py +96 -124
- wandb/sdk/internal/sender_config.py +197 -0
- wandb/sdk/internal/settings_static.py +9 -0
- wandb/sdk/internal/system/system_info.py +5 -3
- wandb/sdk/internal/update.py +1 -1
- wandb/sdk/launch/_launch.py +3 -3
- wandb/sdk/launch/_launch_add.py +28 -29
- wandb/sdk/launch/_project_spec.py +148 -136
- wandb/sdk/launch/agent/agent.py +3 -7
- wandb/sdk/launch/agent/config.py +0 -27
- wandb/sdk/launch/builder/build.py +54 -28
- wandb/sdk/launch/builder/docker_builder.py +4 -15
- wandb/sdk/launch/builder/kaniko_builder.py +72 -45
- wandb/sdk/launch/create_job.py +6 -40
- wandb/sdk/launch/loader.py +10 -0
- wandb/sdk/launch/registry/anon.py +29 -0
- wandb/sdk/launch/registry/local_registry.py +4 -1
- wandb/sdk/launch/runner/kubernetes_runner.py +20 -2
- wandb/sdk/launch/runner/local_container.py +15 -10
- wandb/sdk/launch/runner/sagemaker_runner.py +1 -1
- wandb/sdk/launch/sweeps/scheduler.py +11 -3
- wandb/sdk/launch/utils.py +14 -0
- wandb/sdk/lib/__init__.py +2 -5
- wandb/sdk/lib/_settings_toposort_generated.py +4 -1
- wandb/sdk/lib/apikey.py +0 -5
- wandb/sdk/lib/config_util.py +0 -31
- wandb/sdk/lib/filesystem.py +11 -1
- wandb/sdk/lib/run_moment.py +72 -0
- wandb/sdk/service/service.py +7 -2
- wandb/sdk/service/streams.py +1 -6
- wandb/sdk/verify/verify.py +2 -1
- wandb/sdk/wandb_init.py +12 -1
- wandb/sdk/wandb_login.py +43 -26
- wandb/sdk/wandb_run.py +164 -110
- wandb/sdk/wandb_settings.py +58 -16
- wandb/testing/relay.py +5 -6
- wandb/util.py +50 -7
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/METADATA +8 -1
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/RECORD +89 -82
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/WHEEL +1 -1
- wandb/apis/importers/base.py +0 -400
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/LICENSE +0 -0
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/top_level.txt +0 -0
wandb/sdk/launch/_launch_add.py
CHANGED
@@ -6,7 +6,6 @@ import wandb
|
|
6
6
|
import wandb.apis.public as public
|
7
7
|
from wandb.apis.internal import Api
|
8
8
|
from wandb.errors import CommError
|
9
|
-
from wandb.sdk.launch._project_spec import create_project_from_spec
|
10
9
|
from wandb.sdk.launch.builder.build import build_image_from_project
|
11
10
|
from wandb.sdk.launch.errors import LaunchError
|
12
11
|
from wandb.sdk.launch.utils import (
|
@@ -16,6 +15,8 @@ from wandb.sdk.launch.utils import (
|
|
16
15
|
validate_launch_spec_source,
|
17
16
|
)
|
18
17
|
|
18
|
+
from ._project_spec import LaunchProject
|
19
|
+
|
19
20
|
|
20
21
|
def push_to_queue(
|
21
22
|
api: Api,
|
@@ -106,34 +107,32 @@ def launch_add(
|
|
106
107
|
"""
|
107
108
|
api = Api()
|
108
109
|
|
109
|
-
return
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
priority=priority,
|
132
|
-
)
|
110
|
+
return _launch_add(
|
111
|
+
api,
|
112
|
+
uri,
|
113
|
+
job,
|
114
|
+
config,
|
115
|
+
template_variables,
|
116
|
+
project,
|
117
|
+
entity,
|
118
|
+
queue_name,
|
119
|
+
resource,
|
120
|
+
entry_point,
|
121
|
+
name,
|
122
|
+
version,
|
123
|
+
docker_image,
|
124
|
+
project_queue,
|
125
|
+
resource_args,
|
126
|
+
run_id=run_id,
|
127
|
+
build=build,
|
128
|
+
repository=repository,
|
129
|
+
sweep_id=sweep_id,
|
130
|
+
author=author,
|
131
|
+
priority=priority,
|
133
132
|
)
|
134
133
|
|
135
134
|
|
136
|
-
|
135
|
+
def _launch_add(
|
137
136
|
api: Api,
|
138
137
|
uri: Optional[str],
|
139
138
|
job: Optional[str],
|
@@ -185,9 +184,9 @@ async def _launch_add(
|
|
185
184
|
wandb.termwarn("Build doesn't support setting a job. Overwriting job.")
|
186
185
|
launch_spec["job"] = None
|
187
186
|
|
188
|
-
launch_project =
|
189
|
-
docker_image_uri =
|
190
|
-
launch_project, api, config or {}
|
187
|
+
launch_project = LaunchProject.from_spec(launch_spec, api)
|
188
|
+
docker_image_uri = asyncio.run(
|
189
|
+
build_image_from_project(launch_project, api, config or {})
|
191
190
|
)
|
192
191
|
run = wandb.run or wandb.init(
|
193
192
|
project=launch_spec["project"],
|
@@ -3,7 +3,6 @@
|
|
3
3
|
Arguments can come from a launch spec or call to wandb launch.
|
4
4
|
"""
|
5
5
|
import enum
|
6
|
-
import json
|
7
6
|
import logging
|
8
7
|
import os
|
9
8
|
import tempfile
|
@@ -25,7 +24,6 @@ if TYPE_CHECKING:
|
|
25
24
|
|
26
25
|
_logger = logging.getLogger(__name__)
|
27
26
|
|
28
|
-
DEFAULT_LAUNCH_METADATA_PATH = "launch_metadata.json"
|
29
27
|
|
30
28
|
# need to make user root for sagemaker, so users have access to /opt/ml directories
|
31
29
|
# that let users create artifacts and access input data
|
@@ -46,7 +44,22 @@ class EntrypointDefaults(List[str]):
|
|
46
44
|
|
47
45
|
|
48
46
|
class LaunchProject:
|
49
|
-
"""A launch project specification.
|
47
|
+
"""A launch project specification.
|
48
|
+
|
49
|
+
The LaunchProject is initialized from a raw launch spec an internal API
|
50
|
+
object. The project encapsulates logic for taking a launch spec and converting
|
51
|
+
it into the executable code.
|
52
|
+
|
53
|
+
The LaunchProject needs to ultimately produce a full container spec for
|
54
|
+
execution in docker, k8s, sagemaker, or vertex. This container spec includes:
|
55
|
+
- container image uri
|
56
|
+
- environment variables for configuring wandb etc.
|
57
|
+
- entrypoint command and arguments
|
58
|
+
- additional arguments specific to the target resource (e.g. instance type, node selector)
|
59
|
+
|
60
|
+
This class is stateful and certain methods can only be called after
|
61
|
+
`LaunchProject.fetch_and_validate_project()` has been called.
|
62
|
+
"""
|
50
63
|
|
51
64
|
def __init__(
|
52
65
|
self,
|
@@ -122,7 +135,7 @@ class LaunchProject:
|
|
122
135
|
if override_entrypoint:
|
123
136
|
_logger.info("Adding override entry point")
|
124
137
|
self.override_entrypoint = EntryPoint(
|
125
|
-
name=
|
138
|
+
name=_get_entrypoint_file(override_entrypoint),
|
126
139
|
command=override_entrypoint,
|
127
140
|
)
|
128
141
|
|
@@ -160,22 +173,42 @@ class LaunchProject:
|
|
160
173
|
self.source = LaunchSource.LOCAL
|
161
174
|
self.project_dir = self.uri
|
162
175
|
|
163
|
-
|
176
|
+
def __repr__(self) -> str:
|
177
|
+
"""String representation of LaunchProject."""
|
178
|
+
if self.source == LaunchSource.JOB:
|
179
|
+
return f"{self.job}"
|
180
|
+
return f"{self.uri}"
|
164
181
|
|
165
|
-
@
|
166
|
-
def
|
167
|
-
"""
|
168
|
-
# TODO: this should likely be source_project when we have it...
|
182
|
+
@classmethod
|
183
|
+
def from_spec(cls, launch_spec: Dict[str, Any], api: Api) -> "LaunchProject":
|
184
|
+
"""Constructs a LaunchProject instance using a launch spec.
|
169
185
|
|
170
|
-
|
171
|
-
|
172
|
-
|
186
|
+
Arguments:
|
187
|
+
launch_spec: Dictionary representation of launch spec
|
188
|
+
api: Instance of wandb.apis.internal Api
|
173
189
|
|
174
|
-
|
175
|
-
|
176
|
-
|
190
|
+
Returns:
|
191
|
+
An initialized `LaunchProject` object
|
192
|
+
"""
|
193
|
+
name: Optional[str] = None
|
194
|
+
if launch_spec.get("name"):
|
195
|
+
name = launch_spec["name"]
|
196
|
+
return LaunchProject(
|
197
|
+
launch_spec.get("uri"),
|
198
|
+
launch_spec.get("job"),
|
199
|
+
api,
|
200
|
+
launch_spec,
|
201
|
+
launch_spec["entity"],
|
202
|
+
launch_spec["project"],
|
203
|
+
name,
|
204
|
+
launch_spec.get("docker", {}),
|
205
|
+
launch_spec.get("git", {}),
|
206
|
+
launch_spec.get("overrides", {}),
|
207
|
+
launch_spec.get("resource", None),
|
208
|
+
launch_spec.get("resource_args", {}),
|
209
|
+
launch_spec.get("run_id", None),
|
210
|
+
launch_spec.get("sweep_id", {}),
|
177
211
|
)
|
178
|
-
return self._base_image or generated_name
|
179
212
|
|
180
213
|
@property
|
181
214
|
def image_name(self) -> str:
|
@@ -215,15 +248,6 @@ class LaunchProject:
|
|
215
248
|
def run_queue_item_id(self, value: str) -> None:
|
216
249
|
self._run_queue_item_id = value
|
217
250
|
|
218
|
-
def _get_entrypoint_file(self, entrypoint: List[str]) -> Optional[str]:
|
219
|
-
if not entrypoint:
|
220
|
-
return None
|
221
|
-
if entrypoint[0].endswith(".py") or entrypoint[0].endswith(".sh"):
|
222
|
-
return entrypoint[0]
|
223
|
-
if len(entrypoint) < 2:
|
224
|
-
return None
|
225
|
-
return entrypoint[1]
|
226
|
-
|
227
251
|
def fill_macros(self, image: str) -> Dict[str, Any]:
|
228
252
|
"""Substitute values for macros in resource arguments.
|
229
253
|
|
@@ -277,10 +301,25 @@ class LaunchProject:
|
|
277
301
|
|
278
302
|
@property
|
279
303
|
def docker_image(self) -> Optional[str]:
|
304
|
+
"""Returns the Docker image associated with this LaunchProject.
|
305
|
+
|
306
|
+
This will only be set if an image_uri is being run outside a job.
|
307
|
+
|
308
|
+
Returns:
|
309
|
+
Optional[str]: The Docker image or None if not specified.
|
310
|
+
"""
|
280
311
|
return self._docker_image
|
281
312
|
|
282
313
|
@docker_image.setter
|
283
314
|
def docker_image(self, value: str) -> None:
|
315
|
+
"""Sets the Docker image for the project.
|
316
|
+
|
317
|
+
Args:
|
318
|
+
value (str): The Docker image to set.
|
319
|
+
|
320
|
+
Returns:
|
321
|
+
None
|
322
|
+
"""
|
284
323
|
self._docker_image = value
|
285
324
|
self._ensure_not_docker_image_and_local_process()
|
286
325
|
|
@@ -305,24 +344,39 @@ class LaunchProject:
|
|
305
344
|
self._entry_point = new_entrypoint
|
306
345
|
return new_entrypoint
|
307
346
|
|
308
|
-
def
|
309
|
-
|
310
|
-
raise LaunchError(
|
311
|
-
"Cannot specify docker image with local-process resource runner"
|
312
|
-
)
|
347
|
+
def fetch_and_validate_project(self) -> None:
|
348
|
+
"""Fetches a project into a local directory, adds the config values to the directory, and validates the first entrypoint for the project.
|
313
349
|
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
350
|
+
Arguments:
|
351
|
+
launch_project: LaunchProject to fetch and validate.
|
352
|
+
api: Instance of wandb.apis.internal Api
|
353
|
+
|
354
|
+
Returns:
|
355
|
+
A validated `LaunchProject` object.
|
356
|
+
|
357
|
+
"""
|
358
|
+
if self.source == LaunchSource.DOCKER:
|
359
|
+
return
|
360
|
+
if self.source == LaunchSource.LOCAL:
|
361
|
+
if not self._entry_point:
|
362
|
+
wandb.termlog(
|
363
|
+
f"{LOG_PREFIX}Entry point for repo not specified, defaulting to `python main.py`"
|
364
|
+
)
|
365
|
+
self.set_entry_point(EntrypointDefaults.PYTHON)
|
366
|
+
elif self.source == LaunchSource.JOB:
|
367
|
+
self._fetch_job()
|
368
|
+
else:
|
369
|
+
self._fetch_project_local(internal_api=self.api)
|
370
|
+
|
371
|
+
assert self.project_dir is not None
|
372
|
+
# this prioritizes pip, and we don't support any cases where both are present conda projects when uploaded to
|
373
|
+
# wandb become pip projects via requirements.frozen.txt, wandb doesn't preserve conda envs
|
374
|
+
if os.path.exists(
|
375
|
+
os.path.join(self.project_dir, "requirements.txt")
|
376
|
+
) or os.path.exists(os.path.join(self.project_dir, "requirements.frozen.txt")):
|
377
|
+
self.deps_type = "pip"
|
378
|
+
elif os.path.exists(os.path.join(self.project_dir, "environment.yml")):
|
379
|
+
self.deps_type = "conda"
|
326
380
|
|
327
381
|
def get_image_source_string(self) -> str:
|
328
382
|
"""Returns a unique string identifying the source of an image."""
|
@@ -349,6 +403,35 @@ class LaunchProject:
|
|
349
403
|
else:
|
350
404
|
raise LaunchError("Unknown source type when determing image source string")
|
351
405
|
|
406
|
+
def _ensure_not_docker_image_and_local_process(self) -> None:
|
407
|
+
"""Ensure that docker image is not specified with local-process resource runner.
|
408
|
+
|
409
|
+
Raises:
|
410
|
+
LaunchError: If docker image is specified with local-process resource runner.
|
411
|
+
"""
|
412
|
+
if self.docker_image is not None and self.resource == "local-process":
|
413
|
+
raise LaunchError(
|
414
|
+
"Cannot specify docker image with local-process resource runner"
|
415
|
+
)
|
416
|
+
|
417
|
+
def _fetch_job(self) -> None:
|
418
|
+
"""Fetches the job details from the public API and configures the launch project.
|
419
|
+
|
420
|
+
Raises:
|
421
|
+
LaunchError: If there is an error accessing the job.
|
422
|
+
"""
|
423
|
+
public_api = wandb.apis.public.Api()
|
424
|
+
job_dir = tempfile.mkdtemp()
|
425
|
+
try:
|
426
|
+
job = public_api.job(self.job, path=job_dir)
|
427
|
+
except CommError as e:
|
428
|
+
msg = e.message
|
429
|
+
raise LaunchError(
|
430
|
+
f"Error accessing job {self.job}: {msg} on {public_api.settings.get('base_url')}"
|
431
|
+
)
|
432
|
+
job.configure_launch_project(self)
|
433
|
+
self._job_artifact = job._job_artifact
|
434
|
+
|
352
435
|
def _fetch_project_local(self, internal_api: Api) -> None:
|
353
436
|
"""Fetch a project (either wandb run or git repo) into a local directory, returning the path to the local project directory."""
|
354
437
|
# these asserts are all guaranteed to pass, but are required by mypy
|
@@ -453,6 +536,24 @@ class LaunchProject:
|
|
453
536
|
self.git_version = branch_name
|
454
537
|
|
455
538
|
|
539
|
+
def _get_entrypoint_file(entrypoint: List[str]) -> Optional[str]:
|
540
|
+
"""Get the entrypoint file from the given command.
|
541
|
+
|
542
|
+
Args:
|
543
|
+
entrypoint (List[str]): List of command and arguments.
|
544
|
+
|
545
|
+
Returns:
|
546
|
+
Optional[str]: The entrypoint file if found, otherwise None.
|
547
|
+
"""
|
548
|
+
if not entrypoint:
|
549
|
+
return None
|
550
|
+
if entrypoint[0].endswith(".py") or entrypoint[0].endswith(".sh"):
|
551
|
+
return entrypoint[0]
|
552
|
+
if len(entrypoint) < 2:
|
553
|
+
return None
|
554
|
+
return entrypoint[1]
|
555
|
+
|
556
|
+
|
456
557
|
class EntryPoint:
|
457
558
|
"""An entry point into a wandb launch specification."""
|
458
559
|
|
@@ -467,6 +568,11 @@ class EntryPoint:
|
|
467
568
|
return ret + user_parameters
|
468
569
|
return ret
|
469
570
|
|
571
|
+
def update_entrypoint_path(self, new_path: str) -> None:
|
572
|
+
"""Updates the entrypoint path to a new path."""
|
573
|
+
if len(self.command) == 2 and self.command[0] in ["python", "bash"]:
|
574
|
+
self.command[1] = new_path
|
575
|
+
|
470
576
|
|
471
577
|
def get_entry_point_command(
|
472
578
|
entry_point: Optional["EntryPoint"], parameters: List[str]
|
@@ -483,97 +589,3 @@ def get_entry_point_command(
|
|
483
589
|
if entry_point is None:
|
484
590
|
return []
|
485
591
|
return entry_point.compute_command(parameters)
|
486
|
-
|
487
|
-
|
488
|
-
def create_project_from_spec(launch_spec: Dict[str, Any], api: Api) -> LaunchProject:
|
489
|
-
"""Constructs a LaunchProject instance using a launch spec.
|
490
|
-
|
491
|
-
Arguments:
|
492
|
-
launch_spec: Dictionary representation of launch spec
|
493
|
-
api: Instance of wandb.apis.internal Api
|
494
|
-
|
495
|
-
Returns:
|
496
|
-
An initialized `LaunchProject` object
|
497
|
-
"""
|
498
|
-
name: Optional[str] = None
|
499
|
-
if launch_spec.get("name"):
|
500
|
-
name = launch_spec["name"]
|
501
|
-
return LaunchProject(
|
502
|
-
launch_spec.get("uri"),
|
503
|
-
launch_spec.get("job"),
|
504
|
-
api,
|
505
|
-
launch_spec,
|
506
|
-
launch_spec["entity"],
|
507
|
-
launch_spec["project"],
|
508
|
-
name,
|
509
|
-
launch_spec.get("docker", {}),
|
510
|
-
launch_spec.get("git", {}),
|
511
|
-
launch_spec.get("overrides", {}),
|
512
|
-
launch_spec.get("resource", None),
|
513
|
-
launch_spec.get("resource_args", {}),
|
514
|
-
launch_spec.get("run_id", None),
|
515
|
-
launch_spec.get("sweep_id", {}),
|
516
|
-
)
|
517
|
-
|
518
|
-
|
519
|
-
def fetch_and_validate_project(
|
520
|
-
launch_project: LaunchProject, api: Api
|
521
|
-
) -> LaunchProject:
|
522
|
-
"""Fetches a project into a local directory, adds the config values to the directory, and validates the first entrypoint for the project.
|
523
|
-
|
524
|
-
Arguments:
|
525
|
-
launch_project: LaunchProject to fetch and validate.
|
526
|
-
api: Instance of wandb.apis.internal Api
|
527
|
-
|
528
|
-
Returns:
|
529
|
-
A validated `LaunchProject` object.
|
530
|
-
|
531
|
-
"""
|
532
|
-
if launch_project.source == LaunchSource.DOCKER:
|
533
|
-
return launch_project
|
534
|
-
if launch_project.source == LaunchSource.LOCAL:
|
535
|
-
if not launch_project._entry_point:
|
536
|
-
wandb.termlog(
|
537
|
-
f"{LOG_PREFIX}Entry point for repo not specified, defaulting to `python main.py`"
|
538
|
-
)
|
539
|
-
launch_project.set_entry_point(EntrypointDefaults.PYTHON)
|
540
|
-
elif launch_project.source == LaunchSource.JOB:
|
541
|
-
launch_project._fetch_job()
|
542
|
-
else:
|
543
|
-
launch_project._fetch_project_local(internal_api=api)
|
544
|
-
|
545
|
-
assert launch_project.project_dir is not None
|
546
|
-
# this prioritizes pip, and we don't support any cases where both are present conda projects when uploaded to
|
547
|
-
# wandb become pip projects via requirements.frozen.txt, wandb doesn't preserve conda envs
|
548
|
-
if os.path.exists(
|
549
|
-
os.path.join(launch_project.project_dir, "requirements.txt")
|
550
|
-
) or os.path.exists(
|
551
|
-
os.path.join(launch_project.project_dir, "requirements.frozen.txt")
|
552
|
-
):
|
553
|
-
launch_project.deps_type = "pip"
|
554
|
-
elif os.path.exists(os.path.join(launch_project.project_dir, "environment.yml")):
|
555
|
-
launch_project.deps_type = "conda"
|
556
|
-
|
557
|
-
return launch_project
|
558
|
-
|
559
|
-
|
560
|
-
def create_metadata_file(
|
561
|
-
launch_project: LaunchProject,
|
562
|
-
image_uri: str,
|
563
|
-
sanitized_entrypoint_str: str,
|
564
|
-
sanitized_dockerfile_contents: str,
|
565
|
-
) -> None:
|
566
|
-
assert launch_project.project_dir is not None
|
567
|
-
with open(
|
568
|
-
os.path.join(launch_project.project_dir, DEFAULT_LAUNCH_METADATA_PATH),
|
569
|
-
"w",
|
570
|
-
) as f:
|
571
|
-
json.dump(
|
572
|
-
{
|
573
|
-
**launch_project.launch_spec,
|
574
|
-
"image_uri": image_uri,
|
575
|
-
"command": sanitized_entrypoint_str,
|
576
|
-
"dockerfile_contents": sanitized_dockerfile_contents,
|
577
|
-
},
|
578
|
-
f,
|
579
|
-
)
|
wandb/sdk/launch/agent/agent.py
CHANGED
@@ -20,11 +20,7 @@ from wandb.sdk.launch.sweeps.scheduler import Scheduler
|
|
20
20
|
from wandb.sdk.lib import runid
|
21
21
|
|
22
22
|
from .. import loader
|
23
|
-
from .._project_spec import
|
24
|
-
LaunchProject,
|
25
|
-
create_project_from_spec,
|
26
|
-
fetch_and_validate_project,
|
27
|
-
)
|
23
|
+
from .._project_spec import LaunchProject
|
28
24
|
from ..builder.build import construct_agent_configs
|
29
25
|
from ..errors import LaunchDockerError, LaunchError
|
30
26
|
from ..utils import (
|
@@ -630,7 +626,7 @@ class LaunchAgent:
|
|
630
626
|
thread_id: int,
|
631
627
|
job_tracker: JobAndRunStatusTracker,
|
632
628
|
) -> None:
|
633
|
-
project =
|
629
|
+
project = LaunchProject.from_spec(launch_spec, api)
|
634
630
|
self._set_queue_and_rqi_in_project(project, job, job_tracker.queue)
|
635
631
|
ack = event_loop_thread_exec(api.ack_run_queue_item)
|
636
632
|
await ack(job["runQueueItemId"], project.run_id)
|
@@ -639,7 +635,7 @@ class LaunchAgent:
|
|
639
635
|
|
640
636
|
job_tracker.update_run_info(project)
|
641
637
|
_logger.info("Fetching and validating project...")
|
642
|
-
project
|
638
|
+
project.fetch_and_validate_project()
|
643
639
|
_logger.info("Fetching resource...")
|
644
640
|
resource = launch_spec.get("resource") or "local-container"
|
645
641
|
backend_config: Dict[str, Any] = {
|
wandb/sdk/launch/agent/config.py
CHANGED
@@ -128,22 +128,6 @@ class BuilderConfig(BaseModel):
|
|
128
128
|
"the image will be pushed to the registry.",
|
129
129
|
)
|
130
130
|
|
131
|
-
@validator("destination") # type: ignore
|
132
|
-
@classmethod
|
133
|
-
def validate_destination(cls, destination: str) -> str:
|
134
|
-
"""Validate that the destination is a valid container registry URI."""
|
135
|
-
for regex in [
|
136
|
-
GCP_ARTIFACT_REGISTRY_URI_REGEX,
|
137
|
-
AZURE_CONTAINER_REGISTRY_URI_REGEX,
|
138
|
-
ELASTIC_CONTAINER_REGISTRY_URI_REGEX,
|
139
|
-
]:
|
140
|
-
if regex.match(destination):
|
141
|
-
return destination
|
142
|
-
raise ValueError(
|
143
|
-
"Invalid destination. Destination must be a repository URI for an "
|
144
|
-
"ECR, ACR, or GCP Artifact Registry."
|
145
|
-
)
|
146
|
-
|
147
131
|
platform: Optional[TargetPlatform] = Field(
|
148
132
|
None,
|
149
133
|
description="The platform to use for the built image. If not provided, "
|
@@ -196,17 +180,6 @@ class BuilderConfig(BaseModel):
|
|
196
180
|
"S3 bucket, GCS bucket, or Azure blob."
|
197
181
|
)
|
198
182
|
|
199
|
-
@root_validator(pre=True) # type: ignore
|
200
|
-
@classmethod
|
201
|
-
def validate_kaniko(cls, values: dict) -> dict:
|
202
|
-
"""Validate that kaniko is configured correctly."""
|
203
|
-
if values.get("type") == BuilderType.kaniko:
|
204
|
-
if values.get("build-context-store") is None:
|
205
|
-
raise ValueError(
|
206
|
-
"builder.build-context-store is required if builder.type is set to kaniko."
|
207
|
-
)
|
208
|
-
return values
|
209
|
-
|
210
183
|
@root_validator(pre=True) # type: ignore
|
211
184
|
@classmethod
|
212
185
|
def validate_docker(cls, values: dict) -> dict:
|
@@ -2,13 +2,13 @@ import hashlib
|
|
2
2
|
import json
|
3
3
|
import logging
|
4
4
|
import os
|
5
|
+
import pathlib
|
5
6
|
import shlex
|
6
7
|
import shutil
|
7
8
|
import sys
|
8
9
|
import tempfile
|
9
10
|
from typing import Any, Dict, List, Optional, Tuple
|
10
11
|
|
11
|
-
import pkg_resources
|
12
12
|
import yaml
|
13
13
|
from dockerpycreds.utils import find_executable # type: ignore
|
14
14
|
from six.moves import shlex_quote
|
@@ -22,15 +22,12 @@ from wandb.sdk.launch.loader import (
|
|
22
22
|
environment_from_config,
|
23
23
|
registry_from_config,
|
24
24
|
)
|
25
|
+
from wandb.util import get_module
|
25
26
|
|
26
|
-
from .._project_spec import
|
27
|
-
EntryPoint,
|
28
|
-
EntrypointDefaults,
|
29
|
-
LaunchProject,
|
30
|
-
fetch_and_validate_project,
|
31
|
-
)
|
27
|
+
from .._project_spec import EntryPoint, EntrypointDefaults, LaunchProject
|
32
28
|
from ..errors import ExecutionError, LaunchError
|
33
29
|
from ..registry.abstract import AbstractRegistry
|
30
|
+
from ..registry.anon import AnonynmousRegistry
|
34
31
|
from ..utils import (
|
35
32
|
AZURE_CONTAINER_REGISTRY_URI_REGEX,
|
36
33
|
ELASTIC_CONTAINER_REGISTRY_URI_REGEX,
|
@@ -105,8 +102,7 @@ def registry_from_uri(uri: str) -> AbstractRegistry:
|
|
105
102
|
|
106
103
|
return ElasticContainerRegistry(uri=uri)
|
107
104
|
|
108
|
-
|
109
|
-
raise LaunchError(f"Unsupported registry URI: {uri}. Unable to load helper.")
|
105
|
+
return AnonynmousRegistry(uri=uri)
|
110
106
|
|
111
107
|
|
112
108
|
async def validate_docker_installation() -> None:
|
@@ -315,7 +311,6 @@ def get_env_vars_dict(
|
|
315
311
|
_inject_wandb_config_env_vars(
|
316
312
|
launch_project.override_config, env_vars, max_env_length
|
317
313
|
)
|
318
|
-
# env_vars["WANDB_CONFIG"] = json.dumps(launch_project.override_config)
|
319
314
|
artifacts = {}
|
320
315
|
# if we're spinning up a launch process from a job
|
321
316
|
# we should tell the run to use that artifact
|
@@ -340,27 +335,56 @@ def get_requirements_section(launch_project: LaunchProject, builder_type: str) -
|
|
340
335
|
buildx_installed = False
|
341
336
|
if launch_project.deps_type == "pip":
|
342
337
|
requirements_files = []
|
343
|
-
|
344
|
-
|
345
|
-
)
|
338
|
+
deps_install_line = None
|
339
|
+
assert launch_project.project_dir is not None
|
340
|
+
base_path = pathlib.Path(launch_project.project_dir)
|
341
|
+
# If there is a requirements.txt at root of build context, use that.
|
342
|
+
if (base_path / "requirements.txt").exists():
|
346
343
|
requirements_files += ["src/requirements.txt"]
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
):
|
351
|
-
|
352
|
-
|
353
|
-
|
344
|
+
deps_install_line = "pip install -r requirements.txt"
|
345
|
+
# Elif there is pyproject.toml at build context, convert the dependencies
|
346
|
+
# section to a requirements.txt and use that.
|
347
|
+
elif (base_path / "pyproject.toml").exists():
|
348
|
+
tomli = get_module("tomli")
|
349
|
+
if tomli is None:
|
350
|
+
wandb.termwarn(
|
351
|
+
"pyproject.toml found but tomli could not be loaded. To "
|
352
|
+
"install dependencies from pyproject.toml please run "
|
353
|
+
"`pip install tomli` and try again."
|
354
|
+
)
|
355
|
+
else:
|
356
|
+
# First try to read deps from standard pyproject format.
|
357
|
+
with open(base_path / "pyproject.toml", "rb") as f:
|
358
|
+
contents = tomli.load(f)
|
359
|
+
project_deps = [
|
360
|
+
str(d) for d in contents.get("project", {}).get("dependencies", [])
|
361
|
+
]
|
362
|
+
if project_deps:
|
363
|
+
with open(base_path / "requirements.txt", "w") as f:
|
364
|
+
f.write("\n".join(project_deps))
|
365
|
+
requirements_files += ["src/requirements.txt"]
|
366
|
+
deps_install_line = "pip install -r requirements.txt"
|
367
|
+
# Else use frozen requirements from wandb run.
|
368
|
+
if not deps_install_line and (base_path / "requirements.frozen.txt").exists():
|
369
|
+
requirements_files += [
|
370
|
+
"src/requirements.frozen.txt",
|
371
|
+
"_wandb_bootstrap.py",
|
372
|
+
]
|
373
|
+
deps_install_line = (
|
354
374
|
_parse_existing_requirements(launch_project)
|
355
375
|
+ "python _wandb_bootstrap.py"
|
356
376
|
)
|
377
|
+
|
378
|
+
if not deps_install_line:
|
379
|
+
raise LaunchError(f"No dependency sources found for {launch_project}")
|
380
|
+
|
357
381
|
if buildx_installed:
|
358
382
|
prefix = "RUN --mount=type=cache,mode=0777,target=/root/.cache/pip"
|
359
383
|
|
360
384
|
requirements_line = PIP_TEMPLATE.format(
|
361
385
|
buildx_optional_prefix=prefix,
|
362
386
|
requirements_files=" ".join(requirements_files),
|
363
|
-
pip_install=
|
387
|
+
pip_install=deps_install_line,
|
364
388
|
)
|
365
389
|
elif launch_project.deps_type == "conda":
|
366
390
|
if buildx_installed:
|
@@ -446,13 +470,9 @@ def generate_dockerfile(
|
|
446
470
|
return dockerfile_contents
|
447
471
|
|
448
472
|
|
449
|
-
def construct_gcp_registry_uri(
|
450
|
-
gcp_repo: str, gcp_project: str, gcp_registry: str
|
451
|
-
) -> str:
|
452
|
-
return "/".join([gcp_registry, gcp_project, gcp_repo])
|
453
|
-
|
454
|
-
|
455
473
|
def _parse_existing_requirements(launch_project: LaunchProject) -> str:
|
474
|
+
import pkg_resources
|
475
|
+
|
456
476
|
requirements_line = ""
|
457
477
|
assert launch_project.project_dir is not None
|
458
478
|
base_requirements = os.path.join(launch_project.project_dir, "requirements.txt")
|
@@ -506,6 +526,12 @@ def _create_docker_build_ctx(
|
|
506
526
|
dirs_exist_ok=True,
|
507
527
|
ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
|
508
528
|
)
|
529
|
+
# TODO: remove this once we make things more explicit for users
|
530
|
+
if entrypoint_dir:
|
531
|
+
new_path = os.path.basename(entrypoint.name)
|
532
|
+
entrypoint = launch_project.get_single_entry_point()
|
533
|
+
if entrypoint is not None:
|
534
|
+
entrypoint.update_entrypoint_path(new_path)
|
509
535
|
return directory
|
510
536
|
|
511
537
|
dst_path = os.path.join(directory, "src")
|
@@ -613,7 +639,7 @@ async def build_image_from_project(
|
|
613
639
|
if not builder:
|
614
640
|
raise LaunchError("Unable to build image. No builder found.")
|
615
641
|
|
616
|
-
launch_project
|
642
|
+
launch_project.fetch_and_validate_project()
|
617
643
|
|
618
644
|
entry_point: EntryPoint = launch_project.get_single_entry_point() or EntryPoint(
|
619
645
|
name=EntrypointDefaults.PYTHON[-1],
|