wandb 0.17.0rc1__py3-none-macosx_11_0_arm64.whl → 0.17.1__py3-none-macosx_11_0_arm64.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +1 -2
- wandb/apis/importers/internals/internal.py +0 -1
- wandb/apis/importers/wandb.py +12 -7
- wandb/apis/internal.py +0 -3
- wandb/apis/public/api.py +213 -79
- wandb/apis/public/artifacts.py +335 -100
- wandb/apis/public/files.py +9 -9
- wandb/apis/public/jobs.py +16 -4
- wandb/apis/public/projects.py +26 -28
- wandb/apis/public/query_generator.py +1 -1
- wandb/apis/public/runs.py +163 -65
- wandb/apis/public/sweeps.py +2 -2
- wandb/apis/reports/__init__.py +1 -7
- wandb/apis/reports/v1/__init__.py +5 -27
- wandb/apis/reports/v2/__init__.py +7 -19
- wandb/apis/workspaces/__init__.py +8 -0
- wandb/beta/workflows.py +8 -3
- wandb/bin/apple_gpu_stats +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/cli.py +131 -59
- wandb/data_types.py +6 -3
- wandb/docker/__init__.py +2 -2
- wandb/env.py +3 -3
- wandb/errors/term.py +10 -2
- wandb/filesync/step_checksum.py +1 -4
- wandb/filesync/step_prepare.py +4 -24
- wandb/filesync/step_upload.py +5 -107
- wandb/filesync/upload_job.py +0 -76
- wandb/integration/gym/__init__.py +35 -15
- wandb/integration/huggingface/resolver.py +2 -2
- wandb/integration/keras/callbacks/metrics_logger.py +1 -1
- wandb/integration/keras/keras.py +1 -1
- wandb/integration/openai/fine_tuning.py +21 -3
- wandb/integration/prodigy/prodigy.py +1 -1
- wandb/jupyter.py +16 -17
- wandb/old/summary.py +1 -1
- wandb/plot/confusion_matrix.py +1 -1
- wandb/plot/pr_curve.py +2 -1
- wandb/plot/roc_curve.py +2 -1
- wandb/{plots → plot}/utils.py +13 -25
- wandb/proto/v3/wandb_internal_pb2.py +54 -54
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +54 -54
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v5/wandb_base_pb2.py +30 -0
- wandb/proto/v5/wandb_internal_pb2.py +355 -0
- wandb/proto/v5/wandb_server_pb2.py +63 -0
- wandb/proto/v5/wandb_settings_pb2.py +45 -0
- wandb/proto/v5/wandb_telemetry_pb2.py +41 -0
- wandb/proto/wandb_base_pb2.py +2 -0
- wandb/proto/wandb_deprecated.py +9 -1
- wandb/proto/wandb_generate_deprecated.py +34 -0
- wandb/proto/{wandb_internal_codegen.py → wandb_generate_proto.py} +1 -35
- wandb/proto/wandb_internal_pb2.py +2 -0
- wandb/proto/wandb_server_pb2.py +2 -0
- wandb/proto/wandb_settings_pb2.py +2 -0
- wandb/proto/wandb_telemetry_pb2.py +2 -0
- wandb/sdk/artifacts/artifact.py +68 -22
- wandb/sdk/artifacts/artifact_manifest.py +1 -1
- wandb/sdk/artifacts/artifact_manifest_entry.py +6 -3
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -1
- wandb/sdk/artifacts/artifact_saver.py +1 -10
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +6 -2
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -1
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +6 -4
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +2 -42
- wandb/sdk/artifacts/storage_policy.py +1 -12
- wandb/sdk/data_types/_dtypes.py +8 -8
- wandb/sdk/data_types/image.py +2 -2
- wandb/sdk/data_types/video.py +5 -3
- wandb/sdk/integration_utils/data_logging.py +5 -5
- wandb/sdk/interface/interface.py +14 -1
- wandb/sdk/interface/interface_shared.py +1 -1
- wandb/sdk/internal/file_pusher.py +2 -5
- wandb/sdk/internal/file_stream.py +6 -19
- wandb/sdk/internal/internal_api.py +148 -136
- wandb/sdk/internal/job_builder.py +208 -136
- wandb/sdk/internal/progress.py +0 -28
- wandb/sdk/internal/sender.py +102 -39
- wandb/sdk/internal/settings_static.py +8 -1
- wandb/sdk/internal/system/assets/trainium.py +3 -3
- wandb/sdk/internal/system/system_info.py +4 -2
- wandb/sdk/internal/update.py +1 -1
- wandb/sdk/launch/__init__.py +9 -1
- wandb/sdk/launch/_launch.py +4 -24
- wandb/sdk/launch/_launch_add.py +1 -3
- wandb/sdk/launch/_project_spec.py +187 -225
- wandb/sdk/launch/agent/agent.py +59 -19
- wandb/sdk/launch/agent/config.py +0 -3
- wandb/sdk/launch/builder/abstract.py +68 -1
- wandb/sdk/launch/builder/build.py +165 -576
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +7 -23
- wandb/sdk/launch/builder/kaniko_builder.py +12 -25
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +51 -45
- wandb/sdk/launch/environment/aws_environment.py +26 -1
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +224 -0
- wandb/sdk/launch/inputs/manage.py +95 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +1 -1
- wandb/sdk/launch/runner/abstract.py +2 -2
- wandb/sdk/launch/runner/kubernetes_monitor.py +45 -12
- wandb/sdk/launch/runner/kubernetes_runner.py +6 -8
- wandb/sdk/launch/runner/local_container.py +2 -3
- wandb/sdk/launch/runner/local_process.py +8 -29
- wandb/sdk/launch/runner/sagemaker_runner.py +20 -14
- wandb/sdk/launch/runner/vertex_runner.py +8 -7
- wandb/sdk/launch/sweeps/scheduler.py +5 -3
- wandb/sdk/launch/sweeps/scheduler_sweep.py +1 -1
- wandb/sdk/launch/sweeps/utils.py +4 -4
- wandb/sdk/launch/utils.py +16 -138
- wandb/sdk/lib/_settings_toposort_generated.py +2 -5
- wandb/sdk/lib/apikey.py +4 -2
- wandb/sdk/lib/config_util.py +3 -3
- wandb/sdk/lib/import_hooks.py +1 -1
- wandb/sdk/lib/proto_util.py +22 -1
- wandb/sdk/lib/redirect.py +20 -15
- wandb/sdk/lib/tracelog.py +1 -1
- wandb/sdk/service/service.py +2 -1
- wandb/sdk/service/streams.py +5 -5
- wandb/sdk/wandb_init.py +25 -59
- wandb/sdk/wandb_login.py +28 -25
- wandb/sdk/wandb_run.py +123 -53
- wandb/sdk/wandb_settings.py +33 -64
- wandb/sdk/wandb_setup.py +1 -1
- wandb/sdk/wandb_watch.py +1 -1
- wandb/sklearn/plot/classifier.py +10 -12
- wandb/sklearn/plot/clusterer.py +1 -1
- wandb/sync/sync.py +2 -2
- wandb/testing/relay.py +32 -17
- wandb/util.py +36 -37
- wandb/wandb_agent.py +3 -3
- wandb/wandb_controller.py +5 -4
- {wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/METADATA +8 -10
- {wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/RECORD +141 -163
- {wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/WHEEL +1 -1
- wandb/apis/reports/v1/_blocks.py +0 -1406
- wandb/apis/reports/v1/_helpers.py +0 -70
- wandb/apis/reports/v1/_panels.py +0 -1282
- wandb/apis/reports/v1/_templates.py +0 -478
- wandb/apis/reports/v1/blocks.py +0 -27
- wandb/apis/reports/v1/helpers.py +0 -2
- wandb/apis/reports/v1/mutations.py +0 -66
- wandb/apis/reports/v1/panels.py +0 -17
- wandb/apis/reports/v1/report.py +0 -268
- wandb/apis/reports/v1/runset.py +0 -144
- wandb/apis/reports/v1/templates.py +0 -7
- wandb/apis/reports/v1/util.py +0 -406
- wandb/apis/reports/v1/validators.py +0 -131
- wandb/apis/reports/v2/blocks.py +0 -25
- wandb/apis/reports/v2/expr_parsing.py +0 -257
- wandb/apis/reports/v2/gql.py +0 -68
- wandb/apis/reports/v2/interface.py +0 -1911
- wandb/apis/reports/v2/internal.py +0 -867
- wandb/apis/reports/v2/metrics.py +0 -6
- wandb/apis/reports/v2/panels.py +0 -15
- wandb/catboost/__init__.py +0 -9
- wandb/fastai/__init__.py +0 -9
- wandb/keras/__init__.py +0 -19
- wandb/lightgbm/__init__.py +0 -9
- wandb/plots/__init__.py +0 -6
- wandb/plots/explain_text.py +0 -36
- wandb/plots/heatmap.py +0 -81
- wandb/plots/named_entity.py +0 -43
- wandb/plots/part_of_speech.py +0 -50
- wandb/plots/plot_definitions.py +0 -768
- wandb/plots/precision_recall.py +0 -121
- wandb/plots/roc.py +0 -103
- wandb/sacred/__init__.py +0 -3
- wandb/xgboost/__init__.py +0 -9
- {wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/entry_points.txt +0 -0
- {wandb-0.17.0rc1.dist-info → wandb-0.17.1.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/launch/create_job.py
CHANGED
@@ -10,9 +10,12 @@ import wandb
|
|
10
10
|
from wandb.apis.internal import Api
|
11
11
|
from wandb.sdk.artifacts.artifact import Artifact
|
12
12
|
from wandb.sdk.internal.job_builder import JobBuilder
|
13
|
-
from wandb.sdk.launch.builder.build import get_current_python_version
|
14
13
|
from wandb.sdk.launch.git_reference import GitReference
|
15
|
-
from wandb.sdk.launch.utils import
|
14
|
+
from wandb.sdk.launch.utils import (
|
15
|
+
_is_git_uri,
|
16
|
+
get_current_python_version,
|
17
|
+
get_entrypoint_file,
|
18
|
+
)
|
16
19
|
from wandb.sdk.lib import filesystem
|
17
20
|
from wandb.util import make_artifact_name_safe
|
18
21
|
|
@@ -34,6 +37,8 @@ def create_job(
|
|
34
37
|
runtime: Optional[str] = None,
|
35
38
|
entrypoint: Optional[str] = None,
|
36
39
|
git_hash: Optional[str] = None,
|
40
|
+
build_context: Optional[str] = None,
|
41
|
+
dockerfile: Optional[str] = None,
|
37
42
|
) -> Optional[Artifact]:
|
38
43
|
"""Create a job from a path, not as the output of a run.
|
39
44
|
|
@@ -46,9 +51,12 @@ def create_job(
|
|
46
51
|
description (Optional[str]): Description of the job.
|
47
52
|
aliases (Optional[List[str]]): Aliases for the job.
|
48
53
|
runtime (Optional[str]): Python runtime of the job, like 3.9.
|
49
|
-
entrypoint (Optional[str]): Entrypoint of the job.
|
54
|
+
entrypoint (Optional[str]): Entrypoint of the job. If build_context is
|
55
|
+
provided, path is relative to build_context.
|
50
56
|
git_hash (Optional[str]): Git hash of a specific commit, when using git type jobs.
|
51
|
-
|
57
|
+
build_context (Optional[str]): Path to the build context, when using image type jobs.
|
58
|
+
dockerfile (Optional[str]): Path to the Dockerfile, when using image type jobs.
|
59
|
+
If build_context is provided, path is relative to build_context.
|
52
60
|
|
53
61
|
Returns:
|
54
62
|
Optional[Artifact]: The artifact created by the job, the action (for printing), and job aliases.
|
@@ -85,6 +93,8 @@ def create_job(
|
|
85
93
|
runtime,
|
86
94
|
entrypoint,
|
87
95
|
git_hash,
|
96
|
+
build_context,
|
97
|
+
dockerfile,
|
88
98
|
)
|
89
99
|
|
90
100
|
return artifact_job
|
@@ -102,6 +112,8 @@ def _create_job(
|
|
102
112
|
runtime: Optional[str] = None,
|
103
113
|
entrypoint: Optional[str] = None,
|
104
114
|
git_hash: Optional[str] = None,
|
115
|
+
build_context: Optional[str] = None,
|
116
|
+
dockerfile: Optional[str] = None,
|
105
117
|
) -> Tuple[Optional[Artifact], str, List[str]]:
|
106
118
|
wandb.termlog(f"Creating launch job of type: {job_type}...")
|
107
119
|
|
@@ -172,7 +184,11 @@ def _create_job(
|
|
172
184
|
name = job_name
|
173
185
|
|
174
186
|
# build job artifact, loads wandb-metadata and creates wandb-job.json here
|
175
|
-
artifact = job_builder.build(
|
187
|
+
artifact = job_builder.build(
|
188
|
+
api.api,
|
189
|
+
dockerfile=dockerfile,
|
190
|
+
build_context=build_context,
|
191
|
+
)
|
176
192
|
if not artifact:
|
177
193
|
wandb.termerror("JobBuilder failed to build a job")
|
178
194
|
_logger.debug("Failed to build job, check job source and metadata")
|
@@ -195,7 +211,7 @@ def _create_job(
|
|
195
211
|
project_name=project,
|
196
212
|
run_name=run.id, # type: ignore # run will be deleted after creation
|
197
213
|
description=description,
|
198
|
-
metadata=
|
214
|
+
metadata={"_partial": True},
|
199
215
|
is_user_created=True,
|
200
216
|
aliases=[{"artifactCollectionName": name, "alias": a} for a in aliases],
|
201
217
|
)
|
@@ -229,8 +245,9 @@ def _make_metadata_for_partial_job(
|
|
229
245
|
entrypoint: Optional[str],
|
230
246
|
) -> Tuple[Optional[Dict[str, Any]], Optional[List[str]]]:
|
231
247
|
"""Create metadata for partial jobs, return metadata and requirements."""
|
232
|
-
metadata = {
|
248
|
+
metadata = {}
|
233
249
|
if job_type == "git":
|
250
|
+
assert entrypoint is not None
|
234
251
|
repo_metadata = _create_repo_metadata(
|
235
252
|
path=path,
|
236
253
|
tempdir=tempdir.name,
|
@@ -245,12 +262,7 @@ def _make_metadata_for_partial_job(
|
|
245
262
|
return metadata, None
|
246
263
|
|
247
264
|
if job_type == "code":
|
248
|
-
|
249
|
-
wandb.termerror(
|
250
|
-
"Artifact jobs must have an entrypoint, either included in the path or specified with -E"
|
251
|
-
)
|
252
|
-
return None, None
|
253
|
-
|
265
|
+
assert entrypoint is not None
|
254
266
|
artifact_metadata, requirements = _create_artifact_metadata(
|
255
267
|
path=path, entrypoint=entrypoint, runtime=runtime
|
256
268
|
)
|
@@ -276,10 +288,18 @@ def _make_metadata_for_partial_job(
|
|
276
288
|
return None, None
|
277
289
|
|
278
290
|
|
291
|
+
def _maybe_warn_python_no_executable(entrypoint: str):
|
292
|
+
entrypoint_list = entrypoint.split(" ")
|
293
|
+
if len(entrypoint_list) == 1 and entrypoint_list[0].endswith(".py"):
|
294
|
+
wandb.termwarn(
|
295
|
+
f"Entrypoint {entrypoint} is a python file without an executable, you may want to use `python {entrypoint}` as the entrypoint instead."
|
296
|
+
)
|
297
|
+
|
298
|
+
|
279
299
|
def _create_repo_metadata(
|
280
300
|
path: str,
|
281
301
|
tempdir: str,
|
282
|
-
entrypoint:
|
302
|
+
entrypoint: str,
|
283
303
|
git_hash: Optional[str] = None,
|
284
304
|
runtime: Optional[str] = None,
|
285
305
|
) -> Optional[Dict[str, Any]]:
|
@@ -287,6 +307,9 @@ def _create_repo_metadata(
|
|
287
307
|
if entrypoint and ".." in entrypoint:
|
288
308
|
wandb.termerror("Entrypoint cannot contain backward path traversal")
|
289
309
|
return None
|
310
|
+
|
311
|
+
_maybe_warn_python_no_executable(entrypoint)
|
312
|
+
|
290
313
|
if not _is_git_uri(path):
|
291
314
|
wandb.termerror("Path must be a git URI")
|
292
315
|
return None
|
@@ -315,32 +338,16 @@ def _create_repo_metadata(
|
|
315
338
|
with open(os.path.join(local_dir, ".python-version")) as f:
|
316
339
|
python_version = f.read().strip().splitlines()[0]
|
317
340
|
else:
|
318
|
-
|
341
|
+
python_version, _ = get_current_python_version()
|
319
342
|
|
320
343
|
python_version = _clean_python_version(python_version)
|
321
344
|
|
322
|
-
# check if entrypoint is valid
|
323
|
-
assert entrypoint is not None
|
324
|
-
entrypoint_list = entrypoint.split(" ")
|
325
|
-
entrypoint_file = get_entrypoint_file(entrypoint_list)
|
326
|
-
if not entrypoint_file:
|
327
|
-
wandb.termerror(
|
328
|
-
f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
|
329
|
-
)
|
330
|
-
return None
|
331
|
-
|
332
|
-
if not os.path.exists(os.path.join(local_dir, entrypoint_file)):
|
333
|
-
wandb.termerror(f"Entrypoint file {entrypoint_file} not found in git repo")
|
334
|
-
return None
|
335
|
-
|
336
345
|
metadata = {
|
337
346
|
"git": {
|
338
347
|
"commit": commit,
|
339
348
|
"remote": ref.url,
|
340
349
|
},
|
341
|
-
"
|
342
|
-
"codePath": entrypoint_file,
|
343
|
-
"entrypoint": entrypoint_list,
|
350
|
+
"entrypoint": entrypoint.split(" "),
|
344
351
|
"python": python_version, # used to build container
|
345
352
|
"notebook": False, # partial jobs from notebooks not supported
|
346
353
|
}
|
@@ -354,13 +361,11 @@ def _create_artifact_metadata(
|
|
354
361
|
if not os.path.isdir(path):
|
355
362
|
wandb.termerror("Path must be a valid file or directory")
|
356
363
|
return {}, []
|
364
|
+
|
365
|
+
_maybe_warn_python_no_executable(entrypoint)
|
366
|
+
|
357
367
|
entrypoint_list = entrypoint.split(" ")
|
358
368
|
entrypoint_file = get_entrypoint_file(entrypoint_list)
|
359
|
-
if not entrypoint_file:
|
360
|
-
wandb.termerror(
|
361
|
-
f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
|
362
|
-
)
|
363
|
-
return None, None
|
364
369
|
|
365
370
|
# read local requirements.txt and dump to temp dir for builder
|
366
371
|
requirements = []
|
@@ -369,6 +374,9 @@ def _create_artifact_metadata(
|
|
369
374
|
with open(depspath) as f:
|
370
375
|
requirements = f.read().splitlines()
|
371
376
|
|
377
|
+
if not any(["wandb" in r for r in requirements]):
|
378
|
+
wandb.termwarn("wandb is not present in requirements.txt.")
|
379
|
+
|
372
380
|
if runtime:
|
373
381
|
python_version = _clean_python_version(runtime)
|
374
382
|
else:
|
@@ -399,6 +407,7 @@ def _configure_job_builder_for_partial(tmpdir: str, job_source: str) -> JobBuild
|
|
399
407
|
settings=settings, # type: ignore
|
400
408
|
verbose=True,
|
401
409
|
)
|
410
|
+
job_builder._partial = True
|
402
411
|
# never allow notebook runs
|
403
412
|
job_builder._is_notebook_run = False
|
404
413
|
# set run inputs and outputs to empty dicts
|
@@ -421,15 +430,12 @@ def _make_code_artifact(
|
|
421
430
|
|
422
431
|
Returns the name of the eventual job.
|
423
432
|
"""
|
424
|
-
assert entrypoint is not None
|
425
433
|
entrypoint_list = entrypoint.split(" ")
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
return None
|
432
|
-
|
434
|
+
# We no longer require the entrypoint to end in an existing file. But we
|
435
|
+
# need something to use as the default job artifact name. In the future we
|
436
|
+
# may require the user to provide a job name explicitly when calling
|
437
|
+
# wandb job create.
|
438
|
+
entrypoint_file = entrypoint_list[-1]
|
433
439
|
artifact_name = _make_code_artifact_name(os.path.join(path, entrypoint_file), name)
|
434
440
|
code_artifact = wandb.Artifact(
|
435
441
|
name=artifact_name,
|
@@ -7,7 +7,7 @@ from typing import Dict, Optional
|
|
7
7
|
from wandb.sdk.launch.errors import LaunchError
|
8
8
|
from wandb.util import get_module
|
9
9
|
|
10
|
-
from ..utils import S3_URI_RE, event_loop_thread_exec
|
10
|
+
from ..utils import ARN_PARTITION_RE, S3_URI_RE, event_loop_thread_exec
|
11
11
|
from .abstract import AbstractEnvironment
|
12
12
|
|
13
13
|
boto3 = get_module(
|
@@ -49,6 +49,7 @@ class AwsEnvironment(AbstractEnvironment):
|
|
49
49
|
self._secret_key = secret_key
|
50
50
|
self._session_token = session_token
|
51
51
|
self._account = None
|
52
|
+
self._partition = None
|
52
53
|
|
53
54
|
@classmethod
|
54
55
|
def from_default(cls, region: Optional[str] = None) -> "AwsEnvironment":
|
@@ -122,6 +123,30 @@ class AwsEnvironment(AbstractEnvironment):
|
|
122
123
|
def region(self, region: str) -> None:
|
123
124
|
self._region = region
|
124
125
|
|
126
|
+
async def get_partition(self) -> str:
|
127
|
+
"""Set the partition for the AWS environment."""
|
128
|
+
try:
|
129
|
+
session = await self.get_session()
|
130
|
+
client = await event_loop_thread_exec(session.client)("sts")
|
131
|
+
get_caller_identity = event_loop_thread_exec(client.get_caller_identity)
|
132
|
+
identity = await get_caller_identity()
|
133
|
+
arn = identity.get("Arn")
|
134
|
+
if not arn:
|
135
|
+
raise LaunchError(
|
136
|
+
"Could not set partition for AWS environment. ARN not found."
|
137
|
+
)
|
138
|
+
matched_partition = ARN_PARTITION_RE.match(arn)
|
139
|
+
if not matched_partition:
|
140
|
+
raise LaunchError(
|
141
|
+
f"Could not set partition for AWS environment. ARN {arn} is not valid."
|
142
|
+
)
|
143
|
+
partition = matched_partition.group(1)
|
144
|
+
return partition
|
145
|
+
except botocore.exceptions.ClientError as e:
|
146
|
+
raise LaunchError(
|
147
|
+
f"Could not set partition for AWS environment. {e}"
|
148
|
+
) from e
|
149
|
+
|
125
150
|
async def verify(self) -> None:
|
126
151
|
"""Verify that the AWS environment is configured correctly.
|
127
152
|
|
@@ -0,0 +1,148 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
from typing import Any, Dict
|
4
|
+
|
5
|
+
import yaml
|
6
|
+
|
7
|
+
from ..errors import LaunchError
|
8
|
+
|
9
|
+
FILE_OVERRIDE_ENV_VAR = "WANDB_LAUNCH_FILE_OVERRIDES"
|
10
|
+
|
11
|
+
|
12
|
+
class FileOverrides:
|
13
|
+
"""Singleton that read file overrides json from environment variables."""
|
14
|
+
|
15
|
+
_instance = None
|
16
|
+
|
17
|
+
def __new__(cls):
|
18
|
+
if cls._instance is None:
|
19
|
+
cls._instance = object.__new__(cls)
|
20
|
+
cls._instance.overrides = {}
|
21
|
+
cls._instance.load()
|
22
|
+
return cls._instance
|
23
|
+
|
24
|
+
def load(self) -> None:
|
25
|
+
"""Load overrides from an environment variable."""
|
26
|
+
overrides = os.environ.get(FILE_OVERRIDE_ENV_VAR)
|
27
|
+
if overrides is None:
|
28
|
+
if f"{FILE_OVERRIDE_ENV_VAR}_0" in os.environ:
|
29
|
+
overrides = ""
|
30
|
+
idx = 0
|
31
|
+
while f"{FILE_OVERRIDE_ENV_VAR}_{idx}" in os.environ:
|
32
|
+
overrides += os.environ[f"{FILE_OVERRIDE_ENV_VAR}_{idx}"]
|
33
|
+
idx += 1
|
34
|
+
if overrides:
|
35
|
+
try:
|
36
|
+
contents = json.loads(overrides)
|
37
|
+
if not isinstance(contents, dict):
|
38
|
+
raise LaunchError(f"Invalid JSON in {FILE_OVERRIDE_ENV_VAR}")
|
39
|
+
self.overrides = contents
|
40
|
+
except json.JSONDecodeError:
|
41
|
+
raise LaunchError(f"Invalid JSON in {FILE_OVERRIDE_ENV_VAR}")
|
42
|
+
|
43
|
+
|
44
|
+
def config_path_is_valid(path: str) -> None:
|
45
|
+
"""Validate a config file path.
|
46
|
+
|
47
|
+
This function checks if a given config file path is valid. A valid path
|
48
|
+
should meet the following criteria:
|
49
|
+
|
50
|
+
- The path must be expressed as a relative path without any upwards path
|
51
|
+
traversal, e.g. `../config.json`.
|
52
|
+
- The file specified by the path must exist.
|
53
|
+
- The file must have a supported extension (`.json`, `.yaml`, or `.yml`).
|
54
|
+
|
55
|
+
Args:
|
56
|
+
path (str): The path to validate.
|
57
|
+
|
58
|
+
Raises:
|
59
|
+
LaunchError: If the path is not valid.
|
60
|
+
"""
|
61
|
+
if os.path.isabs(path):
|
62
|
+
raise LaunchError(
|
63
|
+
f"Invalid config path: {path}. Please provide a relative path."
|
64
|
+
)
|
65
|
+
if ".." in path:
|
66
|
+
raise LaunchError(
|
67
|
+
f"Invalid config path: {path}. Please provide a relative path "
|
68
|
+
"without any upward path traversal, e.g. `../config.json`."
|
69
|
+
)
|
70
|
+
path = os.path.normpath(path)
|
71
|
+
if not os.path.exists(path):
|
72
|
+
raise LaunchError(f"Invalid config path: {path}. File does not exist.")
|
73
|
+
if not any(path.endswith(ext) for ext in [".json", ".yaml", ".yml"]):
|
74
|
+
raise LaunchError(
|
75
|
+
f"Invalid config path: {path}. Only JSON and YAML files are supported."
|
76
|
+
)
|
77
|
+
|
78
|
+
|
79
|
+
def override_file(path: str) -> None:
|
80
|
+
"""Check for file overrides in the environment and apply them if found."""
|
81
|
+
file_overrides = FileOverrides()
|
82
|
+
if path in file_overrides.overrides:
|
83
|
+
overrides = file_overrides.overrides.get(path)
|
84
|
+
if overrides is not None:
|
85
|
+
config = _read_config_file(path)
|
86
|
+
_update_dict(config, overrides)
|
87
|
+
_write_config_file(path, config)
|
88
|
+
|
89
|
+
|
90
|
+
def _write_config_file(path: str, config: Any) -> None:
|
91
|
+
"""Write a config file to disk.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
path (str): The path to the config file.
|
95
|
+
config (Any): The contents of the config file as a Python object.
|
96
|
+
|
97
|
+
Raises:
|
98
|
+
LaunchError: If the file extension is not supported.
|
99
|
+
"""
|
100
|
+
_, ext = os.path.splitext(path)
|
101
|
+
if ext == ".json":
|
102
|
+
with open(path, "w") as f:
|
103
|
+
json.dump(config, f, indent=2)
|
104
|
+
elif ext in [".yaml", ".yml"]:
|
105
|
+
with open(path, "w") as f:
|
106
|
+
yaml.safe_dump(config, f)
|
107
|
+
else:
|
108
|
+
raise LaunchError(f"Unsupported file extension: {ext}")
|
109
|
+
|
110
|
+
|
111
|
+
def _read_config_file(path: str) -> Any:
|
112
|
+
"""Read a config file from disk.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
path (str): The path to the config file.
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
Any: The contents of the config file as a Python object.
|
119
|
+
"""
|
120
|
+
_, ext = os.path.splitext(path)
|
121
|
+
if ext == ".json":
|
122
|
+
with open(
|
123
|
+
path,
|
124
|
+
) as f:
|
125
|
+
return json.load(f)
|
126
|
+
elif ext in [".yaml", ".yml"]:
|
127
|
+
with open(
|
128
|
+
path,
|
129
|
+
) as f:
|
130
|
+
return yaml.safe_load(f)
|
131
|
+
else:
|
132
|
+
raise LaunchError(f"Unsupported file extension: {ext}")
|
133
|
+
|
134
|
+
|
135
|
+
def _update_dict(target: Dict, source: Dict) -> None:
|
136
|
+
"""Update a dictionary with the contents of another dictionary.
|
137
|
+
|
138
|
+
Args:
|
139
|
+
target (Dict): The dictionary to update.
|
140
|
+
source (Dict): The dictionary to update from.
|
141
|
+
"""
|
142
|
+
for key, value in source.items():
|
143
|
+
if isinstance(value, dict):
|
144
|
+
if key not in target:
|
145
|
+
target[key] = {}
|
146
|
+
_update_dict(target[key], value)
|
147
|
+
else:
|
148
|
+
target[key] = value
|
@@ -0,0 +1,224 @@
|
|
1
|
+
"""The layer between launch sdk user code and the wandb internal process.
|
2
|
+
|
3
|
+
If there is an active run this communication is done through the wandb run's
|
4
|
+
backend interface.
|
5
|
+
|
6
|
+
If there is no active run, the messages are staged on the StagedLaunchInputs
|
7
|
+
singleton and sent when a run is created.
|
8
|
+
"""
|
9
|
+
|
10
|
+
import os
|
11
|
+
import pathlib
|
12
|
+
import shutil
|
13
|
+
import tempfile
|
14
|
+
from typing import List, Optional
|
15
|
+
|
16
|
+
import wandb
|
17
|
+
import wandb.data_types
|
18
|
+
from wandb.sdk.launch.errors import LaunchError
|
19
|
+
from wandb.sdk.wandb_run import Run
|
20
|
+
|
21
|
+
from .files import config_path_is_valid, override_file
|
22
|
+
|
23
|
+
PERIOD = "."
|
24
|
+
BACKSLASH = "\\"
|
25
|
+
LAUNCH_MANAGED_CONFIGS_DIR = "_wandb_configs"
|
26
|
+
|
27
|
+
|
28
|
+
class ConfigTmpDir:
|
29
|
+
"""Singleton for managing temporary directories for configuration files.
|
30
|
+
|
31
|
+
Any configuration files designated as inputs to a launch job are copied to
|
32
|
+
a temporary directory. This singleton manages the temporary directory and
|
33
|
+
provides paths to the configuration files.
|
34
|
+
"""
|
35
|
+
|
36
|
+
_instance = None
|
37
|
+
|
38
|
+
def __new__(cls):
|
39
|
+
if cls._instance is None:
|
40
|
+
cls._instance = object.__new__(cls)
|
41
|
+
return cls._instance
|
42
|
+
|
43
|
+
def __init__(self):
|
44
|
+
if not hasattr(self, "_tmp_dir"):
|
45
|
+
self._tmp_dir = tempfile.mkdtemp()
|
46
|
+
self._configs_dir = os.path.join(self._tmp_dir, LAUNCH_MANAGED_CONFIGS_DIR)
|
47
|
+
os.mkdir(self._configs_dir)
|
48
|
+
|
49
|
+
@property
|
50
|
+
def tmp_dir(self):
|
51
|
+
return pathlib.Path(self._tmp_dir)
|
52
|
+
|
53
|
+
@property
|
54
|
+
def configs_dir(self):
|
55
|
+
return pathlib.Path(self._configs_dir)
|
56
|
+
|
57
|
+
|
58
|
+
class JobInputArguments:
|
59
|
+
"""Arguments for the publish_job_input of Interface."""
|
60
|
+
|
61
|
+
def __init__(
|
62
|
+
self,
|
63
|
+
include: Optional[List[str]] = None,
|
64
|
+
exclude: Optional[List[str]] = None,
|
65
|
+
file_path: Optional[str] = None,
|
66
|
+
run_config: Optional[bool] = None,
|
67
|
+
):
|
68
|
+
self.include = include
|
69
|
+
self.exclude = exclude
|
70
|
+
self.file_path = file_path
|
71
|
+
self.run_config = run_config
|
72
|
+
|
73
|
+
|
74
|
+
class StagedLaunchInputs:
|
75
|
+
_instance = None
|
76
|
+
|
77
|
+
def __new__(cls):
|
78
|
+
if cls._instance is None:
|
79
|
+
cls._instance = object.__new__(cls)
|
80
|
+
return cls._instance
|
81
|
+
|
82
|
+
def __init__(self) -> None:
|
83
|
+
if not hasattr(self, "_staged_inputs"):
|
84
|
+
self._staged_inputs: List[JobInputArguments] = []
|
85
|
+
|
86
|
+
def add_staged_input(
|
87
|
+
self,
|
88
|
+
input_arguments: JobInputArguments,
|
89
|
+
):
|
90
|
+
self._staged_inputs.append(input_arguments)
|
91
|
+
|
92
|
+
def apply(self, run: Run):
|
93
|
+
"""Apply the staged inputs to the given run."""
|
94
|
+
for input in self._staged_inputs:
|
95
|
+
_publish_job_input(input, run)
|
96
|
+
|
97
|
+
|
98
|
+
def _publish_job_input(
|
99
|
+
input: JobInputArguments,
|
100
|
+
run: Run,
|
101
|
+
) -> None:
|
102
|
+
"""Publish a job input to the backend interface of the given run.
|
103
|
+
|
104
|
+
Arguments:
|
105
|
+
input (JobInputArguments): The arguments for the job input.
|
106
|
+
run (Run): The run to publish the job input to.
|
107
|
+
"""
|
108
|
+
assert run._backend is not None
|
109
|
+
assert run._backend.interface is not None
|
110
|
+
assert input.run_config is not None
|
111
|
+
|
112
|
+
interface = run._backend.interface
|
113
|
+
if input.file_path:
|
114
|
+
config_dir = ConfigTmpDir()
|
115
|
+
dest = os.path.join(config_dir.configs_dir, input.file_path)
|
116
|
+
run.save(dest, base_path=config_dir.tmp_dir)
|
117
|
+
interface.publish_job_input(
|
118
|
+
include_paths=[_split_on_unesc_dot(path) for path in input.include]
|
119
|
+
if input.include
|
120
|
+
else [],
|
121
|
+
exclude_paths=[_split_on_unesc_dot(path) for path in input.exclude]
|
122
|
+
if input.exclude
|
123
|
+
else [],
|
124
|
+
run_config=input.run_config,
|
125
|
+
file_path=input.file_path or "",
|
126
|
+
)
|
127
|
+
|
128
|
+
|
129
|
+
def handle_config_file_input(
|
130
|
+
path: str,
|
131
|
+
include: Optional[List[str]] = None,
|
132
|
+
exclude: Optional[List[str]] = None,
|
133
|
+
):
|
134
|
+
"""Declare an overridable configuration file for a launch job.
|
135
|
+
|
136
|
+
The configuration file is copied to a temporary directory and the path to
|
137
|
+
the copy is sent to the backend interface of the active run and used to
|
138
|
+
configure the job builder.
|
139
|
+
|
140
|
+
If there is no active run, the configuration file is staged and sent when a
|
141
|
+
run is created.
|
142
|
+
"""
|
143
|
+
config_path_is_valid(path)
|
144
|
+
override_file(path)
|
145
|
+
tmp_dir = ConfigTmpDir()
|
146
|
+
dest = os.path.join(tmp_dir.configs_dir, path)
|
147
|
+
dest_dir = os.path.dirname(dest)
|
148
|
+
if not os.path.exists(dest_dir):
|
149
|
+
os.makedirs(dest_dir)
|
150
|
+
shutil.copy(
|
151
|
+
path,
|
152
|
+
dest,
|
153
|
+
)
|
154
|
+
arguments = JobInputArguments(
|
155
|
+
include=include,
|
156
|
+
exclude=exclude,
|
157
|
+
file_path=path,
|
158
|
+
run_config=False,
|
159
|
+
)
|
160
|
+
if wandb.run is not None:
|
161
|
+
_publish_job_input(arguments, wandb.run)
|
162
|
+
else:
|
163
|
+
staged_inputs = StagedLaunchInputs()
|
164
|
+
staged_inputs.add_staged_input(arguments)
|
165
|
+
|
166
|
+
|
167
|
+
def handle_run_config_input(
|
168
|
+
include: Optional[List[str]] = None, exclude: Optional[List[str]] = None
|
169
|
+
):
|
170
|
+
"""Declare wandb.config as an overridable configuration for a launch job.
|
171
|
+
|
172
|
+
The include and exclude paths are sent to the backend interface of the
|
173
|
+
active run and used to configure the job builder.
|
174
|
+
|
175
|
+
If there is no active run, the include and exclude paths are staged and sent
|
176
|
+
when a run is created.
|
177
|
+
"""
|
178
|
+
arguments = JobInputArguments(
|
179
|
+
include=include,
|
180
|
+
exclude=exclude,
|
181
|
+
run_config=True,
|
182
|
+
file_path=None,
|
183
|
+
)
|
184
|
+
if wandb.run is not None:
|
185
|
+
_publish_job_input(arguments, wandb.run)
|
186
|
+
else:
|
187
|
+
stage_inputs = StagedLaunchInputs()
|
188
|
+
stage_inputs.add_staged_input(arguments)
|
189
|
+
|
190
|
+
|
191
|
+
def _split_on_unesc_dot(path: str) -> List[str]:
|
192
|
+
r"""Split a string on unescaped dots.
|
193
|
+
|
194
|
+
Arguments:
|
195
|
+
path (str): The string to split.
|
196
|
+
|
197
|
+
Raises:
|
198
|
+
ValueError: If the path has a trailing escape character.
|
199
|
+
|
200
|
+
Returns:
|
201
|
+
List[str]: The split string.
|
202
|
+
"""
|
203
|
+
parts = []
|
204
|
+
part = ""
|
205
|
+
i = 0
|
206
|
+
while i < len(path):
|
207
|
+
if path[i] == BACKSLASH:
|
208
|
+
if i == len(path) - 1:
|
209
|
+
raise LaunchError(
|
210
|
+
f"Invalid config path {path}: trailing {BACKSLASH}.",
|
211
|
+
)
|
212
|
+
if path[i + 1] == PERIOD:
|
213
|
+
part += PERIOD
|
214
|
+
i += 2
|
215
|
+
elif path[i] == PERIOD:
|
216
|
+
parts.append(part)
|
217
|
+
part = ""
|
218
|
+
i += 1
|
219
|
+
else:
|
220
|
+
part += path[i]
|
221
|
+
i += 1
|
222
|
+
if part:
|
223
|
+
parts.append(part)
|
224
|
+
return parts
|