wandb 0.16.5__py3-none-any.whl → 0.17.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- package_readme.md +95 -0
- wandb/__init__.py +2 -3
- wandb/agents/pyagent.py +0 -1
- wandb/analytics/sentry.py +2 -1
- wandb/apis/importers/internals/internal.py +0 -1
- wandb/apis/importers/internals/protocols.py +30 -56
- wandb/apis/importers/mlflow.py +13 -26
- wandb/apis/importers/wandb.py +8 -14
- wandb/apis/internal.py +0 -3
- wandb/apis/public/api.py +55 -3
- wandb/apis/public/artifacts.py +1 -0
- wandb/apis/public/files.py +1 -0
- wandb/apis/public/history.py +1 -0
- wandb/apis/public/jobs.py +17 -4
- wandb/apis/public/projects.py +1 -0
- wandb/apis/public/reports.py +1 -0
- wandb/apis/public/runs.py +15 -17
- wandb/apis/public/sweeps.py +1 -0
- wandb/apis/public/teams.py +1 -0
- wandb/apis/public/users.py +1 -0
- wandb/apis/reports/v1/_blocks.py +3 -7
- wandb/apis/reports/v2/gql.py +1 -0
- wandb/apis/reports/v2/interface.py +3 -4
- wandb/apis/reports/v2/internal.py +5 -8
- wandb/cli/cli.py +95 -22
- wandb/data_types.py +9 -6
- wandb/docker/__init__.py +1 -1
- wandb/env.py +38 -8
- wandb/errors/__init__.py +5 -0
- wandb/errors/term.py +10 -2
- wandb/filesync/step_checksum.py +1 -4
- wandb/filesync/step_prepare.py +4 -24
- wandb/filesync/step_upload.py +4 -106
- wandb/filesync/upload_job.py +0 -76
- wandb/integration/catboost/catboost.py +1 -1
- wandb/integration/fastai/__init__.py +1 -0
- wandb/integration/huggingface/resolver.py +2 -2
- wandb/integration/keras/__init__.py +1 -0
- wandb/integration/keras/callbacks/metrics_logger.py +1 -1
- wandb/integration/keras/keras.py +7 -7
- wandb/integration/langchain/wandb_tracer.py +1 -0
- wandb/integration/lightning/fabric/logger.py +1 -3
- wandb/integration/metaflow/metaflow.py +41 -6
- wandb/integration/openai/fine_tuning.py +77 -40
- wandb/integration/prodigy/prodigy.py +1 -1
- wandb/old/summary.py +1 -1
- wandb/plot/confusion_matrix.py +1 -1
- wandb/plot/pr_curve.py +2 -1
- wandb/plot/roc_curve.py +2 -1
- wandb/{plots → plot}/utils.py +13 -25
- wandb/proto/v3/wandb_internal_pb2.py +364 -332
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +322 -316
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/wandb_deprecated.py +7 -1
- wandb/proto/wandb_internal_codegen.py +3 -29
- wandb/sdk/artifacts/artifact.py +51 -20
- wandb/sdk/artifacts/artifact_download_logger.py +1 -0
- wandb/sdk/artifacts/artifact_file_cache.py +18 -4
- wandb/sdk/artifacts/artifact_instance_cache.py +1 -0
- wandb/sdk/artifacts/artifact_manifest.py +1 -0
- wandb/sdk/artifacts/artifact_manifest_entry.py +7 -3
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
- wandb/sdk/artifacts/artifact_saver.py +18 -27
- wandb/sdk/artifacts/artifact_state.py +1 -0
- wandb/sdk/artifacts/artifact_ttl.py +1 -0
- wandb/sdk/artifacts/exceptions.py +1 -0
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +13 -18
- wandb/sdk/artifacts/storage_handlers/http_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +5 -3
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -42
- wandb/sdk/artifacts/storage_policy.py +2 -12
- wandb/sdk/data_types/_dtypes.py +8 -8
- wandb/sdk/data_types/base_types/media.py +3 -6
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
- wandb/sdk/data_types/image.py +1 -1
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/integration_utils/auto_logging.py +5 -6
- wandb/sdk/integration_utils/data_logging.py +10 -6
- wandb/sdk/interface/interface.py +86 -38
- wandb/sdk/interface/interface_shared.py +7 -13
- wandb/sdk/internal/datastore.py +1 -1
- wandb/sdk/internal/file_pusher.py +2 -5
- wandb/sdk/internal/file_stream.py +5 -18
- wandb/sdk/internal/handler.py +18 -2
- wandb/sdk/internal/internal.py +0 -1
- wandb/sdk/internal/internal_api.py +1 -129
- wandb/sdk/internal/internal_util.py +0 -1
- wandb/sdk/internal/job_builder.py +159 -45
- wandb/sdk/internal/profiler.py +1 -0
- wandb/sdk/internal/progress.py +0 -28
- wandb/sdk/internal/run.py +1 -0
- wandb/sdk/internal/sender.py +1 -2
- wandb/sdk/internal/system/assets/gpu_amd.py +44 -44
- wandb/sdk/internal/system/assets/gpu_apple.py +56 -11
- wandb/sdk/internal/system/assets/interfaces.py +6 -8
- wandb/sdk/internal/system/assets/open_metrics.py +2 -2
- wandb/sdk/internal/system/assets/trainium.py +1 -3
- wandb/sdk/launch/__init__.py +9 -1
- wandb/sdk/launch/_launch.py +9 -24
- wandb/sdk/launch/_launch_add.py +1 -3
- wandb/sdk/launch/_project_spec.py +188 -241
- wandb/sdk/launch/agent/agent.py +115 -48
- wandb/sdk/launch/agent/config.py +80 -14
- wandb/sdk/launch/builder/abstract.py +69 -1
- wandb/sdk/launch/builder/build.py +156 -555
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +8 -23
- wandb/sdk/launch/builder/kaniko_builder.py +161 -159
- wandb/sdk/launch/builder/noop.py +1 -0
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +68 -63
- wandb/sdk/launch/environment/abstract.py +1 -0
- wandb/sdk/launch/environment/gcp_environment.py +1 -0
- wandb/sdk/launch/environment/local_environment.py +1 -0
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +217 -0
- wandb/sdk/launch/inputs/manage.py +95 -0
- wandb/sdk/launch/loader.py +1 -0
- wandb/sdk/launch/registry/abstract.py +1 -0
- wandb/sdk/launch/registry/azure_container_registry.py +1 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +1 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +2 -1
- wandb/sdk/launch/registry/local_registry.py +1 -0
- wandb/sdk/launch/runner/abstract.py +1 -0
- wandb/sdk/launch/runner/kubernetes_monitor.py +4 -1
- wandb/sdk/launch/runner/kubernetes_runner.py +9 -10
- wandb/sdk/launch/runner/local_container.py +2 -3
- wandb/sdk/launch/runner/local_process.py +8 -29
- wandb/sdk/launch/runner/sagemaker_runner.py +21 -20
- wandb/sdk/launch/runner/vertex_runner.py +8 -7
- wandb/sdk/launch/sweeps/scheduler.py +7 -4
- wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
- wandb/sdk/launch/sweeps/utils.py +3 -3
- wandb/sdk/launch/utils.py +33 -140
- wandb/sdk/lib/_settings_toposort_generated.py +1 -5
- wandb/sdk/lib/fsm.py +8 -12
- wandb/sdk/lib/gitlib.py +4 -4
- wandb/sdk/lib/import_hooks.py +1 -1
- wandb/sdk/lib/lazyloader.py +0 -1
- wandb/sdk/lib/proto_util.py +23 -2
- wandb/sdk/lib/redirect.py +19 -14
- wandb/sdk/lib/retry.py +3 -2
- wandb/sdk/lib/run_moment.py +7 -1
- wandb/sdk/lib/tracelog.py +1 -1
- wandb/sdk/service/service.py +19 -16
- wandb/sdk/verify/verify.py +2 -1
- wandb/sdk/wandb_init.py +16 -63
- wandb/sdk/wandb_manager.py +2 -2
- wandb/sdk/wandb_require.py +5 -0
- wandb/sdk/wandb_run.py +164 -90
- wandb/sdk/wandb_settings.py +2 -48
- wandb/sdk/wandb_setup.py +1 -1
- wandb/sklearn/__init__.py +1 -0
- wandb/sklearn/plot/__init__.py +1 -0
- wandb/sklearn/plot/classifier.py +11 -12
- wandb/sklearn/plot/clusterer.py +2 -1
- wandb/sklearn/plot/regressor.py +1 -0
- wandb/sklearn/plot/shared.py +1 -0
- wandb/sklearn/utils.py +1 -0
- wandb/testing/relay.py +4 -4
- wandb/trigger.py +1 -0
- wandb/util.py +67 -54
- wandb/wandb_controller.py +2 -3
- wandb/wandb_torch.py +1 -2
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/METADATA +67 -70
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/RECORD +178 -188
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/WHEEL +1 -2
- wandb/bin/apple_gpu_stats +0 -0
- wandb/catboost/__init__.py +0 -9
- wandb/fastai/__init__.py +0 -9
- wandb/keras/__init__.py +0 -18
- wandb/lightgbm/__init__.py +0 -9
- wandb/plots/__init__.py +0 -6
- wandb/plots/explain_text.py +0 -36
- wandb/plots/heatmap.py +0 -81
- wandb/plots/named_entity.py +0 -43
- wandb/plots/part_of_speech.py +0 -50
- wandb/plots/plot_definitions.py +0 -768
- wandb/plots/precision_recall.py +0 -121
- wandb/plots/roc.py +0 -103
- wandb/sacred/__init__.py +0 -3
- wandb/xgboost/__init__.py +0 -9
- wandb-0.16.5.dist-info/top_level.txt +0 -1
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info/licenses}/LICENSE +0 -0
@@ -2,18 +2,21 @@
|
|
2
2
|
|
3
3
|
Arguments can come from a launch spec or call to wandb launch.
|
4
4
|
"""
|
5
|
+
|
5
6
|
import enum
|
7
|
+
import json
|
6
8
|
import logging
|
7
9
|
import os
|
8
10
|
import tempfile
|
9
11
|
from copy import deepcopy
|
10
12
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
|
11
13
|
|
14
|
+
from six.moves import shlex_quote
|
15
|
+
|
12
16
|
import wandb
|
13
|
-
import wandb.docker as docker
|
14
17
|
from wandb.apis.internal import Api
|
15
18
|
from wandb.errors import CommError
|
16
|
-
from wandb.sdk.launch import
|
19
|
+
from wandb.sdk.launch.utils import get_entrypoint_file
|
17
20
|
from wandb.sdk.lib.runid import generate_id
|
18
21
|
|
19
22
|
from .errors import LaunchError
|
@@ -32,15 +35,18 @@ IMAGE_TAG_MAX_LENGTH = 32
|
|
32
35
|
|
33
36
|
|
34
37
|
class LaunchSource(enum.IntEnum):
|
35
|
-
|
36
|
-
GIT: int = 2
|
37
|
-
LOCAL: int = 3
|
38
|
-
DOCKER: int = 4
|
39
|
-
JOB: int = 5
|
38
|
+
"""Enumeration of possible sources for a launch project.
|
40
39
|
|
40
|
+
Attributes:
|
41
|
+
DOCKER: Source is a Docker image. This can happen if a user runs
|
42
|
+
`wandb launch -d <docker-image>`.
|
43
|
+
JOB: Source is a job. This is standard case.
|
44
|
+
SCHEDULER: Source is a wandb sweep scheduler command.
|
45
|
+
"""
|
41
46
|
|
42
|
-
|
43
|
-
|
47
|
+
DOCKER: int = 1
|
48
|
+
JOB: int = 2
|
49
|
+
SCHEDULER: int = 3
|
44
50
|
|
45
51
|
|
46
52
|
class LaunchProject:
|
@@ -59,8 +65,16 @@ class LaunchProject:
|
|
59
65
|
|
60
66
|
This class is stateful and certain methods can only be called after
|
61
67
|
`LaunchProject.fetch_and_validate_project()` has been called.
|
68
|
+
|
69
|
+
Notes on the entrypoint:
|
70
|
+
- The entrypoint is the command that will be run inside the container.
|
71
|
+
- The LaunchProject stores two entrypoints
|
72
|
+
- The job entrypoint is the entrypoint specified in the job's config.
|
73
|
+
- The override entrypoint is the entrypoint specified in the launch spec.
|
74
|
+
- The override entrypoint takes precedence over the job entrypoint.
|
62
75
|
"""
|
63
76
|
|
77
|
+
# This init is way to long, and there are too many attributes on this sucker.
|
64
78
|
def __init__(
|
65
79
|
self,
|
66
80
|
uri: Optional[str],
|
@@ -78,9 +92,6 @@ class LaunchProject:
|
|
78
92
|
run_id: Optional[str],
|
79
93
|
sweep_id: Optional[str] = None,
|
80
94
|
):
|
81
|
-
if uri is not None and utils.is_bare_wandb_uri(uri):
|
82
|
-
uri = api.settings("base_url") + uri
|
83
|
-
_logger.info(f"{LOG_PREFIX}Updating uri with base uri: {uri}")
|
84
95
|
self.uri = uri
|
85
96
|
self.job = job
|
86
97
|
if job is not None:
|
@@ -104,74 +115,57 @@ class LaunchProject:
|
|
104
115
|
self.accelerator_base_image: Optional[str] = resource_args_build.get(
|
105
116
|
"accelerator", {}
|
106
117
|
).get("base_image") or resource_args_build.get("cuda", {}).get("base_image")
|
107
|
-
self._base_image: Optional[str] = launch_spec.get("base_image")
|
108
118
|
self.docker_image: Optional[str] = docker_config.get(
|
109
119
|
"docker_image"
|
110
120
|
) or launch_spec.get("image_uri")
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
self.
|
116
|
-
self.
|
117
|
-
self.
|
118
|
-
self.overrides = overrides
|
119
|
-
self.override_args: List[str] = overrides.get("args", [])
|
120
|
-
self.override_config: Dict[str, Any] = overrides.get("run_config", {})
|
121
|
-
self.override_artifacts: Dict[str, Any] = overrides.get("artifacts", {})
|
122
|
-
self.override_entrypoint: Optional[EntryPoint] = None
|
123
|
-
self.override_dockerfile: Optional[str] = overrides.get("dockerfile")
|
121
|
+
self.docker_user_id = docker_config.get("user_id", 1000)
|
122
|
+
self._entry_point: Optional[EntryPoint] = (
|
123
|
+
None # todo: keep multiple entrypoint support?
|
124
|
+
)
|
125
|
+
self.init_overrides(overrides)
|
126
|
+
self.init_source()
|
127
|
+
self.init_git(git_info)
|
124
128
|
self.deps_type: Optional[str] = None
|
125
129
|
self._runtime: Optional[str] = None
|
126
130
|
self.run_id = run_id or generate_id()
|
127
131
|
self._queue_name: Optional[str] = None
|
128
132
|
self._queue_entity: Optional[str] = None
|
129
133
|
self._run_queue_item_id: Optional[str] = None
|
130
|
-
self.
|
131
|
-
|
132
|
-
] = None # todo: keep multiple entrypoint support?
|
134
|
+
self._job_dockerfile: Optional[str] = None
|
135
|
+
self._job_build_context: Optional[str] = None
|
133
136
|
|
134
|
-
|
135
|
-
if override_entrypoint:
|
136
|
-
_logger.info("Adding override entry point")
|
137
|
-
self.override_entrypoint = EntryPoint(
|
138
|
-
name=_get_entrypoint_file(override_entrypoint),
|
139
|
-
command=override_entrypoint,
|
140
|
-
)
|
141
|
-
|
142
|
-
if overrides.get("sweep_id") is not None:
|
143
|
-
_logger.info("Adding override sweep id")
|
144
|
-
self.sweep_id = overrides["sweep_id"]
|
137
|
+
def init_source(self) -> None:
|
145
138
|
if self.docker_image is not None:
|
146
139
|
self.source = LaunchSource.DOCKER
|
147
140
|
self.project_dir = None
|
148
141
|
elif self.job is not None:
|
149
142
|
self.source = LaunchSource.JOB
|
150
143
|
self.project_dir = tempfile.mkdtemp()
|
151
|
-
|
152
|
-
|
153
|
-
self.
|
154
|
-
self.
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
144
|
+
if self.uri and self.uri.startswith("placeholder"):
|
145
|
+
self.source = LaunchSource.SCHEDULER
|
146
|
+
self.project_dir = os.getcwd()
|
147
|
+
self._entry_point = self.override_entrypoint
|
148
|
+
|
149
|
+
def init_git(self, git_info: Dict[str, str]) -> None:
|
150
|
+
self.git_version = git_info.get("version")
|
151
|
+
self.git_repo = git_info.get("repo")
|
152
|
+
|
153
|
+
def init_overrides(self, overrides: Dict[str, Any]) -> None:
|
154
|
+
"""Initialize override attributes for a launch project."""
|
155
|
+
self.overrides = overrides
|
156
|
+
self.override_args: List[str] = overrides.get("args", [])
|
157
|
+
self.override_config: Dict[str, Any] = overrides.get("run_config", {})
|
158
|
+
self.override_artifacts: Dict[str, Any] = overrides.get("artifacts", {})
|
159
|
+
self.override_files: Dict[str, Any] = overrides.get("files", {})
|
160
|
+
self.override_entrypoint: Optional[EntryPoint] = None
|
161
|
+
self.override_dockerfile: Optional[str] = overrides.get("dockerfile")
|
162
|
+
override_entrypoint = overrides.get("entry_point")
|
163
|
+
if override_entrypoint:
|
164
|
+
_logger.info("Adding override entry point")
|
165
|
+
self.override_entrypoint = EntryPoint(
|
166
|
+
name=get_entrypoint_file(override_entrypoint),
|
167
|
+
command=override_entrypoint,
|
162
168
|
)
|
163
|
-
self.uri = os.getcwd()
|
164
|
-
self.source = LaunchSource.LOCAL
|
165
|
-
self.project_dir = self.uri
|
166
|
-
else:
|
167
|
-
_logger.info(f"URI {self.uri} indicates a local uri")
|
168
|
-
# assume local
|
169
|
-
if self.uri is not None and not os.path.exists(self.uri):
|
170
|
-
raise LaunchError(
|
171
|
-
"Assumed URI supplied is a local path but path is not valid"
|
172
|
-
)
|
173
|
-
self.source = LaunchSource.LOCAL
|
174
|
-
self.project_dir = self.uri
|
175
169
|
|
176
170
|
def __repr__(self) -> str:
|
177
171
|
"""String representation of LaunchProject."""
|
@@ -210,6 +204,20 @@ class LaunchProject:
|
|
210
204
|
launch_spec.get("sweep_id", {}),
|
211
205
|
)
|
212
206
|
|
207
|
+
@property
|
208
|
+
def job_dockerfile(self) -> Optional[str]:
|
209
|
+
return self._job_dockerfile
|
210
|
+
|
211
|
+
@property
|
212
|
+
def job_build_context(self) -> Optional[str]:
|
213
|
+
return self._job_build_context
|
214
|
+
|
215
|
+
def set_job_dockerfile(self, dockerfile: str) -> None:
|
216
|
+
self._job_dockerfile = dockerfile
|
217
|
+
|
218
|
+
def set_job_build_context(self, build_context: str) -> None:
|
219
|
+
self._job_build_context = build_context
|
220
|
+
|
213
221
|
@property
|
214
222
|
def image_name(self) -> str:
|
215
223
|
if self.docker_image is not None:
|
@@ -273,7 +281,7 @@ class LaunchProject:
|
|
273
281
|
image (str): The image name to fill in for ${wandb-image}.
|
274
282
|
|
275
283
|
Returns:
|
276
|
-
|
284
|
+
Dict[str, Any]: The resource args with all macros filled in.
|
277
285
|
"""
|
278
286
|
update_dict = {
|
279
287
|
"project_name": self.target_project,
|
@@ -323,8 +331,8 @@ class LaunchProject:
|
|
323
331
|
self._docker_image = value
|
324
332
|
self._ensure_not_docker_image_and_local_process()
|
325
333
|
|
326
|
-
def
|
327
|
-
"""Returns the
|
334
|
+
def get_job_entry_point(self) -> Optional["EntryPoint"]:
|
335
|
+
"""Returns the job entrypoint for the project."""
|
328
336
|
# assuming project only has 1 entry point, pull that out
|
329
337
|
# tmp fn until we figure out if we want to support multiple entry points or not
|
330
338
|
if not self._entry_point:
|
@@ -335,8 +343,8 @@ class LaunchProject:
|
|
335
343
|
return None
|
336
344
|
return self._entry_point
|
337
345
|
|
338
|
-
def
|
339
|
-
"""
|
346
|
+
def set_job_entry_point(self, command: List[str]) -> "EntryPoint":
|
347
|
+
"""Set job entrypoint for the project."""
|
340
348
|
assert (
|
341
349
|
self._entry_point is None
|
342
350
|
), "Cannot set entry point twice. Use LaunchProject.override_entrypoint"
|
@@ -357,51 +365,23 @@ class LaunchProject:
|
|
357
365
|
"""
|
358
366
|
if self.source == LaunchSource.DOCKER:
|
359
367
|
return
|
360
|
-
if self.source == LaunchSource.LOCAL:
|
361
|
-
if not self._entry_point:
|
362
|
-
wandb.termlog(
|
363
|
-
f"{LOG_PREFIX}Entry point for repo not specified, defaulting to `python main.py`"
|
364
|
-
)
|
365
|
-
self.set_entry_point(EntrypointDefaults.PYTHON)
|
366
368
|
elif self.source == LaunchSource.JOB:
|
367
369
|
self._fetch_job()
|
368
|
-
else:
|
369
|
-
self._fetch_project_local(internal_api=self.api)
|
370
|
-
|
371
370
|
assert self.project_dir is not None
|
372
|
-
# this prioritizes pip, and we don't support any cases where both are present conda projects when uploaded to
|
373
|
-
# wandb become pip projects via requirements.frozen.txt, wandb doesn't preserve conda envs
|
374
|
-
if os.path.exists(
|
375
|
-
os.path.join(self.project_dir, "requirements.txt")
|
376
|
-
) or os.path.exists(os.path.join(self.project_dir, "requirements.frozen.txt")):
|
377
|
-
self.deps_type = "pip"
|
378
|
-
elif os.path.exists(os.path.join(self.project_dir, "environment.yml")):
|
379
|
-
self.deps_type = "conda"
|
380
371
|
|
372
|
+
# Let's make sure we document this very clearly.
|
381
373
|
def get_image_source_string(self) -> str:
|
382
374
|
"""Returns a unique string identifying the source of an image."""
|
383
|
-
if self.source == LaunchSource.
|
384
|
-
# TODO: more correct to get a hash of local uri contents
|
385
|
-
assert isinstance(self.uri, str)
|
386
|
-
return self.uri
|
387
|
-
elif self.source == LaunchSource.JOB:
|
375
|
+
if self.source == LaunchSource.JOB:
|
388
376
|
assert self._job_artifact is not None
|
389
377
|
return f"{self._job_artifact.name}:v{self._job_artifact.version}"
|
390
|
-
elif self.source == LaunchSource.GIT:
|
391
|
-
assert isinstance(self.uri, str)
|
392
|
-
ret = self.uri
|
393
|
-
if self.git_version:
|
394
|
-
ret += self.git_version
|
395
|
-
return ret
|
396
|
-
elif self.source == LaunchSource.WANDB:
|
397
|
-
assert isinstance(self.uri, str)
|
398
|
-
return self.uri
|
399
378
|
elif self.source == LaunchSource.DOCKER:
|
400
379
|
assert isinstance(self.docker_image, str)
|
401
|
-
_logger.debug("")
|
402
380
|
return self.docker_image
|
403
381
|
else:
|
404
|
-
raise LaunchError(
|
382
|
+
raise LaunchError(
|
383
|
+
"Unknown source type when determining image source string"
|
384
|
+
)
|
405
385
|
|
406
386
|
def _ensure_not_docker_image_and_local_process(self) -> None:
|
407
387
|
"""Ensure that docker image is not specified with local-process resource runner.
|
@@ -429,129 +409,84 @@ class LaunchProject:
|
|
429
409
|
raise LaunchError(
|
430
410
|
f"Error accessing job {self.job}: {msg} on {public_api.settings.get('base_url')}"
|
431
411
|
)
|
432
|
-
job.configure_launch_project(self)
|
412
|
+
job.configure_launch_project(self) # Why is this a method of the job?
|
433
413
|
self._job_artifact = job._job_artifact
|
434
414
|
|
435
|
-
def
|
436
|
-
"""
|
437
|
-
# these asserts are all guaranteed to pass, but are required by mypy
|
438
|
-
assert self.source != LaunchSource.LOCAL and self.source != LaunchSource.JOB
|
439
|
-
assert isinstance(self.uri, str)
|
440
|
-
assert self.project_dir is not None
|
441
|
-
_logger.info("Fetching project locally...")
|
442
|
-
if utils._is_wandb_uri(self.uri):
|
443
|
-
source_entity, source_project, source_run_name = utils.parse_wandb_uri(
|
444
|
-
self.uri
|
445
|
-
)
|
446
|
-
run_info = utils.fetch_wandb_project_run_info(
|
447
|
-
source_entity, source_project, source_run_name, internal_api
|
448
|
-
)
|
449
|
-
program_name = run_info.get("codePath") or run_info["program"]
|
450
|
-
|
451
|
-
self.python_version = run_info.get("python", "3")
|
452
|
-
downloaded_code_artifact = utils.check_and_download_code_artifacts(
|
453
|
-
source_entity,
|
454
|
-
source_project,
|
455
|
-
source_run_name,
|
456
|
-
internal_api,
|
457
|
-
self.project_dir,
|
458
|
-
)
|
459
|
-
if not downloaded_code_artifact:
|
460
|
-
if not run_info["git"]:
|
461
|
-
raise LaunchError(
|
462
|
-
"Reproducing a run requires either an associated git repo or a code artifact logged with `run.log_code()`"
|
463
|
-
)
|
464
|
-
branch_name = utils._fetch_git_repo(
|
465
|
-
self.project_dir,
|
466
|
-
run_info["git"]["remote"],
|
467
|
-
run_info["git"]["commit"],
|
468
|
-
)
|
469
|
-
if self.git_version is None:
|
470
|
-
self.git_version = branch_name
|
471
|
-
patch = utils.fetch_project_diff(
|
472
|
-
source_entity, source_project, source_run_name, internal_api
|
473
|
-
)
|
474
|
-
if patch:
|
475
|
-
utils.apply_patch(patch, self.project_dir)
|
476
|
-
|
477
|
-
# For cases where the entry point wasn't checked into git
|
478
|
-
if not os.path.exists(os.path.join(self.project_dir, program_name)):
|
479
|
-
downloaded_entrypoint = utils.download_entry_point(
|
480
|
-
source_entity,
|
481
|
-
source_project,
|
482
|
-
source_run_name,
|
483
|
-
internal_api,
|
484
|
-
program_name,
|
485
|
-
self.project_dir,
|
486
|
-
)
|
487
|
-
|
488
|
-
if not downloaded_entrypoint:
|
489
|
-
raise LaunchError(
|
490
|
-
f"Entrypoint file: {program_name} does not exist, "
|
491
|
-
"and could not be downloaded. Please specify the entrypoint for this run."
|
492
|
-
)
|
493
|
-
|
494
|
-
if (
|
495
|
-
"_session_history.ipynb" in os.listdir(self.project_dir)
|
496
|
-
or ".ipynb" in program_name
|
497
|
-
):
|
498
|
-
program_name = utils.convert_jupyter_notebook_to_script(
|
499
|
-
program_name, self.project_dir
|
500
|
-
)
|
501
|
-
|
502
|
-
# Download any frozen requirements
|
503
|
-
utils.download_wandb_python_deps(
|
504
|
-
source_entity,
|
505
|
-
source_project,
|
506
|
-
source_run_name,
|
507
|
-
internal_api,
|
508
|
-
self.project_dir,
|
509
|
-
)
|
510
|
-
|
511
|
-
if not self._entry_point:
|
512
|
-
_, ext = os.path.splitext(program_name)
|
513
|
-
if ext == ".py":
|
514
|
-
entry_point = ["python", program_name]
|
515
|
-
elif ext == ".sh":
|
516
|
-
command = os.environ.get("SHELL", "bash")
|
517
|
-
entry_point = [command, program_name]
|
518
|
-
else:
|
519
|
-
raise LaunchError(f"Unsupported entrypoint: {program_name}")
|
520
|
-
self.set_entry_point(entry_point)
|
521
|
-
if not self.override_args:
|
522
|
-
self.override_args = run_info["args"]
|
523
|
-
else:
|
524
|
-
assert utils._GIT_URI_REGEX.match(self.uri), (
|
525
|
-
"Non-wandb URI %s should be a Git URI" % self.uri
|
526
|
-
)
|
527
|
-
if not self._entry_point:
|
528
|
-
wandb.termlog(
|
529
|
-
f"{LOG_PREFIX}Entry point for repo not specified, defaulting to python main.py"
|
530
|
-
)
|
531
|
-
self.set_entry_point(EntrypointDefaults.PYTHON)
|
532
|
-
branch_name = utils._fetch_git_repo(
|
533
|
-
self.project_dir, self.uri, self.git_version
|
534
|
-
)
|
535
|
-
if self.git_version is None:
|
536
|
-
self.git_version = branch_name
|
415
|
+
def get_env_vars_dict(self, api: Api, max_env_length: int) -> Dict[str, str]:
|
416
|
+
"""Generate environment variables for the project.
|
537
417
|
|
418
|
+
Arguments:
|
419
|
+
launch_project: LaunchProject to generate environment variables for.
|
538
420
|
|
539
|
-
|
540
|
-
|
421
|
+
Returns:
|
422
|
+
Dictionary of environment variables.
|
423
|
+
"""
|
424
|
+
env_vars = {}
|
425
|
+
env_vars["WANDB_BASE_URL"] = api.settings("base_url")
|
426
|
+
override_api_key = self.launch_spec.get("_wandb_api_key")
|
427
|
+
env_vars["WANDB_API_KEY"] = override_api_key or api.api_key
|
428
|
+
if self.target_project:
|
429
|
+
env_vars["WANDB_PROJECT"] = self.target_project
|
430
|
+
env_vars["WANDB_ENTITY"] = self.target_entity
|
431
|
+
env_vars["WANDB_LAUNCH"] = "True"
|
432
|
+
env_vars["WANDB_RUN_ID"] = self.run_id
|
433
|
+
if self.docker_image:
|
434
|
+
env_vars["WANDB_DOCKER"] = self.docker_image
|
435
|
+
if self.name is not None:
|
436
|
+
env_vars["WANDB_NAME"] = self.name
|
437
|
+
if "author" in self.launch_spec and not override_api_key:
|
438
|
+
env_vars["WANDB_USERNAME"] = self.launch_spec["author"]
|
439
|
+
if self.sweep_id:
|
440
|
+
env_vars["WANDB_SWEEP_ID"] = self.sweep_id
|
441
|
+
if self.launch_spec.get("_resume_count", 0) > 0:
|
442
|
+
env_vars["WANDB_RESUME"] = "allow"
|
443
|
+
if self.queue_name:
|
444
|
+
env_vars[wandb.env.LAUNCH_QUEUE_NAME] = self.queue_name
|
445
|
+
if self.queue_entity:
|
446
|
+
env_vars[wandb.env.LAUNCH_QUEUE_ENTITY] = self.queue_entity
|
447
|
+
if self.run_queue_item_id:
|
448
|
+
env_vars[wandb.env.LAUNCH_TRACE_ID] = self.run_queue_item_id
|
449
|
+
|
450
|
+
_inject_wandb_config_env_vars(self.override_config, env_vars, max_env_length)
|
451
|
+
_inject_file_overrides_env_vars(self.override_files, env_vars, max_env_length)
|
452
|
+
|
453
|
+
artifacts = {}
|
454
|
+
# if we're spinning up a launch process from a job
|
455
|
+
# we should tell the run to use that artifact
|
456
|
+
if self.job:
|
457
|
+
artifacts = {wandb.util.LAUNCH_JOB_ARTIFACT_SLOT_NAME: self.job}
|
458
|
+
env_vars["WANDB_ARTIFACTS"] = json.dumps(
|
459
|
+
{**artifacts, **self.override_artifacts}
|
460
|
+
)
|
461
|
+
return env_vars
|
541
462
|
|
542
|
-
|
543
|
-
|
463
|
+
def parse_existing_requirements(self) -> str:
|
464
|
+
import pkg_resources
|
544
465
|
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
466
|
+
requirements_line = ""
|
467
|
+
assert self.project_dir is not None
|
468
|
+
base_requirements = os.path.join(self.project_dir, "requirements.txt")
|
469
|
+
if os.path.exists(base_requirements):
|
470
|
+
include_only = set()
|
471
|
+
with open(base_requirements) as f:
|
472
|
+
iter = pkg_resources.parse_requirements(f)
|
473
|
+
while True:
|
474
|
+
try:
|
475
|
+
pkg = next(iter)
|
476
|
+
if hasattr(pkg, "name"):
|
477
|
+
name = pkg.name.lower()
|
478
|
+
else:
|
479
|
+
name = str(pkg)
|
480
|
+
include_only.add(shlex_quote(name))
|
481
|
+
except StopIteration:
|
482
|
+
break
|
483
|
+
# Different versions of pkg_resources throw different errors
|
484
|
+
# just catch them all and ignore packages we can't parse
|
485
|
+
except Exception as e:
|
486
|
+
_logger.warn(f"Unable to parse requirements.txt: {e}")
|
487
|
+
continue
|
488
|
+
requirements_line += "WANDB_ONLY_INCLUDE={} ".format(",".join(include_only))
|
489
|
+
return requirements_line
|
555
490
|
|
556
491
|
|
557
492
|
class EntryPoint:
|
@@ -561,31 +496,43 @@ class EntryPoint:
|
|
561
496
|
self.name = name
|
562
497
|
self.command = command
|
563
498
|
|
564
|
-
def compute_command(self, user_parameters: Optional[List[str]]) -> List[str]:
|
565
|
-
"""Converts user parameter dictionary to a string."""
|
566
|
-
ret = self.command
|
567
|
-
if user_parameters:
|
568
|
-
return ret + user_parameters
|
569
|
-
return ret
|
570
|
-
|
571
499
|
def update_entrypoint_path(self, new_path: str) -> None:
|
572
500
|
"""Updates the entrypoint path to a new path."""
|
573
|
-
if len(self.command) == 2 and
|
501
|
+
if len(self.command) == 2 and (
|
502
|
+
self.command[0].startswith("python") or self.command[0] == "bash"
|
503
|
+
):
|
574
504
|
self.command[1] = new_path
|
575
505
|
|
576
506
|
|
577
|
-
def
|
578
|
-
|
579
|
-
) ->
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
507
|
+
def _inject_wandb_config_env_vars(
|
508
|
+
config: Dict[str, Any], env_dict: Dict[str, Any], maximum_env_length: int
|
509
|
+
) -> None:
|
510
|
+
str_config = json.dumps(config)
|
511
|
+
if len(str_config) <= maximum_env_length:
|
512
|
+
env_dict["WANDB_CONFIG"] = str_config
|
513
|
+
return
|
514
|
+
|
515
|
+
chunks = [
|
516
|
+
str_config[i : i + maximum_env_length]
|
517
|
+
for i in range(0, len(str_config), maximum_env_length)
|
518
|
+
]
|
519
|
+
config_chunks_dict = {f"WANDB_CONFIG_{i}": chunk for i, chunk in enumerate(chunks)}
|
520
|
+
env_dict.update(config_chunks_dict)
|
521
|
+
|
522
|
+
|
523
|
+
def _inject_file_overrides_env_vars(
|
524
|
+
overrides: Dict[str, Any], env_dict: Dict[str, Any], maximum_env_length: int
|
525
|
+
) -> None:
|
526
|
+
str_overrides = json.dumps(overrides)
|
527
|
+
if len(str_overrides) <= maximum_env_length:
|
528
|
+
env_dict["WANDB_LAUNCH_FILE_OVERRIDES"] = str_overrides
|
529
|
+
return
|
530
|
+
|
531
|
+
chunks = [
|
532
|
+
str_overrides[i : i + maximum_env_length]
|
533
|
+
for i in range(0, len(str_overrides), maximum_env_length)
|
534
|
+
]
|
535
|
+
overrides_chunks_dict = {
|
536
|
+
f"WANDB_LAUNCH_FILE_OVERRIDES_{i}": chunk for i, chunk in enumerate(chunks)
|
537
|
+
}
|
538
|
+
env_dict.update(overrides_chunks_dict)
|