wandb 0.16.6__py3-none-any.whl → 0.17.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- package_readme.md +95 -0
- wandb/__init__.py +2 -3
- wandb/agents/pyagent.py +0 -1
- wandb/analytics/sentry.py +2 -1
- wandb/apis/importers/internals/internal.py +0 -1
- wandb/apis/importers/internals/protocols.py +30 -56
- wandb/apis/importers/mlflow.py +13 -26
- wandb/apis/importers/wandb.py +8 -14
- wandb/apis/internal.py +0 -3
- wandb/apis/public/api.py +55 -3
- wandb/apis/public/artifacts.py +1 -0
- wandb/apis/public/files.py +1 -0
- wandb/apis/public/history.py +1 -0
- wandb/apis/public/jobs.py +17 -4
- wandb/apis/public/projects.py +1 -0
- wandb/apis/public/reports.py +1 -0
- wandb/apis/public/runs.py +15 -17
- wandb/apis/public/sweeps.py +1 -0
- wandb/apis/public/teams.py +1 -0
- wandb/apis/public/users.py +1 -0
- wandb/apis/reports/v1/_blocks.py +3 -7
- wandb/apis/reports/v2/gql.py +1 -0
- wandb/apis/reports/v2/interface.py +3 -4
- wandb/apis/reports/v2/internal.py +5 -8
- wandb/cli/cli.py +92 -22
- wandb/data_types.py +9 -6
- wandb/docker/__init__.py +1 -1
- wandb/env.py +38 -8
- wandb/errors/__init__.py +5 -0
- wandb/errors/term.py +10 -2
- wandb/filesync/step_checksum.py +1 -4
- wandb/filesync/step_prepare.py +4 -24
- wandb/filesync/step_upload.py +4 -106
- wandb/filesync/upload_job.py +0 -76
- wandb/integration/catboost/catboost.py +1 -1
- wandb/integration/fastai/__init__.py +1 -0
- wandb/integration/huggingface/resolver.py +2 -2
- wandb/integration/keras/__init__.py +1 -0
- wandb/integration/keras/callbacks/metrics_logger.py +1 -1
- wandb/integration/keras/keras.py +7 -7
- wandb/integration/langchain/wandb_tracer.py +1 -0
- wandb/integration/lightning/fabric/logger.py +1 -3
- wandb/integration/metaflow/metaflow.py +41 -6
- wandb/integration/openai/fine_tuning.py +3 -3
- wandb/integration/prodigy/prodigy.py +1 -1
- wandb/old/summary.py +1 -1
- wandb/plot/confusion_matrix.py +1 -1
- wandb/plot/pr_curve.py +2 -1
- wandb/plot/roc_curve.py +2 -1
- wandb/{plots → plot}/utils.py +13 -25
- wandb/proto/v3/wandb_internal_pb2.py +364 -332
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +322 -316
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/wandb_deprecated.py +7 -1
- wandb/proto/wandb_internal_codegen.py +3 -29
- wandb/sdk/artifacts/artifact.py +26 -11
- wandb/sdk/artifacts/artifact_download_logger.py +1 -0
- wandb/sdk/artifacts/artifact_file_cache.py +18 -4
- wandb/sdk/artifacts/artifact_instance_cache.py +1 -0
- wandb/sdk/artifacts/artifact_manifest.py +1 -0
- wandb/sdk/artifacts/artifact_manifest_entry.py +7 -3
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
- wandb/sdk/artifacts/artifact_saver.py +2 -8
- wandb/sdk/artifacts/artifact_state.py +1 -0
- wandb/sdk/artifacts/artifact_ttl.py +1 -0
- wandb/sdk/artifacts/exceptions.py +1 -0
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +13 -18
- wandb/sdk/artifacts/storage_handlers/http_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +5 -3
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -42
- wandb/sdk/artifacts/storage_policy.py +2 -12
- wandb/sdk/data_types/_dtypes.py +8 -8
- wandb/sdk/data_types/base_types/media.py +3 -6
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
- wandb/sdk/data_types/image.py +1 -1
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/integration_utils/auto_logging.py +5 -6
- wandb/sdk/integration_utils/data_logging.py +10 -6
- wandb/sdk/interface/interface.py +68 -32
- wandb/sdk/interface/interface_shared.py +7 -13
- wandb/sdk/internal/datastore.py +1 -1
- wandb/sdk/internal/file_pusher.py +2 -5
- wandb/sdk/internal/file_stream.py +5 -18
- wandb/sdk/internal/handler.py +18 -2
- wandb/sdk/internal/internal.py +0 -1
- wandb/sdk/internal/internal_api.py +1 -129
- wandb/sdk/internal/internal_util.py +0 -1
- wandb/sdk/internal/job_builder.py +159 -45
- wandb/sdk/internal/profiler.py +1 -0
- wandb/sdk/internal/progress.py +0 -28
- wandb/sdk/internal/run.py +1 -0
- wandb/sdk/internal/sender.py +1 -2
- wandb/sdk/internal/system/assets/gpu_amd.py +44 -44
- wandb/sdk/internal/system/assets/gpu_apple.py +56 -11
- wandb/sdk/internal/system/assets/interfaces.py +6 -8
- wandb/sdk/internal/system/assets/open_metrics.py +2 -2
- wandb/sdk/internal/system/assets/trainium.py +1 -3
- wandb/sdk/launch/__init__.py +9 -1
- wandb/sdk/launch/_launch.py +4 -24
- wandb/sdk/launch/_launch_add.py +1 -3
- wandb/sdk/launch/_project_spec.py +186 -224
- wandb/sdk/launch/agent/agent.py +37 -13
- wandb/sdk/launch/agent/config.py +72 -14
- wandb/sdk/launch/builder/abstract.py +69 -1
- wandb/sdk/launch/builder/build.py +156 -555
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +8 -23
- wandb/sdk/launch/builder/kaniko_builder.py +12 -25
- wandb/sdk/launch/builder/noop.py +1 -0
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +47 -37
- wandb/sdk/launch/environment/abstract.py +1 -0
- wandb/sdk/launch/environment/gcp_environment.py +1 -0
- wandb/sdk/launch/environment/local_environment.py +1 -0
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +217 -0
- wandb/sdk/launch/inputs/manage.py +95 -0
- wandb/sdk/launch/loader.py +1 -0
- wandb/sdk/launch/registry/abstract.py +1 -0
- wandb/sdk/launch/registry/azure_container_registry.py +1 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +1 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +2 -1
- wandb/sdk/launch/registry/local_registry.py +1 -0
- wandb/sdk/launch/runner/abstract.py +1 -0
- wandb/sdk/launch/runner/kubernetes_monitor.py +1 -0
- wandb/sdk/launch/runner/kubernetes_runner.py +9 -10
- wandb/sdk/launch/runner/local_container.py +2 -3
- wandb/sdk/launch/runner/local_process.py +8 -29
- wandb/sdk/launch/runner/sagemaker_runner.py +21 -20
- wandb/sdk/launch/runner/vertex_runner.py +8 -7
- wandb/sdk/launch/sweeps/scheduler.py +4 -3
- wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
- wandb/sdk/launch/sweeps/utils.py +3 -3
- wandb/sdk/launch/utils.py +15 -140
- wandb/sdk/lib/_settings_toposort_generated.py +0 -5
- wandb/sdk/lib/fsm.py +8 -12
- wandb/sdk/lib/gitlib.py +4 -4
- wandb/sdk/lib/import_hooks.py +1 -1
- wandb/sdk/lib/lazyloader.py +0 -1
- wandb/sdk/lib/proto_util.py +23 -2
- wandb/sdk/lib/redirect.py +19 -14
- wandb/sdk/lib/retry.py +3 -2
- wandb/sdk/lib/tracelog.py +1 -1
- wandb/sdk/service/service.py +19 -16
- wandb/sdk/verify/verify.py +2 -1
- wandb/sdk/wandb_init.py +14 -55
- wandb/sdk/wandb_manager.py +2 -2
- wandb/sdk/wandb_require.py +5 -0
- wandb/sdk/wandb_run.py +114 -56
- wandb/sdk/wandb_settings.py +0 -48
- wandb/sdk/wandb_setup.py +1 -1
- wandb/sklearn/__init__.py +1 -0
- wandb/sklearn/plot/__init__.py +1 -0
- wandb/sklearn/plot/classifier.py +11 -12
- wandb/sklearn/plot/clusterer.py +2 -1
- wandb/sklearn/plot/regressor.py +1 -0
- wandb/sklearn/plot/shared.py +1 -0
- wandb/sklearn/utils.py +1 -0
- wandb/testing/relay.py +4 -4
- wandb/trigger.py +1 -0
- wandb/util.py +67 -54
- wandb/wandb_controller.py +2 -3
- wandb/wandb_torch.py +1 -2
- {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/METADATA +67 -70
- {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/RECORD +177 -187
- {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/WHEEL +1 -2
- wandb/bin/apple_gpu_stats +0 -0
- wandb/catboost/__init__.py +0 -9
- wandb/fastai/__init__.py +0 -9
- wandb/keras/__init__.py +0 -18
- wandb/lightgbm/__init__.py +0 -9
- wandb/plots/__init__.py +0 -6
- wandb/plots/explain_text.py +0 -36
- wandb/plots/heatmap.py +0 -81
- wandb/plots/named_entity.py +0 -43
- wandb/plots/part_of_speech.py +0 -50
- wandb/plots/plot_definitions.py +0 -768
- wandb/plots/precision_recall.py +0 -121
- wandb/plots/roc.py +0 -103
- wandb/sacred/__init__.py +0 -3
- wandb/xgboost/__init__.py +0 -9
- wandb-0.16.6.dist-info/top_level.txt +0 -1
- {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,235 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import shutil
|
4
|
+
import tempfile
|
5
|
+
from typing import Tuple
|
6
|
+
|
7
|
+
from wandb.sdk.launch._project_spec import LaunchProject
|
8
|
+
from wandb.sdk.launch.builder.build import image_tag_from_dockerfile_and_source
|
9
|
+
from wandb.sdk.launch.errors import LaunchError
|
10
|
+
from wandb.sdk.launch.utils import get_current_python_version
|
11
|
+
|
12
|
+
from .build import (
|
13
|
+
_WANDB_DOCKERFILE_NAME,
|
14
|
+
get_base_setup,
|
15
|
+
get_docker_user,
|
16
|
+
get_entrypoint_setup,
|
17
|
+
get_requirements_section,
|
18
|
+
get_user_setup,
|
19
|
+
)
|
20
|
+
from .templates.dockerfile import DOCKERFILE_TEMPLATE
|
21
|
+
|
22
|
+
_logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
|
25
|
+
class BuildContextManager:
|
26
|
+
"""Creates a build context for a container image from job source code.
|
27
|
+
|
28
|
+
The dockerfile and build context may be specified by the job itself. If not,
|
29
|
+
the behavior for creating the build context is as follows:
|
30
|
+
|
31
|
+
- If a Dockerfile.wandb is found adjacent to the entrypoint, the directory
|
32
|
+
containing the entrypoint is used as the build context and Dockerfile.wandb
|
33
|
+
is used as the Dockerfile.
|
34
|
+
|
35
|
+
- If `override_dockerfile` is set on the LaunchProject, the directory
|
36
|
+
containing the Dockerfile is used as the build context and the Dockerfile
|
37
|
+
is used as the Dockerfile. `override_dockerfile` can be set in a launch
|
38
|
+
spec via the `-D` flag to `wandb launch` or in the `overrides` section
|
39
|
+
of the launch drawer.
|
40
|
+
|
41
|
+
- If no dockerfile is set, a Dockerfile is generated from the job's
|
42
|
+
requirements and entrypoint.
|
43
|
+
"""
|
44
|
+
|
45
|
+
def __init__(self, launch_project: LaunchProject):
|
46
|
+
"""Initialize a BuildContextManager.
|
47
|
+
|
48
|
+
Arguments:
|
49
|
+
launch_project: The launch project.
|
50
|
+
"""
|
51
|
+
self._launch_project = launch_project
|
52
|
+
assert self._launch_project.project_dir is not None
|
53
|
+
self._directory = tempfile.mkdtemp()
|
54
|
+
|
55
|
+
def _generate_dockerfile(self, builder_type: str) -> str:
|
56
|
+
"""Generate a Dockerfile for the container image.
|
57
|
+
|
58
|
+
Arguments:
|
59
|
+
builder_type: The type of builder to use. One of "docker" or "kaniko".
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
The contents of the Dockerfile.
|
63
|
+
"""
|
64
|
+
launch_project = self._launch_project
|
65
|
+
entry_point = (
|
66
|
+
launch_project.override_entrypoint or launch_project.get_job_entry_point()
|
67
|
+
)
|
68
|
+
|
69
|
+
# get python versions truncated to major.minor to ensure image availability
|
70
|
+
if launch_project.python_version:
|
71
|
+
spl = launch_project.python_version.split(".")[:2]
|
72
|
+
py_version, py_major = (".".join(spl), spl[0])
|
73
|
+
else:
|
74
|
+
py_version, py_major = get_current_python_version()
|
75
|
+
|
76
|
+
python_build_image = (
|
77
|
+
f"python:{py_version}" # use full python image for package installation
|
78
|
+
)
|
79
|
+
requirements_section = get_requirements_section(
|
80
|
+
launch_project, self._directory, builder_type
|
81
|
+
)
|
82
|
+
# ----- stage 2: base -----
|
83
|
+
python_base_setup = get_base_setup(launch_project, py_version, py_major)
|
84
|
+
|
85
|
+
# set up user info
|
86
|
+
username, userid = get_docker_user(launch_project, launch_project.resource)
|
87
|
+
user_setup = get_user_setup(username, userid, launch_project.resource)
|
88
|
+
workdir = f"/home/{username}"
|
89
|
+
|
90
|
+
assert entry_point is not None
|
91
|
+
entrypoint_section = get_entrypoint_setup(entry_point)
|
92
|
+
|
93
|
+
dockerfile_contents = DOCKERFILE_TEMPLATE.format(
|
94
|
+
py_build_image=python_build_image,
|
95
|
+
requirements_section=requirements_section,
|
96
|
+
base_setup=python_base_setup,
|
97
|
+
uid=userid,
|
98
|
+
user_setup=user_setup,
|
99
|
+
workdir=workdir,
|
100
|
+
entrypoint_section=entrypoint_section,
|
101
|
+
)
|
102
|
+
return dockerfile_contents
|
103
|
+
|
104
|
+
def create_build_context(self, builder_type: str) -> Tuple[str, str]:
|
105
|
+
"""Create the build context for the container image.
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
A pair of str: the path to the build context locally and the image
|
109
|
+
tag computed from the Dockerfile.
|
110
|
+
"""
|
111
|
+
entrypoint = (
|
112
|
+
self._launch_project.get_job_entry_point()
|
113
|
+
or self._launch_project.override_entrypoint
|
114
|
+
)
|
115
|
+
assert entrypoint is not None
|
116
|
+
assert entrypoint.name is not None
|
117
|
+
assert self._launch_project.project_dir is not None
|
118
|
+
|
119
|
+
# we use that as the build context.
|
120
|
+
build_context_root_dir = self._launch_project.project_dir
|
121
|
+
job_build_context = self._launch_project.job_build_context
|
122
|
+
if job_build_context:
|
123
|
+
full_path = os.path.join(build_context_root_dir, job_build_context)
|
124
|
+
if not os.path.exists(full_path):
|
125
|
+
raise LaunchError(f"Build context does not exist at {full_path}")
|
126
|
+
build_context_root_dir = full_path
|
127
|
+
|
128
|
+
# This is the case where the user specifies a Dockerfile to use.
|
129
|
+
# We use the directory containing the Dockerfile as the build context.
|
130
|
+
override_dockerfile = self._launch_project.override_dockerfile
|
131
|
+
if override_dockerfile:
|
132
|
+
full_path = os.path.join(
|
133
|
+
build_context_root_dir,
|
134
|
+
override_dockerfile,
|
135
|
+
)
|
136
|
+
if not os.path.exists(full_path):
|
137
|
+
raise LaunchError(f"Dockerfile does not exist at {full_path}")
|
138
|
+
shutil.copytree(
|
139
|
+
build_context_root_dir,
|
140
|
+
self._directory,
|
141
|
+
symlinks=True,
|
142
|
+
dirs_exist_ok=True,
|
143
|
+
ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
|
144
|
+
)
|
145
|
+
shutil.copy(
|
146
|
+
full_path,
|
147
|
+
os.path.join(self._directory, _WANDB_DOCKERFILE_NAME),
|
148
|
+
)
|
149
|
+
return self._directory, image_tag_from_dockerfile_and_source(
|
150
|
+
self._launch_project, open(full_path).read()
|
151
|
+
)
|
152
|
+
|
153
|
+
# If the job specifies a Dockerfile, we use that as the Dockerfile.
|
154
|
+
job_dockerfile = self._launch_project.job_dockerfile
|
155
|
+
if job_dockerfile:
|
156
|
+
dockerfile_path = os.path.join(build_context_root_dir, job_dockerfile)
|
157
|
+
if not os.path.exists(dockerfile_path):
|
158
|
+
raise LaunchError(f"Dockerfile does not exist at {dockerfile_path}")
|
159
|
+
shutil.copytree(
|
160
|
+
build_context_root_dir,
|
161
|
+
self._directory,
|
162
|
+
symlinks=True,
|
163
|
+
dirs_exist_ok=True,
|
164
|
+
ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
|
165
|
+
)
|
166
|
+
shutil.copy(
|
167
|
+
dockerfile_path,
|
168
|
+
os.path.join(self._directory, _WANDB_DOCKERFILE_NAME),
|
169
|
+
)
|
170
|
+
return self._directory, image_tag_from_dockerfile_and_source(
|
171
|
+
self._launch_project, open(dockerfile_path).read()
|
172
|
+
)
|
173
|
+
|
174
|
+
# This is the case where we find Dockerfile.wandb adjacent to the
|
175
|
+
# entrypoint. We use the entrypoint directory as the build context.
|
176
|
+
entrypoint_dir = os.path.dirname(entrypoint.name)
|
177
|
+
if entrypoint_dir:
|
178
|
+
path = os.path.join(
|
179
|
+
build_context_root_dir,
|
180
|
+
entrypoint_dir,
|
181
|
+
_WANDB_DOCKERFILE_NAME,
|
182
|
+
)
|
183
|
+
else:
|
184
|
+
path = os.path.join(build_context_root_dir, _WANDB_DOCKERFILE_NAME)
|
185
|
+
if os.path.exists(
|
186
|
+
path
|
187
|
+
): # We found a Dockerfile.wandb adjacent to the entrypoint.
|
188
|
+
shutil.copytree(
|
189
|
+
os.path.dirname(path),
|
190
|
+
self._directory,
|
191
|
+
symlinks=True,
|
192
|
+
dirs_exist_ok=True,
|
193
|
+
ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
|
194
|
+
)
|
195
|
+
# TODO: remove this once we make things more explicit for users
|
196
|
+
if entrypoint_dir:
|
197
|
+
new_path = os.path.basename(entrypoint.name)
|
198
|
+
entrypoint = self._launch_project.get_job_entry_point()
|
199
|
+
if entrypoint is not None:
|
200
|
+
entrypoint.update_entrypoint_path(new_path)
|
201
|
+
with open(path) as f:
|
202
|
+
docker_file_contents = f.read()
|
203
|
+
return self._directory, image_tag_from_dockerfile_and_source(
|
204
|
+
self._launch_project, docker_file_contents
|
205
|
+
)
|
206
|
+
|
207
|
+
# This is the case where we use our own Dockerfile template. We move
|
208
|
+
# the user code into a src directory in the build context.
|
209
|
+
dst_path = os.path.join(self._directory, "src")
|
210
|
+
assert self._launch_project.project_dir is not None
|
211
|
+
shutil.copytree(
|
212
|
+
src=self._launch_project.project_dir,
|
213
|
+
dst=dst_path,
|
214
|
+
symlinks=True,
|
215
|
+
ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
|
216
|
+
)
|
217
|
+
shutil.copy(
|
218
|
+
os.path.join(os.path.dirname(__file__), "templates", "_wandb_bootstrap.py"),
|
219
|
+
os.path.join(self._directory),
|
220
|
+
)
|
221
|
+
if self._launch_project.python_version:
|
222
|
+
runtime_path = os.path.join(dst_path, "runtime.txt")
|
223
|
+
with open(runtime_path, "w") as fp:
|
224
|
+
fp.write(f"python-{self._launch_project.python_version}")
|
225
|
+
|
226
|
+
# TODO: we likely don't need to pass the whole git repo into the container
|
227
|
+
# with open(os.path.join(directory, ".dockerignore"), "w") as f:
|
228
|
+
# f.write("**/.git")
|
229
|
+
with open(os.path.join(self._directory, _WANDB_DOCKERFILE_NAME), "w") as handle:
|
230
|
+
docker_file_contents = self._generate_dockerfile(builder_type=builder_type)
|
231
|
+
handle.write(docker_file_contents)
|
232
|
+
image_tag = image_tag_from_dockerfile_and_source(
|
233
|
+
self._launch_project, docker_file_contents
|
234
|
+
)
|
235
|
+
return self._directory, image_tag
|
@@ -1,4 +1,5 @@
|
|
1
1
|
"""Implementation of the docker builder."""
|
2
|
+
|
2
3
|
import logging
|
3
4
|
import os
|
4
5
|
from typing import Any, Dict, Optional
|
@@ -6,8 +7,7 @@ from typing import Any, Dict, Optional
|
|
6
7
|
import wandb
|
7
8
|
import wandb.docker as docker
|
8
9
|
from wandb.sdk.launch.agent.job_status_tracker import JobAndRunStatusTracker
|
9
|
-
from wandb.sdk.launch.builder.abstract import AbstractBuilder
|
10
|
-
from wandb.sdk.launch.builder.build import registry_from_uri
|
10
|
+
from wandb.sdk.launch.builder.abstract import AbstractBuilder, registry_from_uri
|
11
11
|
from wandb.sdk.launch.environment.abstract import AbstractEnvironment
|
12
12
|
from wandb.sdk.launch.registry.abstract import AbstractRegistry
|
13
13
|
|
@@ -20,13 +20,8 @@ from ..utils import (
|
|
20
20
|
event_loop_thread_exec,
|
21
21
|
warn_failed_packages_from_build_logs,
|
22
22
|
)
|
23
|
-
from .build import
|
24
|
-
|
25
|
-
_create_docker_build_ctx,
|
26
|
-
generate_dockerfile,
|
27
|
-
image_tag_from_dockerfile_and_source,
|
28
|
-
validate_docker_installation,
|
29
|
-
)
|
23
|
+
from .build import _WANDB_DOCKERFILE_NAME, validate_docker_installation
|
24
|
+
from .context_manager import BuildContextManager
|
30
25
|
|
31
26
|
_logger = logging.getLogger(__name__)
|
32
27
|
|
@@ -40,7 +35,6 @@ class DockerBuilder(AbstractBuilder):
|
|
40
35
|
"""
|
41
36
|
|
42
37
|
builder_type = "docker"
|
43
|
-
base_image = "python:3.8"
|
44
38
|
target_platform = "linux/amd64"
|
45
39
|
|
46
40
|
def __init__(
|
@@ -123,17 +117,11 @@ class DockerBuilder(AbstractBuilder):
|
|
123
117
|
await self.verify()
|
124
118
|
await self.login()
|
125
119
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
runner_type=launch_project.resource,
|
130
|
-
builder_type="docker",
|
131
|
-
dockerfile=launch_project.override_dockerfile,
|
132
|
-
)
|
133
|
-
|
134
|
-
image_tag = image_tag_from_dockerfile_and_source(launch_project, dockerfile_str)
|
135
|
-
|
120
|
+
build_context_manager = BuildContextManager(launch_project=launch_project)
|
121
|
+
build_ctx_path, image_tag = build_context_manager.create_build_context("docker")
|
122
|
+
dockerfile = os.path.join(build_ctx_path, _WANDB_DOCKERFILE_NAME)
|
136
123
|
repository = None if not self.registry else await self.registry.get_repo_uri()
|
124
|
+
|
137
125
|
# if repo is set, use the repo name as the image name
|
138
126
|
if repository:
|
139
127
|
image_uri = f"{repository}:{image_tag}"
|
@@ -151,9 +139,6 @@ class DockerBuilder(AbstractBuilder):
|
|
151
139
|
_logger.info(
|
152
140
|
f"image {image_uri} does not already exist in repository, building."
|
153
141
|
)
|
154
|
-
|
155
|
-
build_ctx_path = _create_docker_build_ctx(launch_project, dockerfile_str)
|
156
|
-
dockerfile = os.path.join(build_ctx_path, _WANDB_DOCKERFILE_NAME)
|
157
142
|
try:
|
158
143
|
output = await event_loop_thread_exec(docker.build)(
|
159
144
|
tags=[image_uri],
|
@@ -13,8 +13,7 @@ from typing import Any, Dict, Optional
|
|
13
13
|
|
14
14
|
import wandb
|
15
15
|
from wandb.sdk.launch.agent.job_status_tracker import JobAndRunStatusTracker
|
16
|
-
from wandb.sdk.launch.builder.abstract import AbstractBuilder
|
17
|
-
from wandb.sdk.launch.builder.build import registry_from_uri
|
16
|
+
from wandb.sdk.launch.builder.abstract import AbstractBuilder, registry_from_uri
|
18
17
|
from wandb.sdk.launch.environment.abstract import AbstractEnvironment
|
19
18
|
from wandb.sdk.launch.environment.azure_environment import AzureEnvironment
|
20
19
|
from wandb.sdk.launch.registry.abstract import AbstractRegistry
|
@@ -32,12 +31,8 @@ from ..utils import (
|
|
32
31
|
get_kube_context_and_api_client,
|
33
32
|
warn_failed_packages_from_build_logs,
|
34
33
|
)
|
35
|
-
from .build import
|
36
|
-
|
37
|
-
_create_docker_build_ctx,
|
38
|
-
generate_dockerfile,
|
39
|
-
image_tag_from_dockerfile_and_source,
|
40
|
-
)
|
34
|
+
from .build import _WANDB_DOCKERFILE_NAME
|
35
|
+
from .context_manager import BuildContextManager
|
41
36
|
|
42
37
|
get_module(
|
43
38
|
"kubernetes_asyncio",
|
@@ -261,17 +256,13 @@ class KanikoBuilder(AbstractBuilder):
|
|
261
256
|
job_tracker: Optional[JobAndRunStatusTracker] = None,
|
262
257
|
) -> str:
|
263
258
|
await self.verify()
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
runner_type=launch_project.resource,
|
269
|
-
builder_type="kaniko",
|
270
|
-
dockerfile=launch_project.override_dockerfile,
|
271
|
-
)
|
272
|
-
image_tag = image_tag_from_dockerfile_and_source(launch_project, dockerfile_str)
|
259
|
+
|
260
|
+
build_contex_manager = BuildContextManager(launch_project=launch_project)
|
261
|
+
context_path, image_tag = build_contex_manager.create_build_context("kaniko")
|
262
|
+
run_id = launch_project.run_id
|
273
263
|
repo_uri = await self.registry.get_repo_uri()
|
274
264
|
image_uri = repo_uri + ":" + image_tag
|
265
|
+
|
275
266
|
if (
|
276
267
|
not launch_project.build_required()
|
277
268
|
and await self.registry.check_image_exists(image_uri)
|
@@ -279,14 +270,10 @@ class KanikoBuilder(AbstractBuilder):
|
|
279
270
|
return image_uri
|
280
271
|
|
281
272
|
_logger.info(f"Building image {image_uri}...")
|
282
|
-
|
283
|
-
context_path = _create_docker_build_ctx(launch_project, dockerfile_str)
|
284
|
-
run_id = launch_project.run_id
|
285
|
-
|
286
273
|
_, api_client = await get_kube_context_and_api_client(
|
287
274
|
kubernetes, launch_project.resource_args
|
288
275
|
)
|
289
|
-
# TODO: use same client as
|
276
|
+
# TODO: use same client as kubernetes_runner.py
|
290
277
|
batch_v1 = client.BatchV1Api(api_client)
|
291
278
|
core_v1 = client.CoreV1Api(api_client)
|
292
279
|
|
@@ -492,8 +479,8 @@ class KanikoBuilder(AbstractBuilder):
|
|
492
479
|
}
|
493
480
|
)
|
494
481
|
else:
|
495
|
-
|
496
|
-
f"
|
482
|
+
wandb.termwarn(
|
483
|
+
f"{LOG_PREFIX}Automatic credential handling is not supported for registry type {type(self.registry)}. Build job: {self.build_job_name}"
|
497
484
|
)
|
498
485
|
volumes.append(
|
499
486
|
{
|
@@ -522,7 +509,7 @@ class KanikoBuilder(AbstractBuilder):
|
|
522
509
|
volume_mounts.append(
|
523
510
|
{"name": "docker-config", "mountPath": "/kaniko/.docker/"}
|
524
511
|
)
|
525
|
-
# Kaniko doesn't want https:// at the
|
512
|
+
# Kaniko doesn't want https:// at the beginning of the image tag.
|
526
513
|
destination = image_tag
|
527
514
|
if destination.startswith("https://"):
|
528
515
|
destination = destination.replace("https://", "")
|
wandb/sdk/launch/builder/noop.py
CHANGED
@@ -0,0 +1,92 @@
|
|
1
|
+
DOCKERFILE_TEMPLATE = """
|
2
|
+
# ----- stage 1: build -----
|
3
|
+
FROM {py_build_image} as build
|
4
|
+
|
5
|
+
# requirements section depends on pip vs conda, and presence of buildx
|
6
|
+
ENV PIP_PROGRESS_BAR off
|
7
|
+
{requirements_section}
|
8
|
+
|
9
|
+
# ----- stage 2: base -----
|
10
|
+
{base_setup}
|
11
|
+
|
12
|
+
COPY --from=build /env /env
|
13
|
+
ENV PATH="/env/bin:$PATH"
|
14
|
+
|
15
|
+
ENV SHELL /bin/bash
|
16
|
+
|
17
|
+
# some resources (eg sagemaker) must run on root
|
18
|
+
{user_setup}
|
19
|
+
|
20
|
+
WORKDIR {workdir}
|
21
|
+
RUN chown -R {uid} {workdir}
|
22
|
+
|
23
|
+
# make artifacts cache dir unrelated to build
|
24
|
+
RUN mkdir -p {workdir}/.cache && chown -R {uid} {workdir}/.cache
|
25
|
+
|
26
|
+
# copy code/etc
|
27
|
+
COPY --chown={uid} src/ {workdir}
|
28
|
+
|
29
|
+
ENV PYTHONUNBUFFERED=1
|
30
|
+
|
31
|
+
{entrypoint_section}
|
32
|
+
"""
|
33
|
+
|
34
|
+
# this goes into base_setup in TEMPLATE
|
35
|
+
PYTHON_SETUP_TEMPLATE = """
|
36
|
+
FROM {py_base_image} as base
|
37
|
+
"""
|
38
|
+
|
39
|
+
# this goes into base_setup in TEMPLATE
|
40
|
+
ACCELERATOR_SETUP_TEMPLATE = """
|
41
|
+
FROM {accelerator_base_image} as base
|
42
|
+
|
43
|
+
# make non-interactive so build doesn't block on questions
|
44
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
45
|
+
|
46
|
+
# install python
|
47
|
+
RUN apt-get update -qq && apt-get install --no-install-recommends -y \
|
48
|
+
{python_packages} \
|
49
|
+
&& apt-get -qq purge && apt-get -qq clean \
|
50
|
+
&& rm -rf /var/lib/apt/lists/*
|
51
|
+
|
52
|
+
# make sure `python` points at the right version
|
53
|
+
RUN update-alternatives --install /usr/bin/python python /usr/bin/python{py_version} 1 \
|
54
|
+
&& update-alternatives --install /usr/local/bin/python python /usr/bin/python{py_version} 1
|
55
|
+
"""
|
56
|
+
|
57
|
+
# this goes into requirements_section in TEMPLATE
|
58
|
+
PIP_TEMPLATE = """
|
59
|
+
RUN python -m venv /env
|
60
|
+
# make sure we install into the env
|
61
|
+
ENV PATH="/env/bin:$PATH"
|
62
|
+
|
63
|
+
COPY {requirements_files} ./
|
64
|
+
{buildx_optional_prefix} {pip_install}
|
65
|
+
"""
|
66
|
+
|
67
|
+
# this goes into requirements_section in TEMPLATE
|
68
|
+
CONDA_TEMPLATE = """
|
69
|
+
COPY src/environment.yml .
|
70
|
+
{buildx_optional_prefix} conda env create -f environment.yml -n env
|
71
|
+
|
72
|
+
# pack the environment so that we can transfer to the base image
|
73
|
+
RUN conda install -c conda-forge conda-pack
|
74
|
+
RUN conda pack -n env -o /tmp/env.tar && \
|
75
|
+
mkdir /env && cd /env && tar xf /tmp/env.tar && \
|
76
|
+
rm /tmp/env.tar
|
77
|
+
RUN /env/bin/conda-unpack
|
78
|
+
"""
|
79
|
+
|
80
|
+
USER_CREATE_TEMPLATE = """
|
81
|
+
RUN useradd \
|
82
|
+
--create-home \
|
83
|
+
--no-log-init \
|
84
|
+
--shell /bin/bash \
|
85
|
+
--gid 0 \
|
86
|
+
--uid {uid} \
|
87
|
+
{user} || echo ""
|
88
|
+
"""
|
89
|
+
|
90
|
+
ENTRYPOINT_TEMPLATE = """
|
91
|
+
ENTRYPOINT {entrypoint}
|
92
|
+
"""
|
wandb/sdk/launch/create_job.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
3
|
import os
|
4
|
+
import re
|
4
5
|
import sys
|
5
6
|
import tempfile
|
6
7
|
from typing import Any, Dict, List, Optional, Tuple
|
@@ -9,9 +10,12 @@ import wandb
|
|
9
10
|
from wandb.apis.internal import Api
|
10
11
|
from wandb.sdk.artifacts.artifact import Artifact
|
11
12
|
from wandb.sdk.internal.job_builder import JobBuilder
|
12
|
-
from wandb.sdk.launch.builder.build import get_current_python_version
|
13
13
|
from wandb.sdk.launch.git_reference import GitReference
|
14
|
-
from wandb.sdk.launch.utils import
|
14
|
+
from wandb.sdk.launch.utils import (
|
15
|
+
_is_git_uri,
|
16
|
+
get_current_python_version,
|
17
|
+
get_entrypoint_file,
|
18
|
+
)
|
15
19
|
from wandb.sdk.lib import filesystem
|
16
20
|
from wandb.util import make_artifact_name_safe
|
17
21
|
|
@@ -19,6 +23,9 @@ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
|
19
23
|
_logger = logging.getLogger("wandb")
|
20
24
|
|
21
25
|
|
26
|
+
CODE_ARTIFACT_EXCLUDE_PATHS = ["wandb", ".git"]
|
27
|
+
|
28
|
+
|
22
29
|
def create_job(
|
23
30
|
path: str,
|
24
31
|
job_type: str,
|
@@ -30,6 +37,8 @@ def create_job(
|
|
30
37
|
runtime: Optional[str] = None,
|
31
38
|
entrypoint: Optional[str] = None,
|
32
39
|
git_hash: Optional[str] = None,
|
40
|
+
build_context: Optional[str] = None,
|
41
|
+
dockerfile: Optional[str] = None,
|
33
42
|
) -> Optional[Artifact]:
|
34
43
|
"""Create a job from a path, not as the output of a run.
|
35
44
|
|
@@ -42,9 +51,12 @@ def create_job(
|
|
42
51
|
description (Optional[str]): Description of the job.
|
43
52
|
aliases (Optional[List[str]]): Aliases for the job.
|
44
53
|
runtime (Optional[str]): Python runtime of the job, like 3.9.
|
45
|
-
entrypoint (Optional[str]): Entrypoint of the job.
|
54
|
+
entrypoint (Optional[str]): Entrypoint of the job. If build_context is
|
55
|
+
provided, path is relative to build_context.
|
46
56
|
git_hash (Optional[str]): Git hash of a specific commit, when using git type jobs.
|
47
|
-
|
57
|
+
build_context (Optional[str]): Path to the build context, when using image type jobs.
|
58
|
+
dockerfile (Optional[str]): Path to the Dockerfile, when using image type jobs.
|
59
|
+
If build_context is provided, path is relative to build_context.
|
48
60
|
|
49
61
|
Returns:
|
50
62
|
Optional[Artifact]: The artifact created by the job, the action (for printing), and job aliases.
|
@@ -81,6 +93,8 @@ def create_job(
|
|
81
93
|
runtime,
|
82
94
|
entrypoint,
|
83
95
|
git_hash,
|
96
|
+
build_context,
|
97
|
+
dockerfile,
|
84
98
|
)
|
85
99
|
|
86
100
|
return artifact_job
|
@@ -98,6 +112,8 @@ def _create_job(
|
|
98
112
|
runtime: Optional[str] = None,
|
99
113
|
entrypoint: Optional[str] = None,
|
100
114
|
git_hash: Optional[str] = None,
|
115
|
+
build_context: Optional[str] = None,
|
116
|
+
dockerfile: Optional[str] = None,
|
101
117
|
) -> Tuple[Optional[Artifact], str, List[str]]:
|
102
118
|
wandb.termlog(f"Creating launch job of type: {job_type}...")
|
103
119
|
|
@@ -107,6 +123,13 @@ def _create_job(
|
|
107
123
|
)
|
108
124
|
return None, "", []
|
109
125
|
|
126
|
+
if runtime is not None:
|
127
|
+
if not re.match(r"^3\.\d+$", runtime):
|
128
|
+
wandb.termerror(
|
129
|
+
f"Runtime (-r, --runtime) must be a minor version of Python 3, "
|
130
|
+
f"e.g. 3.9 or 3.10, received {runtime}"
|
131
|
+
)
|
132
|
+
return None, "", []
|
110
133
|
aliases = aliases or []
|
111
134
|
tempdir = tempfile.TemporaryDirectory()
|
112
135
|
try:
|
@@ -161,7 +184,10 @@ def _create_job(
|
|
161
184
|
name = job_name
|
162
185
|
|
163
186
|
# build job artifact, loads wandb-metadata and creates wandb-job.json here
|
164
|
-
artifact = job_builder.build(
|
187
|
+
artifact = job_builder.build(
|
188
|
+
dockerfile=dockerfile,
|
189
|
+
build_context=build_context,
|
190
|
+
)
|
165
191
|
if not artifact:
|
166
192
|
wandb.termerror("JobBuilder failed to build a job")
|
167
193
|
_logger.debug("Failed to build job, check job source and metadata")
|
@@ -220,6 +246,7 @@ def _make_metadata_for_partial_job(
|
|
220
246
|
"""Create metadata for partial jobs, return metadata and requirements."""
|
221
247
|
metadata = {"_partial": "v0"}
|
222
248
|
if job_type == "git":
|
249
|
+
assert entrypoint is not None
|
223
250
|
repo_metadata = _create_repo_metadata(
|
224
251
|
path=path,
|
225
252
|
tempdir=tempdir.name,
|
@@ -234,12 +261,7 @@ def _make_metadata_for_partial_job(
|
|
234
261
|
return metadata, None
|
235
262
|
|
236
263
|
if job_type == "code":
|
237
|
-
|
238
|
-
wandb.termerror(
|
239
|
-
"Artifact jobs must have an entrypoint, either included in the path or specified with -E"
|
240
|
-
)
|
241
|
-
return None, None
|
242
|
-
|
264
|
+
assert entrypoint is not None
|
243
265
|
artifact_metadata, requirements = _create_artifact_metadata(
|
244
266
|
path=path, entrypoint=entrypoint, runtime=runtime
|
245
267
|
)
|
@@ -268,7 +290,7 @@ def _make_metadata_for_partial_job(
|
|
268
290
|
def _create_repo_metadata(
|
269
291
|
path: str,
|
270
292
|
tempdir: str,
|
271
|
-
entrypoint:
|
293
|
+
entrypoint: str,
|
272
294
|
git_hash: Optional[str] = None,
|
273
295
|
runtime: Optional[str] = None,
|
274
296
|
) -> Optional[Dict[str, Any]]:
|
@@ -308,28 +330,12 @@ def _create_repo_metadata(
|
|
308
330
|
|
309
331
|
python_version = _clean_python_version(python_version)
|
310
332
|
|
311
|
-
# check if entrypoint is valid
|
312
|
-
assert entrypoint is not None
|
313
|
-
entrypoint_list = entrypoint.split(" ")
|
314
|
-
entrypoint_file = get_entrypoint_file(entrypoint_list)
|
315
|
-
if not entrypoint_file:
|
316
|
-
wandb.termerror(
|
317
|
-
f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
|
318
|
-
)
|
319
|
-
return None
|
320
|
-
|
321
|
-
if not os.path.exists(os.path.join(local_dir, entrypoint_file)):
|
322
|
-
wandb.termerror(f"Entrypoint file {entrypoint_file} not found in git repo")
|
323
|
-
return None
|
324
|
-
|
325
333
|
metadata = {
|
326
334
|
"git": {
|
327
335
|
"commit": commit,
|
328
336
|
"remote": ref.url,
|
329
337
|
},
|
330
|
-
"
|
331
|
-
"codePath": entrypoint_file,
|
332
|
-
"entrypoint": entrypoint_list,
|
338
|
+
"entrypoint": entrypoint.split(" "),
|
333
339
|
"python": python_version, # used to build container
|
334
340
|
"notebook": False, # partial jobs from notebooks not supported
|
335
341
|
}
|
@@ -410,15 +416,12 @@ def _make_code_artifact(
|
|
410
416
|
|
411
417
|
Returns the name of the eventual job.
|
412
418
|
"""
|
413
|
-
assert entrypoint is not None
|
414
419
|
entrypoint_list = entrypoint.split(" ")
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
return None
|
421
|
-
|
420
|
+
# We no longer require the entrypoint to end in an existing file. But we
|
421
|
+
# need something to use as the default job artifact name. In the future we
|
422
|
+
# may require the user to provide a job name explicitly when calling
|
423
|
+
# wandb job create.
|
424
|
+
entrypoint_file = entrypoint_list[-1]
|
422
425
|
artifact_name = _make_code_artifact_name(os.path.join(path, entrypoint_file), name)
|
423
426
|
code_artifact = wandb.Artifact(
|
424
427
|
name=artifact_name,
|
@@ -436,6 +439,13 @@ def _make_code_artifact(
|
|
436
439
|
wandb.termerror(f"Error adding to code artifact: {e}")
|
437
440
|
return None
|
438
441
|
|
442
|
+
# Remove paths we don't want to include, if present
|
443
|
+
for item in CODE_ARTIFACT_EXCLUDE_PATHS:
|
444
|
+
try:
|
445
|
+
code_artifact.remove(item)
|
446
|
+
except FileNotFoundError:
|
447
|
+
pass
|
448
|
+
|
439
449
|
res, _ = api.create_artifact(
|
440
450
|
artifact_type_name="code",
|
441
451
|
artifact_collection_name=artifact_name,
|