wandb 0.16.6__py3-none-any.whl → 0.17.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (193) hide show
  1. package_readme.md +95 -0
  2. wandb/__init__.py +2 -3
  3. wandb/agents/pyagent.py +0 -1
  4. wandb/analytics/sentry.py +2 -1
  5. wandb/apis/importers/internals/internal.py +0 -1
  6. wandb/apis/importers/internals/protocols.py +30 -56
  7. wandb/apis/importers/mlflow.py +13 -26
  8. wandb/apis/importers/wandb.py +8 -14
  9. wandb/apis/internal.py +0 -3
  10. wandb/apis/public/api.py +55 -3
  11. wandb/apis/public/artifacts.py +1 -0
  12. wandb/apis/public/files.py +1 -0
  13. wandb/apis/public/history.py +1 -0
  14. wandb/apis/public/jobs.py +17 -4
  15. wandb/apis/public/projects.py +1 -0
  16. wandb/apis/public/reports.py +1 -0
  17. wandb/apis/public/runs.py +15 -17
  18. wandb/apis/public/sweeps.py +1 -0
  19. wandb/apis/public/teams.py +1 -0
  20. wandb/apis/public/users.py +1 -0
  21. wandb/apis/reports/v1/_blocks.py +3 -7
  22. wandb/apis/reports/v2/gql.py +1 -0
  23. wandb/apis/reports/v2/interface.py +3 -4
  24. wandb/apis/reports/v2/internal.py +5 -8
  25. wandb/cli/cli.py +92 -22
  26. wandb/data_types.py +9 -6
  27. wandb/docker/__init__.py +1 -1
  28. wandb/env.py +38 -8
  29. wandb/errors/__init__.py +5 -0
  30. wandb/errors/term.py +10 -2
  31. wandb/filesync/step_checksum.py +1 -4
  32. wandb/filesync/step_prepare.py +4 -24
  33. wandb/filesync/step_upload.py +4 -106
  34. wandb/filesync/upload_job.py +0 -76
  35. wandb/integration/catboost/catboost.py +1 -1
  36. wandb/integration/fastai/__init__.py +1 -0
  37. wandb/integration/huggingface/resolver.py +2 -2
  38. wandb/integration/keras/__init__.py +1 -0
  39. wandb/integration/keras/callbacks/metrics_logger.py +1 -1
  40. wandb/integration/keras/keras.py +7 -7
  41. wandb/integration/langchain/wandb_tracer.py +1 -0
  42. wandb/integration/lightning/fabric/logger.py +1 -3
  43. wandb/integration/metaflow/metaflow.py +41 -6
  44. wandb/integration/openai/fine_tuning.py +3 -3
  45. wandb/integration/prodigy/prodigy.py +1 -1
  46. wandb/old/summary.py +1 -1
  47. wandb/plot/confusion_matrix.py +1 -1
  48. wandb/plot/pr_curve.py +2 -1
  49. wandb/plot/roc_curve.py +2 -1
  50. wandb/{plots → plot}/utils.py +13 -25
  51. wandb/proto/v3/wandb_internal_pb2.py +364 -332
  52. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  53. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  54. wandb/proto/v4/wandb_internal_pb2.py +322 -316
  55. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  56. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  57. wandb/proto/wandb_deprecated.py +7 -1
  58. wandb/proto/wandb_internal_codegen.py +3 -29
  59. wandb/sdk/artifacts/artifact.py +26 -11
  60. wandb/sdk/artifacts/artifact_download_logger.py +1 -0
  61. wandb/sdk/artifacts/artifact_file_cache.py +18 -4
  62. wandb/sdk/artifacts/artifact_instance_cache.py +1 -0
  63. wandb/sdk/artifacts/artifact_manifest.py +1 -0
  64. wandb/sdk/artifacts/artifact_manifest_entry.py +7 -3
  65. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
  66. wandb/sdk/artifacts/artifact_saver.py +2 -8
  67. wandb/sdk/artifacts/artifact_state.py +1 -0
  68. wandb/sdk/artifacts/artifact_ttl.py +1 -0
  69. wandb/sdk/artifacts/exceptions.py +1 -0
  70. wandb/sdk/artifacts/storage_handlers/azure_handler.py +1 -0
  71. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +13 -18
  72. wandb/sdk/artifacts/storage_handlers/http_handler.py +1 -0
  73. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +1 -0
  74. wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -0
  75. wandb/sdk/artifacts/storage_handlers/s3_handler.py +5 -3
  76. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +1 -0
  77. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -0
  78. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -0
  79. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -42
  80. wandb/sdk/artifacts/storage_policy.py +2 -12
  81. wandb/sdk/data_types/_dtypes.py +8 -8
  82. wandb/sdk/data_types/base_types/media.py +3 -6
  83. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
  84. wandb/sdk/data_types/image.py +1 -1
  85. wandb/sdk/data_types/video.py +1 -1
  86. wandb/sdk/integration_utils/auto_logging.py +5 -6
  87. wandb/sdk/integration_utils/data_logging.py +10 -6
  88. wandb/sdk/interface/interface.py +68 -32
  89. wandb/sdk/interface/interface_shared.py +7 -13
  90. wandb/sdk/internal/datastore.py +1 -1
  91. wandb/sdk/internal/file_pusher.py +2 -5
  92. wandb/sdk/internal/file_stream.py +5 -18
  93. wandb/sdk/internal/handler.py +18 -2
  94. wandb/sdk/internal/internal.py +0 -1
  95. wandb/sdk/internal/internal_api.py +1 -129
  96. wandb/sdk/internal/internal_util.py +0 -1
  97. wandb/sdk/internal/job_builder.py +159 -45
  98. wandb/sdk/internal/profiler.py +1 -0
  99. wandb/sdk/internal/progress.py +0 -28
  100. wandb/sdk/internal/run.py +1 -0
  101. wandb/sdk/internal/sender.py +1 -2
  102. wandb/sdk/internal/system/assets/gpu_amd.py +44 -44
  103. wandb/sdk/internal/system/assets/gpu_apple.py +56 -11
  104. wandb/sdk/internal/system/assets/interfaces.py +6 -8
  105. wandb/sdk/internal/system/assets/open_metrics.py +2 -2
  106. wandb/sdk/internal/system/assets/trainium.py +1 -3
  107. wandb/sdk/launch/__init__.py +9 -1
  108. wandb/sdk/launch/_launch.py +4 -24
  109. wandb/sdk/launch/_launch_add.py +1 -3
  110. wandb/sdk/launch/_project_spec.py +186 -224
  111. wandb/sdk/launch/agent/agent.py +37 -13
  112. wandb/sdk/launch/agent/config.py +72 -14
  113. wandb/sdk/launch/builder/abstract.py +69 -1
  114. wandb/sdk/launch/builder/build.py +156 -555
  115. wandb/sdk/launch/builder/context_manager.py +235 -0
  116. wandb/sdk/launch/builder/docker_builder.py +8 -23
  117. wandb/sdk/launch/builder/kaniko_builder.py +12 -25
  118. wandb/sdk/launch/builder/noop.py +1 -0
  119. wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
  120. wandb/sdk/launch/create_job.py +47 -37
  121. wandb/sdk/launch/environment/abstract.py +1 -0
  122. wandb/sdk/launch/environment/gcp_environment.py +1 -0
  123. wandb/sdk/launch/environment/local_environment.py +1 -0
  124. wandb/sdk/launch/inputs/files.py +148 -0
  125. wandb/sdk/launch/inputs/internal.py +217 -0
  126. wandb/sdk/launch/inputs/manage.py +95 -0
  127. wandb/sdk/launch/loader.py +1 -0
  128. wandb/sdk/launch/registry/abstract.py +1 -0
  129. wandb/sdk/launch/registry/azure_container_registry.py +1 -0
  130. wandb/sdk/launch/registry/elastic_container_registry.py +1 -0
  131. wandb/sdk/launch/registry/google_artifact_registry.py +2 -1
  132. wandb/sdk/launch/registry/local_registry.py +1 -0
  133. wandb/sdk/launch/runner/abstract.py +1 -0
  134. wandb/sdk/launch/runner/kubernetes_monitor.py +1 -0
  135. wandb/sdk/launch/runner/kubernetes_runner.py +9 -10
  136. wandb/sdk/launch/runner/local_container.py +2 -3
  137. wandb/sdk/launch/runner/local_process.py +8 -29
  138. wandb/sdk/launch/runner/sagemaker_runner.py +21 -20
  139. wandb/sdk/launch/runner/vertex_runner.py +8 -7
  140. wandb/sdk/launch/sweeps/scheduler.py +4 -3
  141. wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
  142. wandb/sdk/launch/sweeps/utils.py +3 -3
  143. wandb/sdk/launch/utils.py +15 -140
  144. wandb/sdk/lib/_settings_toposort_generated.py +0 -5
  145. wandb/sdk/lib/fsm.py +8 -12
  146. wandb/sdk/lib/gitlib.py +4 -4
  147. wandb/sdk/lib/import_hooks.py +1 -1
  148. wandb/sdk/lib/lazyloader.py +0 -1
  149. wandb/sdk/lib/proto_util.py +23 -2
  150. wandb/sdk/lib/redirect.py +19 -14
  151. wandb/sdk/lib/retry.py +3 -2
  152. wandb/sdk/lib/tracelog.py +1 -1
  153. wandb/sdk/service/service.py +19 -16
  154. wandb/sdk/verify/verify.py +2 -1
  155. wandb/sdk/wandb_init.py +14 -55
  156. wandb/sdk/wandb_manager.py +2 -2
  157. wandb/sdk/wandb_require.py +5 -0
  158. wandb/sdk/wandb_run.py +114 -56
  159. wandb/sdk/wandb_settings.py +0 -48
  160. wandb/sdk/wandb_setup.py +1 -1
  161. wandb/sklearn/__init__.py +1 -0
  162. wandb/sklearn/plot/__init__.py +1 -0
  163. wandb/sklearn/plot/classifier.py +11 -12
  164. wandb/sklearn/plot/clusterer.py +2 -1
  165. wandb/sklearn/plot/regressor.py +1 -0
  166. wandb/sklearn/plot/shared.py +1 -0
  167. wandb/sklearn/utils.py +1 -0
  168. wandb/testing/relay.py +4 -4
  169. wandb/trigger.py +1 -0
  170. wandb/util.py +67 -54
  171. wandb/wandb_controller.py +2 -3
  172. wandb/wandb_torch.py +1 -2
  173. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/METADATA +67 -70
  174. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/RECORD +177 -187
  175. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/WHEEL +1 -2
  176. wandb/bin/apple_gpu_stats +0 -0
  177. wandb/catboost/__init__.py +0 -9
  178. wandb/fastai/__init__.py +0 -9
  179. wandb/keras/__init__.py +0 -18
  180. wandb/lightgbm/__init__.py +0 -9
  181. wandb/plots/__init__.py +0 -6
  182. wandb/plots/explain_text.py +0 -36
  183. wandb/plots/heatmap.py +0 -81
  184. wandb/plots/named_entity.py +0 -43
  185. wandb/plots/part_of_speech.py +0 -50
  186. wandb/plots/plot_definitions.py +0 -768
  187. wandb/plots/precision_recall.py +0 -121
  188. wandb/plots/roc.py +0 -103
  189. wandb/sacred/__init__.py +0 -3
  190. wandb/xgboost/__init__.py +0 -9
  191. wandb-0.16.6.dist-info/top_level.txt +0 -1
  192. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/entry_points.txt +0 -0
  193. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,235 @@
1
+ import logging
2
+ import os
3
+ import shutil
4
+ import tempfile
5
+ from typing import Tuple
6
+
7
+ from wandb.sdk.launch._project_spec import LaunchProject
8
+ from wandb.sdk.launch.builder.build import image_tag_from_dockerfile_and_source
9
+ from wandb.sdk.launch.errors import LaunchError
10
+ from wandb.sdk.launch.utils import get_current_python_version
11
+
12
+ from .build import (
13
+ _WANDB_DOCKERFILE_NAME,
14
+ get_base_setup,
15
+ get_docker_user,
16
+ get_entrypoint_setup,
17
+ get_requirements_section,
18
+ get_user_setup,
19
+ )
20
+ from .templates.dockerfile import DOCKERFILE_TEMPLATE
21
+
22
+ _logger = logging.getLogger(__name__)
23
+
24
+
25
+ class BuildContextManager:
26
+ """Creates a build context for a container image from job source code.
27
+
28
+ The dockerfile and build context may be specified by the job itself. If not,
29
+ the behavior for creating the build context is as follows:
30
+
31
+ - If a Dockerfile.wandb is found adjacent to the entrypoint, the directory
32
+ containing the entrypoint is used as the build context and Dockerfile.wandb
33
+ is used as the Dockerfile.
34
+
35
+ - If `override_dockerfile` is set on the LaunchProject, the directory
36
+ containing the Dockerfile is used as the build context and the Dockerfile
37
+ is used as the Dockerfile. `override_dockerfile` can be set in a launch
38
+ spec via the `-D` flag to `wandb launch` or in the `overrides` section
39
+ of the launch drawer.
40
+
41
+ - If no dockerfile is set, a Dockerfile is generated from the job's
42
+ requirements and entrypoint.
43
+ """
44
+
45
+ def __init__(self, launch_project: LaunchProject):
46
+ """Initialize a BuildContextManager.
47
+
48
+ Arguments:
49
+ launch_project: The launch project.
50
+ """
51
+ self._launch_project = launch_project
52
+ assert self._launch_project.project_dir is not None
53
+ self._directory = tempfile.mkdtemp()
54
+
55
+ def _generate_dockerfile(self, builder_type: str) -> str:
56
+ """Generate a Dockerfile for the container image.
57
+
58
+ Arguments:
59
+ builder_type: The type of builder to use. One of "docker" or "kaniko".
60
+
61
+ Returns:
62
+ The contents of the Dockerfile.
63
+ """
64
+ launch_project = self._launch_project
65
+ entry_point = (
66
+ launch_project.override_entrypoint or launch_project.get_job_entry_point()
67
+ )
68
+
69
+ # get python versions truncated to major.minor to ensure image availability
70
+ if launch_project.python_version:
71
+ spl = launch_project.python_version.split(".")[:2]
72
+ py_version, py_major = (".".join(spl), spl[0])
73
+ else:
74
+ py_version, py_major = get_current_python_version()
75
+
76
+ python_build_image = (
77
+ f"python:{py_version}" # use full python image for package installation
78
+ )
79
+ requirements_section = get_requirements_section(
80
+ launch_project, self._directory, builder_type
81
+ )
82
+ # ----- stage 2: base -----
83
+ python_base_setup = get_base_setup(launch_project, py_version, py_major)
84
+
85
+ # set up user info
86
+ username, userid = get_docker_user(launch_project, launch_project.resource)
87
+ user_setup = get_user_setup(username, userid, launch_project.resource)
88
+ workdir = f"/home/{username}"
89
+
90
+ assert entry_point is not None
91
+ entrypoint_section = get_entrypoint_setup(entry_point)
92
+
93
+ dockerfile_contents = DOCKERFILE_TEMPLATE.format(
94
+ py_build_image=python_build_image,
95
+ requirements_section=requirements_section,
96
+ base_setup=python_base_setup,
97
+ uid=userid,
98
+ user_setup=user_setup,
99
+ workdir=workdir,
100
+ entrypoint_section=entrypoint_section,
101
+ )
102
+ return dockerfile_contents
103
+
104
+ def create_build_context(self, builder_type: str) -> Tuple[str, str]:
105
+ """Create the build context for the container image.
106
+
107
+ Returns:
108
+ A pair of str: the path to the build context locally and the image
109
+ tag computed from the Dockerfile.
110
+ """
111
+ entrypoint = (
112
+ self._launch_project.get_job_entry_point()
113
+ or self._launch_project.override_entrypoint
114
+ )
115
+ assert entrypoint is not None
116
+ assert entrypoint.name is not None
117
+ assert self._launch_project.project_dir is not None
118
+
119
+ # we use that as the build context.
120
+ build_context_root_dir = self._launch_project.project_dir
121
+ job_build_context = self._launch_project.job_build_context
122
+ if job_build_context:
123
+ full_path = os.path.join(build_context_root_dir, job_build_context)
124
+ if not os.path.exists(full_path):
125
+ raise LaunchError(f"Build context does not exist at {full_path}")
126
+ build_context_root_dir = full_path
127
+
128
+ # This is the case where the user specifies a Dockerfile to use.
129
+ # We use the directory containing the Dockerfile as the build context.
130
+ override_dockerfile = self._launch_project.override_dockerfile
131
+ if override_dockerfile:
132
+ full_path = os.path.join(
133
+ build_context_root_dir,
134
+ override_dockerfile,
135
+ )
136
+ if not os.path.exists(full_path):
137
+ raise LaunchError(f"Dockerfile does not exist at {full_path}")
138
+ shutil.copytree(
139
+ build_context_root_dir,
140
+ self._directory,
141
+ symlinks=True,
142
+ dirs_exist_ok=True,
143
+ ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
144
+ )
145
+ shutil.copy(
146
+ full_path,
147
+ os.path.join(self._directory, _WANDB_DOCKERFILE_NAME),
148
+ )
149
+ return self._directory, image_tag_from_dockerfile_and_source(
150
+ self._launch_project, open(full_path).read()
151
+ )
152
+
153
+ # If the job specifies a Dockerfile, we use that as the Dockerfile.
154
+ job_dockerfile = self._launch_project.job_dockerfile
155
+ if job_dockerfile:
156
+ dockerfile_path = os.path.join(build_context_root_dir, job_dockerfile)
157
+ if not os.path.exists(dockerfile_path):
158
+ raise LaunchError(f"Dockerfile does not exist at {dockerfile_path}")
159
+ shutil.copytree(
160
+ build_context_root_dir,
161
+ self._directory,
162
+ symlinks=True,
163
+ dirs_exist_ok=True,
164
+ ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
165
+ )
166
+ shutil.copy(
167
+ dockerfile_path,
168
+ os.path.join(self._directory, _WANDB_DOCKERFILE_NAME),
169
+ )
170
+ return self._directory, image_tag_from_dockerfile_and_source(
171
+ self._launch_project, open(dockerfile_path).read()
172
+ )
173
+
174
+ # This is the case where we find Dockerfile.wandb adjacent to the
175
+ # entrypoint. We use the entrypoint directory as the build context.
176
+ entrypoint_dir = os.path.dirname(entrypoint.name)
177
+ if entrypoint_dir:
178
+ path = os.path.join(
179
+ build_context_root_dir,
180
+ entrypoint_dir,
181
+ _WANDB_DOCKERFILE_NAME,
182
+ )
183
+ else:
184
+ path = os.path.join(build_context_root_dir, _WANDB_DOCKERFILE_NAME)
185
+ if os.path.exists(
186
+ path
187
+ ): # We found a Dockerfile.wandb adjacent to the entrypoint.
188
+ shutil.copytree(
189
+ os.path.dirname(path),
190
+ self._directory,
191
+ symlinks=True,
192
+ dirs_exist_ok=True,
193
+ ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
194
+ )
195
+ # TODO: remove this once we make things more explicit for users
196
+ if entrypoint_dir:
197
+ new_path = os.path.basename(entrypoint.name)
198
+ entrypoint = self._launch_project.get_job_entry_point()
199
+ if entrypoint is not None:
200
+ entrypoint.update_entrypoint_path(new_path)
201
+ with open(path) as f:
202
+ docker_file_contents = f.read()
203
+ return self._directory, image_tag_from_dockerfile_and_source(
204
+ self._launch_project, docker_file_contents
205
+ )
206
+
207
+ # This is the case where we use our own Dockerfile template. We move
208
+ # the user code into a src directory in the build context.
209
+ dst_path = os.path.join(self._directory, "src")
210
+ assert self._launch_project.project_dir is not None
211
+ shutil.copytree(
212
+ src=self._launch_project.project_dir,
213
+ dst=dst_path,
214
+ symlinks=True,
215
+ ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
216
+ )
217
+ shutil.copy(
218
+ os.path.join(os.path.dirname(__file__), "templates", "_wandb_bootstrap.py"),
219
+ os.path.join(self._directory),
220
+ )
221
+ if self._launch_project.python_version:
222
+ runtime_path = os.path.join(dst_path, "runtime.txt")
223
+ with open(runtime_path, "w") as fp:
224
+ fp.write(f"python-{self._launch_project.python_version}")
225
+
226
+ # TODO: we likely don't need to pass the whole git repo into the container
227
+ # with open(os.path.join(directory, ".dockerignore"), "w") as f:
228
+ # f.write("**/.git")
229
+ with open(os.path.join(self._directory, _WANDB_DOCKERFILE_NAME), "w") as handle:
230
+ docker_file_contents = self._generate_dockerfile(builder_type=builder_type)
231
+ handle.write(docker_file_contents)
232
+ image_tag = image_tag_from_dockerfile_and_source(
233
+ self._launch_project, docker_file_contents
234
+ )
235
+ return self._directory, image_tag
@@ -1,4 +1,5 @@
1
1
  """Implementation of the docker builder."""
2
+
2
3
  import logging
3
4
  import os
4
5
  from typing import Any, Dict, Optional
@@ -6,8 +7,7 @@ from typing import Any, Dict, Optional
6
7
  import wandb
7
8
  import wandb.docker as docker
8
9
  from wandb.sdk.launch.agent.job_status_tracker import JobAndRunStatusTracker
9
- from wandb.sdk.launch.builder.abstract import AbstractBuilder
10
- from wandb.sdk.launch.builder.build import registry_from_uri
10
+ from wandb.sdk.launch.builder.abstract import AbstractBuilder, registry_from_uri
11
11
  from wandb.sdk.launch.environment.abstract import AbstractEnvironment
12
12
  from wandb.sdk.launch.registry.abstract import AbstractRegistry
13
13
 
@@ -20,13 +20,8 @@ from ..utils import (
20
20
  event_loop_thread_exec,
21
21
  warn_failed_packages_from_build_logs,
22
22
  )
23
- from .build import (
24
- _WANDB_DOCKERFILE_NAME,
25
- _create_docker_build_ctx,
26
- generate_dockerfile,
27
- image_tag_from_dockerfile_and_source,
28
- validate_docker_installation,
29
- )
23
+ from .build import _WANDB_DOCKERFILE_NAME, validate_docker_installation
24
+ from .context_manager import BuildContextManager
30
25
 
31
26
  _logger = logging.getLogger(__name__)
32
27
 
@@ -40,7 +35,6 @@ class DockerBuilder(AbstractBuilder):
40
35
  """
41
36
 
42
37
  builder_type = "docker"
43
- base_image = "python:3.8"
44
38
  target_platform = "linux/amd64"
45
39
 
46
40
  def __init__(
@@ -123,17 +117,11 @@ class DockerBuilder(AbstractBuilder):
123
117
  await self.verify()
124
118
  await self.login()
125
119
 
126
- dockerfile_str = generate_dockerfile(
127
- launch_project=launch_project,
128
- entry_point=entrypoint,
129
- runner_type=launch_project.resource,
130
- builder_type="docker",
131
- dockerfile=launch_project.override_dockerfile,
132
- )
133
-
134
- image_tag = image_tag_from_dockerfile_and_source(launch_project, dockerfile_str)
135
-
120
+ build_context_manager = BuildContextManager(launch_project=launch_project)
121
+ build_ctx_path, image_tag = build_context_manager.create_build_context("docker")
122
+ dockerfile = os.path.join(build_ctx_path, _WANDB_DOCKERFILE_NAME)
136
123
  repository = None if not self.registry else await self.registry.get_repo_uri()
124
+
137
125
  # if repo is set, use the repo name as the image name
138
126
  if repository:
139
127
  image_uri = f"{repository}:{image_tag}"
@@ -151,9 +139,6 @@ class DockerBuilder(AbstractBuilder):
151
139
  _logger.info(
152
140
  f"image {image_uri} does not already exist in repository, building."
153
141
  )
154
-
155
- build_ctx_path = _create_docker_build_ctx(launch_project, dockerfile_str)
156
- dockerfile = os.path.join(build_ctx_path, _WANDB_DOCKERFILE_NAME)
157
142
  try:
158
143
  output = await event_loop_thread_exec(docker.build)(
159
144
  tags=[image_uri],
@@ -13,8 +13,7 @@ from typing import Any, Dict, Optional
13
13
 
14
14
  import wandb
15
15
  from wandb.sdk.launch.agent.job_status_tracker import JobAndRunStatusTracker
16
- from wandb.sdk.launch.builder.abstract import AbstractBuilder
17
- from wandb.sdk.launch.builder.build import registry_from_uri
16
+ from wandb.sdk.launch.builder.abstract import AbstractBuilder, registry_from_uri
18
17
  from wandb.sdk.launch.environment.abstract import AbstractEnvironment
19
18
  from wandb.sdk.launch.environment.azure_environment import AzureEnvironment
20
19
  from wandb.sdk.launch.registry.abstract import AbstractRegistry
@@ -32,12 +31,8 @@ from ..utils import (
32
31
  get_kube_context_and_api_client,
33
32
  warn_failed_packages_from_build_logs,
34
33
  )
35
- from .build import (
36
- _WANDB_DOCKERFILE_NAME,
37
- _create_docker_build_ctx,
38
- generate_dockerfile,
39
- image_tag_from_dockerfile_and_source,
40
- )
34
+ from .build import _WANDB_DOCKERFILE_NAME
35
+ from .context_manager import BuildContextManager
41
36
 
42
37
  get_module(
43
38
  "kubernetes_asyncio",
@@ -261,17 +256,13 @@ class KanikoBuilder(AbstractBuilder):
261
256
  job_tracker: Optional[JobAndRunStatusTracker] = None,
262
257
  ) -> str:
263
258
  await self.verify()
264
- # kaniko builder doesn't seem to work with a custom user id, need more investigation
265
- dockerfile_str = generate_dockerfile(
266
- launch_project=launch_project,
267
- entry_point=entrypoint,
268
- runner_type=launch_project.resource,
269
- builder_type="kaniko",
270
- dockerfile=launch_project.override_dockerfile,
271
- )
272
- image_tag = image_tag_from_dockerfile_and_source(launch_project, dockerfile_str)
259
+
260
+ build_contex_manager = BuildContextManager(launch_project=launch_project)
261
+ context_path, image_tag = build_contex_manager.create_build_context("kaniko")
262
+ run_id = launch_project.run_id
273
263
  repo_uri = await self.registry.get_repo_uri()
274
264
  image_uri = repo_uri + ":" + image_tag
265
+
275
266
  if (
276
267
  not launch_project.build_required()
277
268
  and await self.registry.check_image_exists(image_uri)
@@ -279,14 +270,10 @@ class KanikoBuilder(AbstractBuilder):
279
270
  return image_uri
280
271
 
281
272
  _logger.info(f"Building image {image_uri}...")
282
-
283
- context_path = _create_docker_build_ctx(launch_project, dockerfile_str)
284
- run_id = launch_project.run_id
285
-
286
273
  _, api_client = await get_kube_context_and_api_client(
287
274
  kubernetes, launch_project.resource_args
288
275
  )
289
- # TODO: use same client as kuberentes_runner.py
276
+ # TODO: use same client as kubernetes_runner.py
290
277
  batch_v1 = client.BatchV1Api(api_client)
291
278
  core_v1 = client.CoreV1Api(api_client)
292
279
 
@@ -492,8 +479,8 @@ class KanikoBuilder(AbstractBuilder):
492
479
  }
493
480
  )
494
481
  else:
495
- raise LaunchError(
496
- f"Registry type {type(self.registry)} not supported by kaniko"
482
+ wandb.termwarn(
483
+ f"{LOG_PREFIX}Automatic credential handling is not supported for registry type {type(self.registry)}. Build job: {self.build_job_name}"
497
484
  )
498
485
  volumes.append(
499
486
  {
@@ -522,7 +509,7 @@ class KanikoBuilder(AbstractBuilder):
522
509
  volume_mounts.append(
523
510
  {"name": "docker-config", "mountPath": "/kaniko/.docker/"}
524
511
  )
525
- # Kaniko doesn't want https:// at the begining of the image tag.
512
+ # Kaniko doesn't want https:// at the beginning of the image tag.
526
513
  destination = image_tag
527
514
  if destination.startswith("https://"):
528
515
  destination = destination.replace("https://", "")
@@ -1,4 +1,5 @@
1
1
  """NoOp builder implementation."""
2
+
2
3
  from typing import Any, Dict, Optional
3
4
 
4
5
  from wandb.sdk.launch.builder.abstract import AbstractBuilder
@@ -0,0 +1,92 @@
1
+ DOCKERFILE_TEMPLATE = """
2
+ # ----- stage 1: build -----
3
+ FROM {py_build_image} as build
4
+
5
+ # requirements section depends on pip vs conda, and presence of buildx
6
+ ENV PIP_PROGRESS_BAR off
7
+ {requirements_section}
8
+
9
+ # ----- stage 2: base -----
10
+ {base_setup}
11
+
12
+ COPY --from=build /env /env
13
+ ENV PATH="/env/bin:$PATH"
14
+
15
+ ENV SHELL /bin/bash
16
+
17
+ # some resources (eg sagemaker) must run on root
18
+ {user_setup}
19
+
20
+ WORKDIR {workdir}
21
+ RUN chown -R {uid} {workdir}
22
+
23
+ # make artifacts cache dir unrelated to build
24
+ RUN mkdir -p {workdir}/.cache && chown -R {uid} {workdir}/.cache
25
+
26
+ # copy code/etc
27
+ COPY --chown={uid} src/ {workdir}
28
+
29
+ ENV PYTHONUNBUFFERED=1
30
+
31
+ {entrypoint_section}
32
+ """
33
+
34
+ # this goes into base_setup in TEMPLATE
35
+ PYTHON_SETUP_TEMPLATE = """
36
+ FROM {py_base_image} as base
37
+ """
38
+
39
+ # this goes into base_setup in TEMPLATE
40
+ ACCELERATOR_SETUP_TEMPLATE = """
41
+ FROM {accelerator_base_image} as base
42
+
43
+ # make non-interactive so build doesn't block on questions
44
+ ENV DEBIAN_FRONTEND=noninteractive
45
+
46
+ # install python
47
+ RUN apt-get update -qq && apt-get install --no-install-recommends -y \
48
+ {python_packages} \
49
+ && apt-get -qq purge && apt-get -qq clean \
50
+ && rm -rf /var/lib/apt/lists/*
51
+
52
+ # make sure `python` points at the right version
53
+ RUN update-alternatives --install /usr/bin/python python /usr/bin/python{py_version} 1 \
54
+ && update-alternatives --install /usr/local/bin/python python /usr/bin/python{py_version} 1
55
+ """
56
+
57
+ # this goes into requirements_section in TEMPLATE
58
+ PIP_TEMPLATE = """
59
+ RUN python -m venv /env
60
+ # make sure we install into the env
61
+ ENV PATH="/env/bin:$PATH"
62
+
63
+ COPY {requirements_files} ./
64
+ {buildx_optional_prefix} {pip_install}
65
+ """
66
+
67
+ # this goes into requirements_section in TEMPLATE
68
+ CONDA_TEMPLATE = """
69
+ COPY src/environment.yml .
70
+ {buildx_optional_prefix} conda env create -f environment.yml -n env
71
+
72
+ # pack the environment so that we can transfer to the base image
73
+ RUN conda install -c conda-forge conda-pack
74
+ RUN conda pack -n env -o /tmp/env.tar && \
75
+ mkdir /env && cd /env && tar xf /tmp/env.tar && \
76
+ rm /tmp/env.tar
77
+ RUN /env/bin/conda-unpack
78
+ """
79
+
80
+ USER_CREATE_TEMPLATE = """
81
+ RUN useradd \
82
+ --create-home \
83
+ --no-log-init \
84
+ --shell /bin/bash \
85
+ --gid 0 \
86
+ --uid {uid} \
87
+ {user} || echo ""
88
+ """
89
+
90
+ ENTRYPOINT_TEMPLATE = """
91
+ ENTRYPOINT {entrypoint}
92
+ """
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
+ import re
4
5
  import sys
5
6
  import tempfile
6
7
  from typing import Any, Dict, List, Optional, Tuple
@@ -9,9 +10,12 @@ import wandb
9
10
  from wandb.apis.internal import Api
10
11
  from wandb.sdk.artifacts.artifact import Artifact
11
12
  from wandb.sdk.internal.job_builder import JobBuilder
12
- from wandb.sdk.launch.builder.build import get_current_python_version
13
13
  from wandb.sdk.launch.git_reference import GitReference
14
- from wandb.sdk.launch.utils import _is_git_uri, get_entrypoint_file
14
+ from wandb.sdk.launch.utils import (
15
+ _is_git_uri,
16
+ get_current_python_version,
17
+ get_entrypoint_file,
18
+ )
15
19
  from wandb.sdk.lib import filesystem
16
20
  from wandb.util import make_artifact_name_safe
17
21
 
@@ -19,6 +23,9 @@ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
19
23
  _logger = logging.getLogger("wandb")
20
24
 
21
25
 
26
+ CODE_ARTIFACT_EXCLUDE_PATHS = ["wandb", ".git"]
27
+
28
+
22
29
  def create_job(
23
30
  path: str,
24
31
  job_type: str,
@@ -30,6 +37,8 @@ def create_job(
30
37
  runtime: Optional[str] = None,
31
38
  entrypoint: Optional[str] = None,
32
39
  git_hash: Optional[str] = None,
40
+ build_context: Optional[str] = None,
41
+ dockerfile: Optional[str] = None,
33
42
  ) -> Optional[Artifact]:
34
43
  """Create a job from a path, not as the output of a run.
35
44
 
@@ -42,9 +51,12 @@ def create_job(
42
51
  description (Optional[str]): Description of the job.
43
52
  aliases (Optional[List[str]]): Aliases for the job.
44
53
  runtime (Optional[str]): Python runtime of the job, like 3.9.
45
- entrypoint (Optional[str]): Entrypoint of the job.
54
+ entrypoint (Optional[str]): Entrypoint of the job. If build_context is
55
+ provided, path is relative to build_context.
46
56
  git_hash (Optional[str]): Git hash of a specific commit, when using git type jobs.
47
-
57
+ build_context (Optional[str]): Path to the build context, when using image type jobs.
58
+ dockerfile (Optional[str]): Path to the Dockerfile, when using image type jobs.
59
+ If build_context is provided, path is relative to build_context.
48
60
 
49
61
  Returns:
50
62
  Optional[Artifact]: The artifact created by the job, the action (for printing), and job aliases.
@@ -81,6 +93,8 @@ def create_job(
81
93
  runtime,
82
94
  entrypoint,
83
95
  git_hash,
96
+ build_context,
97
+ dockerfile,
84
98
  )
85
99
 
86
100
  return artifact_job
@@ -98,6 +112,8 @@ def _create_job(
98
112
  runtime: Optional[str] = None,
99
113
  entrypoint: Optional[str] = None,
100
114
  git_hash: Optional[str] = None,
115
+ build_context: Optional[str] = None,
116
+ dockerfile: Optional[str] = None,
101
117
  ) -> Tuple[Optional[Artifact], str, List[str]]:
102
118
  wandb.termlog(f"Creating launch job of type: {job_type}...")
103
119
 
@@ -107,6 +123,13 @@ def _create_job(
107
123
  )
108
124
  return None, "", []
109
125
 
126
+ if runtime is not None:
127
+ if not re.match(r"^3\.\d+$", runtime):
128
+ wandb.termerror(
129
+ f"Runtime (-r, --runtime) must be a minor version of Python 3, "
130
+ f"e.g. 3.9 or 3.10, received {runtime}"
131
+ )
132
+ return None, "", []
110
133
  aliases = aliases or []
111
134
  tempdir = tempfile.TemporaryDirectory()
112
135
  try:
@@ -161,7 +184,10 @@ def _create_job(
161
184
  name = job_name
162
185
 
163
186
  # build job artifact, loads wandb-metadata and creates wandb-job.json here
164
- artifact = job_builder.build()
187
+ artifact = job_builder.build(
188
+ dockerfile=dockerfile,
189
+ build_context=build_context,
190
+ )
165
191
  if not artifact:
166
192
  wandb.termerror("JobBuilder failed to build a job")
167
193
  _logger.debug("Failed to build job, check job source and metadata")
@@ -220,6 +246,7 @@ def _make_metadata_for_partial_job(
220
246
  """Create metadata for partial jobs, return metadata and requirements."""
221
247
  metadata = {"_partial": "v0"}
222
248
  if job_type == "git":
249
+ assert entrypoint is not None
223
250
  repo_metadata = _create_repo_metadata(
224
251
  path=path,
225
252
  tempdir=tempdir.name,
@@ -234,12 +261,7 @@ def _make_metadata_for_partial_job(
234
261
  return metadata, None
235
262
 
236
263
  if job_type == "code":
237
- if not entrypoint:
238
- wandb.termerror(
239
- "Artifact jobs must have an entrypoint, either included in the path or specified with -E"
240
- )
241
- return None, None
242
-
264
+ assert entrypoint is not None
243
265
  artifact_metadata, requirements = _create_artifact_metadata(
244
266
  path=path, entrypoint=entrypoint, runtime=runtime
245
267
  )
@@ -268,7 +290,7 @@ def _make_metadata_for_partial_job(
268
290
  def _create_repo_metadata(
269
291
  path: str,
270
292
  tempdir: str,
271
- entrypoint: Optional[str] = None,
293
+ entrypoint: str,
272
294
  git_hash: Optional[str] = None,
273
295
  runtime: Optional[str] = None,
274
296
  ) -> Optional[Dict[str, Any]]:
@@ -308,28 +330,12 @@ def _create_repo_metadata(
308
330
 
309
331
  python_version = _clean_python_version(python_version)
310
332
 
311
- # check if entrypoint is valid
312
- assert entrypoint is not None
313
- entrypoint_list = entrypoint.split(" ")
314
- entrypoint_file = get_entrypoint_file(entrypoint_list)
315
- if not entrypoint_file:
316
- wandb.termerror(
317
- f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
318
- )
319
- return None
320
-
321
- if not os.path.exists(os.path.join(local_dir, entrypoint_file)):
322
- wandb.termerror(f"Entrypoint file {entrypoint_file} not found in git repo")
323
- return None
324
-
325
333
  metadata = {
326
334
  "git": {
327
335
  "commit": commit,
328
336
  "remote": ref.url,
329
337
  },
330
- "codePathLocal": entrypoint_file, # not in git context, optionally also set local
331
- "codePath": entrypoint_file,
332
- "entrypoint": entrypoint_list,
338
+ "entrypoint": entrypoint.split(" "),
333
339
  "python": python_version, # used to build container
334
340
  "notebook": False, # partial jobs from notebooks not supported
335
341
  }
@@ -410,15 +416,12 @@ def _make_code_artifact(
410
416
 
411
417
  Returns the name of the eventual job.
412
418
  """
413
- assert entrypoint is not None
414
419
  entrypoint_list = entrypoint.split(" ")
415
- entrypoint_file = get_entrypoint_file(entrypoint_list)
416
- if not entrypoint_file:
417
- wandb.termerror(
418
- f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
419
- )
420
- return None
421
-
420
+ # We no longer require the entrypoint to end in an existing file. But we
421
+ # need something to use as the default job artifact name. In the future we
422
+ # may require the user to provide a job name explicitly when calling
423
+ # wandb job create.
424
+ entrypoint_file = entrypoint_list[-1]
422
425
  artifact_name = _make_code_artifact_name(os.path.join(path, entrypoint_file), name)
423
426
  code_artifact = wandb.Artifact(
424
427
  name=artifact_name,
@@ -436,6 +439,13 @@ def _make_code_artifact(
436
439
  wandb.termerror(f"Error adding to code artifact: {e}")
437
440
  return None
438
441
 
442
+ # Remove paths we don't want to include, if present
443
+ for item in CODE_ARTIFACT_EXCLUDE_PATHS:
444
+ try:
445
+ code_artifact.remove(item)
446
+ except FileNotFoundError:
447
+ pass
448
+
439
449
  res, _ = api.create_artifact(
440
450
  artifact_type_name="code",
441
451
  artifact_collection_name=artifact_name,
@@ -1,4 +1,5 @@
1
1
  """Abstract base class for environments."""
2
+
2
3
  from abc import ABC, abstractmethod
3
4
 
4
5