wandb 0.16.6__py3-none-any.whl → 0.17.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (193) hide show
  1. package_readme.md +95 -0
  2. wandb/__init__.py +2 -3
  3. wandb/agents/pyagent.py +0 -1
  4. wandb/analytics/sentry.py +2 -1
  5. wandb/apis/importers/internals/internal.py +0 -1
  6. wandb/apis/importers/internals/protocols.py +30 -56
  7. wandb/apis/importers/mlflow.py +13 -26
  8. wandb/apis/importers/wandb.py +8 -14
  9. wandb/apis/internal.py +0 -3
  10. wandb/apis/public/api.py +55 -3
  11. wandb/apis/public/artifacts.py +1 -0
  12. wandb/apis/public/files.py +1 -0
  13. wandb/apis/public/history.py +1 -0
  14. wandb/apis/public/jobs.py +17 -4
  15. wandb/apis/public/projects.py +1 -0
  16. wandb/apis/public/reports.py +1 -0
  17. wandb/apis/public/runs.py +15 -17
  18. wandb/apis/public/sweeps.py +1 -0
  19. wandb/apis/public/teams.py +1 -0
  20. wandb/apis/public/users.py +1 -0
  21. wandb/apis/reports/v1/_blocks.py +3 -7
  22. wandb/apis/reports/v2/gql.py +1 -0
  23. wandb/apis/reports/v2/interface.py +3 -4
  24. wandb/apis/reports/v2/internal.py +5 -8
  25. wandb/cli/cli.py +92 -22
  26. wandb/data_types.py +9 -6
  27. wandb/docker/__init__.py +1 -1
  28. wandb/env.py +38 -8
  29. wandb/errors/__init__.py +5 -0
  30. wandb/errors/term.py +10 -2
  31. wandb/filesync/step_checksum.py +1 -4
  32. wandb/filesync/step_prepare.py +4 -24
  33. wandb/filesync/step_upload.py +4 -106
  34. wandb/filesync/upload_job.py +0 -76
  35. wandb/integration/catboost/catboost.py +1 -1
  36. wandb/integration/fastai/__init__.py +1 -0
  37. wandb/integration/huggingface/resolver.py +2 -2
  38. wandb/integration/keras/__init__.py +1 -0
  39. wandb/integration/keras/callbacks/metrics_logger.py +1 -1
  40. wandb/integration/keras/keras.py +7 -7
  41. wandb/integration/langchain/wandb_tracer.py +1 -0
  42. wandb/integration/lightning/fabric/logger.py +1 -3
  43. wandb/integration/metaflow/metaflow.py +41 -6
  44. wandb/integration/openai/fine_tuning.py +3 -3
  45. wandb/integration/prodigy/prodigy.py +1 -1
  46. wandb/old/summary.py +1 -1
  47. wandb/plot/confusion_matrix.py +1 -1
  48. wandb/plot/pr_curve.py +2 -1
  49. wandb/plot/roc_curve.py +2 -1
  50. wandb/{plots → plot}/utils.py +13 -25
  51. wandb/proto/v3/wandb_internal_pb2.py +364 -332
  52. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  53. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  54. wandb/proto/v4/wandb_internal_pb2.py +322 -316
  55. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  56. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  57. wandb/proto/wandb_deprecated.py +7 -1
  58. wandb/proto/wandb_internal_codegen.py +3 -29
  59. wandb/sdk/artifacts/artifact.py +26 -11
  60. wandb/sdk/artifacts/artifact_download_logger.py +1 -0
  61. wandb/sdk/artifacts/artifact_file_cache.py +18 -4
  62. wandb/sdk/artifacts/artifact_instance_cache.py +1 -0
  63. wandb/sdk/artifacts/artifact_manifest.py +1 -0
  64. wandb/sdk/artifacts/artifact_manifest_entry.py +7 -3
  65. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
  66. wandb/sdk/artifacts/artifact_saver.py +2 -8
  67. wandb/sdk/artifacts/artifact_state.py +1 -0
  68. wandb/sdk/artifacts/artifact_ttl.py +1 -0
  69. wandb/sdk/artifacts/exceptions.py +1 -0
  70. wandb/sdk/artifacts/storage_handlers/azure_handler.py +1 -0
  71. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +13 -18
  72. wandb/sdk/artifacts/storage_handlers/http_handler.py +1 -0
  73. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +1 -0
  74. wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -0
  75. wandb/sdk/artifacts/storage_handlers/s3_handler.py +5 -3
  76. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +1 -0
  77. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -0
  78. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -0
  79. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -42
  80. wandb/sdk/artifacts/storage_policy.py +2 -12
  81. wandb/sdk/data_types/_dtypes.py +8 -8
  82. wandb/sdk/data_types/base_types/media.py +3 -6
  83. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
  84. wandb/sdk/data_types/image.py +1 -1
  85. wandb/sdk/data_types/video.py +1 -1
  86. wandb/sdk/integration_utils/auto_logging.py +5 -6
  87. wandb/sdk/integration_utils/data_logging.py +10 -6
  88. wandb/sdk/interface/interface.py +68 -32
  89. wandb/sdk/interface/interface_shared.py +7 -13
  90. wandb/sdk/internal/datastore.py +1 -1
  91. wandb/sdk/internal/file_pusher.py +2 -5
  92. wandb/sdk/internal/file_stream.py +5 -18
  93. wandb/sdk/internal/handler.py +18 -2
  94. wandb/sdk/internal/internal.py +0 -1
  95. wandb/sdk/internal/internal_api.py +1 -129
  96. wandb/sdk/internal/internal_util.py +0 -1
  97. wandb/sdk/internal/job_builder.py +159 -45
  98. wandb/sdk/internal/profiler.py +1 -0
  99. wandb/sdk/internal/progress.py +0 -28
  100. wandb/sdk/internal/run.py +1 -0
  101. wandb/sdk/internal/sender.py +1 -2
  102. wandb/sdk/internal/system/assets/gpu_amd.py +44 -44
  103. wandb/sdk/internal/system/assets/gpu_apple.py +56 -11
  104. wandb/sdk/internal/system/assets/interfaces.py +6 -8
  105. wandb/sdk/internal/system/assets/open_metrics.py +2 -2
  106. wandb/sdk/internal/system/assets/trainium.py +1 -3
  107. wandb/sdk/launch/__init__.py +9 -1
  108. wandb/sdk/launch/_launch.py +4 -24
  109. wandb/sdk/launch/_launch_add.py +1 -3
  110. wandb/sdk/launch/_project_spec.py +186 -224
  111. wandb/sdk/launch/agent/agent.py +37 -13
  112. wandb/sdk/launch/agent/config.py +72 -14
  113. wandb/sdk/launch/builder/abstract.py +69 -1
  114. wandb/sdk/launch/builder/build.py +156 -555
  115. wandb/sdk/launch/builder/context_manager.py +235 -0
  116. wandb/sdk/launch/builder/docker_builder.py +8 -23
  117. wandb/sdk/launch/builder/kaniko_builder.py +12 -25
  118. wandb/sdk/launch/builder/noop.py +1 -0
  119. wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
  120. wandb/sdk/launch/create_job.py +47 -37
  121. wandb/sdk/launch/environment/abstract.py +1 -0
  122. wandb/sdk/launch/environment/gcp_environment.py +1 -0
  123. wandb/sdk/launch/environment/local_environment.py +1 -0
  124. wandb/sdk/launch/inputs/files.py +148 -0
  125. wandb/sdk/launch/inputs/internal.py +217 -0
  126. wandb/sdk/launch/inputs/manage.py +95 -0
  127. wandb/sdk/launch/loader.py +1 -0
  128. wandb/sdk/launch/registry/abstract.py +1 -0
  129. wandb/sdk/launch/registry/azure_container_registry.py +1 -0
  130. wandb/sdk/launch/registry/elastic_container_registry.py +1 -0
  131. wandb/sdk/launch/registry/google_artifact_registry.py +2 -1
  132. wandb/sdk/launch/registry/local_registry.py +1 -0
  133. wandb/sdk/launch/runner/abstract.py +1 -0
  134. wandb/sdk/launch/runner/kubernetes_monitor.py +1 -0
  135. wandb/sdk/launch/runner/kubernetes_runner.py +9 -10
  136. wandb/sdk/launch/runner/local_container.py +2 -3
  137. wandb/sdk/launch/runner/local_process.py +8 -29
  138. wandb/sdk/launch/runner/sagemaker_runner.py +21 -20
  139. wandb/sdk/launch/runner/vertex_runner.py +8 -7
  140. wandb/sdk/launch/sweeps/scheduler.py +4 -3
  141. wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
  142. wandb/sdk/launch/sweeps/utils.py +3 -3
  143. wandb/sdk/launch/utils.py +15 -140
  144. wandb/sdk/lib/_settings_toposort_generated.py +0 -5
  145. wandb/sdk/lib/fsm.py +8 -12
  146. wandb/sdk/lib/gitlib.py +4 -4
  147. wandb/sdk/lib/import_hooks.py +1 -1
  148. wandb/sdk/lib/lazyloader.py +0 -1
  149. wandb/sdk/lib/proto_util.py +23 -2
  150. wandb/sdk/lib/redirect.py +19 -14
  151. wandb/sdk/lib/retry.py +3 -2
  152. wandb/sdk/lib/tracelog.py +1 -1
  153. wandb/sdk/service/service.py +19 -16
  154. wandb/sdk/verify/verify.py +2 -1
  155. wandb/sdk/wandb_init.py +14 -55
  156. wandb/sdk/wandb_manager.py +2 -2
  157. wandb/sdk/wandb_require.py +5 -0
  158. wandb/sdk/wandb_run.py +114 -56
  159. wandb/sdk/wandb_settings.py +0 -48
  160. wandb/sdk/wandb_setup.py +1 -1
  161. wandb/sklearn/__init__.py +1 -0
  162. wandb/sklearn/plot/__init__.py +1 -0
  163. wandb/sklearn/plot/classifier.py +11 -12
  164. wandb/sklearn/plot/clusterer.py +2 -1
  165. wandb/sklearn/plot/regressor.py +1 -0
  166. wandb/sklearn/plot/shared.py +1 -0
  167. wandb/sklearn/utils.py +1 -0
  168. wandb/testing/relay.py +4 -4
  169. wandb/trigger.py +1 -0
  170. wandb/util.py +67 -54
  171. wandb/wandb_controller.py +2 -3
  172. wandb/wandb_torch.py +1 -2
  173. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/METADATA +67 -70
  174. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/RECORD +177 -187
  175. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/WHEEL +1 -2
  176. wandb/bin/apple_gpu_stats +0 -0
  177. wandb/catboost/__init__.py +0 -9
  178. wandb/fastai/__init__.py +0 -9
  179. wandb/keras/__init__.py +0 -18
  180. wandb/lightgbm/__init__.py +0 -9
  181. wandb/plots/__init__.py +0 -6
  182. wandb/plots/explain_text.py +0 -36
  183. wandb/plots/heatmap.py +0 -81
  184. wandb/plots/named_entity.py +0 -43
  185. wandb/plots/part_of_speech.py +0 -50
  186. wandb/plots/plot_definitions.py +0 -768
  187. wandb/plots/precision_recall.py +0 -121
  188. wandb/plots/roc.py +0 -103
  189. wandb/sacred/__init__.py +0 -3
  190. wandb/xgboost/__init__.py +0 -9
  191. wandb-0.16.6.dist-info/top_level.txt +0 -1
  192. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info}/entry_points.txt +0 -0
  193. {wandb-0.16.6.dist-info → wandb-0.17.0.dist-info/licenses}/LICENSE +0 -0
@@ -4,18 +4,13 @@ import logging
4
4
  import os
5
5
  import pathlib
6
6
  import shlex
7
- import shutil
8
- import sys
9
- import tempfile
10
- from typing import Any, Dict, List, Optional, Tuple
7
+ from typing import Any, Dict, List, Tuple
11
8
 
12
- import yaml
13
9
  from dockerpycreds.utils import find_executable # type: ignore
14
- from six.moves import shlex_quote
15
10
 
16
11
  import wandb
17
- import wandb.docker as docker
18
12
  import wandb.env
13
+ from wandb import docker
19
14
  from wandb.apis.internal import Api
20
15
  from wandb.sdk.launch.loader import (
21
16
  builder_from_config,
@@ -24,18 +19,15 @@ from wandb.sdk.launch.loader import (
24
19
  )
25
20
  from wandb.util import get_module
26
21
 
27
- from .._project_spec import EntryPoint, EntrypointDefaults, LaunchProject
22
+ from .._project_spec import EntryPoint, LaunchProject
28
23
  from ..errors import ExecutionError, LaunchError
29
- from ..registry.abstract import AbstractRegistry
30
- from ..registry.anon import AnonynmousRegistry
31
- from ..utils import (
32
- AZURE_CONTAINER_REGISTRY_URI_REGEX,
33
- ELASTIC_CONTAINER_REGISTRY_URI_REGEX,
34
- GCP_ARTIFACT_REGISTRY_URI_REGEX,
35
- LAUNCH_CONFIG_FILE,
36
- LOG_PREFIX,
37
- event_loop_thread_exec,
38
- resolve_build_and_registry_config,
24
+ from ..utils import LOG_PREFIX, event_loop_thread_exec
25
+ from .templates.dockerfile import (
26
+ ACCELERATOR_SETUP_TEMPLATE,
27
+ ENTRYPOINT_TEMPLATE,
28
+ PIP_TEMPLATE,
29
+ PYTHON_SETUP_TEMPLATE,
30
+ USER_CREATE_TEMPLATE,
39
31
  )
40
32
 
41
33
  _logger = logging.getLogger(__name__)
@@ -44,76 +36,95 @@ _logger = logging.getLogger(__name__)
44
36
  _WANDB_DOCKERFILE_NAME = "Dockerfile.wandb"
45
37
 
46
38
 
47
- def registry_from_uri(uri: str) -> AbstractRegistry:
48
- """Create a registry helper object from a uri.
39
+ async def validate_docker_installation() -> None:
40
+ """Verify if Docker is installed on host machine."""
41
+ find_exec = event_loop_thread_exec(find_executable)
42
+ if not await find_exec("docker"):
43
+ raise ExecutionError(
44
+ "Could not find Docker executable. "
45
+ "Ensure Docker is installed as per the instructions "
46
+ "at https://docs.docker.com/install/overview/."
47
+ )
49
48
 
50
- This function parses the URI and determines which supported registry it
51
- belongs to. It then creates a registry helper object for that registry.
52
- The supported remote registry types are:
53
- - Azure Container Registry
54
- - Google Container Registry
55
- - AWS Elastic Container Registry
56
49
 
57
- The format of the URI is as follows:
58
- - Azure Container Registry: <registry-name>.azurecr.io/<repo-name>/<image-name>
59
- - Google Container Registry: <location>-docker.pkg.dev/<project-id>/<repo-name>/<image-name>
60
- - AWS Elastic Container Registry: <account-id>.dkr.ecr.<region>.amazonaws.com/<repo-name>/<image-name>
50
+ def join(split_command: List[str]) -> str:
51
+ """Return a shell-escaped string from *split_command*.
61
52
 
62
- Our classification of the registry is based on the domain name. For example,
63
- if the uri contains `.azurecr.io`, we classify it as an Azure
64
- Container Registry. If the uri contains `.dkr.ecr`, we classify
65
- it as an AWS Elastic Container Registry. If the uri contains
66
- `-docker.pkg.dev`, we classify it as a Google Artifact Registry.
53
+ Also remove quotes from double quoted strings. Ex:
54
+ "'local container queue'" --> "local container queue"
55
+ """
56
+ return " ".join(shlex.quote(arg.replace("'", "")) for arg in split_command)
67
57
 
68
- This function will attempt to load the approriate cloud helpers for the
69
58
 
70
- `https://` prefix is optional for all of the above.
59
+ async def build_image_from_project(
60
+ launch_project: LaunchProject,
61
+ api: Api,
62
+ launch_config: Dict[str, Any],
63
+ ) -> str:
64
+ """Construct a docker image from a project and returns the URI of the image.
71
65
 
72
66
  Arguments:
73
- uri: The uri to create a registry from.
67
+ launch_project: The project to build an image from.
68
+ api: The API object to use for fetching the project.
69
+ launch_config: The launch config to use for building the image.
74
70
 
75
71
  Returns:
76
- The registry.
77
-
78
- Raises:
79
- LaunchError: If the registry helper cannot be loaded for the given URI.
72
+ The URI of the built image.
80
73
  """
81
- if uri.startswith("https://"):
82
- uri = uri[len("https://") :]
83
-
84
- if AZURE_CONTAINER_REGISTRY_URI_REGEX.match(uri) is not None:
85
- from wandb.sdk.launch.registry.azure_container_registry import (
86
- AzureContainerRegistry,
74
+ assert launch_project.uri, "To build an image on queue a URI must be set."
75
+ launch_config = launch_config or {}
76
+ env_config = launch_config.get("environment", {})
77
+ if not isinstance(env_config, dict):
78
+ wrong_type = type(env_config).__name__
79
+ raise LaunchError(
80
+ f"Invalid environment config: {env_config} of type {wrong_type} "
81
+ "loaded from launch config. Expected dict."
87
82
  )
83
+ environment = environment_from_config(env_config)
88
84
 
89
- return AzureContainerRegistry(uri=uri)
90
-
91
- elif GCP_ARTIFACT_REGISTRY_URI_REGEX.match(uri) is not None:
92
- from wandb.sdk.launch.registry.google_artifact_registry import (
93
- GoogleArtifactRegistry,
85
+ registry_config = launch_config.get("registry", {})
86
+ if not isinstance(registry_config, dict):
87
+ wrong_type = type(registry_config).__name__
88
+ raise LaunchError(
89
+ f"Invalid registry config: {registry_config} of type {wrong_type}"
90
+ " loaded from launch config. Expected dict."
94
91
  )
92
+ registry = registry_from_config(registry_config, environment)
95
93
 
96
- return GoogleArtifactRegistry(uri=uri)
97
-
98
- elif ELASTIC_CONTAINER_REGISTRY_URI_REGEX.match(uri) is not None:
99
- from wandb.sdk.launch.registry.elastic_container_registry import (
100
- ElasticContainerRegistry,
94
+ builder_config = launch_config.get("builder", {})
95
+ if not isinstance(builder_config, dict):
96
+ wrong_type = type(builder_config).__name__
97
+ raise LaunchError(
98
+ f"Invalid builder config: {builder_config} of type {wrong_type} "
99
+ "loaded from launch config. Expected dict."
101
100
  )
101
+ builder = builder_from_config(builder_config, environment, registry)
102
102
 
103
- return ElasticContainerRegistry(uri=uri)
103
+ if not builder:
104
+ raise LaunchError("Unable to build image. No builder found.")
105
+
106
+ launch_project.fetch_and_validate_project()
104
107
 
105
- return AnonynmousRegistry(uri=uri)
108
+ entry_point = (
109
+ launch_project.get_job_entry_point() or launch_project.override_entrypoint
110
+ )
111
+ assert entry_point is not None
112
+ wandb.termlog(f"{LOG_PREFIX}Building docker image from uri source")
113
+ image_uri = await builder.build_image(launch_project, entry_point)
114
+ if not image_uri:
115
+ raise LaunchError("Error building image uri")
116
+ else:
117
+ return image_uri
106
118
 
107
119
 
108
- async def validate_docker_installation() -> None:
109
- """Verify if Docker is installed on host machine."""
110
- find_exec = event_loop_thread_exec(find_executable)
111
- if not await find_exec("docker"):
112
- raise ExecutionError(
113
- "Could not find Docker executable. "
114
- "Ensure Docker is installed as per the instructions "
115
- "at https://docs.docker.com/install/overview/."
116
- )
120
+ def image_tag_from_dockerfile_and_source(
121
+ launch_project: LaunchProject, dockerfile_contents: str
122
+ ) -> str:
123
+ """Hashes the source and dockerfile contents into a unique tag."""
124
+ image_source_string = launch_project.get_image_source_string()
125
+ unique_id_string = image_source_string + dockerfile_contents
126
+ image_tag = hashlib.sha256(unique_id_string.encode("utf-8")).hexdigest()[:8]
127
+ return image_tag
117
128
 
118
129
 
119
130
  def get_docker_user(launch_project: LaunchProject, runner_type: str) -> Tuple[str, int]:
@@ -129,107 +140,6 @@ def get_docker_user(launch_project: LaunchProject, runner_type: str) -> Tuple[st
129
140
  return username, userid
130
141
 
131
142
 
132
- DOCKERFILE_TEMPLATE = """
133
- # ----- stage 1: build -----
134
- FROM {py_build_image} as build
135
-
136
- # requirements section depends on pip vs conda, and presence of buildx
137
- ENV PIP_PROGRESS_BAR off
138
- {requirements_section}
139
-
140
- # ----- stage 2: base -----
141
- {base_setup}
142
-
143
- COPY --from=build /env /env
144
- ENV PATH="/env/bin:$PATH"
145
-
146
- ENV SHELL /bin/bash
147
-
148
- # some resources (eg sagemaker) must run on root
149
- {user_setup}
150
-
151
- WORKDIR {workdir}
152
- RUN chown -R {uid} {workdir}
153
-
154
- # make artifacts cache dir unrelated to build
155
- RUN mkdir -p {workdir}/.cache && chown -R {uid} {workdir}/.cache
156
-
157
- # copy code/etc
158
- COPY --chown={uid} src/ {workdir}
159
-
160
- ENV PYTHONUNBUFFERED=1
161
-
162
- {entrypoint_section}
163
- """
164
-
165
- # this goes into base_setup in TEMPLATE
166
- PYTHON_SETUP_TEMPLATE = """
167
- FROM {py_base_image} as base
168
- """
169
-
170
- # this goes into base_setup in TEMPLATE
171
- ACCELERATOR_SETUP_TEMPLATE = """
172
- FROM {accelerator_base_image} as base
173
-
174
- # make non-interactive so build doesn't block on questions
175
- ENV DEBIAN_FRONTEND=noninteractive
176
-
177
- # install python
178
- RUN apt-get update -qq && apt-get install --no-install-recommends -y \
179
- {python_packages} \
180
- && apt-get -qq purge && apt-get -qq clean \
181
- && rm -rf /var/lib/apt/lists/*
182
-
183
- # make sure `python` points at the right version
184
- RUN update-alternatives --install /usr/bin/python python /usr/bin/python{py_version} 1 \
185
- && update-alternatives --install /usr/local/bin/python python /usr/bin/python{py_version} 1
186
- """
187
-
188
- # this goes into requirements_section in TEMPLATE
189
- PIP_TEMPLATE = """
190
- RUN python -m venv /env
191
- # make sure we install into the env
192
- ENV PATH="/env/bin:$PATH"
193
-
194
- COPY {requirements_files} ./
195
- {buildx_optional_prefix} {pip_install}
196
- """
197
-
198
- # this goes into requirements_section in TEMPLATE
199
- CONDA_TEMPLATE = """
200
- COPY src/environment.yml .
201
- {buildx_optional_prefix} conda env create -f environment.yml -n env
202
-
203
- # pack the environment so that we can transfer to the base image
204
- RUN conda install -c conda-forge conda-pack
205
- RUN conda pack -n env -o /tmp/env.tar && \
206
- mkdir /env && cd /env && tar xf /tmp/env.tar && \
207
- rm /tmp/env.tar
208
- RUN /env/bin/conda-unpack
209
- """
210
-
211
- USER_CREATE_TEMPLATE = """
212
- RUN useradd \
213
- --create-home \
214
- --no-log-init \
215
- --shell /bin/bash \
216
- --gid 0 \
217
- --uid {uid} \
218
- {user} || echo ""
219
- """
220
-
221
- ENTRYPOINT_TEMPLATE = """
222
- ENTRYPOINT {entrypoint}
223
- """
224
-
225
-
226
- def get_current_python_version() -> Tuple[str, str]:
227
- full_version = sys.version.split()[0].split(".")
228
- major = full_version[0]
229
- version = ".".join(full_version[:2]) if len(full_version) >= 2 else major + ".0"
230
- return version, major
231
-
232
-
233
143
  def get_base_setup(
234
144
  launch_project: LaunchProject, py_version: str, py_major: str
235
145
  ) -> str:
@@ -237,26 +147,21 @@ def get_base_setup(
237
147
 
238
148
  CPU version is built on python, Accelerator version is built on user provided.
239
149
  """
240
- python_base_image = f"python:{py_version}-buster"
150
+ minor = int(py_version.split(".")[1])
151
+ if minor < 12:
152
+ python_base_image = f"python:{py_version}-buster"
153
+ else:
154
+ python_base_image = f"python:{py_version}-bookworm"
241
155
  if launch_project.accelerator_base_image:
242
156
  _logger.info(
243
157
  f"Using accelerator base image: {launch_project.accelerator_base_image}"
244
158
  )
245
- # accelerator base images doesn't come with python tooling
246
- if py_major == "2":
247
- python_packages = [
248
- f"python{py_version}",
249
- f"libpython{py_version}",
250
- "python-pip",
251
- "python-setuptools",
252
- ]
253
- else:
254
- python_packages = [
255
- f"python{py_version}",
256
- f"libpython{py_version}",
257
- "python3-pip",
258
- "python3-setuptools",
259
- ]
159
+ python_packages = [
160
+ f"python{py_version}",
161
+ f"libpython{py_version}",
162
+ "python3-pip",
163
+ "python3-setuptools",
164
+ ]
260
165
  base_setup = ACCELERATOR_SETUP_TEMPLATE.format(
261
166
  accelerator_base_image=launch_project.accelerator_base_image,
262
167
  python_packages=" \\\n".join(python_packages),
@@ -264,65 +169,17 @@ def get_base_setup(
264
169
  )
265
170
  else:
266
171
  python_packages = [
267
- "python3-dev" if py_major == "3" else "python-dev",
172
+ "python3-dev",
268
173
  "gcc",
269
174
  ] # gcc required for python < 3.7 for some reason
270
175
  base_setup = PYTHON_SETUP_TEMPLATE.format(py_base_image=python_base_image)
271
176
  return base_setup
272
177
 
273
178
 
274
- def get_env_vars_dict(
275
- launch_project: LaunchProject, api: Api, max_env_length: int
276
- ) -> Dict[str, str]:
277
- """Generate environment variables for the project.
278
-
279
- Arguments:
280
- launch_project: LaunchProject to generate environment variables for.
281
-
282
- Returns:
283
- Dictionary of environment variables.
284
- """
285
- env_vars = {}
286
- env_vars["WANDB_BASE_URL"] = api.settings("base_url")
287
- override_api_key = launch_project.launch_spec.get("_wandb_api_key")
288
- env_vars["WANDB_API_KEY"] = override_api_key or api.api_key
289
- if launch_project.target_project:
290
- env_vars["WANDB_PROJECT"] = launch_project.target_project
291
- env_vars["WANDB_ENTITY"] = launch_project.target_entity
292
- env_vars["WANDB_LAUNCH"] = "True"
293
- env_vars["WANDB_RUN_ID"] = launch_project.run_id
294
- if launch_project.docker_image:
295
- env_vars["WANDB_DOCKER"] = launch_project.docker_image
296
- if launch_project.name is not None:
297
- env_vars["WANDB_NAME"] = launch_project.name
298
- if "author" in launch_project.launch_spec and not override_api_key:
299
- env_vars["WANDB_USERNAME"] = launch_project.launch_spec["author"]
300
- if launch_project.sweep_id:
301
- env_vars["WANDB_SWEEP_ID"] = launch_project.sweep_id
302
- if launch_project.launch_spec.get("_resume_count", 0) > 0:
303
- env_vars["WANDB_RESUME"] = "allow"
304
- if launch_project.queue_name:
305
- env_vars[wandb.env.LAUNCH_QUEUE_NAME] = launch_project.queue_name
306
- if launch_project.queue_entity:
307
- env_vars[wandb.env.LAUNCH_QUEUE_ENTITY] = launch_project.queue_entity
308
- if launch_project.run_queue_item_id:
309
- env_vars[wandb.env.LAUNCH_TRACE_ID] = launch_project.run_queue_item_id
310
-
311
- _inject_wandb_config_env_vars(
312
- launch_project.override_config, env_vars, max_env_length
313
- )
314
- artifacts = {}
315
- # if we're spinning up a launch process from a job
316
- # we should tell the run to use that artifact
317
- if launch_project.job:
318
- artifacts = {wandb.util.LAUNCH_JOB_ARTIFACT_SLOT_NAME: launch_project.job}
319
- env_vars["WANDB_ARTIFACTS"] = json.dumps(
320
- {**artifacts, **launch_project.override_artifacts}
321
- )
322
- return env_vars
323
-
324
-
325
- def get_requirements_section(launch_project: LaunchProject, builder_type: str) -> str:
179
+ # Move this into the build context manager.
180
+ def get_requirements_section(
181
+ launch_project: LaunchProject, build_context_dir: str, builder_type: str
182
+ ) -> str:
326
183
  if builder_type == "docker":
327
184
  buildx_installed = docker.is_buildx_installed()
328
185
  if not buildx_installed:
@@ -333,69 +190,79 @@ def get_requirements_section(launch_project: LaunchProject, builder_type: str) -
333
190
  elif builder_type == "kaniko":
334
191
  prefix = "RUN WANDB_DISABLE_CACHE=true"
335
192
  buildx_installed = False
336
- if launch_project.deps_type == "pip":
337
- requirements_files = []
338
- deps_install_line = None
339
- assert launch_project.project_dir is not None
340
- base_path = pathlib.Path(launch_project.project_dir)
341
- # If there is a requirements.txt at root of build context, use that.
342
- if (base_path / "requirements.txt").exists():
343
- requirements_files += ["src/requirements.txt"]
344
- deps_install_line = "pip install -r requirements.txt"
345
- # Elif there is pyproject.toml at build context, convert the dependencies
346
- # section to a requirements.txt and use that.
347
- elif (base_path / "pyproject.toml").exists():
348
- tomli = get_module("tomli")
349
- if tomli is None:
350
- wandb.termwarn(
351
- "pyproject.toml found but tomli could not be loaded. To "
352
- "install dependencies from pyproject.toml please run "
353
- "`pip install tomli` and try again."
354
- )
355
- else:
356
- # First try to read deps from standard pyproject format.
357
- with open(base_path / "pyproject.toml", "rb") as f:
358
- contents = tomli.load(f)
359
- project_deps = [
360
- str(d) for d in contents.get("project", {}).get("dependencies", [])
361
- ]
362
- if project_deps:
363
- with open(base_path / "requirements.txt", "w") as f:
364
- f.write("\n".join(project_deps))
365
- requirements_files += ["src/requirements.txt"]
366
- deps_install_line = "pip install -r requirements.txt"
367
- # Else use frozen requirements from wandb run.
368
- if not deps_install_line and (base_path / "requirements.frozen.txt").exists():
369
- requirements_files += [
370
- "src/requirements.frozen.txt",
371
- "_wandb_bootstrap.py",
372
- ]
373
- deps_install_line = (
374
- _parse_existing_requirements(launch_project)
375
- + "python _wandb_bootstrap.py"
193
+
194
+ if buildx_installed:
195
+ prefix = "RUN --mount=type=cache,mode=0777,target=/root/.cache/pip"
196
+
197
+ requirements_files = []
198
+ deps_install_line = None
199
+
200
+ base_path = pathlib.Path(build_context_dir)
201
+ # If there is a requirements.txt at root of build context, use that.
202
+ if (base_path / "src" / "requirements.txt").exists():
203
+ requirements_files += ["src/requirements.txt"]
204
+ deps_install_line = "pip install -r requirements.txt"
205
+ return PIP_TEMPLATE.format(
206
+ buildx_optional_prefix=prefix,
207
+ requirements_files=" ".join(requirements_files),
208
+ pip_install=deps_install_line,
209
+ )
210
+
211
+ # Elif there is pyproject.toml at build context, convert the dependencies
212
+ # section to a requirements.txt and use that.
213
+ elif (base_path / "src" / "pyproject.toml").exists():
214
+ tomli = get_module("tomli")
215
+ if tomli is None:
216
+ wandb.termwarn(
217
+ "pyproject.toml found but tomli could not be loaded. To "
218
+ "install dependencies from pyproject.toml please run "
219
+ "`pip install tomli` and try again."
376
220
  )
221
+ else:
222
+ # First try to read deps from standard pyproject format.
223
+ with open(base_path / "src" / "pyproject.toml", "rb") as f:
224
+ contents = tomli.load(f)
225
+ project_deps = [
226
+ str(d) for d in contents.get("project", {}).get("dependencies", [])
227
+ ]
228
+ if project_deps:
229
+ with open(base_path / "src" / "requirements.txt", "w") as f:
230
+ f.write("\n".join(project_deps))
231
+ requirements_files += ["src/requirements.txt"]
232
+ deps_install_line = "pip install -r requirements.txt"
233
+ return PIP_TEMPLATE.format(
234
+ buildx_optional_prefix=prefix,
235
+ requirements_files=" ".join(requirements_files),
236
+ pip_install=deps_install_line,
237
+ )
238
+
239
+ # Else use frozen requirements from wandb run.
240
+ if (
241
+ not deps_install_line
242
+ and (base_path / "src" / "requirements.frozen.txt").exists()
243
+ ):
244
+ requirements_files += [
245
+ "src/requirements.frozen.txt",
246
+ "_wandb_bootstrap.py",
247
+ ]
248
+ deps_install_line = (
249
+ launch_project.parse_existing_requirements() + "python _wandb_bootstrap.py"
250
+ )
377
251
 
378
252
  if not deps_install_line:
379
253
  raise LaunchError(f"No dependency sources found for {launch_project}")
380
254
 
381
- if buildx_installed:
382
- prefix = "RUN --mount=type=cache,mode=0777,target=/root/.cache/pip"
383
-
384
- requirements_line = PIP_TEMPLATE.format(
255
+ return PIP_TEMPLATE.format(
385
256
  buildx_optional_prefix=prefix,
386
257
  requirements_files=" ".join(requirements_files),
387
258
  pip_install=deps_install_line,
388
259
  )
389
- elif launch_project.deps_type == "conda":
390
- if buildx_installed:
391
- prefix = "RUN --mount=type=cache,mode=0777,target=/opt/conda/pkgs"
392
- requirements_line = CONDA_TEMPLATE.format(buildx_optional_prefix=prefix)
260
+
393
261
  else:
394
262
  # this means no deps file was found
395
263
  requirements_line = "RUN mkdir -p env/" # Docker fails otherwise
396
264
  wandb.termwarn("No requirements file found. No packages will be installed.")
397
-
398
- return requirements_line
265
+ return requirements_line
399
266
 
400
267
 
401
268
  def get_user_setup(username: str, userid: int, runner_type: str) -> str:
@@ -411,269 +278,3 @@ def get_entrypoint_setup(
411
278
  entry_point: EntryPoint,
412
279
  ) -> str:
413
280
  return ENTRYPOINT_TEMPLATE.format(entrypoint=json.dumps(entry_point.command))
414
-
415
-
416
- def generate_dockerfile(
417
- launch_project: LaunchProject,
418
- entry_point: EntryPoint,
419
- runner_type: str,
420
- builder_type: str,
421
- dockerfile: Optional[str] = None,
422
- ) -> str:
423
- if launch_project.project_dir is not None and dockerfile:
424
- path = os.path.join(launch_project.project_dir, dockerfile)
425
- if not os.path.exists(path):
426
- raise LaunchError(f"Dockerfile does not exist at {path}")
427
- launch_project.project_dir = os.path.dirname(path)
428
- wandb.termlog(f"Using dockerfile: {dockerfile}")
429
- return open(path).read()
430
-
431
- # get python versions truncated to major.minor to ensure image availability
432
- if launch_project.python_version:
433
- spl = launch_project.python_version.split(".")[:2]
434
- py_version, py_major = (".".join(spl), spl[0])
435
- else:
436
- py_version, py_major = get_current_python_version()
437
-
438
- # ----- stage 1: build -----
439
- if launch_project.deps_type == "pip" or launch_project.deps_type is None:
440
- python_build_image = (
441
- f"python:{py_version}" # use full python image for package installation
442
- )
443
- elif launch_project.deps_type == "conda":
444
- # neither of these images are receiving regular updates, latest should be pretty stable
445
- python_build_image = (
446
- "continuumio/miniconda3:latest"
447
- if py_major == "3"
448
- else "continuumio/miniconda:latest"
449
- )
450
- requirements_section = get_requirements_section(launch_project, builder_type)
451
- # ----- stage 2: base -----
452
- python_base_setup = get_base_setup(launch_project, py_version, py_major)
453
-
454
- # set up user info
455
- username, userid = get_docker_user(launch_project, runner_type)
456
- user_setup = get_user_setup(username, userid, runner_type)
457
- workdir = f"/home/{username}"
458
-
459
- entrypoint_section = get_entrypoint_setup(entry_point)
460
-
461
- dockerfile_contents = DOCKERFILE_TEMPLATE.format(
462
- py_build_image=python_build_image,
463
- requirements_section=requirements_section,
464
- base_setup=python_base_setup,
465
- uid=userid,
466
- user_setup=user_setup,
467
- workdir=workdir,
468
- entrypoint_section=entrypoint_section,
469
- )
470
- return dockerfile_contents
471
-
472
-
473
- def _parse_existing_requirements(launch_project: LaunchProject) -> str:
474
- import pkg_resources
475
-
476
- requirements_line = ""
477
- assert launch_project.project_dir is not None
478
- base_requirements = os.path.join(launch_project.project_dir, "requirements.txt")
479
- if os.path.exists(base_requirements):
480
- include_only = set()
481
- with open(base_requirements) as f:
482
- iter = pkg_resources.parse_requirements(f)
483
- while True:
484
- try:
485
- pkg = next(iter)
486
- if hasattr(pkg, "name"):
487
- name = pkg.name.lower()
488
- else:
489
- name = str(pkg)
490
- include_only.add(shlex_quote(name))
491
- except StopIteration:
492
- break
493
- # Different versions of pkg_resources throw different errors
494
- # just catch them all and ignore packages we can't parse
495
- except Exception as e:
496
- _logger.warn(f"Unable to parse requirements.txt: {e}")
497
- continue
498
- requirements_line += "WANDB_ONLY_INCLUDE={} ".format(",".join(include_only))
499
- return requirements_line
500
-
501
-
502
- def _create_docker_build_ctx(
503
- launch_project: LaunchProject,
504
- dockerfile_contents: str,
505
- ) -> str:
506
- """Create a build context temp dir for a Dockerfile and project code."""
507
- assert launch_project.project_dir is not None
508
- directory = tempfile.mkdtemp()
509
- entrypoint = launch_project.get_single_entry_point()
510
- if entrypoint is not None:
511
- assert entrypoint.name is not None
512
- entrypoint_dir = os.path.dirname(entrypoint.name)
513
- if entrypoint_dir:
514
- path = os.path.join(
515
- launch_project.project_dir, entrypoint_dir, _WANDB_DOCKERFILE_NAME
516
- )
517
- else:
518
- path = os.path.join(launch_project.project_dir, _WANDB_DOCKERFILE_NAME)
519
- if os.path.exists(
520
- path
521
- ): # We found a Dockerfile.wandb adjacent to the entrypoint.
522
- shutil.copytree(
523
- os.path.dirname(path),
524
- directory,
525
- symlinks=True,
526
- dirs_exist_ok=True,
527
- ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
528
- )
529
- # TODO: remove this once we make things more explicit for users
530
- if entrypoint_dir:
531
- new_path = os.path.basename(entrypoint.name)
532
- entrypoint = launch_project.get_single_entry_point()
533
- if entrypoint is not None:
534
- entrypoint.update_entrypoint_path(new_path)
535
- return directory
536
-
537
- dst_path = os.path.join(directory, "src")
538
- assert launch_project.project_dir is not None
539
- shutil.copytree(
540
- src=launch_project.project_dir,
541
- dst=dst_path,
542
- symlinks=True,
543
- ignore=shutil.ignore_patterns("fsmonitor--daemon.ipc"),
544
- )
545
- shutil.copy(
546
- os.path.join(os.path.dirname(__file__), "templates", "_wandb_bootstrap.py"),
547
- os.path.join(directory),
548
- )
549
- if launch_project.python_version:
550
- runtime_path = os.path.join(dst_path, "runtime.txt")
551
- with open(runtime_path, "w") as fp:
552
- fp.write(f"python-{launch_project.python_version}")
553
- # TODO: we likely don't need to pass the whole git repo into the container
554
- # with open(os.path.join(directory, ".dockerignore"), "w") as f:
555
- # f.write("**/.git")
556
- with open(os.path.join(directory, _WANDB_DOCKERFILE_NAME), "w") as handle:
557
- handle.write(dockerfile_contents)
558
- return directory
559
-
560
-
561
- def join(split_command: List[str]) -> str:
562
- """Return a shell-escaped string from *split_command*.
563
-
564
- Also remove quotes from double quoted strings. Ex:
565
- "'local container queue'" --> "local container queue"
566
- """
567
- return " ".join(shlex.quote(arg.replace("'", "")) for arg in split_command)
568
-
569
-
570
- def construct_agent_configs(
571
- launch_config: Optional[Dict] = None,
572
- build_config: Optional[Dict] = None,
573
- ) -> Tuple[Optional[Dict[str, Any]], Dict[str, Any], Dict[str, Any]]:
574
- registry_config = None
575
- environment_config = None
576
- if launch_config is not None:
577
- build_config = launch_config.get("builder")
578
- registry_config = launch_config.get("registry")
579
-
580
- default_launch_config = None
581
- if os.path.exists(os.path.expanduser(LAUNCH_CONFIG_FILE)):
582
- with open(os.path.expanduser(LAUNCH_CONFIG_FILE)) as f:
583
- default_launch_config = (
584
- yaml.safe_load(f) or {}
585
- ) # In case the config is empty, we want it to be {} instead of None.
586
- environment_config = default_launch_config.get("environment")
587
-
588
- build_config, registry_config = resolve_build_and_registry_config(
589
- default_launch_config, build_config, registry_config
590
- )
591
-
592
- return environment_config, build_config, registry_config
593
-
594
-
595
- async def build_image_from_project(
596
- launch_project: LaunchProject,
597
- api: Api,
598
- launch_config: Dict[str, Any],
599
- ) -> str:
600
- """Construct a docker image from a project and returns the URI of the image.
601
-
602
- Arguments:
603
- launch_project: The project to build an image from.
604
- api: The API object to use for fetching the project.
605
- launch_config: The launch config to use for building the image.
606
-
607
- Returns:
608
- The URI of the built image.
609
- """
610
- assert launch_project.uri, "To build an image on queue a URI must be set."
611
- launch_config = launch_config or {}
612
- env_config = launch_config.get("environment", {})
613
- if not isinstance(env_config, dict):
614
- wrong_type = type(env_config).__name__
615
- raise LaunchError(
616
- f"Invalid environment config: {env_config} of type {wrong_type} "
617
- "loaded from launch config. Expected dict."
618
- )
619
- environment = environment_from_config(env_config)
620
-
621
- registry_config = launch_config.get("registry", {})
622
- if not isinstance(registry_config, dict):
623
- wrong_type = type(registry_config).__name__
624
- raise LaunchError(
625
- f"Invalid registry config: {registry_config} of type {wrong_type}"
626
- " loaded from launch config. Expected dict."
627
- )
628
- registry = registry_from_config(registry_config, environment)
629
-
630
- builder_config = launch_config.get("builder", {})
631
- if not isinstance(builder_config, dict):
632
- wrong_type = type(builder_config).__name__
633
- raise LaunchError(
634
- f"Invalid builder config: {builder_config} of type {wrong_type} "
635
- "loaded from launch config. Expected dict."
636
- )
637
- builder = builder_from_config(builder_config, environment, registry)
638
-
639
- if not builder:
640
- raise LaunchError("Unable to build image. No builder found.")
641
-
642
- launch_project.fetch_and_validate_project()
643
-
644
- entry_point: EntryPoint = launch_project.get_single_entry_point() or EntryPoint(
645
- name=EntrypointDefaults.PYTHON[-1],
646
- command=EntrypointDefaults.PYTHON,
647
- )
648
- wandb.termlog(f"{LOG_PREFIX}Building docker image from uri source")
649
- image_uri = await builder.build_image(launch_project, entry_point)
650
- if not image_uri:
651
- raise LaunchError("Error building image uri")
652
- else:
653
- return image_uri
654
-
655
-
656
- def image_tag_from_dockerfile_and_source(
657
- launch_project: LaunchProject, dockerfile_contents: str
658
- ) -> str:
659
- """Hashes the source and dockerfile contents into a unique tag."""
660
- image_source_string = launch_project.get_image_source_string()
661
- unique_id_string = image_source_string + dockerfile_contents
662
- image_tag = hashlib.sha256(unique_id_string.encode("utf-8")).hexdigest()[:8]
663
- return image_tag
664
-
665
-
666
- def _inject_wandb_config_env_vars(
667
- config: Dict[str, Any], env_dict: Dict[str, Any], maximum_env_length: int
668
- ) -> None:
669
- str_config = json.dumps(config)
670
- if len(str_config) <= maximum_env_length:
671
- env_dict["WANDB_CONFIG"] = str_config
672
- return
673
-
674
- chunks = [
675
- str_config[i : i + maximum_env_length]
676
- for i in range(0, len(str_config), maximum_env_length)
677
- ]
678
- config_chunks_dict = {f"WANDB_CONFIG_{i}": chunk for i, chunk in enumerate(chunks)}
679
- env_dict.update(config_chunks_dict)