wandb 0.16.5__py3-none-any.whl → 0.17.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (194) hide show
  1. package_readme.md +95 -0
  2. wandb/__init__.py +2 -3
  3. wandb/agents/pyagent.py +0 -1
  4. wandb/analytics/sentry.py +2 -1
  5. wandb/apis/importers/internals/internal.py +0 -1
  6. wandb/apis/importers/internals/protocols.py +30 -56
  7. wandb/apis/importers/mlflow.py +13 -26
  8. wandb/apis/importers/wandb.py +8 -14
  9. wandb/apis/internal.py +0 -3
  10. wandb/apis/public/api.py +55 -3
  11. wandb/apis/public/artifacts.py +1 -0
  12. wandb/apis/public/files.py +1 -0
  13. wandb/apis/public/history.py +1 -0
  14. wandb/apis/public/jobs.py +17 -4
  15. wandb/apis/public/projects.py +1 -0
  16. wandb/apis/public/reports.py +1 -0
  17. wandb/apis/public/runs.py +15 -17
  18. wandb/apis/public/sweeps.py +1 -0
  19. wandb/apis/public/teams.py +1 -0
  20. wandb/apis/public/users.py +1 -0
  21. wandb/apis/reports/v1/_blocks.py +3 -7
  22. wandb/apis/reports/v2/gql.py +1 -0
  23. wandb/apis/reports/v2/interface.py +3 -4
  24. wandb/apis/reports/v2/internal.py +5 -8
  25. wandb/cli/cli.py +95 -22
  26. wandb/data_types.py +9 -6
  27. wandb/docker/__init__.py +1 -1
  28. wandb/env.py +38 -8
  29. wandb/errors/__init__.py +5 -0
  30. wandb/errors/term.py +10 -2
  31. wandb/filesync/step_checksum.py +1 -4
  32. wandb/filesync/step_prepare.py +4 -24
  33. wandb/filesync/step_upload.py +4 -106
  34. wandb/filesync/upload_job.py +0 -76
  35. wandb/integration/catboost/catboost.py +1 -1
  36. wandb/integration/fastai/__init__.py +1 -0
  37. wandb/integration/huggingface/resolver.py +2 -2
  38. wandb/integration/keras/__init__.py +1 -0
  39. wandb/integration/keras/callbacks/metrics_logger.py +1 -1
  40. wandb/integration/keras/keras.py +7 -7
  41. wandb/integration/langchain/wandb_tracer.py +1 -0
  42. wandb/integration/lightning/fabric/logger.py +1 -3
  43. wandb/integration/metaflow/metaflow.py +41 -6
  44. wandb/integration/openai/fine_tuning.py +77 -40
  45. wandb/integration/prodigy/prodigy.py +1 -1
  46. wandb/old/summary.py +1 -1
  47. wandb/plot/confusion_matrix.py +1 -1
  48. wandb/plot/pr_curve.py +2 -1
  49. wandb/plot/roc_curve.py +2 -1
  50. wandb/{plots → plot}/utils.py +13 -25
  51. wandb/proto/v3/wandb_internal_pb2.py +364 -332
  52. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  53. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  54. wandb/proto/v4/wandb_internal_pb2.py +322 -316
  55. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  56. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  57. wandb/proto/wandb_deprecated.py +7 -1
  58. wandb/proto/wandb_internal_codegen.py +3 -29
  59. wandb/sdk/artifacts/artifact.py +51 -20
  60. wandb/sdk/artifacts/artifact_download_logger.py +1 -0
  61. wandb/sdk/artifacts/artifact_file_cache.py +18 -4
  62. wandb/sdk/artifacts/artifact_instance_cache.py +1 -0
  63. wandb/sdk/artifacts/artifact_manifest.py +1 -0
  64. wandb/sdk/artifacts/artifact_manifest_entry.py +7 -3
  65. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
  66. wandb/sdk/artifacts/artifact_saver.py +18 -27
  67. wandb/sdk/artifacts/artifact_state.py +1 -0
  68. wandb/sdk/artifacts/artifact_ttl.py +1 -0
  69. wandb/sdk/artifacts/exceptions.py +1 -0
  70. wandb/sdk/artifacts/storage_handlers/azure_handler.py +1 -0
  71. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +13 -18
  72. wandb/sdk/artifacts/storage_handlers/http_handler.py +1 -0
  73. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +1 -0
  74. wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -0
  75. wandb/sdk/artifacts/storage_handlers/s3_handler.py +5 -3
  76. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +1 -0
  77. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -0
  78. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -0
  79. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -42
  80. wandb/sdk/artifacts/storage_policy.py +2 -12
  81. wandb/sdk/data_types/_dtypes.py +8 -8
  82. wandb/sdk/data_types/base_types/media.py +3 -6
  83. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
  84. wandb/sdk/data_types/image.py +1 -1
  85. wandb/sdk/data_types/video.py +1 -1
  86. wandb/sdk/integration_utils/auto_logging.py +5 -6
  87. wandb/sdk/integration_utils/data_logging.py +10 -6
  88. wandb/sdk/interface/interface.py +86 -38
  89. wandb/sdk/interface/interface_shared.py +7 -13
  90. wandb/sdk/internal/datastore.py +1 -1
  91. wandb/sdk/internal/file_pusher.py +2 -5
  92. wandb/sdk/internal/file_stream.py +5 -18
  93. wandb/sdk/internal/handler.py +18 -2
  94. wandb/sdk/internal/internal.py +0 -1
  95. wandb/sdk/internal/internal_api.py +1 -129
  96. wandb/sdk/internal/internal_util.py +0 -1
  97. wandb/sdk/internal/job_builder.py +159 -45
  98. wandb/sdk/internal/profiler.py +1 -0
  99. wandb/sdk/internal/progress.py +0 -28
  100. wandb/sdk/internal/run.py +1 -0
  101. wandb/sdk/internal/sender.py +1 -2
  102. wandb/sdk/internal/system/assets/gpu_amd.py +44 -44
  103. wandb/sdk/internal/system/assets/gpu_apple.py +56 -11
  104. wandb/sdk/internal/system/assets/interfaces.py +6 -8
  105. wandb/sdk/internal/system/assets/open_metrics.py +2 -2
  106. wandb/sdk/internal/system/assets/trainium.py +1 -3
  107. wandb/sdk/launch/__init__.py +9 -1
  108. wandb/sdk/launch/_launch.py +9 -24
  109. wandb/sdk/launch/_launch_add.py +1 -3
  110. wandb/sdk/launch/_project_spec.py +188 -241
  111. wandb/sdk/launch/agent/agent.py +115 -48
  112. wandb/sdk/launch/agent/config.py +80 -14
  113. wandb/sdk/launch/builder/abstract.py +69 -1
  114. wandb/sdk/launch/builder/build.py +156 -555
  115. wandb/sdk/launch/builder/context_manager.py +235 -0
  116. wandb/sdk/launch/builder/docker_builder.py +8 -23
  117. wandb/sdk/launch/builder/kaniko_builder.py +161 -159
  118. wandb/sdk/launch/builder/noop.py +1 -0
  119. wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
  120. wandb/sdk/launch/create_job.py +68 -63
  121. wandb/sdk/launch/environment/abstract.py +1 -0
  122. wandb/sdk/launch/environment/gcp_environment.py +1 -0
  123. wandb/sdk/launch/environment/local_environment.py +1 -0
  124. wandb/sdk/launch/inputs/files.py +148 -0
  125. wandb/sdk/launch/inputs/internal.py +217 -0
  126. wandb/sdk/launch/inputs/manage.py +95 -0
  127. wandb/sdk/launch/loader.py +1 -0
  128. wandb/sdk/launch/registry/abstract.py +1 -0
  129. wandb/sdk/launch/registry/azure_container_registry.py +1 -0
  130. wandb/sdk/launch/registry/elastic_container_registry.py +1 -0
  131. wandb/sdk/launch/registry/google_artifact_registry.py +2 -1
  132. wandb/sdk/launch/registry/local_registry.py +1 -0
  133. wandb/sdk/launch/runner/abstract.py +1 -0
  134. wandb/sdk/launch/runner/kubernetes_monitor.py +4 -1
  135. wandb/sdk/launch/runner/kubernetes_runner.py +9 -10
  136. wandb/sdk/launch/runner/local_container.py +2 -3
  137. wandb/sdk/launch/runner/local_process.py +8 -29
  138. wandb/sdk/launch/runner/sagemaker_runner.py +21 -20
  139. wandb/sdk/launch/runner/vertex_runner.py +8 -7
  140. wandb/sdk/launch/sweeps/scheduler.py +7 -4
  141. wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
  142. wandb/sdk/launch/sweeps/utils.py +3 -3
  143. wandb/sdk/launch/utils.py +33 -140
  144. wandb/sdk/lib/_settings_toposort_generated.py +1 -5
  145. wandb/sdk/lib/fsm.py +8 -12
  146. wandb/sdk/lib/gitlib.py +4 -4
  147. wandb/sdk/lib/import_hooks.py +1 -1
  148. wandb/sdk/lib/lazyloader.py +0 -1
  149. wandb/sdk/lib/proto_util.py +23 -2
  150. wandb/sdk/lib/redirect.py +19 -14
  151. wandb/sdk/lib/retry.py +3 -2
  152. wandb/sdk/lib/run_moment.py +7 -1
  153. wandb/sdk/lib/tracelog.py +1 -1
  154. wandb/sdk/service/service.py +19 -16
  155. wandb/sdk/verify/verify.py +2 -1
  156. wandb/sdk/wandb_init.py +16 -63
  157. wandb/sdk/wandb_manager.py +2 -2
  158. wandb/sdk/wandb_require.py +5 -0
  159. wandb/sdk/wandb_run.py +164 -90
  160. wandb/sdk/wandb_settings.py +2 -48
  161. wandb/sdk/wandb_setup.py +1 -1
  162. wandb/sklearn/__init__.py +1 -0
  163. wandb/sklearn/plot/__init__.py +1 -0
  164. wandb/sklearn/plot/classifier.py +11 -12
  165. wandb/sklearn/plot/clusterer.py +2 -1
  166. wandb/sklearn/plot/regressor.py +1 -0
  167. wandb/sklearn/plot/shared.py +1 -0
  168. wandb/sklearn/utils.py +1 -0
  169. wandb/testing/relay.py +4 -4
  170. wandb/trigger.py +1 -0
  171. wandb/util.py +67 -54
  172. wandb/wandb_controller.py +2 -3
  173. wandb/wandb_torch.py +1 -2
  174. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/METADATA +67 -70
  175. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/RECORD +178 -188
  176. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/WHEEL +1 -2
  177. wandb/bin/apple_gpu_stats +0 -0
  178. wandb/catboost/__init__.py +0 -9
  179. wandb/fastai/__init__.py +0 -9
  180. wandb/keras/__init__.py +0 -18
  181. wandb/lightgbm/__init__.py +0 -9
  182. wandb/plots/__init__.py +0 -6
  183. wandb/plots/explain_text.py +0 -36
  184. wandb/plots/heatmap.py +0 -81
  185. wandb/plots/named_entity.py +0 -43
  186. wandb/plots/part_of_speech.py +0 -50
  187. wandb/plots/plot_definitions.py +0 -768
  188. wandb/plots/precision_recall.py +0 -121
  189. wandb/plots/roc.py +0 -103
  190. wandb/sacred/__init__.py +0 -3
  191. wandb/xgboost/__init__.py +0 -9
  192. wandb-0.16.5.dist-info/top_level.txt +0 -1
  193. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/entry_points.txt +0 -0
  194. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import base64
3
+ import copy
3
4
  import json
4
5
  import logging
5
6
  import os
@@ -8,12 +9,11 @@ import tarfile
8
9
  import tempfile
9
10
  import time
10
11
  import traceback
11
- from typing import Optional
12
+ from typing import Any, Dict, Optional
12
13
 
13
14
  import wandb
14
15
  from wandb.sdk.launch.agent.job_status_tracker import JobAndRunStatusTracker
15
- from wandb.sdk.launch.builder.abstract import AbstractBuilder
16
- from wandb.sdk.launch.builder.build import registry_from_uri
16
+ from wandb.sdk.launch.builder.abstract import AbstractBuilder, registry_from_uri
17
17
  from wandb.sdk.launch.environment.abstract import AbstractEnvironment
18
18
  from wandb.sdk.launch.environment.azure_environment import AzureEnvironment
19
19
  from wandb.sdk.launch.registry.abstract import AbstractRegistry
@@ -31,12 +31,8 @@ from ..utils import (
31
31
  get_kube_context_and_api_client,
32
32
  warn_failed_packages_from_build_logs,
33
33
  )
34
- from .build import (
35
- _WANDB_DOCKERFILE_NAME,
36
- _create_docker_build_ctx,
37
- generate_dockerfile,
38
- image_tag_from_dockerfile_and_source,
39
- )
34
+ from .build import _WANDB_DOCKERFILE_NAME
35
+ from .context_manager import BuildContextManager
40
36
 
41
37
  get_module(
42
38
  "kubernetes_asyncio",
@@ -105,6 +101,7 @@ class KanikoBuilder(AbstractBuilder):
105
101
  secret_name: str = "",
106
102
  secret_key: str = "",
107
103
  image: str = "gcr.io/kaniko-project/executor:v1.11.0",
104
+ config: Optional[dict] = None,
108
105
  ):
109
106
  """Initialize a KanikoBuilder.
110
107
 
@@ -125,6 +122,7 @@ class KanikoBuilder(AbstractBuilder):
125
122
  self.secret_name = secret_name
126
123
  self.secret_key = secret_key
127
124
  self.image = image
125
+ self.kaniko_config = config or {}
128
126
 
129
127
  @classmethod
130
128
  def from_config(
@@ -170,6 +168,7 @@ class KanikoBuilder(AbstractBuilder):
170
168
  image_uri = config.get("destination")
171
169
  if image_uri is not None:
172
170
  registry = registry_from_uri(image_uri)
171
+ kaniko_config = config.get("kaniko-config", {})
173
172
 
174
173
  return cls(
175
174
  environment,
@@ -179,6 +178,7 @@ class KanikoBuilder(AbstractBuilder):
179
178
  secret_name=secret_name,
180
179
  secret_key=secret_key,
181
180
  image=kaniko_image,
181
+ config=kaniko_config,
182
182
  )
183
183
 
184
184
  async def verify(self) -> None:
@@ -256,17 +256,13 @@ class KanikoBuilder(AbstractBuilder):
256
256
  job_tracker: Optional[JobAndRunStatusTracker] = None,
257
257
  ) -> str:
258
258
  await self.verify()
259
- # kaniko builder doesn't seem to work with a custom user id, need more investigation
260
- dockerfile_str = generate_dockerfile(
261
- launch_project=launch_project,
262
- entry_point=entrypoint,
263
- runner_type=launch_project.resource,
264
- builder_type="kaniko",
265
- dockerfile=launch_project.override_dockerfile,
266
- )
267
- image_tag = image_tag_from_dockerfile_and_source(launch_project, dockerfile_str)
259
+
260
+ build_contex_manager = BuildContextManager(launch_project=launch_project)
261
+ context_path, image_tag = build_contex_manager.create_build_context("kaniko")
262
+ run_id = launch_project.run_id
268
263
  repo_uri = await self.registry.get_repo_uri()
269
264
  image_uri = repo_uri + ":" + image_tag
265
+
270
266
  if (
271
267
  not launch_project.build_required()
272
268
  and await self.registry.check_image_exists(image_uri)
@@ -274,14 +270,10 @@ class KanikoBuilder(AbstractBuilder):
274
270
  return image_uri
275
271
 
276
272
  _logger.info(f"Building image {image_uri}...")
277
-
278
- context_path = _create_docker_build_ctx(launch_project, dockerfile_str)
279
- run_id = launch_project.run_id
280
-
281
273
  _, api_client = await get_kube_context_and_api_client(
282
274
  kubernetes, launch_project.resource_args
283
275
  )
284
- # TODO: use same client as kuberentes_runner.py
276
+ # TODO: use same client as kubernetes_runner.py
285
277
  batch_v1 = client.BatchV1Api(api_client)
286
278
  core_v1 = client.CoreV1Api(api_client)
287
279
 
@@ -289,7 +281,7 @@ class KanikoBuilder(AbstractBuilder):
289
281
 
290
282
  build_context = await self._upload_build_context(run_id, context_path)
291
283
  build_job = await self._create_kaniko_job(
292
- build_job_name, repo_uri, image_uri, build_context, core_v1
284
+ build_job_name, repo_uri, image_uri, build_context, core_v1, api_client
293
285
  )
294
286
  wandb.termlog(f"{LOG_PREFIX}Created kaniko job {build_job_name}")
295
287
 
@@ -324,7 +316,9 @@ class KanikoBuilder(AbstractBuilder):
324
316
  ):
325
317
  if job_tracker:
326
318
  job_tracker.set_err_stage("build")
327
- raise Exception(f"Failed to build image in kaniko for job {run_id}")
319
+ raise Exception(
320
+ f"Failed to build image in kaniko for job {run_id}. View logs with `kubectl logs -n {NAMESPACE} {build_job_name}`."
321
+ )
328
322
  try:
329
323
  pods_from_job = await core_v1.list_namespaced_pod(
330
324
  namespace=NAMESPACE, label_selector=f"job-name={build_job_name}"
@@ -371,23 +365,32 @@ class KanikoBuilder(AbstractBuilder):
371
365
  image_tag: str,
372
366
  build_context_path: str,
373
367
  core_client: client.CoreV1Api,
374
- ) -> "client.V1Job":
375
- env = []
376
- volume_mounts = []
377
- volumes = []
368
+ api_client,
369
+ ) -> Dict[str, Any]:
370
+ job = copy.deepcopy(self.kaniko_config)
371
+ job_metadata = job.get("metadata", {})
372
+ job_labels = job_metadata.get("labels", {})
373
+ job_spec = job.get("spec", {})
374
+ pod_template = job_spec.get("template", {})
375
+ pod_metadata = pod_template.get("metadata", {})
376
+ pod_labels = pod_metadata.get("labels", {})
377
+ pod_spec = pod_template.get("spec", {})
378
+ volumes = pod_spec.get("volumes", [])
379
+ containers = pod_spec.get("containers") or [{}]
380
+ if len(containers) > 1:
381
+ raise LaunchError(
382
+ "Multiple container configs not supported for kaniko builder."
383
+ )
384
+ container = containers[0]
385
+ volume_mounts = container.get("volumeMounts", [])
386
+ env = container.get("env", [])
387
+ custom_args = container.get("args", [])
378
388
 
379
389
  if PVC_MOUNT_PATH:
380
390
  volumes.append(
381
- client.V1Volume(
382
- name="kaniko-pvc",
383
- persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
384
- claim_name=PVC_NAME
385
- ),
386
- )
387
- )
388
- volume_mounts.append(
389
- client.V1VolumeMount(name="kaniko-pvc", mount_path="/context")
391
+ {"name": "kaniko-pvc", "persistentVolumeClaim": {"claimName": PVC_NAME}}
390
392
  )
393
+ volume_mounts.append({"name": "kaniko-pvc", "mountPath": "/context"})
391
394
 
392
395
  if bool(self.secret_name) != bool(self.secret_key):
393
396
  raise LaunchError(
@@ -395,13 +398,13 @@ class KanikoBuilder(AbstractBuilder):
395
398
  "for kaniko build. You provided only one of them."
396
399
  )
397
400
  if isinstance(self.registry, ElasticContainerRegistry):
398
- env += [
399
- client.V1EnvVar(
400
- name="AWS_REGION",
401
- value=self.registry.region,
402
- )
403
- ]
404
- # TODO: Refactor all of this environment/registry
401
+ env.append(
402
+ {
403
+ "name": "AWS_REGION",
404
+ "value": self.registry.region,
405
+ }
406
+ )
407
+ # TODO(ben): Refactor all of this environment/registry
405
408
  # specific stuff into methods of those classes.
406
409
  if isinstance(self.environment, AzureEnvironment):
407
410
  # Use the core api to check if the secret exists
@@ -416,52 +419,46 @@ class KanikoBuilder(AbstractBuilder):
416
419
  "namespace wandb. Please create it with the key password "
417
420
  "set to your azure storage access key."
418
421
  ) from e
419
- env += [
420
- client.V1EnvVar(
421
- name="AZURE_STORAGE_ACCESS_KEY",
422
- value_from=client.V1EnvVarSource(
423
- secret_key_ref=client.V1SecretKeySelector(
424
- name="azure-storage-access-key",
425
- key="password",
426
- )
427
- ),
428
- )
429
- ]
422
+ env.append(
423
+ {
424
+ "name": "AZURE_STORAGE_ACCESS_KEY",
425
+ "valueFrom": {
426
+ "secretKeyRef": {
427
+ "name": "azure-storage-access-key",
428
+ "key": "password",
429
+ }
430
+ },
431
+ }
432
+ )
430
433
  if DOCKER_CONFIG_SECRET:
431
434
  volumes.append(
432
- client.V1Volume(
433
- name="kaniko-docker-config",
434
- secret=client.V1SecretVolumeSource(
435
- secret_name=DOCKER_CONFIG_SECRET,
436
- items=[
437
- client.V1KeyToPath(
438
- key=".dockerconfigjson", path="config.json"
439
- )
435
+ {
436
+ "name": "kaniko-docker-config",
437
+ "secret": {
438
+ "secretName": DOCKER_CONFIG_SECRET,
439
+ "items": [
440
+ {
441
+ "key": ".dockerconfigjson",
442
+ "path": "config.json",
443
+ }
440
444
  ],
441
- ),
442
- )
445
+ },
446
+ }
443
447
  )
444
448
  volume_mounts.append(
445
- client.V1VolumeMount(
446
- name="kaniko-docker-config",
447
- mount_path="/kaniko/.docker",
448
- )
449
+ {"name": "kaniko-docker-config", "mountPath": "/kaniko/.docker"}
449
450
  )
450
451
  elif self.secret_name and self.secret_key:
451
- volumes += [
452
- client.V1Volume(
453
- name="docker-config",
454
- config_map=client.V1ConfigMapVolumeSource(
455
- name=f"docker-config-{job_name}",
456
- ),
457
- ),
458
- ]
459
- volume_mounts += [
460
- client.V1VolumeMount(
461
- name="docker-config", mount_path="/kaniko/.docker/"
462
- ),
463
- ]
464
- # TODO: I don't like conditioning on the registry type here. As a
452
+ volumes.append(
453
+ {
454
+ "name": "docker-config",
455
+ "configMap": {"name": f"docker-config-{job_name}"},
456
+ }
457
+ )
458
+ volume_mounts.append(
459
+ {"name": "docker-config", "mountPath": "/kaniko/.docker"}
460
+ )
461
+ # TODO(ben): I don't like conditioning on the registry type here. As a
465
462
  # future change I want the registry and environment classes to provide
466
463
  # a list of environment variables and volume mounts that need to be
467
464
  # added to the job. The environment class provides credentials for
@@ -475,90 +472,95 @@ class KanikoBuilder(AbstractBuilder):
475
472
  elif isinstance(self.registry, GoogleArtifactRegistry):
476
473
  mount_path = "/kaniko/.config/gcloud"
477
474
  key = "config.json"
478
- env += [
479
- client.V1EnvVar(
480
- name="GOOGLE_APPLICATION_CREDENTIALS",
481
- value="/kaniko/.config/gcloud/config.json",
482
- )
483
- ]
484
- else:
485
- raise LaunchError(
486
- f"Registry type {type(self.registry)} not supported by kaniko"
487
- )
488
- volume_mounts += [
489
- client.V1VolumeMount(
490
- name=self.secret_name,
491
- mount_path=mount_path,
492
- read_only=True,
475
+ env.append(
476
+ {
477
+ "name": "GOOGLE_APPLICATION_CREDENTIALS",
478
+ "value": "/kaniko/.config/gcloud/config.json",
479
+ }
493
480
  )
494
- ]
495
- volumes += [
496
- client.V1Volume(
497
- name=self.secret_name,
498
- secret=client.V1SecretVolumeSource(
499
- secret_name=self.secret_name,
500
- items=[client.V1KeyToPath(key=self.secret_key, path=key)],
501
- ),
481
+ else:
482
+ wandb.termwarn(
483
+ f"{LOG_PREFIX}Automatic credential handling is not supported for registry type {type(self.registry)}. Build job: {self.build_job_name}"
502
484
  )
503
- ]
485
+ volumes.append(
486
+ {
487
+ "name": self.secret_name,
488
+ "secret": {
489
+ "secretName": self.secret_name,
490
+ "items": [{"key": self.secret_key, "path": key}],
491
+ },
492
+ }
493
+ )
494
+ volume_mounts.append(
495
+ {
496
+ "name": self.secret_name,
497
+ "mountPath": mount_path,
498
+ "readOnly": True,
499
+ }
500
+ )
504
501
  if isinstance(self.registry, AzureContainerRegistry):
505
- # ADd the docker config map
506
- volume_mounts += [
507
- client.V1VolumeMount(
508
- name="docker-config", mount_path="/kaniko/.docker/"
509
- ),
510
- ]
511
- volumes += [
512
- client.V1Volume(
513
- name="docker-config",
514
- config_map=client.V1ConfigMapVolumeSource(
515
- name=f"docker-config-{job_name}",
516
- ),
517
- ),
518
- ]
519
- # Kaniko doesn't want https:// at the begining of the image tag.
502
+ # Add the docker config map
503
+ volumes.append(
504
+ {
505
+ "name": "docker-config",
506
+ "configMap": {"name": f"docker-config-{job_name}"},
507
+ }
508
+ )
509
+ volume_mounts.append(
510
+ {"name": "docker-config", "mountPath": "/kaniko/.docker/"}
511
+ )
512
+ # Kaniko doesn't want https:// at the beginning of the image tag.
520
513
  destination = image_tag
521
514
  if destination.startswith("https://"):
522
515
  destination = destination.replace("https://", "")
523
- args = [
524
- f"--context={build_context_path}",
525
- f"--dockerfile={_WANDB_DOCKERFILE_NAME}",
526
- f"--destination={destination}",
527
- "--cache=true",
528
- f"--cache-repo={repository.replace('https://', '')}",
529
- "--snapshotMode=redo",
530
- "--compressed-caching=false",
516
+ args = {
517
+ "--context": build_context_path,
518
+ "--dockerfile": _WANDB_DOCKERFILE_NAME,
519
+ "--destination": destination,
520
+ "--cache": "true",
521
+ "--cache-repo": repository.replace("https://", ""),
522
+ "--snapshot-mode": "redo",
523
+ "--compressed-caching": "false",
524
+ }
525
+ for custom_arg in custom_args:
526
+ arg_name, arg_value = custom_arg.split("=", 1)
527
+ args[arg_name] = arg_value
528
+ parsed_args = [
529
+ f"{arg_name}={arg_value}" for arg_name, arg_value in args.items()
531
530
  ]
532
- container = client.V1Container(
533
- name="wandb-container-build",
534
- image=self.image,
535
- args=args,
536
- volume_mounts=volume_mounts,
537
- env=env if env else None,
538
- )
539
- # Create and configure a spec section
540
- labels = {"wandb": "launch"}
531
+ container["args"] = parsed_args
532
+
533
+ # Apply the rest of our defaults
534
+ pod_labels["wandb"] = "launch"
541
535
  # This annotation is required to enable azure workload identity.
542
536
  if isinstance(self.registry, AzureContainerRegistry):
543
- labels["azure.workload.identity/use"] = "true"
544
- template = client.V1PodTemplateSpec(
545
- metadata=client.V1ObjectMeta(labels=labels),
546
- spec=client.V1PodSpec(
547
- restart_policy="Never",
548
- active_deadline_seconds=_DEFAULT_BUILD_TIMEOUT_SECS,
549
- containers=[container],
550
- volumes=volumes,
551
- service_account_name=SERVICE_ACCOUNT_NAME,
552
- ),
537
+ pod_labels["azure.workload.identity/use"] = "true"
538
+ pod_spec["restartPolicy"] = pod_spec.get("restartPolicy", "Never")
539
+ pod_spec["activeDeadlineSeconds"] = pod_spec.get(
540
+ "activeDeadlineSeconds", _DEFAULT_BUILD_TIMEOUT_SECS
553
541
  )
554
- # Create the specification of job
555
- spec = client.V1JobSpec(template=template, backoff_limit=0)
556
- job = client.V1Job(
557
- api_version="batch/v1",
558
- kind="Job",
559
- metadata=client.V1ObjectMeta(
560
- name=job_name, namespace=NAMESPACE, labels={"wandb": "launch"}
561
- ),
562
- spec=spec,
542
+ pod_spec["serviceAccountName"] = pod_spec.get(
543
+ "serviceAccountName", SERVICE_ACCOUNT_NAME
563
544
  )
545
+ job_spec["backoffLimit"] = job_spec.get("backoffLimit", 0)
546
+ job_labels["wandb"] = "launch"
547
+ job_metadata["namespace"] = job_metadata.get("namespace", NAMESPACE)
548
+ job_metadata["name"] = job_metadata.get("name", job_name)
549
+ job["apiVersion"] = "batch/v1"
550
+ job["kind"] = "Job"
551
+
552
+ # Apply all nested configs from the bottom up
553
+ pod_metadata["labels"] = pod_labels
554
+ pod_template["metadata"] = pod_metadata
555
+ container["name"] = container.get("name", "wandb-container-build")
556
+ container["image"] = container.get("image", self.image)
557
+ container["volumeMounts"] = volume_mounts
558
+ container["env"] = env
559
+ pod_spec["containers"] = [container]
560
+ pod_spec["volumes"] = volumes
561
+ pod_template["spec"] = pod_spec
562
+ job_spec["template"] = pod_template
563
+ job_metadata["labels"] = job_labels
564
+ job["metadata"] = job_metadata
565
+ job["spec"] = job_spec
564
566
  return job
@@ -1,4 +1,5 @@
1
1
  """NoOp builder implementation."""
2
+
2
3
  from typing import Any, Dict, Optional
3
4
 
4
5
  from wandb.sdk.launch.builder.abstract import AbstractBuilder
@@ -0,0 +1,92 @@
1
+ DOCKERFILE_TEMPLATE = """
2
+ # ----- stage 1: build -----
3
+ FROM {py_build_image} as build
4
+
5
+ # requirements section depends on pip vs conda, and presence of buildx
6
+ ENV PIP_PROGRESS_BAR off
7
+ {requirements_section}
8
+
9
+ # ----- stage 2: base -----
10
+ {base_setup}
11
+
12
+ COPY --from=build /env /env
13
+ ENV PATH="/env/bin:$PATH"
14
+
15
+ ENV SHELL /bin/bash
16
+
17
+ # some resources (eg sagemaker) must run on root
18
+ {user_setup}
19
+
20
+ WORKDIR {workdir}
21
+ RUN chown -R {uid} {workdir}
22
+
23
+ # make artifacts cache dir unrelated to build
24
+ RUN mkdir -p {workdir}/.cache && chown -R {uid} {workdir}/.cache
25
+
26
+ # copy code/etc
27
+ COPY --chown={uid} src/ {workdir}
28
+
29
+ ENV PYTHONUNBUFFERED=1
30
+
31
+ {entrypoint_section}
32
+ """
33
+
34
+ # this goes into base_setup in TEMPLATE
35
+ PYTHON_SETUP_TEMPLATE = """
36
+ FROM {py_base_image} as base
37
+ """
38
+
39
+ # this goes into base_setup in TEMPLATE
40
+ ACCELERATOR_SETUP_TEMPLATE = """
41
+ FROM {accelerator_base_image} as base
42
+
43
+ # make non-interactive so build doesn't block on questions
44
+ ENV DEBIAN_FRONTEND=noninteractive
45
+
46
+ # install python
47
+ RUN apt-get update -qq && apt-get install --no-install-recommends -y \
48
+ {python_packages} \
49
+ && apt-get -qq purge && apt-get -qq clean \
50
+ && rm -rf /var/lib/apt/lists/*
51
+
52
+ # make sure `python` points at the right version
53
+ RUN update-alternatives --install /usr/bin/python python /usr/bin/python{py_version} 1 \
54
+ && update-alternatives --install /usr/local/bin/python python /usr/bin/python{py_version} 1
55
+ """
56
+
57
+ # this goes into requirements_section in TEMPLATE
58
+ PIP_TEMPLATE = """
59
+ RUN python -m venv /env
60
+ # make sure we install into the env
61
+ ENV PATH="/env/bin:$PATH"
62
+
63
+ COPY {requirements_files} ./
64
+ {buildx_optional_prefix} {pip_install}
65
+ """
66
+
67
+ # this goes into requirements_section in TEMPLATE
68
+ CONDA_TEMPLATE = """
69
+ COPY src/environment.yml .
70
+ {buildx_optional_prefix} conda env create -f environment.yml -n env
71
+
72
+ # pack the environment so that we can transfer to the base image
73
+ RUN conda install -c conda-forge conda-pack
74
+ RUN conda pack -n env -o /tmp/env.tar && \
75
+ mkdir /env && cd /env && tar xf /tmp/env.tar && \
76
+ rm /tmp/env.tar
77
+ RUN /env/bin/conda-unpack
78
+ """
79
+
80
+ USER_CREATE_TEMPLATE = """
81
+ RUN useradd \
82
+ --create-home \
83
+ --no-log-init \
84
+ --shell /bin/bash \
85
+ --gid 0 \
86
+ --uid {uid} \
87
+ {user} || echo ""
88
+ """
89
+
90
+ ENTRYPOINT_TEMPLATE = """
91
+ ENTRYPOINT {entrypoint}
92
+ """