wandb 0.13.10__py3-none-any.whl → 0.14.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. wandb/__init__.py +2 -3
  2. wandb/apis/__init__.py +1 -3
  3. wandb/apis/importers/__init__.py +4 -0
  4. wandb/apis/importers/base.py +312 -0
  5. wandb/apis/importers/mlflow.py +113 -0
  6. wandb/apis/internal.py +29 -2
  7. wandb/apis/normalize.py +6 -5
  8. wandb/apis/public.py +163 -180
  9. wandb/apis/reports/_templates.py +6 -12
  10. wandb/apis/reports/report.py +1 -1
  11. wandb/apis/reports/runset.py +1 -3
  12. wandb/apis/reports/util.py +12 -10
  13. wandb/beta/workflows.py +57 -34
  14. wandb/catboost/__init__.py +1 -2
  15. wandb/cli/cli.py +215 -133
  16. wandb/data_types.py +63 -56
  17. wandb/docker/__init__.py +78 -16
  18. wandb/docker/auth.py +21 -22
  19. wandb/env.py +0 -1
  20. wandb/errors/__init__.py +8 -116
  21. wandb/errors/term.py +1 -1
  22. wandb/fastai/__init__.py +1 -2
  23. wandb/filesync/dir_watcher.py +8 -5
  24. wandb/filesync/step_prepare.py +76 -75
  25. wandb/filesync/step_upload.py +1 -2
  26. wandb/integration/catboost/__init__.py +1 -3
  27. wandb/integration/catboost/catboost.py +8 -14
  28. wandb/integration/fastai/__init__.py +7 -13
  29. wandb/integration/gym/__init__.py +35 -4
  30. wandb/integration/keras/__init__.py +3 -3
  31. wandb/integration/keras/callbacks/metrics_logger.py +9 -8
  32. wandb/integration/keras/callbacks/model_checkpoint.py +9 -9
  33. wandb/integration/keras/callbacks/tables_builder.py +31 -19
  34. wandb/integration/kfp/kfp_patch.py +20 -17
  35. wandb/integration/kfp/wandb_logging.py +1 -2
  36. wandb/integration/lightgbm/__init__.py +21 -19
  37. wandb/integration/prodigy/prodigy.py +6 -7
  38. wandb/integration/sacred/__init__.py +9 -12
  39. wandb/integration/sagemaker/__init__.py +1 -3
  40. wandb/integration/sagemaker/auth.py +0 -1
  41. wandb/integration/sagemaker/config.py +1 -1
  42. wandb/integration/sagemaker/resources.py +1 -1
  43. wandb/integration/sb3/sb3.py +8 -4
  44. wandb/integration/tensorboard/__init__.py +1 -3
  45. wandb/integration/tensorboard/log.py +8 -8
  46. wandb/integration/tensorboard/monkeypatch.py +11 -9
  47. wandb/integration/tensorflow/__init__.py +1 -3
  48. wandb/integration/xgboost/__init__.py +4 -6
  49. wandb/integration/yolov8/__init__.py +7 -0
  50. wandb/integration/yolov8/yolov8.py +250 -0
  51. wandb/jupyter.py +31 -35
  52. wandb/lightgbm/__init__.py +1 -2
  53. wandb/old/settings.py +2 -2
  54. wandb/plot/bar.py +1 -2
  55. wandb/plot/confusion_matrix.py +1 -3
  56. wandb/plot/histogram.py +1 -2
  57. wandb/plot/line.py +1 -2
  58. wandb/plot/line_series.py +4 -4
  59. wandb/plot/pr_curve.py +17 -20
  60. wandb/plot/roc_curve.py +1 -3
  61. wandb/plot/scatter.py +1 -2
  62. wandb/proto/v3/wandb_server_pb2.py +85 -39
  63. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  64. wandb/proto/v4/wandb_server_pb2.py +51 -39
  65. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  66. wandb/sdk/__init__.py +1 -3
  67. wandb/sdk/backend/backend.py +1 -1
  68. wandb/sdk/data_types/_dtypes.py +38 -30
  69. wandb/sdk/data_types/base_types/json_metadata.py +1 -3
  70. wandb/sdk/data_types/base_types/media.py +17 -17
  71. wandb/sdk/data_types/base_types/wb_value.py +33 -26
  72. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +91 -125
  73. wandb/sdk/data_types/helper_types/classes.py +1 -1
  74. wandb/sdk/data_types/helper_types/image_mask.py +12 -12
  75. wandb/sdk/data_types/histogram.py +5 -4
  76. wandb/sdk/data_types/html.py +1 -2
  77. wandb/sdk/data_types/image.py +11 -11
  78. wandb/sdk/data_types/molecule.py +3 -6
  79. wandb/sdk/data_types/object_3d.py +1 -2
  80. wandb/sdk/data_types/plotly.py +1 -2
  81. wandb/sdk/data_types/saved_model.py +10 -8
  82. wandb/sdk/data_types/video.py +1 -1
  83. wandb/sdk/integration_utils/data_logging.py +5 -5
  84. wandb/sdk/interface/artifacts.py +288 -266
  85. wandb/sdk/interface/interface.py +2 -3
  86. wandb/sdk/interface/interface_grpc.py +1 -1
  87. wandb/sdk/interface/interface_queue.py +1 -1
  88. wandb/sdk/interface/interface_relay.py +1 -1
  89. wandb/sdk/interface/interface_shared.py +1 -2
  90. wandb/sdk/interface/interface_sock.py +1 -1
  91. wandb/sdk/interface/message_future.py +1 -1
  92. wandb/sdk/interface/message_future_poll.py +1 -1
  93. wandb/sdk/interface/router.py +1 -1
  94. wandb/sdk/interface/router_queue.py +1 -1
  95. wandb/sdk/interface/router_relay.py +1 -1
  96. wandb/sdk/interface/router_sock.py +1 -1
  97. wandb/sdk/interface/summary_record.py +1 -1
  98. wandb/sdk/internal/artifacts.py +1 -1
  99. wandb/sdk/internal/datastore.py +2 -3
  100. wandb/sdk/internal/file_pusher.py +5 -3
  101. wandb/sdk/internal/file_stream.py +22 -19
  102. wandb/sdk/internal/handler.py +5 -4
  103. wandb/sdk/internal/internal.py +1 -1
  104. wandb/sdk/internal/internal_api.py +115 -55
  105. wandb/sdk/internal/job_builder.py +1 -3
  106. wandb/sdk/internal/profiler.py +1 -1
  107. wandb/sdk/internal/progress.py +4 -6
  108. wandb/sdk/internal/sample.py +1 -3
  109. wandb/sdk/internal/sender.py +28 -16
  110. wandb/sdk/internal/settings_static.py +5 -5
  111. wandb/sdk/internal/system/assets/__init__.py +1 -0
  112. wandb/sdk/internal/system/assets/cpu.py +3 -9
  113. wandb/sdk/internal/system/assets/disk.py +2 -4
  114. wandb/sdk/internal/system/assets/gpu.py +6 -18
  115. wandb/sdk/internal/system/assets/gpu_apple.py +2 -4
  116. wandb/sdk/internal/system/assets/interfaces.py +50 -22
  117. wandb/sdk/internal/system/assets/ipu.py +1 -3
  118. wandb/sdk/internal/system/assets/memory.py +7 -13
  119. wandb/sdk/internal/system/assets/network.py +4 -8
  120. wandb/sdk/internal/system/assets/open_metrics.py +283 -0
  121. wandb/sdk/internal/system/assets/tpu.py +1 -4
  122. wandb/sdk/internal/system/assets/trainium.py +26 -14
  123. wandb/sdk/internal/system/system_info.py +2 -3
  124. wandb/sdk/internal/system/system_monitor.py +52 -20
  125. wandb/sdk/internal/tb_watcher.py +12 -13
  126. wandb/sdk/launch/_project_spec.py +54 -65
  127. wandb/sdk/launch/agent/agent.py +374 -90
  128. wandb/sdk/launch/builder/abstract.py +61 -7
  129. wandb/sdk/launch/builder/build.py +81 -110
  130. wandb/sdk/launch/builder/docker_builder.py +181 -0
  131. wandb/sdk/launch/builder/kaniko_builder.py +419 -0
  132. wandb/sdk/launch/builder/noop.py +31 -12
  133. wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +70 -20
  134. wandb/sdk/launch/environment/abstract.py +28 -0
  135. wandb/sdk/launch/environment/aws_environment.py +276 -0
  136. wandb/sdk/launch/environment/gcp_environment.py +271 -0
  137. wandb/sdk/launch/environment/local_environment.py +65 -0
  138. wandb/sdk/launch/github_reference.py +3 -8
  139. wandb/sdk/launch/launch.py +38 -29
  140. wandb/sdk/launch/launch_add.py +6 -8
  141. wandb/sdk/launch/loader.py +230 -0
  142. wandb/sdk/launch/registry/abstract.py +54 -0
  143. wandb/sdk/launch/registry/elastic_container_registry.py +163 -0
  144. wandb/sdk/launch/registry/google_artifact_registry.py +203 -0
  145. wandb/sdk/launch/registry/local_registry.py +62 -0
  146. wandb/sdk/launch/runner/abstract.py +1 -16
  147. wandb/sdk/launch/runner/{kubernetes.py → kubernetes_runner.py} +83 -95
  148. wandb/sdk/launch/runner/local_container.py +46 -22
  149. wandb/sdk/launch/runner/local_process.py +1 -4
  150. wandb/sdk/launch/runner/{aws.py → sagemaker_runner.py} +53 -212
  151. wandb/sdk/launch/runner/{gcp_vertex.py → vertex_runner.py} +38 -55
  152. wandb/sdk/launch/sweeps/__init__.py +3 -2
  153. wandb/sdk/launch/sweeps/scheduler.py +132 -39
  154. wandb/sdk/launch/sweeps/scheduler_sweep.py +80 -89
  155. wandb/sdk/launch/utils.py +101 -30
  156. wandb/sdk/launch/wandb_reference.py +2 -7
  157. wandb/sdk/lib/_settings_toposort_generate.py +166 -0
  158. wandb/sdk/lib/_settings_toposort_generated.py +201 -0
  159. wandb/sdk/lib/apikey.py +2 -4
  160. wandb/sdk/lib/config_util.py +4 -1
  161. wandb/sdk/lib/console.py +1 -3
  162. wandb/sdk/lib/deprecate.py +3 -3
  163. wandb/sdk/lib/file_stream_utils.py +7 -5
  164. wandb/sdk/lib/filenames.py +1 -1
  165. wandb/sdk/lib/filesystem.py +61 -5
  166. wandb/sdk/lib/git.py +1 -3
  167. wandb/sdk/lib/import_hooks.py +4 -7
  168. wandb/sdk/lib/ipython.py +8 -5
  169. wandb/sdk/lib/lazyloader.py +1 -3
  170. wandb/sdk/lib/mailbox.py +14 -4
  171. wandb/sdk/lib/proto_util.py +10 -5
  172. wandb/sdk/lib/redirect.py +15 -22
  173. wandb/sdk/lib/reporting.py +1 -3
  174. wandb/sdk/lib/retry.py +4 -5
  175. wandb/sdk/lib/runid.py +1 -3
  176. wandb/sdk/lib/server.py +15 -9
  177. wandb/sdk/lib/sock_client.py +1 -1
  178. wandb/sdk/lib/sparkline.py +1 -1
  179. wandb/sdk/lib/wburls.py +1 -1
  180. wandb/sdk/service/port_file.py +1 -2
  181. wandb/sdk/service/service.py +36 -13
  182. wandb/sdk/service/service_base.py +12 -1
  183. wandb/sdk/verify/verify.py +5 -7
  184. wandb/sdk/wandb_artifacts.py +142 -177
  185. wandb/sdk/wandb_config.py +5 -8
  186. wandb/sdk/wandb_helper.py +1 -1
  187. wandb/sdk/wandb_init.py +24 -13
  188. wandb/sdk/wandb_login.py +9 -9
  189. wandb/sdk/wandb_manager.py +39 -4
  190. wandb/sdk/wandb_metric.py +2 -6
  191. wandb/sdk/wandb_require.py +4 -15
  192. wandb/sdk/wandb_require_helpers.py +1 -9
  193. wandb/sdk/wandb_run.py +95 -141
  194. wandb/sdk/wandb_save.py +1 -3
  195. wandb/sdk/wandb_settings.py +149 -54
  196. wandb/sdk/wandb_setup.py +66 -46
  197. wandb/sdk/wandb_summary.py +13 -10
  198. wandb/sdk/wandb_sweep.py +6 -7
  199. wandb/sdk/wandb_watch.py +1 -1
  200. wandb/sklearn/calculate/confusion_matrix.py +1 -1
  201. wandb/sklearn/calculate/learning_curve.py +1 -1
  202. wandb/sklearn/calculate/summary_metrics.py +1 -3
  203. wandb/sklearn/plot/__init__.py +1 -1
  204. wandb/sklearn/plot/classifier.py +27 -18
  205. wandb/sklearn/plot/clusterer.py +4 -5
  206. wandb/sklearn/plot/regressor.py +4 -4
  207. wandb/sklearn/plot/shared.py +2 -2
  208. wandb/sync/__init__.py +1 -3
  209. wandb/sync/sync.py +4 -5
  210. wandb/testing/relay.py +11 -10
  211. wandb/trigger.py +1 -1
  212. wandb/util.py +106 -81
  213. wandb/viz.py +4 -4
  214. wandb/wandb_agent.py +50 -50
  215. wandb/wandb_controller.py +2 -3
  216. wandb/wandb_run.py +1 -2
  217. wandb/wandb_torch.py +1 -1
  218. wandb/xgboost/__init__.py +1 -2
  219. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/METADATA +6 -2
  220. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/RECORD +224 -209
  221. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/WHEEL +1 -1
  222. wandb/sdk/launch/builder/docker.py +0 -80
  223. wandb/sdk/launch/builder/kaniko.py +0 -393
  224. wandb/sdk/launch/builder/loader.py +0 -32
  225. wandb/sdk/launch/runner/loader.py +0 -50
  226. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/LICENSE +0 -0
  227. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/entry_points.txt +0 -0
  228. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.1)
2
+ Generator: bdist_wheel (0.40.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,80 +0,0 @@
1
- import logging
2
- import os
3
- from typing import Any, Dict, Optional
4
-
5
- import wandb
6
- import wandb.docker as docker
7
- from wandb.errors import DockerError, LaunchError
8
- from wandb.sdk.launch.builder.abstract import AbstractBuilder
9
-
10
- from .._project_spec import (
11
- EntryPoint,
12
- LaunchProject,
13
- create_metadata_file,
14
- get_entry_point_command,
15
- )
16
- from ..utils import LOG_PREFIX, sanitize_wandb_api_key
17
- from .build import (
18
- _create_docker_build_ctx,
19
- generate_dockerfile,
20
- validate_docker_installation,
21
- )
22
-
23
- _GENERATED_DOCKERFILE_NAME = "Dockerfile.wandb-autogenerated"
24
- _logger = logging.getLogger(__name__)
25
-
26
-
27
- class DockerBuilder(AbstractBuilder):
28
- type = "docker"
29
-
30
- def __init__(self, builder_config: Dict[str, Any]):
31
- super().__init__(builder_config)
32
- validate_docker_installation()
33
-
34
- def build_image(
35
- self,
36
- launch_project: LaunchProject,
37
- repository: Optional[str],
38
- entrypoint: EntryPoint,
39
- ) -> str:
40
-
41
- if repository:
42
- image_uri = f"{repository}:{launch_project.image_tag}"
43
- else:
44
- image_uri = launch_project.image_uri
45
- entry_cmd = get_entry_point_command(entrypoint, launch_project.override_args)
46
- dockerfile_str = generate_dockerfile(
47
- launch_project, entrypoint, launch_project.resource, self.type
48
- )
49
- create_metadata_file(
50
- launch_project,
51
- image_uri,
52
- sanitize_wandb_api_key(" ".join(entry_cmd)),
53
- dockerfile_str,
54
- )
55
- build_ctx_path = _create_docker_build_ctx(launch_project, dockerfile_str)
56
- dockerfile = os.path.join(build_ctx_path, _GENERATED_DOCKERFILE_NAME)
57
- try:
58
- docker.build(tags=[image_uri], file=dockerfile, context_path=build_ctx_path)
59
- except DockerError as e:
60
- raise LaunchError(f"Error communicating with docker client: {e}")
61
-
62
- try:
63
- os.remove(build_ctx_path)
64
- except Exception:
65
- _msg = f"{LOG_PREFIX}Temporary docker context file {build_ctx_path} was not deleted."
66
- _logger.info(_msg)
67
-
68
- if repository:
69
- reg, tag = image_uri.split(":")
70
- wandb.termlog(f"{LOG_PREFIX}Pushing image {image_uri}")
71
- push_resp = docker.push(reg, tag)
72
- if push_resp is None:
73
- raise LaunchError("Failed to push image to repository")
74
- elif (
75
- launch_project.resource == "sagemaker"
76
- and f"The push refers to repository [{repository}]" not in push_resp
77
- ):
78
- raise LaunchError(f"Unable to push image to ECR, response: {push_resp}")
79
-
80
- return image_uri
@@ -1,393 +0,0 @@
1
- import base64
2
- import json
3
- import os
4
- import tarfile
5
- import tempfile
6
- import time
7
- from typing import Any, Dict, Optional
8
-
9
- import kubernetes # type: ignore
10
- from kubernetes import client
11
-
12
- import wandb
13
- from wandb.errors import LaunchError
14
- from wandb.sdk.launch.builder.abstract import AbstractBuilder
15
- from wandb.util import get_module
16
-
17
- from .._project_spec import (
18
- EntryPoint,
19
- LaunchProject,
20
- create_metadata_file,
21
- get_entry_point_command,
22
- )
23
- from ..utils import LOG_PREFIX, get_kube_context_and_api_client, sanitize_wandb_api_key
24
- from .build import _create_docker_build_ctx, generate_dockerfile
25
-
26
- _DEFAULT_BUILD_TIMEOUT_SECS = 1800 # 30 minute build timeout
27
-
28
-
29
- def _create_dockerfile_configmap(
30
- config_map_name: str, context_path: str
31
- ) -> client.V1ConfigMap:
32
- with open(os.path.join(context_path, "Dockerfile.wandb-autogenerated"), "rb") as f:
33
- docker_file_bytes = f.read()
34
-
35
- build_config_map = client.V1ConfigMap(
36
- metadata=client.V1ObjectMeta(
37
- name=config_map_name, namespace="wandb", labels={"wandb": "launch"}
38
- ),
39
- binary_data={
40
- "Dockerfile": base64.b64encode(docker_file_bytes).decode("UTF-8"),
41
- },
42
- immutable=True,
43
- )
44
- return build_config_map
45
-
46
-
47
- def _wait_for_completion(
48
- batch_client: client.BatchV1Api, job_name: str, deadline_secs: Optional[int] = None
49
- ) -> bool:
50
- start_time = time.time()
51
- while True:
52
- job = batch_client.read_namespaced_job_status(job_name, "wandb")
53
- if job.status.succeeded is not None and job.status.succeeded >= 1:
54
- return True
55
- elif job.status.failed is not None and job.status.failed >= 1:
56
- return False
57
- wandb.termlog(f"{LOG_PREFIX}Waiting for build job to complete...")
58
- if deadline_secs is not None and time.time() - start_time > deadline_secs:
59
- return False
60
-
61
- time.sleep(5)
62
-
63
-
64
- class KanikoBuilder(AbstractBuilder):
65
- type = "kaniko"
66
-
67
- def __init__(self, builder_config: Dict[str, Any]):
68
- super().__init__(builder_config)
69
- self.config_map_name = builder_config.get(
70
- "config-map-name", "wandb-launch-build-context"
71
- )
72
- self.build_job_name = builder_config.get(
73
- "build-job-name", "wandb-launch-container-build"
74
- )
75
- cloud_provider = builder_config.get("cloud-provider", None)
76
- if cloud_provider is None or not isinstance(cloud_provider, str):
77
- raise LaunchError("Kaniko builder requires string cloud-provider")
78
- self.cloud_provider: str = cloud_provider.lower()
79
- self.instance_mode = False
80
- if not builder_config.get("credentials"):
81
- self.instance_mode = True
82
- # if no cloud provider info given, assume running in instance mode
83
- # kaniko pod will have access to build context store and ecr
84
- wandb.termlog(f"{LOG_PREFIX}Kaniko builder running in instance mode")
85
-
86
- self.build_context_store = builder_config.get("build-context-store", None)
87
- if self.build_context_store is None:
88
- raise LaunchError("build-context-store is not set in cloud-provider")
89
- credentials_config = builder_config.get("credentials", {})
90
- self.credentials_secret_name = credentials_config.get("secret-name")
91
- self.credentials_secret_mount_path = credentials_config.get("secret-mount-path")
92
- if bool(self.credentials_secret_name) != bool(
93
- self.credentials_secret_mount_path
94
- ):
95
- raise LaunchError(
96
- "Must provide secret-name and secret-mount-path or neither"
97
- )
98
-
99
- def _create_docker_ecr_config_map(
100
- self, corev1_client: client.CoreV1Api, repository: str
101
- ) -> None:
102
- if self.cloud_provider.lower() == "aws":
103
- if not self.instance_mode:
104
- ecr_config_map = client.V1ConfigMap(
105
- api_version="v1",
106
- kind="ConfigMap",
107
- metadata=client.V1ObjectMeta(
108
- name="docker-config",
109
- namespace="wandb",
110
- ),
111
- data={"config.json": json.dumps({"credsStore": "ecr-login"})},
112
- immutable=True,
113
- )
114
- else:
115
- wandb.termlog(
116
- f"{LOG_PREFIX}Builder not supplied with credentials, assuming instance mode."
117
- )
118
- d = {
119
- "config.json": json.dumps(
120
- {"credHelpers": {repository.split(":")[0]: "ecr-login"}}
121
- )
122
- }
123
- ecr_config_map = client.V1ConfigMap(
124
- api_version="v1",
125
- kind="ConfigMap",
126
- metadata=client.V1ObjectMeta(
127
- name="docker-config",
128
- namespace="wandb",
129
- ),
130
- data=d,
131
- immutable=True,
132
- )
133
- corev1_client.create_namespaced_config_map("wandb", ecr_config_map)
134
-
135
- def _delete_docker_ecr_config_map(self, client: client.CoreV1Api) -> None:
136
- client.delete_namespaced_config_map("docker-config", "wandb")
137
-
138
- def _upload_build_context(self, run_id: str, context_path: str) -> str:
139
- # creat a tar archive of the build context and upload it to s3
140
- context_file = tempfile.NamedTemporaryFile(delete=False)
141
- with tarfile.TarFile.open(fileobj=context_file, mode="w:gz") as context_tgz:
142
- context_tgz.add(context_path, arcname=".")
143
- context_file.close()
144
- if self.cloud_provider.lower() == "aws":
145
- boto3 = get_module(
146
- "boto3",
147
- "AWS cloud provider requires boto3, install with pip install wandb[launch]",
148
- )
149
- botocore = get_module(
150
- "botocore",
151
- "aws cloud-provider requires botocore, install with pip install wandb[launch]",
152
- )
153
-
154
- s3_client = boto3.client("s3")
155
-
156
- try:
157
- s3_client.upload_file(
158
- context_file.name, self.build_context_store, f"{run_id}.tgz"
159
- )
160
- os.remove(context_file.name)
161
- except botocore.exceptions.ClientError as e:
162
- os.remove(context_file.name)
163
- raise LaunchError(f"Failed to upload build context to S3: {e}")
164
- return f"s3://{self.build_context_store}/{run_id}.tgz"
165
- # TODO: support gcp and azure cloud providers
166
- elif self.cloud_provider.lower() == "gcp":
167
- storage = get_module(
168
- "google.cloud.storage",
169
- "gcp provider requires google-cloud-storage, install with pip install wandb[launch]",
170
- )
171
-
172
- storage_client = storage.Client()
173
- try:
174
- bucket = storage_client.bucket(self.build_context_store)
175
- blob = bucket.blob(f"{run_id}.tgz")
176
- blob.upload_from_filename(context_file.name)
177
- os.remove(context_file.name)
178
- except Exception as e:
179
- os.remove(context_file.name)
180
- raise LaunchError(f"Failed to upload build context to GCP: {e}")
181
- return f"gs://{self.build_context_store}/{run_id}.tgz"
182
- else:
183
- raise LaunchError("Unsupported storage provider")
184
-
185
- def check_build_required(
186
- self, repository: str, launch_project: LaunchProject
187
- ) -> bool:
188
- # TODO(kyle): Robustify to remote the trycatch
189
- try:
190
- ecr_provider = self.cloud_provider.lower()
191
- if ecr_provider == "aws" and repository:
192
- # TODO: pass in registry config
193
- region = repository.split(".")[3]
194
- boto3 = get_module(
195
- "boto3",
196
- "AWS ECR requires boto3, install with pip install wandb[launch]",
197
- )
198
- ecr_client = boto3.client("ecr", region_name=region)
199
- repo_name = repository.split("/")[-1]
200
- try:
201
- ecr_client.describe_images(
202
- repositoryName=repo_name,
203
- imageIds=[{"imageTag": launch_project.image_tag}],
204
- )
205
- return False
206
- except ecr_client.exceptions.ImageNotFoundException:
207
- return True
208
- else:
209
- return True
210
- except Exception as e:
211
- wandb.termlog(
212
- f"{LOG_PREFIX}Failed while checking if build is required, defaulting to building: {e}"
213
- )
214
- return True
215
-
216
- def build_image(
217
- self,
218
- launch_project: LaunchProject,
219
- repository: Optional[str],
220
- entrypoint: EntryPoint,
221
- ) -> str:
222
-
223
- if repository is None:
224
- raise LaunchError("repository is required for kaniko builder")
225
-
226
- image_uri = f"{repository}:{launch_project.image_tag}"
227
- wandb.termlog(f"{LOG_PREFIX}Checking for image {image_uri}")
228
- if not self.check_build_required(repository, launch_project):
229
- return image_uri
230
- entry_cmd = " ".join(
231
- get_entry_point_command(entrypoint, launch_project.override_args)
232
- )
233
-
234
- # kaniko builder doesn't seem to work with a custom user id, need more investigation
235
- dockerfile_str = generate_dockerfile(
236
- launch_project, entrypoint, launch_project.resource, self.type
237
- )
238
- create_metadata_file(
239
- launch_project,
240
- image_uri,
241
- sanitize_wandb_api_key(entry_cmd),
242
- sanitize_wandb_api_key(dockerfile_str),
243
- )
244
- context_path = _create_docker_build_ctx(launch_project, dockerfile_str)
245
- run_id = launch_project.run_id
246
-
247
- _, api_client = get_kube_context_and_api_client(
248
- kubernetes, launch_project.resource_args
249
- )
250
- build_job_name = f"{self.build_job_name}-{run_id}"
251
- config_map_name = f"{self.config_map_name}-{run_id}"
252
-
253
- build_context = self._upload_build_context(run_id, context_path)
254
- dockerfile_config_map = _create_dockerfile_configmap(
255
- config_map_name, context_path
256
- )
257
- build_job = self._create_kaniko_job(
258
- build_job_name,
259
- dockerfile_config_map.metadata.name,
260
- repository,
261
- image_uri,
262
- build_context,
263
- )
264
- wandb.termlog(f"{LOG_PREFIX}Created kaniko job {build_job_name}")
265
-
266
- # TODO: use same client as kuberentes.py
267
- batch_v1 = client.BatchV1Api(api_client)
268
- core_v1 = client.CoreV1Api(api_client)
269
-
270
- try:
271
- core_v1.create_namespaced_config_map("wandb", dockerfile_config_map)
272
- self._create_docker_ecr_config_map(core_v1, repository)
273
- batch_v1.create_namespaced_job("wandb", build_job)
274
-
275
- # wait for double the job deadline since it might take time to schedule
276
- if not _wait_for_completion(
277
- batch_v1, build_job_name, 3 * _DEFAULT_BUILD_TIMEOUT_SECS
278
- ):
279
- raise Exception(f"Failed to build image in kaniko for job {run_id}")
280
- except Exception as e:
281
- wandb.termerror(
282
- f"{LOG_PREFIX}Exception when creating Kubernetes resources: {e}\n"
283
- )
284
- finally:
285
- wandb.termlog(f"{LOG_PREFIX}Cleaning up resources")
286
- try:
287
- # should we clean up the s3 build contexts? can set bucket level policy to auto deletion
288
- core_v1.delete_namespaced_config_map(config_map_name, "wandb")
289
- self._delete_docker_ecr_config_map(core_v1)
290
- batch_v1.delete_namespaced_job(build_job_name, "wandb")
291
- except Exception as e:
292
- raise LaunchError(f"Exception during Kubernetes resource clean up {e}")
293
-
294
- return image_uri
295
-
296
- def _create_kaniko_job(
297
- self,
298
- job_name: str,
299
- config_map_name: str,
300
- repository: str,
301
- image_tag: str,
302
- build_context_path: str,
303
- ) -> "client.V1Job":
304
- env = None
305
- if self.instance_mode and self.cloud_provider.lower() == "aws":
306
- region = repository.split(".")[3]
307
- env = client.V1EnvVar(name="AWS_REGION", value=region)
308
-
309
- volume_mounts = [
310
- client.V1VolumeMount(
311
- name="build-context-config-map", mount_path="/etc/config"
312
- ),
313
- client.V1VolumeMount(name="docker-config", mount_path="/kaniko/.docker/"),
314
- ]
315
- volumes = [
316
- client.V1Volume(
317
- name="build-context-config-map",
318
- config_map=client.V1ConfigMapVolumeSource(
319
- name=config_map_name,
320
- ),
321
- ),
322
- client.V1Volume(
323
- name="docker-config",
324
- config_map=client.V1ConfigMapVolumeSource(
325
- name="docker-config",
326
- ),
327
- ),
328
- ]
329
- if (
330
- self.credentials_secret_name is not None
331
- and self.credentials_secret_mount_path is not None
332
- ):
333
- volume_mounts += [
334
- client.V1VolumeMount(
335
- name=self.credentials_secret_name,
336
- mount_path=self.credentials_secret_mount_path,
337
- read_only=True,
338
- )
339
- ]
340
- volumes += [
341
- client.V1Volume(
342
- name=self.credentials_secret_name,
343
- secret=client.V1SecretVolumeSource(
344
- secret_name=self.credentials_secret_name
345
- ),
346
- )
347
- ]
348
- # Configurate Pod template container
349
- args = [
350
- f"--context={build_context_path}",
351
- "--dockerfile=/etc/config/Dockerfile",
352
- f"--destination={image_tag}",
353
- "--cache=true",
354
- f"--cache-repo={repository}",
355
- "--snapshotMode=redo",
356
- ]
357
- if env is not None:
358
- container = client.V1Container(
359
- name="wandb-container-build",
360
- image="gcr.io/kaniko-project/executor:v1.8.0",
361
- args=args,
362
- volume_mounts=volume_mounts,
363
- env=[env],
364
- )
365
- else:
366
- container = client.V1Container(
367
- name="wandb-container-build",
368
- image="gcr.io/kaniko-project/executor:v1.8.0",
369
- args=args,
370
- volume_mounts=volume_mounts,
371
- )
372
- # Create and configure a spec section
373
- template = client.V1PodTemplateSpec(
374
- metadata=client.V1ObjectMeta(labels={"wandb": "launch"}),
375
- spec=client.V1PodSpec(
376
- restart_policy="Never",
377
- active_deadline_seconds=_DEFAULT_BUILD_TIMEOUT_SECS,
378
- containers=[container],
379
- volumes=volumes,
380
- ),
381
- )
382
- # Create the specification of job
383
- spec = client.V1JobSpec(template=template, backoff_limit=1)
384
- job = client.V1Job(
385
- api_version="batch/v1",
386
- kind="Job",
387
- metadata=client.V1ObjectMeta(
388
- name=job_name, namespace="wandb", labels={"wandb": "launch"}
389
- ),
390
- spec=spec,
391
- )
392
-
393
- return job
@@ -1,32 +0,0 @@
1
- import logging
2
- from typing import Any, Dict, List
3
-
4
- from wandb.errors import LaunchError
5
-
6
- from .abstract import AbstractBuilder
7
-
8
- __logger__ = logging.getLogger(__name__)
9
-
10
-
11
- _WANDB_BUILDERS: List[str] = ["kaniko", "docker", "noop"]
12
-
13
-
14
- def load_builder(builder_config: Dict[str, Any]) -> AbstractBuilder:
15
- builder_name = builder_config.get("type", "docker")
16
- if builder_name == "kaniko":
17
- from .kaniko import KanikoBuilder
18
-
19
- return KanikoBuilder(builder_config)
20
- elif builder_name == "docker":
21
- from .docker import DockerBuilder
22
-
23
- return DockerBuilder(builder_config)
24
- elif builder_name == "noop":
25
- from .noop import NoOpBuilder
26
-
27
- return NoOpBuilder(builder_config)
28
- raise LaunchError(
29
- "Builder name not among available builders. Available builders: {} ".format(
30
- ",".join(_WANDB_BUILDERS)
31
- )
32
- )
@@ -1,50 +0,0 @@
1
- import logging
2
- from typing import Any, Dict, List
3
-
4
- from wandb.apis.internal import Api
5
- from wandb.errors import LaunchError
6
-
7
- from .abstract import AbstractRunner
8
-
9
- __logger__ = logging.getLogger(__name__)
10
-
11
-
12
- # Statically register backend defined in wandb
13
- WANDB_RUNNERS: List[str] = [
14
- "local-container",
15
- "local-process",
16
- "gcp-vertex",
17
- "sagemaker",
18
- "kubernetes",
19
- ]
20
-
21
-
22
- def load_backend(
23
- backend_name: str, api: Api, backend_config: Dict[str, Any]
24
- ) -> AbstractRunner:
25
- # Static backends
26
- if backend_name in ["local", "local-container"]:
27
- from .local_container import LocalContainerRunner
28
-
29
- return LocalContainerRunner(api, backend_config)
30
- elif backend_name in ["bare", "local-process"]:
31
- from .local_process import LocalProcessRunner
32
-
33
- return LocalProcessRunner(api, backend_config)
34
- elif backend_name == "gcp-vertex":
35
- from .gcp_vertex import VertexRunner
36
-
37
- return VertexRunner(api, backend_config)
38
- elif backend_name == "sagemaker":
39
- from .aws import AWSSagemakerRunner
40
-
41
- return AWSSagemakerRunner(api, backend_config)
42
- elif backend_name == "kubernetes":
43
- from .kubernetes import KubernetesRunner
44
-
45
- return KubernetesRunner(api, backend_config)
46
- raise LaunchError(
47
- "Resource name not among available resources. Available resources: {} ".format(
48
- ",".join(WANDB_RUNNERS)
49
- )
50
- )