wandb 0.13.10__py3-none-any.whl → 0.14.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. wandb/__init__.py +2 -3
  2. wandb/apis/__init__.py +1 -3
  3. wandb/apis/importers/__init__.py +4 -0
  4. wandb/apis/importers/base.py +312 -0
  5. wandb/apis/importers/mlflow.py +113 -0
  6. wandb/apis/internal.py +29 -2
  7. wandb/apis/normalize.py +6 -5
  8. wandb/apis/public.py +163 -180
  9. wandb/apis/reports/_templates.py +6 -12
  10. wandb/apis/reports/report.py +1 -1
  11. wandb/apis/reports/runset.py +1 -3
  12. wandb/apis/reports/util.py +12 -10
  13. wandb/beta/workflows.py +57 -34
  14. wandb/catboost/__init__.py +1 -2
  15. wandb/cli/cli.py +215 -133
  16. wandb/data_types.py +63 -56
  17. wandb/docker/__init__.py +78 -16
  18. wandb/docker/auth.py +21 -22
  19. wandb/env.py +0 -1
  20. wandb/errors/__init__.py +8 -116
  21. wandb/errors/term.py +1 -1
  22. wandb/fastai/__init__.py +1 -2
  23. wandb/filesync/dir_watcher.py +8 -5
  24. wandb/filesync/step_prepare.py +76 -75
  25. wandb/filesync/step_upload.py +1 -2
  26. wandb/integration/catboost/__init__.py +1 -3
  27. wandb/integration/catboost/catboost.py +8 -14
  28. wandb/integration/fastai/__init__.py +7 -13
  29. wandb/integration/gym/__init__.py +35 -4
  30. wandb/integration/keras/__init__.py +3 -3
  31. wandb/integration/keras/callbacks/metrics_logger.py +9 -8
  32. wandb/integration/keras/callbacks/model_checkpoint.py +9 -9
  33. wandb/integration/keras/callbacks/tables_builder.py +31 -19
  34. wandb/integration/kfp/kfp_patch.py +20 -17
  35. wandb/integration/kfp/wandb_logging.py +1 -2
  36. wandb/integration/lightgbm/__init__.py +21 -19
  37. wandb/integration/prodigy/prodigy.py +6 -7
  38. wandb/integration/sacred/__init__.py +9 -12
  39. wandb/integration/sagemaker/__init__.py +1 -3
  40. wandb/integration/sagemaker/auth.py +0 -1
  41. wandb/integration/sagemaker/config.py +1 -1
  42. wandb/integration/sagemaker/resources.py +1 -1
  43. wandb/integration/sb3/sb3.py +8 -4
  44. wandb/integration/tensorboard/__init__.py +1 -3
  45. wandb/integration/tensorboard/log.py +8 -8
  46. wandb/integration/tensorboard/monkeypatch.py +11 -9
  47. wandb/integration/tensorflow/__init__.py +1 -3
  48. wandb/integration/xgboost/__init__.py +4 -6
  49. wandb/integration/yolov8/__init__.py +7 -0
  50. wandb/integration/yolov8/yolov8.py +250 -0
  51. wandb/jupyter.py +31 -35
  52. wandb/lightgbm/__init__.py +1 -2
  53. wandb/old/settings.py +2 -2
  54. wandb/plot/bar.py +1 -2
  55. wandb/plot/confusion_matrix.py +1 -3
  56. wandb/plot/histogram.py +1 -2
  57. wandb/plot/line.py +1 -2
  58. wandb/plot/line_series.py +4 -4
  59. wandb/plot/pr_curve.py +17 -20
  60. wandb/plot/roc_curve.py +1 -3
  61. wandb/plot/scatter.py +1 -2
  62. wandb/proto/v3/wandb_server_pb2.py +85 -39
  63. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  64. wandb/proto/v4/wandb_server_pb2.py +51 -39
  65. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  66. wandb/sdk/__init__.py +1 -3
  67. wandb/sdk/backend/backend.py +1 -1
  68. wandb/sdk/data_types/_dtypes.py +38 -30
  69. wandb/sdk/data_types/base_types/json_metadata.py +1 -3
  70. wandb/sdk/data_types/base_types/media.py +17 -17
  71. wandb/sdk/data_types/base_types/wb_value.py +33 -26
  72. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +91 -125
  73. wandb/sdk/data_types/helper_types/classes.py +1 -1
  74. wandb/sdk/data_types/helper_types/image_mask.py +12 -12
  75. wandb/sdk/data_types/histogram.py +5 -4
  76. wandb/sdk/data_types/html.py +1 -2
  77. wandb/sdk/data_types/image.py +11 -11
  78. wandb/sdk/data_types/molecule.py +3 -6
  79. wandb/sdk/data_types/object_3d.py +1 -2
  80. wandb/sdk/data_types/plotly.py +1 -2
  81. wandb/sdk/data_types/saved_model.py +10 -8
  82. wandb/sdk/data_types/video.py +1 -1
  83. wandb/sdk/integration_utils/data_logging.py +5 -5
  84. wandb/sdk/interface/artifacts.py +288 -266
  85. wandb/sdk/interface/interface.py +2 -3
  86. wandb/sdk/interface/interface_grpc.py +1 -1
  87. wandb/sdk/interface/interface_queue.py +1 -1
  88. wandb/sdk/interface/interface_relay.py +1 -1
  89. wandb/sdk/interface/interface_shared.py +1 -2
  90. wandb/sdk/interface/interface_sock.py +1 -1
  91. wandb/sdk/interface/message_future.py +1 -1
  92. wandb/sdk/interface/message_future_poll.py +1 -1
  93. wandb/sdk/interface/router.py +1 -1
  94. wandb/sdk/interface/router_queue.py +1 -1
  95. wandb/sdk/interface/router_relay.py +1 -1
  96. wandb/sdk/interface/router_sock.py +1 -1
  97. wandb/sdk/interface/summary_record.py +1 -1
  98. wandb/sdk/internal/artifacts.py +1 -1
  99. wandb/sdk/internal/datastore.py +2 -3
  100. wandb/sdk/internal/file_pusher.py +5 -3
  101. wandb/sdk/internal/file_stream.py +22 -19
  102. wandb/sdk/internal/handler.py +5 -4
  103. wandb/sdk/internal/internal.py +1 -1
  104. wandb/sdk/internal/internal_api.py +115 -55
  105. wandb/sdk/internal/job_builder.py +1 -3
  106. wandb/sdk/internal/profiler.py +1 -1
  107. wandb/sdk/internal/progress.py +4 -6
  108. wandb/sdk/internal/sample.py +1 -3
  109. wandb/sdk/internal/sender.py +28 -16
  110. wandb/sdk/internal/settings_static.py +5 -5
  111. wandb/sdk/internal/system/assets/__init__.py +1 -0
  112. wandb/sdk/internal/system/assets/cpu.py +3 -9
  113. wandb/sdk/internal/system/assets/disk.py +2 -4
  114. wandb/sdk/internal/system/assets/gpu.py +6 -18
  115. wandb/sdk/internal/system/assets/gpu_apple.py +2 -4
  116. wandb/sdk/internal/system/assets/interfaces.py +50 -22
  117. wandb/sdk/internal/system/assets/ipu.py +1 -3
  118. wandb/sdk/internal/system/assets/memory.py +7 -13
  119. wandb/sdk/internal/system/assets/network.py +4 -8
  120. wandb/sdk/internal/system/assets/open_metrics.py +283 -0
  121. wandb/sdk/internal/system/assets/tpu.py +1 -4
  122. wandb/sdk/internal/system/assets/trainium.py +26 -14
  123. wandb/sdk/internal/system/system_info.py +2 -3
  124. wandb/sdk/internal/system/system_monitor.py +52 -20
  125. wandb/sdk/internal/tb_watcher.py +12 -13
  126. wandb/sdk/launch/_project_spec.py +54 -65
  127. wandb/sdk/launch/agent/agent.py +374 -90
  128. wandb/sdk/launch/builder/abstract.py +61 -7
  129. wandb/sdk/launch/builder/build.py +81 -110
  130. wandb/sdk/launch/builder/docker_builder.py +181 -0
  131. wandb/sdk/launch/builder/kaniko_builder.py +419 -0
  132. wandb/sdk/launch/builder/noop.py +31 -12
  133. wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +70 -20
  134. wandb/sdk/launch/environment/abstract.py +28 -0
  135. wandb/sdk/launch/environment/aws_environment.py +276 -0
  136. wandb/sdk/launch/environment/gcp_environment.py +271 -0
  137. wandb/sdk/launch/environment/local_environment.py +65 -0
  138. wandb/sdk/launch/github_reference.py +3 -8
  139. wandb/sdk/launch/launch.py +38 -29
  140. wandb/sdk/launch/launch_add.py +6 -8
  141. wandb/sdk/launch/loader.py +230 -0
  142. wandb/sdk/launch/registry/abstract.py +54 -0
  143. wandb/sdk/launch/registry/elastic_container_registry.py +163 -0
  144. wandb/sdk/launch/registry/google_artifact_registry.py +203 -0
  145. wandb/sdk/launch/registry/local_registry.py +62 -0
  146. wandb/sdk/launch/runner/abstract.py +1 -16
  147. wandb/sdk/launch/runner/{kubernetes.py → kubernetes_runner.py} +83 -95
  148. wandb/sdk/launch/runner/local_container.py +46 -22
  149. wandb/sdk/launch/runner/local_process.py +1 -4
  150. wandb/sdk/launch/runner/{aws.py → sagemaker_runner.py} +53 -212
  151. wandb/sdk/launch/runner/{gcp_vertex.py → vertex_runner.py} +38 -55
  152. wandb/sdk/launch/sweeps/__init__.py +3 -2
  153. wandb/sdk/launch/sweeps/scheduler.py +132 -39
  154. wandb/sdk/launch/sweeps/scheduler_sweep.py +80 -89
  155. wandb/sdk/launch/utils.py +101 -30
  156. wandb/sdk/launch/wandb_reference.py +2 -7
  157. wandb/sdk/lib/_settings_toposort_generate.py +166 -0
  158. wandb/sdk/lib/_settings_toposort_generated.py +201 -0
  159. wandb/sdk/lib/apikey.py +2 -4
  160. wandb/sdk/lib/config_util.py +4 -1
  161. wandb/sdk/lib/console.py +1 -3
  162. wandb/sdk/lib/deprecate.py +3 -3
  163. wandb/sdk/lib/file_stream_utils.py +7 -5
  164. wandb/sdk/lib/filenames.py +1 -1
  165. wandb/sdk/lib/filesystem.py +61 -5
  166. wandb/sdk/lib/git.py +1 -3
  167. wandb/sdk/lib/import_hooks.py +4 -7
  168. wandb/sdk/lib/ipython.py +8 -5
  169. wandb/sdk/lib/lazyloader.py +1 -3
  170. wandb/sdk/lib/mailbox.py +14 -4
  171. wandb/sdk/lib/proto_util.py +10 -5
  172. wandb/sdk/lib/redirect.py +15 -22
  173. wandb/sdk/lib/reporting.py +1 -3
  174. wandb/sdk/lib/retry.py +4 -5
  175. wandb/sdk/lib/runid.py +1 -3
  176. wandb/sdk/lib/server.py +15 -9
  177. wandb/sdk/lib/sock_client.py +1 -1
  178. wandb/sdk/lib/sparkline.py +1 -1
  179. wandb/sdk/lib/wburls.py +1 -1
  180. wandb/sdk/service/port_file.py +1 -2
  181. wandb/sdk/service/service.py +36 -13
  182. wandb/sdk/service/service_base.py +12 -1
  183. wandb/sdk/verify/verify.py +5 -7
  184. wandb/sdk/wandb_artifacts.py +142 -177
  185. wandb/sdk/wandb_config.py +5 -8
  186. wandb/sdk/wandb_helper.py +1 -1
  187. wandb/sdk/wandb_init.py +24 -13
  188. wandb/sdk/wandb_login.py +9 -9
  189. wandb/sdk/wandb_manager.py +39 -4
  190. wandb/sdk/wandb_metric.py +2 -6
  191. wandb/sdk/wandb_require.py +4 -15
  192. wandb/sdk/wandb_require_helpers.py +1 -9
  193. wandb/sdk/wandb_run.py +95 -141
  194. wandb/sdk/wandb_save.py +1 -3
  195. wandb/sdk/wandb_settings.py +149 -54
  196. wandb/sdk/wandb_setup.py +66 -46
  197. wandb/sdk/wandb_summary.py +13 -10
  198. wandb/sdk/wandb_sweep.py +6 -7
  199. wandb/sdk/wandb_watch.py +1 -1
  200. wandb/sklearn/calculate/confusion_matrix.py +1 -1
  201. wandb/sklearn/calculate/learning_curve.py +1 -1
  202. wandb/sklearn/calculate/summary_metrics.py +1 -3
  203. wandb/sklearn/plot/__init__.py +1 -1
  204. wandb/sklearn/plot/classifier.py +27 -18
  205. wandb/sklearn/plot/clusterer.py +4 -5
  206. wandb/sklearn/plot/regressor.py +4 -4
  207. wandb/sklearn/plot/shared.py +2 -2
  208. wandb/sync/__init__.py +1 -3
  209. wandb/sync/sync.py +4 -5
  210. wandb/testing/relay.py +11 -10
  211. wandb/trigger.py +1 -1
  212. wandb/util.py +106 -81
  213. wandb/viz.py +4 -4
  214. wandb/wandb_agent.py +50 -50
  215. wandb/wandb_controller.py +2 -3
  216. wandb/wandb_run.py +1 -2
  217. wandb/wandb_torch.py +1 -1
  218. wandb/xgboost/__init__.py +1 -2
  219. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/METADATA +6 -2
  220. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/RECORD +224 -209
  221. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/WHEEL +1 -1
  222. wandb/sdk/launch/builder/docker.py +0 -80
  223. wandb/sdk/launch/builder/kaniko.py +0 -393
  224. wandb/sdk/launch/builder/loader.py +0 -32
  225. wandb/sdk/launch/runner/loader.py +0 -50
  226. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/LICENSE +0 -0
  227. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/entry_points.txt +0 -0
  228. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/top_level.txt +0 -0
@@ -6,18 +6,18 @@ import yaml
6
6
 
7
7
  import wandb
8
8
  from wandb.apis.internal import Api
9
- from wandb.errors import ExecutionError, LaunchError
10
9
 
10
+ from . import loader
11
11
  from ._project_spec import create_project_from_spec, fetch_and_validate_project
12
12
  from .agent import LaunchAgent
13
- from .builder import loader as builder_loader
14
13
  from .builder.build import construct_builder_args
15
- from .runner import loader
16
14
  from .runner.abstract import AbstractRun
17
15
  from .utils import (
18
16
  LAUNCH_CONFIG_FILE,
19
17
  LAUNCH_DEFAULT_PROJECT,
20
18
  PROJECT_SYNCHRONOUS,
19
+ ExecutionError,
20
+ LaunchError,
21
21
  construct_launch_spec,
22
22
  validate_launch_spec_source,
23
23
  )
@@ -25,37 +25,56 @@ from .utils import (
25
25
  _logger = logging.getLogger(__name__)
26
26
 
27
27
 
28
- def resolve_agent_config(
28
+ def resolve_agent_config( # noqa: C901
29
29
  api: Api,
30
30
  entity: Optional[str],
31
31
  project: Optional[str],
32
32
  max_jobs: Optional[int],
33
33
  queues: Optional[Tuple[str]],
34
+ config: Optional[str],
34
35
  ) -> Tuple[Dict[str, Any], Api]:
36
+ """Resolve the agent config.
37
+
38
+ Arguments:
39
+ api (Api): The api.
40
+ entity (str): The entity.
41
+ project (str): The project.
42
+ max_jobs (int): The max number of jobs.
43
+ queues (Tuple[str]): The queues.
44
+ config (str): The config.
45
+
46
+ Returns:
47
+ Tuple[Dict[str, Any], Api]: The resolved config and api.
48
+ """
35
49
  defaults = {
36
50
  "entity": api.default_entity,
37
51
  "project": LAUNCH_DEFAULT_PROJECT,
38
52
  "max_jobs": 1,
53
+ "max_schedulers": 1,
39
54
  "queues": [],
40
55
  "api_key": api.api_key,
41
56
  "base_url": api.settings("base_url"),
42
57
  "registry": {},
43
- "build": {},
58
+ "builder": {},
44
59
  "runner": {},
45
60
  }
46
61
  user_set_project = False
47
62
  resolved_config: Dict[str, Any] = defaults
48
- if os.path.exists(os.path.expanduser(LAUNCH_CONFIG_FILE)):
49
- config = {}
50
- with open(os.path.expanduser(LAUNCH_CONFIG_FILE)) as f:
63
+ config_path = config or os.path.expanduser(LAUNCH_CONFIG_FILE)
64
+ if os.path.isfile(config_path):
65
+ launch_config = {}
66
+ with open(config_path) as f:
51
67
  try:
52
- config = yaml.safe_load(f)
53
- print(config)
68
+ launch_config = yaml.safe_load(f)
54
69
  except yaml.YAMLError as e:
55
70
  raise LaunchError(f"Invalid launch agent config: {e}")
56
- if config.get("project") is not None:
71
+ if launch_config.get("project") is not None:
57
72
  user_set_project = True
58
- resolved_config.update(dict(config))
73
+ resolved_config.update(launch_config.items())
74
+ elif config is not None:
75
+ raise LaunchError(
76
+ f"Could not find use specified launch config file: {config_path}"
77
+ )
59
78
  if os.environ.get("WANDB_PROJECT") is not None:
60
79
  resolved_config.update({"project": os.environ.get("WANDB_PROJECT")})
61
80
  user_set_project = True
@@ -132,7 +151,6 @@ def _run(
132
151
  resource_args: Optional[Dict[str, Any]],
133
152
  launch_config: Optional[Dict[str, Any]],
134
153
  synchronous: Optional[bool],
135
- cuda: Optional[bool],
136
154
  api: Api,
137
155
  run_id: Optional[str],
138
156
  repository: Optional[str],
@@ -152,7 +170,6 @@ def _run(
152
170
  parameters,
153
171
  resource_args,
154
172
  launch_config,
155
- cuda,
156
173
  run_id,
157
174
  repository,
158
175
  )
@@ -164,20 +181,15 @@ def _run(
164
181
  runner_config: Dict[str, Any] = {}
165
182
  runner_config[PROJECT_SYNCHRONOUS] = synchronous
166
183
 
167
- if repository: # override existing registry with CLI arg
168
- launch_config = launch_config or {}
169
- registry = launch_config.get("registry", {})
170
- registry["url"] = repository
171
- launch_config["registry"] = registry
172
-
173
- build_config, registry_config = construct_builder_args(
174
- launch_config,
175
- )
184
+ config = launch_config or {}
185
+ build_config, registry_config = construct_builder_args(config)
176
186
 
177
- builder = builder_loader.load_builder(build_config)
178
- backend = loader.load_backend(resource, api, runner_config)
187
+ environment = loader.environment_from_config(config.get("environment", {}))
188
+ registry = loader.registry_from_config(registry_config, environment)
189
+ builder = loader.builder_from_config(build_config, environment, registry)
190
+ backend = loader.runner_from_config(resource, api, runner_config, environment)
179
191
  if backend:
180
- submitted_run = backend.run(launch_project, builder, registry_config)
192
+ submitted_run = backend.run(launch_project, builder)
181
193
  # this check will always pass, run is only optional in the agent case where
182
194
  # a run queue id is present on the backend config
183
195
  assert submitted_run
@@ -203,7 +215,6 @@ def run(
203
215
  docker_image: Optional[str] = None,
204
216
  config: Optional[Dict[str, Any]] = None,
205
217
  synchronous: Optional[bool] = True,
206
- cuda: Optional[bool] = None,
207
218
  run_id: Optional[str] = None,
208
219
  repository: Optional[str] = None,
209
220
  ) -> AbstractRun:
@@ -233,7 +244,6 @@ def run(
233
244
  asynchronous runs launched via this method will be terminated. If
234
245
  ``synchronous`` is True and the run fails, the current process will
235
246
  error out as well.
236
- cuda: Whether to build a CUDA-enabled docker image or not
237
247
  run_id: ID for the run (To ultimately replace the :name: field)
238
248
  repository: string name of repository path for remote registry
239
249
 
@@ -276,7 +286,6 @@ def run(
276
286
  resource_args=resource_args,
277
287
  launch_config=config,
278
288
  synchronous=synchronous,
279
- cuda=cuda,
280
289
  api=api,
281
290
  run_id=run_id,
282
291
  repository=repository,
@@ -4,7 +4,6 @@ from typing import Any, Dict, List, Optional
4
4
  import wandb
5
5
  import wandb.apis.public as public
6
6
  from wandb.apis.internal import Api
7
- from wandb.errors import LaunchError
8
7
  from wandb.sdk.launch._project_spec import (
9
8
  compute_command_args,
10
9
  create_project_from_spec,
@@ -13,6 +12,7 @@ from wandb.sdk.launch.builder.build import build_image_from_project
13
12
  from wandb.sdk.launch.utils import (
14
13
  LAUNCH_DEFAULT_PROJECT,
15
14
  LOG_PREFIX,
15
+ LaunchError,
16
16
  construct_launch_spec,
17
17
  validate_launch_spec_source,
18
18
  )
@@ -44,7 +44,6 @@ def launch_add(
44
44
  params: Optional[Dict[str, Any]] = None,
45
45
  project_queue: Optional[str] = None,
46
46
  resource_args: Optional[Dict[str, Any]] = None,
47
- cuda: Optional[bool] = None,
48
47
  run_id: Optional[str] = None,
49
48
  build: Optional[bool] = False,
50
49
  repository: Optional[str] = None,
@@ -69,7 +68,6 @@ def launch_add(
69
68
  the parameters used to run the original run.
70
69
  resource_args: Resource related arguments for launching runs onto a remote backend.
71
70
  Will be stored on the constructed launch config under ``resource_args``.
72
- cuda: Whether to build a CUDA-enabled docker image or not
73
71
  run_id: optional string indicating the id of the launched run
74
72
  build: optional flag defaulting to false, requires queue to be set
75
73
  if build, an image is created, creates a job artifact, pushes a reference
@@ -116,7 +114,6 @@ def launch_add(
116
114
  params,
117
115
  project_queue,
118
116
  resource_args,
119
- cuda,
120
117
  run_id=run_id,
121
118
  build=build,
122
119
  repository=repository,
@@ -139,7 +136,6 @@ def _launch_add(
139
136
  params: Optional[Dict[str, Any]],
140
137
  project_queue: Optional[str],
141
138
  resource_args: Optional[Dict[str, Any]] = None,
142
- cuda: Optional[bool] = None,
143
139
  run_id: Optional[str] = None,
144
140
  build: Optional[bool] = False,
145
141
  repository: Optional[str] = None,
@@ -158,7 +154,6 @@ def _launch_add(
158
154
  params,
159
155
  resource_args,
160
156
  config,
161
- cuda,
162
157
  run_id,
163
158
  repository,
164
159
  )
@@ -174,7 +169,7 @@ def _launch_add(
174
169
  launch_spec["job"] = None
175
170
 
176
171
  launch_project = create_project_from_spec(launch_spec, api)
177
- docker_image_uri = build_image_from_project(launch_project, api, config)
172
+ docker_image_uri = build_image_from_project(launch_project, api, config or {})
178
173
  run = wandb.run or wandb.init(
179
174
  project=launch_spec["project"],
180
175
  entity=launch_spec["entity"],
@@ -207,7 +202,10 @@ def _launch_add(
207
202
  if updated_spec.get("resource"):
208
203
  launch_spec["resource"] = updated_spec.get("resource")
209
204
 
210
- wandb.termlog(f"{LOG_PREFIX}Added run to queue {project_queue}/{queue_name}.")
205
+ if project_queue == LAUNCH_DEFAULT_PROJECT:
206
+ wandb.termlog(f"{LOG_PREFIX}Added run to queue {queue_name}.")
207
+ else:
208
+ wandb.termlog(f"{LOG_PREFIX}Added run to queue {project_queue}/{queue_name}.")
211
209
  wandb.termlog(f"{LOG_PREFIX}Launch spec:\n{pprint.pformat(launch_spec)}\n")
212
210
  public_api = public.Api()
213
211
  container_job = False
@@ -0,0 +1,230 @@
1
+ """Utilities for the agent."""
2
+ from typing import Any, Dict, Optional
3
+
4
+ from wandb.apis.internal import Api
5
+ from wandb.sdk.launch.utils import LaunchError
6
+
7
+ from .builder.abstract import AbstractBuilder
8
+ from .environment.abstract import AbstractEnvironment
9
+ from .registry.abstract import AbstractRegistry
10
+ from .registry.local_registry import LocalRegistry
11
+ from .runner.abstract import AbstractRunner
12
+
13
+ WANDB_RUNNERS = {
14
+ "local-container",
15
+ "local-process",
16
+ "kubernetes",
17
+ "vertex",
18
+ "sagemaker",
19
+ }
20
+
21
+
22
+ def environment_from_config(config: Optional[Dict[str, Any]]) -> AbstractEnvironment:
23
+ """Create an environment from a config.
24
+
25
+ This helper function is used to create an environment from a config. The
26
+ config should have a "type" key that specifies the type of environment to
27
+ create. The remaining keys are passed to the environment's from_config
28
+ method. If the config is None or empty, a LocalEnvironment is returned.
29
+
30
+ Arguments:
31
+ config (Dict[str, Any]): The config.
32
+
33
+ Returns:
34
+ Environment: The environment constructed.
35
+ """
36
+ if not config:
37
+ from .environment.local_environment import LocalEnvironment
38
+
39
+ return LocalEnvironment() # This is the default, dummy environment.
40
+ env_type = config.get("type")
41
+ if not env_type:
42
+ raise LaunchError(
43
+ "Could not create environment from config. Environment type not specified!"
44
+ )
45
+ if env_type == "aws":
46
+ from .environment.aws_environment import AwsEnvironment
47
+
48
+ return AwsEnvironment.from_config(config)
49
+ if env_type == "gcp":
50
+ from .environment.gcp_environment import GcpEnvironment
51
+
52
+ return GcpEnvironment.from_config(config)
53
+ raise LaunchError(
54
+ f"Could not create environment from config. Invalid type: {env_type}"
55
+ )
56
+
57
+
58
+ def registry_from_config(
59
+ config: Optional[Dict[str, Any]], environment: AbstractEnvironment
60
+ ) -> AbstractRegistry:
61
+ """Create a registry from a config.
62
+
63
+ This helper function is used to create a registry from a config. The
64
+ config should have a "type" key that specifies the type of registry to
65
+ create. The remaining keys are passed to the registry's from_config
66
+ method. If the config is None or empty, a LocalRegistry is returned.
67
+
68
+ Arguments:
69
+ config (Dict[str, Any]): The registry config.
70
+ environment (Environment): The environment of the registry.
71
+
72
+ Returns:
73
+ The registry if config is not None, otherwise None.
74
+
75
+ Raises:
76
+ LaunchError: If the registry is not configured correctly.
77
+ """
78
+ if not config:
79
+ from .registry.local_registry import LocalRegistry
80
+
81
+ return LocalRegistry() # This is the default, dummy registry.
82
+ registry_type = config.get("type")
83
+ if registry_type is None:
84
+ from .registry.local_registry import LocalRegistry
85
+
86
+ return LocalRegistry() # This is the default, dummy registry.
87
+ if registry_type == "ecr":
88
+ from .environment.aws_environment import AwsEnvironment
89
+
90
+ if not isinstance(environment, AwsEnvironment):
91
+ raise LaunchError(
92
+ "Could not create ECR registry. "
93
+ "Environment must be an instance of AWSEnvironment."
94
+ )
95
+ from .registry.elastic_container_registry import ElasticContainerRegistry
96
+
97
+ return ElasticContainerRegistry.from_config(config, environment)
98
+ if registry_type == "gcr":
99
+ from .environment.gcp_environment import GcpEnvironment
100
+
101
+ if not isinstance(environment, GcpEnvironment):
102
+ raise LaunchError(
103
+ "Could not create GCR registry. "
104
+ "Environment must be an instance of GCPEnvironment."
105
+ )
106
+ from .registry.google_artifact_registry import GoogleArtifactRegistry
107
+
108
+ return GoogleArtifactRegistry.from_config(config, environment)
109
+ raise LaunchError(
110
+ f"Could not create registry from config. Invalid registry type: {registry_type}"
111
+ )
112
+
113
+
114
+ def builder_from_config(
115
+ config: Optional[Dict[str, Any]],
116
+ environment: AbstractEnvironment,
117
+ registry: AbstractRegistry,
118
+ ) -> AbstractBuilder:
119
+ """Create a builder from a config.
120
+
121
+ This helper function is used to create a builder from a config. The
122
+ config should have a "type" key that specifies the type of builder to import
123
+ and create. The remaining keys are passed to the builder's from_config
124
+ method. If the config is None or empty, a DockerBuilder is returned.
125
+
126
+ Arguments:
127
+ config (Dict[str, Any]): The builder config.
128
+ registry (Registry): The registry of the builder.
129
+
130
+ Returns:
131
+ The builder.
132
+
133
+ Raises:
134
+ LaunchError: If the builder is not configured correctly.
135
+ """
136
+ if not config:
137
+ from .builder.docker_builder import DockerBuilder
138
+
139
+ return DockerBuilder.from_config(
140
+ {}, environment, registry
141
+ ) # This is the default builder.
142
+
143
+ builder_type = config.get("type")
144
+ if builder_type is None:
145
+ raise LaunchError(
146
+ "Could not create builder from config. Builder type not specified"
147
+ )
148
+ if builder_type == "docker":
149
+ from .builder.docker_builder import DockerBuilder
150
+
151
+ return DockerBuilder.from_config(config, environment, registry)
152
+ if builder_type == "kaniko":
153
+ if isinstance(registry, LocalRegistry):
154
+ raise LaunchError(
155
+ "Could not create Kaniko builder. "
156
+ "Registry must be a remote registry."
157
+ )
158
+ from .builder.kaniko_builder import KanikoBuilder
159
+
160
+ return KanikoBuilder.from_config(config, environment, registry)
161
+ if builder_type == "noop":
162
+ from .builder.noop import NoOpBuilder
163
+
164
+ return NoOpBuilder.from_config(config, environment, registry)
165
+ raise LaunchError(
166
+ f"Could not create builder from config. Invalid builder type: {builder_type}"
167
+ )
168
+
169
+
170
+ def runner_from_config(
171
+ runner_name: str,
172
+ api: Api,
173
+ runner_config: Dict[str, Any],
174
+ environment: AbstractEnvironment,
175
+ ) -> AbstractRunner:
176
+ """Create a runner from a config.
177
+
178
+ This helper function is used to create a runner from a config. The
179
+ config should have a "type" key that specifies the type of runner to import
180
+ and create. The remaining keys are passed to the runner's from_config
181
+ method. If the config is None or empty, a LocalContainerRunner is returned.
182
+
183
+ Arguments:
184
+ runner_name (str): The name of the backend.
185
+ api (Api): The API.
186
+ runner_config (Dict[str, Any]): The backend config.
187
+
188
+ Returns:
189
+ The runner.
190
+
191
+ Raises:
192
+ LaunchError: If the runner is not configured correctly.
193
+ """
194
+ if not runner_name or runner_name in ["local-container", "local"]:
195
+ from .runner.local_container import LocalContainerRunner
196
+
197
+ return LocalContainerRunner(api, runner_config, environment)
198
+ if runner_name == "local-process":
199
+ from .runner.local_process import LocalProcessRunner
200
+
201
+ return LocalProcessRunner(api, runner_config)
202
+ if runner_name == "sagemaker":
203
+ from .environment.aws_environment import AwsEnvironment
204
+
205
+ if not isinstance(environment, AwsEnvironment):
206
+ raise LaunchError(
207
+ "Could not create Sagemaker runner. "
208
+ "Environment must be an instance of AwsEnvironment."
209
+ )
210
+ from .runner.sagemaker_runner import SageMakerRunner
211
+
212
+ return SageMakerRunner(api, runner_config, environment)
213
+ if runner_name in ["vertex", "gcp-vertex"]:
214
+ from .environment.gcp_environment import GcpEnvironment
215
+
216
+ if not isinstance(environment, GcpEnvironment):
217
+ raise LaunchError(
218
+ "Could not create Vertex runner. "
219
+ "Environment must be an instance of GcpEnvironment."
220
+ )
221
+ from .runner.vertex_runner import VertexRunner
222
+
223
+ return VertexRunner(api, runner_config, environment)
224
+ if runner_name == "kubernetes":
225
+ from .runner.kubernetes_runner import KubernetesRunner
226
+
227
+ return KubernetesRunner(api, runner_config, environment)
228
+ raise LaunchError(
229
+ f"Could not create runner from config. Invalid runner name: {runner_name}"
230
+ )
@@ -0,0 +1,54 @@
1
+ """Abstract base class for registries."""
2
+ from abc import ABC, abstractmethod
3
+ from typing import Tuple
4
+
5
+ from ..environment.abstract import AbstractEnvironment
6
+
7
+
8
+ class AbstractRegistry(ABC):
9
+ """Abstract base class for registries."""
10
+
11
+ uri: str
12
+
13
+ @abstractmethod
14
+ def verify(self) -> None:
15
+ """Verify that the registry is configured correctly."""
16
+ raise NotImplementedError
17
+
18
+ @abstractmethod
19
+ def get_username_password(self) -> Tuple[str, str]:
20
+ """Get the username and password for the registry.
21
+
22
+ Returns:
23
+ (str, str): The username and password.
24
+ """
25
+ raise NotImplementedError
26
+
27
+ @abstractmethod
28
+ def get_repo_uri(self) -> str:
29
+ """Get the URI for a repository.
30
+
31
+ Returns:
32
+ str: The URI.
33
+ """
34
+ raise NotImplementedError
35
+
36
+ @abstractmethod
37
+ def check_image_exists(self, image_uri: str) -> bool:
38
+ """Check if an image exists in the registry.
39
+
40
+ Arguments:
41
+ image_uri (str): The URI of the image.
42
+
43
+ Returns:
44
+ bool: True if the image exists.
45
+ """
46
+ raise NotImplementedError
47
+
48
+ @classmethod
49
+ @abstractmethod
50
+ def from_config(
51
+ cls, config: dict, environment: "AbstractEnvironment", verify: bool = True
52
+ ) -> "AbstractRegistry":
53
+ """Create a registry from a config."""
54
+ raise NotImplementedError
@@ -0,0 +1,163 @@
1
+ """Implementation of Elastic Container Registry class for wandb launch."""
2
+ import base64
3
+ import logging
4
+ from typing import Dict, Tuple
5
+
6
+ from wandb.sdk.launch.environment.aws_environment import AwsEnvironment
7
+ from wandb.sdk.launch.utils import LaunchError
8
+ from wandb.util import get_module
9
+
10
+ from .abstract import AbstractRegistry
11
+
12
+ botocore = get_module(
13
+ "botocore",
14
+ required="AWS environment requires botocore to be installed. Please install "
15
+ "it with `pip install wandb[launch]`.",
16
+ )
17
+
18
+ _logger = logging.getLogger(__name__)
19
+
20
+
21
+ class ElasticContainerRegistry(AbstractRegistry):
22
+ """Elastic Container Registry class.
23
+
24
+ Attributes:
25
+ repo_name (str): The name of the repository.
26
+ environment (AwsEnvironment): The AWS environment.
27
+ uri (str): The uri of the repository.
28
+ """
29
+
30
+ repo_name: str
31
+ environment: AwsEnvironment
32
+ uri: str
33
+
34
+ def __init__(self, repo_name: str, environment: AwsEnvironment) -> None:
35
+ """Initialize the Elastic Container Registry.
36
+
37
+ Arguments:
38
+ repo_name (str): The name of the repository.
39
+ environment (AwsEnvironment): The AWS environment.
40
+
41
+ Raises:
42
+ LaunchError: If there is an error verifying the registry.
43
+ """
44
+ super().__init__()
45
+ _logger.info(
46
+ f"Initializing Elastic Container Registry with repotisory {repo_name}."
47
+ )
48
+ self.repo_name = repo_name
49
+ self.environment = environment
50
+ self.verify()
51
+
52
+ @classmethod
53
+ def from_config( # type: ignore[override]
54
+ cls,
55
+ config: Dict,
56
+ environment: AwsEnvironment,
57
+ verify: bool = True,
58
+ ) -> "ElasticContainerRegistry":
59
+ """Create an Elastic Container Registry from a config.
60
+
61
+ Arguments:
62
+ config (dict): The config.
63
+ environment (AwsEnvironment): The AWS environment.
64
+
65
+ Returns:
66
+ ElasticContainerRegistry: The Elastic Container Registry.
67
+ """
68
+ if config.get("type") != "ecr":
69
+ raise LaunchError(
70
+ f"Could not create ElasticContainerRegistry from config. Expected type 'ecr' "
71
+ f"but got '{config.get('type')}'."
72
+ )
73
+ repository = config.get("repository")
74
+ if not repository:
75
+ raise LaunchError(
76
+ "Could not create ElasticContainerRegistry from config. 'repository' is required."
77
+ )
78
+ return cls(repository, environment)
79
+
80
+ def verify(self) -> None:
81
+ """Verify that the registry is accessible and the configured repo exists.
82
+
83
+ Raises:
84
+ RegistryError: If there is an error verifying the registry.
85
+ """
86
+ _logger.debug("Verifying Elastic Container Registry.")
87
+ try:
88
+ session = self.environment.get_session()
89
+ client = session.client("ecr")
90
+ response = client.describe_repositories(repositoryNames=[self.repo_name])
91
+ self.uri = response["repositories"][0]["repositoryUri"].split("/")[0]
92
+
93
+ except botocore.exceptions.ClientError as e:
94
+ code = e.response["Error"]["Code"]
95
+ msg = e.response["Error"]["Message"]
96
+ # TODO: Log the code and the message here?
97
+ raise LaunchError(
98
+ f"Error verifying Elastic Container Registry: {code} {msg}"
99
+ )
100
+
101
+ def get_username_password(self) -> Tuple[str, str]:
102
+ """Get the username and password for the registry.
103
+
104
+ Returns:
105
+ (str, str): The username and password.
106
+
107
+ Raises:
108
+ RegistryError: If there is an error getting the username and password.
109
+ """
110
+ _logger.debug("Getting username and password for Elastic Container Registry.")
111
+ try:
112
+ session = self.environment.get_session()
113
+ client = session.client("ecr")
114
+ response = client.get_authorization_token()
115
+ username, password = base64.standard_b64decode(
116
+ response["authorizationData"][0]["authorizationToken"]
117
+ ).split(b":")
118
+ return username.decode("utf-8"), password.decode("utf-8")
119
+
120
+ except botocore.exceptions.ClientError as e:
121
+ code = e.response["Error"]["Code"]
122
+ msg = e.response["Error"]["Message"]
123
+ # TODO: Log the code and the message here?
124
+ raise LaunchError(f"Error getting username and password: {code} {msg}")
125
+
126
+ def get_repo_uri(self) -> str:
127
+ """Get the uri of the repository.
128
+
129
+ Returns:
130
+ str: The uri of the repository.
131
+ """
132
+ return self.uri + "/" + self.repo_name
133
+
134
+ def check_image_exists(self, image_uri: str) -> bool:
135
+ """Check if the image tag exists.
136
+
137
+ Arguments:
138
+ image_uri (str): The full image_uri.
139
+
140
+ Returns:
141
+ bool: True if the image tag exists.
142
+ """
143
+ uri, tag = image_uri.split(":")
144
+ if uri != self.get_repo_uri():
145
+ raise LaunchError(
146
+ f"Image uri {image_uri} does not match Elastic Container Registry uri {self.get_repo_uri()}."
147
+ )
148
+
149
+ _logger.debug("Checking if image tag exists.")
150
+ try:
151
+ session = self.environment.get_session()
152
+ client = session.client("ecr")
153
+ response = client.describe_images(
154
+ repositoryName=self.repo_name, imageIds=[{"imageTag": tag}]
155
+ )
156
+ return len(response["imageDetails"]) > 0
157
+
158
+ except botocore.exceptions.ClientError as e:
159
+ code = e.response["Error"]["Code"]
160
+ if code == "ImageNotFoundException":
161
+ return False
162
+ msg = e.response["Error"]["Message"]
163
+ raise LaunchError(f"Error checking if image tag exists: {code} {msg}")