wandb 0.15.9__py3-none-any.whl → 0.15.11__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. wandb/__init__.py +5 -1
  2. wandb/apis/public.py +137 -17
  3. wandb/apis/reports/_panels.py +1 -1
  4. wandb/apis/reports/blocks.py +1 -0
  5. wandb/apis/reports/report.py +27 -5
  6. wandb/cli/cli.py +52 -41
  7. wandb/docker/__init__.py +17 -0
  8. wandb/docker/auth.py +1 -1
  9. wandb/env.py +24 -4
  10. wandb/filesync/step_checksum.py +3 -3
  11. wandb/integration/openai/openai.py +3 -0
  12. wandb/integration/ultralytics/__init__.py +9 -0
  13. wandb/integration/ultralytics/bbox_utils.py +196 -0
  14. wandb/integration/ultralytics/callback.py +458 -0
  15. wandb/integration/ultralytics/classification_utils.py +66 -0
  16. wandb/integration/ultralytics/mask_utils.py +141 -0
  17. wandb/integration/ultralytics/pose_utils.py +92 -0
  18. wandb/integration/xgboost/xgboost.py +3 -3
  19. wandb/integration/yolov8/__init__.py +0 -7
  20. wandb/integration/yolov8/yolov8.py +22 -3
  21. wandb/old/settings.py +7 -0
  22. wandb/plot/line_series.py +0 -1
  23. wandb/proto/v3/wandb_internal_pb2.py +353 -300
  24. wandb/proto/v3/wandb_server_pb2.py +37 -41
  25. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  26. wandb/proto/v3/wandb_telemetry_pb2.py +16 -16
  27. wandb/proto/v4/wandb_internal_pb2.py +272 -260
  28. wandb/proto/v4/wandb_server_pb2.py +37 -40
  29. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  30. wandb/proto/v4/wandb_telemetry_pb2.py +16 -16
  31. wandb/proto/wandb_internal_codegen.py +7 -31
  32. wandb/sdk/artifacts/artifact.py +321 -189
  33. wandb/sdk/artifacts/artifact_cache.py +14 -0
  34. wandb/sdk/artifacts/artifact_manifest.py +5 -4
  35. wandb/sdk/artifacts/artifact_manifest_entry.py +37 -9
  36. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -9
  37. wandb/sdk/artifacts/artifact_saver.py +13 -50
  38. wandb/sdk/artifacts/artifact_ttl.py +6 -0
  39. wandb/sdk/artifacts/artifacts_cache.py +119 -93
  40. wandb/sdk/artifacts/staging.py +25 -0
  41. wandb/sdk/artifacts/storage_handlers/s3_handler.py +12 -7
  42. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +2 -3
  43. wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
  44. wandb/sdk/artifacts/storage_policies/register.py +1 -0
  45. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +4 -3
  46. wandb/sdk/artifacts/storage_policy.py +4 -2
  47. wandb/sdk/backend/backend.py +0 -16
  48. wandb/sdk/data_types/image.py +3 -1
  49. wandb/sdk/integration_utils/auto_logging.py +38 -13
  50. wandb/sdk/interface/interface.py +16 -135
  51. wandb/sdk/interface/interface_shared.py +9 -147
  52. wandb/sdk/interface/interface_sock.py +0 -26
  53. wandb/sdk/internal/file_pusher.py +20 -3
  54. wandb/sdk/internal/file_stream.py +3 -1
  55. wandb/sdk/internal/handler.py +53 -70
  56. wandb/sdk/internal/internal_api.py +220 -130
  57. wandb/sdk/internal/job_builder.py +41 -37
  58. wandb/sdk/internal/sender.py +7 -25
  59. wandb/sdk/internal/system/assets/disk.py +144 -11
  60. wandb/sdk/internal/system/system_info.py +6 -2
  61. wandb/sdk/launch/__init__.py +5 -0
  62. wandb/sdk/launch/{launch.py → _launch.py} +53 -54
  63. wandb/sdk/launch/{launch_add.py → _launch_add.py} +34 -31
  64. wandb/sdk/launch/_project_spec.py +13 -2
  65. wandb/sdk/launch/agent/agent.py +103 -59
  66. wandb/sdk/launch/agent/run_queue_item_file_saver.py +6 -4
  67. wandb/sdk/launch/builder/build.py +19 -1
  68. wandb/sdk/launch/builder/docker_builder.py +5 -1
  69. wandb/sdk/launch/builder/kaniko_builder.py +5 -1
  70. wandb/sdk/launch/create_job.py +20 -5
  71. wandb/sdk/launch/loader.py +14 -5
  72. wandb/sdk/launch/runner/abstract.py +0 -2
  73. wandb/sdk/launch/runner/kubernetes_monitor.py +329 -0
  74. wandb/sdk/launch/runner/kubernetes_runner.py +66 -209
  75. wandb/sdk/launch/runner/local_container.py +5 -2
  76. wandb/sdk/launch/runner/local_process.py +4 -1
  77. wandb/sdk/launch/sweeps/scheduler.py +43 -25
  78. wandb/sdk/launch/sweeps/utils.py +5 -3
  79. wandb/sdk/launch/utils.py +3 -1
  80. wandb/sdk/lib/_settings_toposort_generate.py +3 -9
  81. wandb/sdk/lib/_settings_toposort_generated.py +27 -3
  82. wandb/sdk/lib/_wburls_generated.py +1 -0
  83. wandb/sdk/lib/filenames.py +27 -6
  84. wandb/sdk/lib/filesystem.py +181 -7
  85. wandb/sdk/lib/fsm.py +5 -3
  86. wandb/sdk/lib/gql_request.py +3 -0
  87. wandb/sdk/lib/ipython.py +7 -0
  88. wandb/sdk/lib/wburls.py +1 -0
  89. wandb/sdk/service/port_file.py +2 -15
  90. wandb/sdk/service/server.py +7 -55
  91. wandb/sdk/service/service.py +56 -26
  92. wandb/sdk/service/service_base.py +1 -1
  93. wandb/sdk/service/streams.py +11 -5
  94. wandb/sdk/verify/verify.py +2 -2
  95. wandb/sdk/wandb_init.py +8 -2
  96. wandb/sdk/wandb_manager.py +4 -14
  97. wandb/sdk/wandb_run.py +143 -53
  98. wandb/sdk/wandb_settings.py +148 -35
  99. wandb/testing/relay.py +85 -38
  100. wandb/util.py +87 -4
  101. wandb/wandb_torch.py +24 -38
  102. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/METADATA +48 -23
  103. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/RECORD +107 -103
  104. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/WHEEL +1 -1
  105. wandb/proto/v3/wandb_server_pb2_grpc.py +0 -1422
  106. wandb/proto/v4/wandb_server_pb2_grpc.py +0 -1422
  107. wandb/proto/wandb_server_pb2_grpc.py +0 -8
  108. wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +0 -61
  109. wandb/sdk/interface/interface_grpc.py +0 -460
  110. wandb/sdk/service/server_grpc.py +0 -444
  111. wandb/sdk/service/service_grpc.py +0 -73
  112. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/LICENSE +0 -0
  113. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/entry_points.txt +0 -0
  114. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/top_level.txt +0 -0
@@ -87,6 +87,7 @@ class JobBuilder:
87
87
  _aliases: List[str]
88
88
  _job_seq_id: Optional[str]
89
89
  _job_version_alias: Optional[str]
90
+ _is_notebook_run: bool
90
91
 
91
92
  def __init__(self, settings: SettingsStatic):
92
93
  self._settings = settings
@@ -103,6 +104,7 @@ class JobBuilder:
103
104
  self._source_type: Optional[
104
105
  Literal["repo", "artifact", "image"]
105
106
  ] = settings.job_source # type: ignore[assignment]
107
+ self._is_notebook_run = self._get_is_notebook_run()
106
108
 
107
109
  def set_config(self, config: Dict[str, Any]) -> None:
108
110
  self._config = config
@@ -153,7 +155,7 @@ class JobBuilder:
153
155
  commit = git_info.get("commit")
154
156
  assert remote is not None
155
157
  assert commit is not None
156
- if self._is_notebook_run():
158
+ if self._is_notebook_run:
157
159
  if not os.path.exists(
158
160
  os.path.join(os.getcwd(), os.path.basename(program_relpath))
159
161
  ):
@@ -194,7 +196,7 @@ class JobBuilder:
194
196
  os.path.basename(sys.executable),
195
197
  full_program_path,
196
198
  ],
197
- "notebook": self._is_notebook_run(),
199
+ "notebook": self._is_notebook_run,
198
200
  }
199
201
 
200
202
  if self._settings.job_name:
@@ -219,18 +221,16 @@ class JobBuilder:
219
221
  ) -> Tuple[Optional[ArtifactSourceDict], Optional[str]]:
220
222
  assert isinstance(self._logged_code_artifact, dict)
221
223
  # TODO: should we just always exit early if the path doesn't exist?
222
- if self._is_notebook_run() and not self._is_colab_run():
224
+ if self._is_notebook_run and not self._is_colab_run():
223
225
  full_program_relpath = os.path.relpath(program_relpath, os.getcwd())
224
226
  # if the resolved path doesn't exist, then we shouldn't make a job because it will fail
225
227
  if not os.path.exists(full_program_relpath):
226
228
  # when users call log code in a notebook the code artifact starts
227
- # at the directory the notebook is in instead of the jupyter
228
- # core
229
- if os.path.exists(os.path.basename(program_relpath)):
230
- full_program_relpath = os.path.basename(program_relpath)
231
- else:
229
+ # at the directory the notebook is in instead of the jupyter core
230
+ if not os.path.exists(os.path.basename(program_relpath)):
232
231
  _logger.info("target path does not exist, exiting")
233
232
  return None, None
233
+ full_program_relpath = os.path.basename(program_relpath)
234
234
  else:
235
235
  full_program_relpath = program_relpath
236
236
  entrypoint = [
@@ -240,7 +240,7 @@ class JobBuilder:
240
240
  # TODO: update executable to a method that supports pex
241
241
  source: ArtifactSourceDict = {
242
242
  "entrypoint": entrypoint,
243
- "notebook": self._is_notebook_run(),
243
+ "notebook": self._is_notebook_run,
244
244
  "artifact": f"wandb-artifact://_id/{self._logged_code_artifact['id']}",
245
245
  }
246
246
 
@@ -271,7 +271,7 @@ class JobBuilder:
271
271
  }
272
272
  return source, name
273
273
 
274
- def _is_notebook_run(self) -> bool:
274
+ def _get_is_notebook_run(self) -> bool:
275
275
  return hasattr(self._settings, "_jupyter") and bool(self._settings._jupyter)
276
276
 
277
277
  def _is_colab_run(self) -> bool:
@@ -288,15 +288,10 @@ class JobBuilder:
288
288
  return None
289
289
 
290
290
  runtime: Optional[str] = metadata.get("python")
291
- program_relpath: Optional[str] = metadata.get("codePath")
292
291
  # can't build a job without a python version
293
292
  if runtime is None:
294
293
  return None
295
294
 
296
- if self._is_notebook_run():
297
- _logger.info("run is notebook based run")
298
- program_relpath = metadata.get("program")
299
-
300
295
  input_types = TypeRegistry.type_of(self._config).to_json()
301
296
  output_types = TypeRegistry.type_of(self._summary).to_json()
302
297
 
@@ -315,10 +310,14 @@ class JobBuilder:
315
310
  source_type = source_info.get("source_type")
316
311
  else:
317
312
  # configure job from environment
318
- source_type = self._get_source_type(metadata, program_relpath)
313
+ source_type = self._get_source_type(metadata)
319
314
  if not source_type:
320
315
  return None
321
316
 
317
+ program_relpath = self._get_program_relpath(source_type, metadata)
318
+ if source_type != "image" and not program_relpath:
319
+ return None
320
+
322
321
  source: Union[
323
322
  Optional[GitSourceDict],
324
323
  Optional[ArtifactSourceDict],
@@ -326,19 +325,15 @@ class JobBuilder:
326
325
  ] = None
327
326
 
328
327
  # make source dict
329
- if source_type == "repo" and self._has_git_job_ingredients(
330
- metadata, program_relpath
331
- ):
332
- assert program_relpath is not None
328
+ if source_type == "repo":
329
+ assert program_relpath
333
330
  source, name = self._build_repo_job_source(
334
331
  metadata,
335
332
  program_relpath,
336
333
  metadata.get("root"),
337
334
  )
338
- elif source_type == "artifact" and self._has_artifact_job_ingredients(
339
- program_relpath
340
- ):
341
- assert program_relpath is not None
335
+ elif source_type == "artifact":
336
+ assert program_relpath
342
337
  source, name = self._build_artifact_job_source(program_relpath)
343
338
  elif source_type == "image" and self._has_image_job_ingredients(metadata):
344
339
  source, name = self._build_image_job_source(metadata)
@@ -390,17 +385,15 @@ class JobBuilder:
390
385
 
391
386
  return artifact
392
387
 
393
- def _get_source_type(
394
- self, metadata: Dict[str, Any], relpath: Optional[str]
395
- ) -> Optional[str]:
388
+ def _get_source_type(self, metadata: Dict[str, Any]) -> Optional[str]:
396
389
  if self._source_type:
397
390
  return self._source_type
398
391
 
399
- if self._has_git_job_ingredients(metadata, relpath):
392
+ if self._has_git_job_ingredients(metadata):
400
393
  _logger.info("is repo sourced job")
401
394
  return "repo"
402
395
 
403
- if self._has_artifact_job_ingredients(relpath):
396
+ if self._has_artifact_job_ingredients():
404
397
  _logger.info("is artifact sourced job")
405
398
  return "artifact"
406
399
 
@@ -411,6 +404,21 @@ class JobBuilder:
411
404
  _logger.info("no source found")
412
405
  return None
413
406
 
407
+ def _get_program_relpath(
408
+ self, source_type: str, metadata: Dict[str, Any]
409
+ ) -> Optional[str]:
410
+ if self._is_notebook_run:
411
+ _logger.info("run is notebook based run")
412
+ return metadata.get("program")
413
+
414
+ if source_type == "artifact" or self._settings.job_source == "artifact":
415
+ # if the job is set to be an artifact, use relpath guaranteed
416
+ # to be correct. 'codePath' uses the root path when in git repo
417
+ # fallback to codePath if strictly local relpath not present
418
+ return metadata.get("codePathLocal") or metadata.get("codePath")
419
+
420
+ return metadata.get("codePath")
421
+
414
422
  def _handle_metadata_file(
415
423
  self,
416
424
  ) -> Optional[Dict]:
@@ -421,18 +429,14 @@ class JobBuilder:
421
429
 
422
430
  return None
423
431
 
424
- def _has_git_job_ingredients(
425
- self, metadata: Dict[str, Any], program_relpath: Optional[str]
426
- ) -> bool:
432
+ def _has_git_job_ingredients(self, metadata: Dict[str, Any]) -> bool:
427
433
  git_info: Dict[str, str] = metadata.get("git", {})
428
- if program_relpath is None:
429
- return False
430
- if self._is_notebook_run() and metadata.get("root") is None:
434
+ if self._is_notebook_run and metadata.get("root") is None:
431
435
  return False
432
436
  return git_info.get("remote") is not None and git_info.get("commit") is not None
433
437
 
434
- def _has_artifact_job_ingredients(self, program_relpath: Optional[str]) -> bool:
435
- return self._logged_code_artifact is not None and program_relpath is not None
438
+ def _has_artifact_job_ingredients(self) -> bool:
439
+ return self._logged_code_artifact is not None
436
440
 
437
441
  def _has_image_job_ingredients(self, metadata: Dict[str, Any]) -> bool:
438
442
  return metadata.get("docker") is not None
@@ -33,7 +33,7 @@ from wandb.errors import CommError, UsageError
33
33
  from wandb.errors.util import ProtobufErrorHandler
34
34
  from wandb.filesync.dir_watcher import DirWatcher
35
35
  from wandb.proto import wandb_internal_pb2
36
- from wandb.sdk.artifacts import artifact_saver
36
+ from wandb.sdk.artifacts.artifact_saver import ArtifactSaver
37
37
  from wandb.sdk.interface import interface
38
38
  from wandb.sdk.interface.interface_queue import InterfaceQueue
39
39
  from wandb.sdk.internal import (
@@ -268,6 +268,7 @@ class SendManager:
268
268
  self._cached_summary: Dict[str, Any] = dict()
269
269
  self._config_metric_index_dict: Dict[str, int] = {}
270
270
  self._config_metric_dict: Dict[str, wandb_internal_pb2.MetricRecord] = {}
271
+ self._consolidated_summary: Dict[str, Any] = dict()
271
272
 
272
273
  self._cached_server_info = dict()
273
274
  self._cached_viewer = dict()
@@ -1149,7 +1150,9 @@ class SendManager:
1149
1150
  summary_dict.pop("_wandb", None)
1150
1151
  if self._metadata_summary:
1151
1152
  summary_dict["_wandb"] = self._metadata_summary
1152
- json_summary = json.dumps(summary_dict)
1153
+ # merge with consolidated summary
1154
+ self._consolidated_summary.update(summary_dict)
1155
+ json_summary = json.dumps(self._consolidated_summary)
1153
1156
  if self._fs:
1154
1157
  self._fs.push(filenames.SUMMARY_FNAME, json_summary)
1155
1158
  # TODO(jhr): we should only write this at the end of the script
@@ -1446,28 +1449,6 @@ class SendManager:
1446
1449
 
1447
1450
  self._respond_result(result)
1448
1451
 
1449
- def send_request_artifact_send(self, record: "Record") -> None:
1450
- # TODO: combine and eventually remove send_request_log_artifact()
1451
-
1452
- # for now, we are using req/resp uuid for transaction id
1453
- # in the future this should be part of the message to handle idempotency
1454
- xid = record.uuid
1455
-
1456
- done_msg = wandb_internal_pb2.ArtifactDoneRequest(xid=xid)
1457
- artifact = record.request.artifact_send.artifact
1458
- try:
1459
- res = self._send_artifact(artifact)
1460
- assert res, "Unable to send artifact"
1461
- done_msg.artifact_id = res["id"]
1462
- logger.info(f"logged artifact {artifact.name} - {res}")
1463
- except Exception as e:
1464
- done_msg.error_message = 'error logging artifact "{}/{}": {}'.format(
1465
- artifact.type, artifact.name, e
1466
- )
1467
-
1468
- logger.info("send artifact done")
1469
- self._interface._publish_artifact_done(done_msg)
1470
-
1471
1452
  def send_artifact(self, record: "Record") -> None:
1472
1453
  artifact = record.artifact
1473
1454
  try:
@@ -1486,7 +1467,7 @@ class SendManager:
1486
1467
  from pkg_resources import parse_version
1487
1468
 
1488
1469
  assert self._pusher
1489
- saver = artifact_saver.ArtifactSaver(
1470
+ saver = ArtifactSaver(
1490
1471
  api=self._api,
1491
1472
  digest=artifact.digest,
1492
1473
  manifest_json=_manifest_json_from_proto(artifact.manifest),
@@ -1512,6 +1493,7 @@ class SendManager:
1512
1493
  client_id=artifact.client_id,
1513
1494
  sequence_client_id=artifact.sequence_client_id,
1514
1495
  metadata=metadata,
1496
+ ttl_duration_seconds=artifact.ttl_duration_seconds or None,
1515
1497
  description=artifact.description or None,
1516
1498
  aliases=artifact.aliases,
1517
1499
  use_after_commit=artifact.use_after_commit,
@@ -1,12 +1,14 @@
1
1
  import threading
2
2
  from collections import deque
3
- from typing import TYPE_CHECKING, List
3
+ from typing import TYPE_CHECKING, List, Optional
4
4
 
5
5
  try:
6
6
  import psutil
7
7
  except ImportError:
8
8
  psutil = None
9
9
 
10
+ from wandb.errors.term import termwarn
11
+
10
12
  from .aggregators import aggregate_mean
11
13
  from .asset_registry import asset_registry
12
14
  from .interfaces import Interface, Metric, MetricsMonitor
@@ -17,18 +19,132 @@ if TYPE_CHECKING:
17
19
  from wandb.sdk.internal.settings_static import SettingsStatic
18
20
 
19
21
 
20
- class DiskUsage:
22
+ class DiskUsagePercent:
21
23
  """Total system disk usage in percent."""
22
24
 
23
- # name = "disk_usage"
24
- name = "disk"
25
+ name = "disk.{path}.usagePercent"
26
+ samples: "Deque[List[float]]"
27
+
28
+ def __init__(self, paths: List[str]) -> None:
29
+ self.samples = deque([])
30
+ # check if we have access to the disk paths:
31
+ self.paths: List[str] = []
32
+ for path in paths:
33
+ try:
34
+ psutil.disk_usage(path)
35
+ self.paths.append(path)
36
+ except Exception as e: # noqa
37
+ termwarn(f"Could not access disk path {path}: {e}", repeat=False)
38
+
39
+ def sample(self) -> None:
40
+ # self.samples.append(psutil.disk_usage("/").percent)
41
+ disk_usage: List[float] = []
42
+ for path in self.paths:
43
+ disk_usage.append(psutil.disk_usage(path).percent)
44
+ if disk_usage:
45
+ self.samples.append(disk_usage)
46
+
47
+ def clear(self) -> None:
48
+ self.samples.clear()
49
+
50
+ def aggregate(self) -> dict:
51
+ if not self.samples:
52
+ return {}
53
+ disk_metrics = {}
54
+ for i, _path in enumerate(self.paths):
55
+ aggregate_i = aggregate_mean([sample[i] for sample in self.samples])
56
+ # ugly hack to please the frontend:
57
+ _path = _path.replace("/", "\\")
58
+ disk_metrics[self.name.format(path=_path)] = aggregate_i
59
+
60
+ return disk_metrics
61
+
62
+
63
+ class DiskUsage:
64
+ """Total system disk usage in GB."""
65
+
66
+ name = "disk.{path}.usageGB"
67
+ samples: "Deque[List[float]]"
68
+
69
+ def __init__(self, paths: List[str]) -> None:
70
+ self.samples = deque([])
71
+ # check if we have access to the disk paths:
72
+ self.paths: List[str] = []
73
+ for path in paths:
74
+ try:
75
+ psutil.disk_usage(path)
76
+ self.paths.append(path)
77
+ except Exception as e: # noqa
78
+ termwarn(f"Could not access disk path {path}: {e}", repeat=False)
79
+
80
+ def sample(self) -> None:
81
+ disk_usage: List[float] = []
82
+ for path in self.paths:
83
+ disk_usage.append(psutil.disk_usage(path).used / 1024 / 1024 / 1024)
84
+ if disk_usage:
85
+ self.samples.append(disk_usage)
86
+
87
+ def clear(self) -> None:
88
+ self.samples.clear()
89
+
90
+ def aggregate(self) -> dict:
91
+ if not self.samples:
92
+ return {}
93
+ disk_metrics = {}
94
+ for i, _path in enumerate(self.paths):
95
+ aggregate_i = aggregate_mean([sample[i] for sample in self.samples])
96
+ # ugly hack to please the frontend:
97
+ _path = _path.replace("/", "\\")
98
+ disk_metrics[self.name.format(path=_path)] = aggregate_i
99
+
100
+ return disk_metrics
101
+
102
+
103
+ class DiskIn:
104
+ """Total system disk read in MB."""
105
+
106
+ name = "disk.in"
107
+ samples: "Deque[float]"
108
+
109
+ def __init__(self) -> None:
110
+ self.samples = deque([])
111
+ self.read_init: Optional[int] = None
112
+
113
+ def sample(self) -> None:
114
+ if self.read_init is None:
115
+ # initialize the read_init value on first sample
116
+ self.read_init = psutil.disk_io_counters().read_bytes
117
+ self.samples.append(
118
+ (psutil.disk_io_counters().read_bytes - self.read_init) / 1024 / 1024
119
+ )
120
+
121
+ def clear(self) -> None:
122
+ self.samples.clear()
123
+
124
+ def aggregate(self) -> dict:
125
+ if not self.samples:
126
+ return {}
127
+ aggregate = aggregate_mean(self.samples)
128
+ return {self.name: aggregate}
129
+
130
+
131
+ class DiskOut:
132
+ """Total system disk write in MB."""
133
+
134
+ name = "disk.out"
25
135
  samples: "Deque[float]"
26
136
 
27
137
  def __init__(self) -> None:
28
138
  self.samples = deque([])
139
+ self.write_init: Optional[int] = None
29
140
 
30
141
  def sample(self) -> None:
31
- self.samples.append(psutil.disk_usage("/").percent)
142
+ if self.write_init is None:
143
+ # init on first sample
144
+ self.write_init = psutil.disk_io_counters().write_bytes
145
+ self.samples.append(
146
+ (psutil.disk_io_counters().write_bytes - self.write_init) / 1024 / 1024
147
+ )
32
148
 
33
149
  def clear(self) -> None:
34
150
  self.samples.clear()
@@ -49,7 +165,13 @@ class Disk:
49
165
  shutdown_event: threading.Event,
50
166
  ) -> None:
51
167
  self.name = self.__class__.__name__.lower()
52
- self.metrics: List[Metric] = [DiskUsage()]
168
+ self.settings = settings
169
+ self.metrics: List[Metric] = [
170
+ DiskUsagePercent(list(settings._stats_disk_paths or ["/"])),
171
+ DiskUsage(list(settings._stats_disk_paths or ["/"])),
172
+ DiskIn(),
173
+ DiskOut(),
174
+ ]
53
175
  self.metrics_monitor = MetricsMonitor(
54
176
  self.name,
55
177
  self.metrics,
@@ -64,11 +186,22 @@ class Disk:
64
186
  return psutil is not None
65
187
 
66
188
  def probe(self) -> dict:
67
- # total disk space:
68
- total = psutil.disk_usage("/").total / 1024 / 1024 / 1024
69
- # total disk space used:
70
- used = psutil.disk_usage("/").used / 1024 / 1024 / 1024
71
- return {self.name: {"total": total, "used": used}}
189
+ disk_paths = list(self.settings._stats_disk_paths or ["/"])
190
+ disk_metrics = {}
191
+ for disk_path in disk_paths:
192
+ try:
193
+ # total disk space in GB:
194
+ total = psutil.disk_usage(disk_path).total / 1024 / 1024 / 1024
195
+ # total disk space used in GB:
196
+ used = psutil.disk_usage(disk_path).used / 1024 / 1024 / 1024
197
+ disk_metrics[disk_path] = {
198
+ "total": total,
199
+ "used": used,
200
+ }
201
+ except Exception as e: # noqa
202
+ termwarn(f"Could not access disk path {disk_path}: {e}", repeat=False)
203
+
204
+ return {self.name: disk_metrics}
72
205
 
73
206
  def start(self) -> None:
74
207
  self.metrics_monitor.start()
@@ -19,6 +19,7 @@ from wandb.sdk.lib.filenames import (
19
19
  REQUIREMENTS_FNAME,
20
20
  )
21
21
  from wandb.sdk.lib.gitlib import GitRepo
22
+ from wandb.sdk.wandb_settings import _get_program_relpath
22
23
 
23
24
  from .assets.interfaces import Interface
24
25
 
@@ -87,7 +88,7 @@ class SystemInfo:
87
88
 
88
89
  def _save_code(self) -> None:
89
90
  logger.debug("Saving code")
90
- if self.settings.program_relpath is None:
91
+ if not self.settings.program_relpath:
91
92
  logger.warning("unable to save code -- program entry not found")
92
93
  return None
93
94
 
@@ -210,8 +211,11 @@ class SystemInfo:
210
211
 
211
212
  if self.settings.program is not None:
212
213
  data["program"] = self.settings.program
214
+ # Used during artifact-job creation, always points to the relpath
215
+ # of code execution, even when in a git repo
216
+ data["codePathLocal"] = _get_program_relpath(self.settings.program)
213
217
  if not self.settings.disable_code:
214
- if self.settings.program_relpath is not None:
218
+ if self.settings.program_relpath:
215
219
  data["codePath"] = self.settings.program_relpath
216
220
  elif self.settings._jupyter:
217
221
  if self.settings.notebook_name:
@@ -0,0 +1,5 @@
1
+ from ._launch import launch
2
+ from ._launch_add import launch_add
3
+ from .agent.agent import LaunchAgent
4
+
5
+ __all__ = ["LaunchAgent", "launch", "launch_add"]
@@ -124,24 +124,28 @@ def create_and_run_agent(
124
124
  agent.loop()
125
125
 
126
126
 
127
- def _run(
128
- uri: Optional[str],
129
- job: Optional[str],
130
- name: Optional[str],
131
- project: Optional[str],
132
- entity: Optional[str],
133
- docker_image: Optional[str],
134
- entry_point: Optional[List[str]],
135
- version: Optional[str],
136
- resource: str,
137
- resource_args: Optional[Dict[str, Any]],
138
- launch_config: Optional[Dict[str, Any]],
139
- synchronous: Optional[bool],
127
+ def _launch(
140
128
  api: Api,
141
- run_id: Optional[str],
142
- repository: Optional[str],
129
+ uri: Optional[str] = None,
130
+ job: Optional[str] = None,
131
+ name: Optional[str] = None,
132
+ project: Optional[str] = None,
133
+ entity: Optional[str] = None,
134
+ docker_image: Optional[str] = None,
135
+ entry_point: Optional[List[str]] = None,
136
+ version: Optional[str] = None,
137
+ resource: Optional[str] = None,
138
+ resource_args: Optional[Dict[str, Any]] = None,
139
+ launch_config: Optional[Dict[str, Any]] = None,
140
+ synchronous: Optional[bool] = None,
141
+ run_id: Optional[str] = None,
142
+ repository: Optional[str] = None,
143
143
  ) -> AbstractRun:
144
144
  """Helper that delegates to the project-running method corresponding to the passed-in backend."""
145
+ if launch_config is None:
146
+ launch_config = {}
147
+ if resource is None:
148
+ resource = "local-container"
145
149
  launch_spec = construct_launch_spec(
146
150
  uri,
147
151
  job,
@@ -193,9 +197,8 @@ def _run(
193
197
  )
194
198
 
195
199
 
196
- def run(
200
+ def launch(
197
201
  api: Api,
198
- uri: Optional[str] = None,
199
202
  job: Optional[str] = None,
200
203
  entry_point: Optional[List[str]] = None,
201
204
  version: Optional[str] = None,
@@ -210,41 +213,43 @@ def run(
210
213
  run_id: Optional[str] = None,
211
214
  repository: Optional[str] = None,
212
215
  ) -> AbstractRun:
213
- """Run a W&B launch experiment. The project can be wandb uri or a Git URI.
216
+ """Launch a W&B launch experiment.
214
217
 
215
218
  Arguments:
216
- uri: URI of experiment to run. A wandb run uri or a Git repository URI.
217
- job: string reference to a wandb.Job eg: wandb/test/my-job:latest
218
- api: An instance of a wandb Api from wandb.apis.internal.
219
- entry_point: Entry point to run within the project. Defaults to using the entry point used
220
- in the original run for wandb URIs, or main.py for git repository URIs.
221
- version: For Git-based projects, either a commit hash or a branch name.
222
- name: Name run under which to launch the run.
223
- resource: Execution backend for the run.
224
- resource_args: Resource related arguments for launching runs onto a remote backend.
225
- Will be stored on the constructed launch config under ``resource_args``.
226
- project: Target project to send launched run to
227
- entity: Target entity to send launched run to
228
- config: A dictionary containing the configuration for the run. May also contain
229
- resource specific arguments under the key "resource_args".
230
- synchronous: Whether to block while waiting for a run to complete. Defaults to True.
231
- Note that if ``synchronous`` is False and ``backend`` is "local-container", this
232
- method will return, but the current process will block when exiting until
233
- the local run completes. If the current process is interrupted, any
234
- asynchronous runs launched via this method will be terminated. If
235
- ``synchronous`` is True and the run fails, the current process will
236
- error out as well.
237
- run_id: ID for the run (To ultimately replace the :name: field)
238
- repository: string name of repository path for remote registry
219
+ job: string reference to a wandb.Job eg: wandb/test/my-job:latest
220
+ api: An instance of a wandb Api from wandb.apis.internal.
221
+ entry_point: Entry point to run within the project. Defaults to using the entry point used
222
+ in the original run for wandb URIs, or main.py for git repository URIs.
223
+ version: For Git-based projects, either a commit hash or a branch name.
224
+ name: Name run under which to launch the run.
225
+ resource: Execution backend for the run.
226
+ resource_args: Resource related arguments for launching runs onto a remote backend.
227
+ Will be stored on the constructed launch config under ``resource_args``.
228
+ project: Target project to send launched run to
229
+ entity: Target entity to send launched run to
230
+ config: A dictionary containing the configuration for the run. May also contain
231
+ resource specific arguments under the key "resource_args".
232
+ synchronous: Whether to block while waiting for a run to complete. Defaults to True.
233
+ Note that if ``synchronous`` is False and ``backend`` is "local-container", this
234
+ method will return, but the current process will block when exiting until
235
+ the local run completes. If the current process is interrupted, any
236
+ asynchronous runs launched via this method will be terminated. If
237
+ ``synchronous`` is True and the run fails, the current process will
238
+ error out as well.
239
+ run_id: ID for the run (To ultimately replace the :name: field)
240
+ repository: string name of repository path for remote registry
239
241
 
240
242
  Example:
241
- import wandb
242
- project_uri = "https://github.com/wandb/examples"
243
- params = {"alpha": 0.5, "l1_ratio": 0.01}
243
+ ```python
244
+ from wandb.sdk.launch import launch
245
+
246
+ job = "wandb/jobs/Hello World:latest"
247
+ params = {"epochs": 5}
244
248
  # Run W&B project and create a reproducible docker environment
245
249
  # on a local host
246
250
  api = wandb.apis.internal.Api()
247
- wandb.launch(project_uri, api, parameters=params)
251
+ launch(api, job, parameters=params)
252
+ ```
248
253
 
249
254
 
250
255
  Returns:
@@ -255,15 +260,9 @@ def run(
255
260
  `wandb.exceptions.ExecutionError` If a run launched in blocking mode
256
261
  is unsuccessful.
257
262
  """
258
- if config is None:
259
- config = {}
260
-
261
- # default to local container for runs without a queue
262
- if resource is None:
263
- resource = "local-container"
264
-
265
- submitted_run_obj = _run(
266
- uri=uri,
263
+ submitted_run_obj = _launch(
264
+ # TODO: fully deprecate URI path
265
+ uri=None,
267
266
  job=job,
268
267
  name=name,
269
268
  project=project,