wandb 0.15.9__py3-none-any.whl → 0.15.11__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. wandb/__init__.py +5 -1
  2. wandb/apis/public.py +137 -17
  3. wandb/apis/reports/_panels.py +1 -1
  4. wandb/apis/reports/blocks.py +1 -0
  5. wandb/apis/reports/report.py +27 -5
  6. wandb/cli/cli.py +52 -41
  7. wandb/docker/__init__.py +17 -0
  8. wandb/docker/auth.py +1 -1
  9. wandb/env.py +24 -4
  10. wandb/filesync/step_checksum.py +3 -3
  11. wandb/integration/openai/openai.py +3 -0
  12. wandb/integration/ultralytics/__init__.py +9 -0
  13. wandb/integration/ultralytics/bbox_utils.py +196 -0
  14. wandb/integration/ultralytics/callback.py +458 -0
  15. wandb/integration/ultralytics/classification_utils.py +66 -0
  16. wandb/integration/ultralytics/mask_utils.py +141 -0
  17. wandb/integration/ultralytics/pose_utils.py +92 -0
  18. wandb/integration/xgboost/xgboost.py +3 -3
  19. wandb/integration/yolov8/__init__.py +0 -7
  20. wandb/integration/yolov8/yolov8.py +22 -3
  21. wandb/old/settings.py +7 -0
  22. wandb/plot/line_series.py +0 -1
  23. wandb/proto/v3/wandb_internal_pb2.py +353 -300
  24. wandb/proto/v3/wandb_server_pb2.py +37 -41
  25. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  26. wandb/proto/v3/wandb_telemetry_pb2.py +16 -16
  27. wandb/proto/v4/wandb_internal_pb2.py +272 -260
  28. wandb/proto/v4/wandb_server_pb2.py +37 -40
  29. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  30. wandb/proto/v4/wandb_telemetry_pb2.py +16 -16
  31. wandb/proto/wandb_internal_codegen.py +7 -31
  32. wandb/sdk/artifacts/artifact.py +321 -189
  33. wandb/sdk/artifacts/artifact_cache.py +14 -0
  34. wandb/sdk/artifacts/artifact_manifest.py +5 -4
  35. wandb/sdk/artifacts/artifact_manifest_entry.py +37 -9
  36. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -9
  37. wandb/sdk/artifacts/artifact_saver.py +13 -50
  38. wandb/sdk/artifacts/artifact_ttl.py +6 -0
  39. wandb/sdk/artifacts/artifacts_cache.py +119 -93
  40. wandb/sdk/artifacts/staging.py +25 -0
  41. wandb/sdk/artifacts/storage_handlers/s3_handler.py +12 -7
  42. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +2 -3
  43. wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
  44. wandb/sdk/artifacts/storage_policies/register.py +1 -0
  45. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +4 -3
  46. wandb/sdk/artifacts/storage_policy.py +4 -2
  47. wandb/sdk/backend/backend.py +0 -16
  48. wandb/sdk/data_types/image.py +3 -1
  49. wandb/sdk/integration_utils/auto_logging.py +38 -13
  50. wandb/sdk/interface/interface.py +16 -135
  51. wandb/sdk/interface/interface_shared.py +9 -147
  52. wandb/sdk/interface/interface_sock.py +0 -26
  53. wandb/sdk/internal/file_pusher.py +20 -3
  54. wandb/sdk/internal/file_stream.py +3 -1
  55. wandb/sdk/internal/handler.py +53 -70
  56. wandb/sdk/internal/internal_api.py +220 -130
  57. wandb/sdk/internal/job_builder.py +41 -37
  58. wandb/sdk/internal/sender.py +7 -25
  59. wandb/sdk/internal/system/assets/disk.py +144 -11
  60. wandb/sdk/internal/system/system_info.py +6 -2
  61. wandb/sdk/launch/__init__.py +5 -0
  62. wandb/sdk/launch/{launch.py → _launch.py} +53 -54
  63. wandb/sdk/launch/{launch_add.py → _launch_add.py} +34 -31
  64. wandb/sdk/launch/_project_spec.py +13 -2
  65. wandb/sdk/launch/agent/agent.py +103 -59
  66. wandb/sdk/launch/agent/run_queue_item_file_saver.py +6 -4
  67. wandb/sdk/launch/builder/build.py +19 -1
  68. wandb/sdk/launch/builder/docker_builder.py +5 -1
  69. wandb/sdk/launch/builder/kaniko_builder.py +5 -1
  70. wandb/sdk/launch/create_job.py +20 -5
  71. wandb/sdk/launch/loader.py +14 -5
  72. wandb/sdk/launch/runner/abstract.py +0 -2
  73. wandb/sdk/launch/runner/kubernetes_monitor.py +329 -0
  74. wandb/sdk/launch/runner/kubernetes_runner.py +66 -209
  75. wandb/sdk/launch/runner/local_container.py +5 -2
  76. wandb/sdk/launch/runner/local_process.py +4 -1
  77. wandb/sdk/launch/sweeps/scheduler.py +43 -25
  78. wandb/sdk/launch/sweeps/utils.py +5 -3
  79. wandb/sdk/launch/utils.py +3 -1
  80. wandb/sdk/lib/_settings_toposort_generate.py +3 -9
  81. wandb/sdk/lib/_settings_toposort_generated.py +27 -3
  82. wandb/sdk/lib/_wburls_generated.py +1 -0
  83. wandb/sdk/lib/filenames.py +27 -6
  84. wandb/sdk/lib/filesystem.py +181 -7
  85. wandb/sdk/lib/fsm.py +5 -3
  86. wandb/sdk/lib/gql_request.py +3 -0
  87. wandb/sdk/lib/ipython.py +7 -0
  88. wandb/sdk/lib/wburls.py +1 -0
  89. wandb/sdk/service/port_file.py +2 -15
  90. wandb/sdk/service/server.py +7 -55
  91. wandb/sdk/service/service.py +56 -26
  92. wandb/sdk/service/service_base.py +1 -1
  93. wandb/sdk/service/streams.py +11 -5
  94. wandb/sdk/verify/verify.py +2 -2
  95. wandb/sdk/wandb_init.py +8 -2
  96. wandb/sdk/wandb_manager.py +4 -14
  97. wandb/sdk/wandb_run.py +143 -53
  98. wandb/sdk/wandb_settings.py +148 -35
  99. wandb/testing/relay.py +85 -38
  100. wandb/util.py +87 -4
  101. wandb/wandb_torch.py +24 -38
  102. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/METADATA +48 -23
  103. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/RECORD +107 -103
  104. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/WHEEL +1 -1
  105. wandb/proto/v3/wandb_server_pb2_grpc.py +0 -1422
  106. wandb/proto/v4/wandb_server_pb2_grpc.py +0 -1422
  107. wandb/proto/wandb_server_pb2_grpc.py +0 -8
  108. wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +0 -61
  109. wandb/sdk/interface/interface_grpc.py +0 -460
  110. wandb/sdk/service/server_grpc.py +0 -444
  111. wandb/sdk/service/service_grpc.py +0 -73
  112. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/LICENSE +0 -0
  113. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/entry_points.txt +0 -0
  114. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import logging
3
3
  import os
4
4
  import queue
5
5
  import tempfile
6
+ import threading
6
7
  import time
7
8
  from typing import TYPE_CHECKING, Optional, Tuple
8
9
 
@@ -12,8 +13,8 @@ from wandb.filesync import stats, step_checksum, step_upload
12
13
  from wandb.sdk.lib.paths import LogicalPath
13
14
 
14
15
  if TYPE_CHECKING:
15
- from wandb.sdk.artifacts import artifact_saver
16
16
  from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
17
+ from wandb.sdk.artifacts.artifact_saver import SaveFn, SaveFnAsync
17
18
  from wandb.sdk.internal import file_stream, internal_api
18
19
  from wandb.sdk.internal.settings_static import SettingsStatic
19
20
 
@@ -72,6 +73,21 @@ class FilePusher:
72
73
  )
73
74
  self._step_upload.start()
74
75
 
76
+ self._stats_thread_stop = threading.Event()
77
+ if os.environ.get("WANDB_DEBUG"):
78
+ # debug thread to monitor and report file pusher stats
79
+ self._stats_thread = threading.Thread(
80
+ target=self._file_pusher_stats,
81
+ daemon=True,
82
+ name="FPStatsThread",
83
+ )
84
+ self._stats_thread.start()
85
+
86
+ def _file_pusher_stats(self) -> None:
87
+ while not self._stats_thread_stop.is_set():
88
+ logger.info(f"FilePusher stats: {self._stats._stats}")
89
+ time.sleep(1)
90
+
75
91
  def get_status(self) -> Tuple[bool, stats.Summary]:
76
92
  running = self.is_alive()
77
93
  summary = self._stats.summary()
@@ -134,8 +150,8 @@ class FilePusher:
134
150
  self,
135
151
  manifest: "ArtifactManifest",
136
152
  artifact_id: str,
137
- save_fn: "artifact_saver.SaveFn",
138
- save_fn_async: "artifact_saver.SaveFnAsync",
153
+ save_fn: "SaveFn",
154
+ save_fn_async: "SaveFnAsync",
139
155
  ) -> None:
140
156
  event = step_checksum.RequestStoreManifestFiles(
141
157
  manifest, artifact_id, save_fn, save_fn_async
@@ -158,6 +174,7 @@ class FilePusher:
158
174
  def finish(self, callback: Optional[step_upload.OnRequestFinishFn] = None):
159
175
  logger.info("shutting down file pusher")
160
176
  self._incoming_queue.put(step_checksum.RequestFinish(callback))
177
+ self._stats_thread_stop.set()
161
178
 
162
179
  def join(self) -> None:
163
180
  # NOTE: must have called finish before join
@@ -335,11 +335,13 @@ class FileStreamApi:
335
335
  self._run_id = run_id
336
336
  self._start_time = start_time
337
337
  self._client = requests.Session()
338
+ timeout = timeout or 0
338
339
  if timeout > 0:
339
340
  self._client.post = functools.partial(self._client.post, timeout=timeout) # type: ignore[method-assign]
340
341
  self._client.auth = api.client.transport.session.auth
341
342
  self._client.headers.update(api.client.transport.headers or {})
342
343
  self._client.cookies.update(api.client.transport.cookies or {}) # type: ignore[no-untyped-call]
344
+ self._client.proxies.update(api.client.transport.session.proxies or {})
343
345
  self._file_policies: Dict[str, DefaultFilePolicy] = {}
344
346
  self._dropped_chunks: int = 0
345
347
  self._queue: queue.Queue = queue.Queue()
@@ -499,7 +501,7 @@ class FileStreamApi:
499
501
  wandb.termerror(
500
502
  "Dropped streaming file chunk (see wandb/debug-internal.log)"
501
503
  )
502
- logging.exception("dropped chunk %s" % response)
504
+ logger.exception("dropped chunk %s" % response)
503
505
  self._dropped_chunks += 1
504
506
  else:
505
507
  parsed: Optional[dict] = None
@@ -23,6 +23,7 @@ from typing import (
23
23
 
24
24
  from wandb.proto.wandb_internal_pb2 import (
25
25
  HistoryRecord,
26
+ InternalMessages,
26
27
  MetricRecord,
27
28
  Record,
28
29
  Result,
@@ -34,13 +35,13 @@ from wandb.proto.wandb_internal_pb2 import (
34
35
  )
35
36
 
36
37
  from ..interface.interface_queue import InterfaceQueue
37
- from ..lib import handler_util, proto_util, tracelog
38
+ from ..lib import handler_util, proto_util, tracelog, wburls
38
39
  from . import context, sample, tb_watcher
39
40
  from .settings_static import SettingsStatic
40
41
  from .system.system_monitor import SystemMonitor
41
42
 
42
43
  if TYPE_CHECKING:
43
- from wandb.proto.wandb_internal_pb2 import ArtifactDoneRequest, MetricSummary
44
+ from wandb.proto.wandb_internal_pb2 import MetricSummary
44
45
 
45
46
 
46
47
  SummaryDict = Dict[str, Any]
@@ -80,7 +81,6 @@ class HandleManager:
80
81
  _metric_copy: Dict[Tuple[str, ...], Any]
81
82
  _track_time: Optional[float]
82
83
  _accumulate_time: float
83
- _artifact_xid_done: Dict[str, "ArtifactDoneRequest"]
84
84
  _run_start_time: Optional[float]
85
85
  _context_keeper: context.ContextKeeper
86
86
 
@@ -119,9 +119,9 @@ class HandleManager:
119
119
  self._metric_globs = defaultdict(MetricRecord)
120
120
  self._metric_track = dict()
121
121
  self._metric_copy = dict()
122
+ self._internal_messages = InternalMessages()
122
123
 
123
- # TODO: implement release protocol to clean this up
124
- self._artifact_xid_done = dict()
124
+ self._dropped_history = False
125
125
 
126
126
  def __len__(self) -> int:
127
127
  return self._record_q.qsize()
@@ -235,7 +235,7 @@ class HandleManager:
235
235
  record = Record(summary=summary)
236
236
  self._dispatch_record(record)
237
237
  elif not self._settings._offline:
238
- # Send this summary update as a request since we arent persisting every update
238
+ # Send this summary update as a request since we aren't persisting every update
239
239
  summary_record = SummaryRecordRequest(summary=summary)
240
240
  request_record = self._interface._make_request(
241
241
  summary_record=summary_record
@@ -266,7 +266,7 @@ class HandleManager:
266
266
  if s.none:
267
267
  return False
268
268
  if s.copy:
269
- # non key list copy already done in _update_summary
269
+ # non-key list copy already done in _update_summary
270
270
  if len(kl) > 1:
271
271
  _dict_nested_set(self._consolidated_summary, kl, v)
272
272
  return True
@@ -290,7 +290,7 @@ class HandleManager:
290
290
  if best_key:
291
291
  _dict_nested_set(self._consolidated_summary, best_key, v)
292
292
  updated = True
293
- # defaulting to minimize if goal is not supecified
293
+ # defaulting to minimize if goal is not specified
294
294
  if s.min or best_key and not goal_max:
295
295
  min_key = tuple(kl + ["min"])
296
296
  old_min = self._metric_track.get(min_key)
@@ -358,7 +358,7 @@ class HandleManager:
358
358
  ) -> bool:
359
359
  metric_key = ".".join([k.replace(".", "\\.") for k in kl])
360
360
  d = self._metric_defines.get(metric_key, d)
361
- # if the dict has _type key, its a wandb table object
361
+ # if the dict has _type key, it's a wandb table object
362
362
  if isinstance(v, dict) and not handler_util.metric_is_wandb_dict(v):
363
363
  updated = False
364
364
  for nk, nv in v.items():
@@ -374,7 +374,7 @@ class HandleManager:
374
374
  return updated
375
375
 
376
376
  def _update_summary_media_objects(self, v: Dict[str, Any]) -> Dict[str, Any]:
377
- # For now, non recursive - just top level
377
+ # For now, non-recursive - just top level
378
378
  for nk, nv in v.items():
379
379
  if (
380
380
  isinstance(nv, dict)
@@ -387,17 +387,17 @@ class HandleManager:
387
387
  v[nk] = nv
388
388
  return v
389
389
 
390
- def _update_summary(self, history_dict: Dict[str, Any]) -> bool:
390
+ def _update_summary(self, history_dict: Dict[str, Any]) -> List[str]:
391
391
  # keep old behavior fast path if no define metrics have been used
392
392
  if not self._metric_defines:
393
393
  history_dict = self._update_summary_media_objects(history_dict)
394
394
  self._consolidated_summary.update(history_dict)
395
- return True
396
- updated = False
395
+ return list(history_dict.keys())
396
+ updated_keys = []
397
397
  for k, v in history_dict.items():
398
398
  if self._update_summary_list(kl=[k], v=v):
399
- updated = True
400
- return updated
399
+ updated_keys.append(k)
400
+ return updated_keys
401
401
 
402
402
  def _history_assign_step(
403
403
  self,
@@ -511,24 +511,28 @@ class HandleManager:
511
511
  self._history_update(record.history, history_dict)
512
512
  self._dispatch_record(record)
513
513
  self._save_history(record.history)
514
- updated = self._update_summary(history_dict)
515
- if updated:
516
- self._save_summary(self._consolidated_summary)
514
+ # update summary from history
515
+ updated_keys = self._update_summary(history_dict)
516
+ if updated_keys:
517
+ updated_items = {k: self._consolidated_summary[k] for k in updated_keys}
518
+ self._save_summary(updated_items)
517
519
 
518
520
  def _flush_partial_history(
519
521
  self,
520
522
  step: Optional[int] = None,
521
523
  ) -> None:
522
- if self._partial_history:
523
- history = HistoryRecord()
524
- for k, v in self._partial_history.items():
525
- item = history.item.add()
526
- item.key = k
527
- item.value_json = json.dumps(v)
528
- if step is not None:
529
- history.step.num = step
530
- self.handle_history(Record(history=history))
531
- self._partial_history = {}
524
+ if not self._partial_history:
525
+ return
526
+
527
+ history = HistoryRecord()
528
+ for k, v in self._partial_history.items():
529
+ item = history.item.add()
530
+ item.key = k
531
+ item.value_json = json.dumps(v)
532
+ if step is not None:
533
+ history.step.num = step
534
+ self.handle_history(Record(history=history))
535
+ self._partial_history = {}
532
536
 
533
537
  def handle_request_sender_mark_report(self, record: Record) -> None:
534
538
  self._dispatch_record(record, always_send=True)
@@ -550,9 +554,18 @@ class HandleManager:
550
554
  history_dict = proto_util.dict_from_proto_list(partial_history.item)
551
555
  if step is not None:
552
556
  if step < self._step:
553
- logger.warning(
554
- f"Step {step} < {self._step}. Dropping entry: {history_dict}."
557
+ if not self._dropped_history:
558
+ message = (
559
+ "Step only supports monotonically increasing values, use define_metric to set a custom x "
560
+ f"axis. For details see: {wburls.wburls.get('wandb_define_metric')}"
561
+ )
562
+ self._internal_messages.warning.append(message)
563
+ self._dropped_history = True
564
+ message = (
565
+ f"(User provided step: {step} is less than current step: {self._step}. "
566
+ f"Dropping entry: {history_dict})."
555
567
  )
568
+ self._internal_messages.warning.append(message)
556
569
  return
557
570
  elif step > self._step:
558
571
  self._flush_partial_history()
@@ -639,44 +652,6 @@ class HandleManager:
639
652
  def handle_request_log_artifact(self, record: Record) -> None:
640
653
  self._dispatch_record(record)
641
654
 
642
- def handle_request_artifact_send(self, record: Record) -> None:
643
- assert record.control.req_resp
644
- result = proto_util._result_from_record(record)
645
-
646
- self._dispatch_record(record)
647
-
648
- # send response immediately, the request will be polled for result
649
- xid = record.uuid
650
- result.response.artifact_send_response.xid = xid
651
- self._respond_result(result)
652
-
653
- def handle_request_artifact_poll(self, record: Record) -> None:
654
- assert record.control.req_resp
655
- xid = record.request.artifact_poll.xid
656
- assert xid
657
-
658
- result = proto_util._result_from_record(record)
659
- done_req = self._artifact_xid_done.get(xid)
660
- if done_req:
661
- result.response.artifact_poll_response.artifact_id = done_req.artifact_id
662
- result.response.artifact_poll_response.error_message = (
663
- done_req.error_message
664
- )
665
- result.response.artifact_poll_response.ready = True
666
- self._respond_result(result)
667
-
668
- def handle_request_artifact_done(self, record: Record) -> None:
669
- assert not record.control.req_resp
670
- done_req = record.request.artifact_done
671
- xid = done_req.xid
672
- assert xid
673
-
674
- self._artifact_xid_done[xid] = done_req
675
-
676
- # def handle_request_artifact_release(self, record: Record) -> None:
677
- # assert record.control.req_resp
678
- # # TODO: implement release protocol to clean up _artifact_xid_done dict
679
-
680
655
  def handle_telemetry(self, record: Record) -> None:
681
656
  self._dispatch_record(record)
682
657
 
@@ -716,7 +691,7 @@ class HandleManager:
716
691
 
717
692
  def handle_request_resume(self, record: Record) -> None:
718
693
  if self._system_monitor is not None:
719
- logger.info("starting system metrics thread or process")
694
+ logger.info("starting system metrics thread")
720
695
  self._system_monitor.start()
721
696
 
722
697
  if self._track_time is not None:
@@ -725,7 +700,7 @@ class HandleManager:
725
700
 
726
701
  def handle_request_pause(self, record: Record) -> None:
727
702
  if self._system_monitor is not None:
728
- logger.info("stopping system metrics thread or process")
703
+ logger.info("stopping system metrics thread")
729
704
  self._system_monitor.finish()
730
705
  if self._track_time is not None:
731
706
  self._accumulate_time += time.time() - self._track_time
@@ -740,6 +715,14 @@ class HandleManager:
740
715
  def handle_request_network_status(self, record: Record) -> None:
741
716
  self._dispatch_record(record)
742
717
 
718
+ def handle_request_internal_messages(self, record: Record) -> None:
719
+ result = proto_util._result_from_record(record)
720
+ result.response.internal_messages_response.messages.CopyFrom(
721
+ self._internal_messages
722
+ )
723
+ self._internal_messages.Clear()
724
+ self._respond_result(result)
725
+
743
726
  def handle_request_status(self, record: Record) -> None:
744
727
  # TODO(mempressure): do something better?
745
728
  assert record.control.req_resp