wandb 0.17.4__py3-none-any.whl → 0.17.6__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. wandb/__init__.py +3 -1
  2. wandb/apis/public/api.py +1 -1
  3. wandb/apis/public/jobs.py +5 -0
  4. wandb/bin/nvidia_gpu_stats +0 -0
  5. wandb/data_types.py +2 -1
  6. wandb/env.py +6 -0
  7. wandb/filesync/upload_job.py +1 -1
  8. wandb/integration/lightning/fabric/logger.py +4 -4
  9. wandb/proto/v3/wandb_internal_pb2.py +339 -328
  10. wandb/proto/v3/wandb_settings_pb2.py +1 -1
  11. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  12. wandb/proto/v4/wandb_internal_pb2.py +326 -323
  13. wandb/proto/v4/wandb_settings_pb2.py +1 -1
  14. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  15. wandb/proto/v5/wandb_internal_pb2.py +326 -323
  16. wandb/proto/v5/wandb_settings_pb2.py +1 -1
  17. wandb/proto/v5/wandb_telemetry_pb2.py +10 -10
  18. wandb/proto/wandb_deprecated.py +4 -0
  19. wandb/proto/wandb_internal_pb2.py +6 -0
  20. wandb/sdk/artifacts/artifact.py +16 -24
  21. wandb/sdk/artifacts/artifact_manifest_entry.py +31 -0
  22. wandb/sdk/artifacts/storage_handlers/azure_handler.py +35 -23
  23. wandb/sdk/data_types/object_3d.py +113 -2
  24. wandb/sdk/interface/interface.py +35 -5
  25. wandb/sdk/interface/interface_shared.py +9 -7
  26. wandb/sdk/internal/handler.py +1 -1
  27. wandb/sdk/internal/internal_api.py +4 -4
  28. wandb/sdk/internal/sender.py +40 -17
  29. wandb/sdk/launch/_launch.py +4 -2
  30. wandb/sdk/launch/_project_spec.py +34 -8
  31. wandb/sdk/launch/agent/agent.py +6 -2
  32. wandb/sdk/launch/agent/run_queue_item_file_saver.py +2 -4
  33. wandb/sdk/launch/builder/build.py +4 -2
  34. wandb/sdk/launch/builder/kaniko_builder.py +30 -9
  35. wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +2 -1
  36. wandb/sdk/launch/inputs/internal.py +93 -2
  37. wandb/sdk/launch/inputs/manage.py +21 -3
  38. wandb/sdk/launch/inputs/schema.py +39 -0
  39. wandb/sdk/launch/runner/kubernetes_runner.py +72 -0
  40. wandb/sdk/launch/runner/local_container.py +13 -10
  41. wandb/sdk/launch/runner/sagemaker_runner.py +3 -5
  42. wandb/sdk/launch/utils.py +2 -0
  43. wandb/sdk/lib/disabled.py +13 -174
  44. wandb/sdk/lib/tracelog.py +2 -2
  45. wandb/sdk/wandb_init.py +23 -27
  46. wandb/sdk/wandb_login.py +6 -6
  47. wandb/sdk/wandb_manager.py +9 -5
  48. wandb/sdk/wandb_run.py +141 -97
  49. wandb/sdk/wandb_settings.py +3 -2
  50. wandb/util.py +29 -11
  51. wandb/wandb_agent.py +2 -0
  52. {wandb-0.17.4.dist-info → wandb-0.17.6.dist-info}/METADATA +3 -2
  53. {wandb-0.17.4.dist-info → wandb-0.17.6.dist-info}/RECORD +56 -54
  54. {wandb-0.17.4.dist-info → wandb-0.17.6.dist-info}/WHEEL +0 -0
  55. {wandb-0.17.4.dist-info → wandb-0.17.6.dist-info}/entry_points.txt +0 -0
  56. {wandb-0.17.4.dist-info → wandb-0.17.6.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/wandb_run.py CHANGED
@@ -235,10 +235,10 @@ class RunStatusChecker:
235
235
 
236
236
  with lock:
237
237
  if self._join_event.is_set():
238
- return
238
+ break
239
239
  set_handle(local_handle)
240
240
  try:
241
- result = local_handle.wait(timeout=timeout)
241
+ result = local_handle.wait(timeout=timeout, release=False)
242
242
  except MailboxError:
243
243
  # background threads are oportunistically getting results
244
244
  # from the internal process but the internal process could
@@ -253,6 +253,7 @@ class RunStatusChecker:
253
253
  if result:
254
254
  process(result)
255
255
  # if request finished, clear the handle to send on the next interval
256
+ local_handle.abandon()
256
257
  local_handle = None
257
258
 
258
259
  time_elapsed = time.monotonic() - time_probe
@@ -591,8 +592,12 @@ class Run:
591
592
  ) -> None:
592
593
  # pid is set, so we know if this run object was initialized by this process
593
594
  self._init_pid = os.getpid()
595
+ self._settings = settings
596
+
597
+ if settings._noop:
598
+ return
599
+
594
600
  self._init(
595
- settings=settings,
596
601
  config=config,
597
602
  sweep_config=sweep_config,
598
603
  launch_config=launch_config,
@@ -600,12 +605,10 @@ class Run:
600
605
 
601
606
  def _init(
602
607
  self,
603
- settings: Settings,
604
608
  config: Optional[Dict[str, Any]] = None,
605
609
  sweep_config: Optional[Dict[str, Any]] = None,
606
610
  launch_config: Optional[Dict[str, Any]] = None,
607
611
  ) -> None:
608
- self._settings = settings
609
612
  self._config = wandb_config.Config()
610
613
  self._config._set_callback(self._config_callback)
611
614
  self._config._set_artifact_callback(self._config_artifact_callback)
@@ -2100,36 +2103,56 @@ class Run:
2100
2103
  return self._finish(exit_code, quiet)
2101
2104
 
2102
2105
  def _finish(
2103
- self, exit_code: Optional[int] = None, quiet: Optional[bool] = None
2106
+ self,
2107
+ exit_code: Optional[int] = None,
2108
+ quiet: Optional[bool] = None,
2104
2109
  ) -> None:
2105
- if quiet is not None:
2106
- self._quiet = quiet
2110
+ logger.info(f"finishing run {self._get_path()}")
2107
2111
  with telemetry.context(run=self) as tel:
2108
2112
  tel.feature.finish = True
2109
- logger.info(f"finishing run {self._get_path()}")
2110
- # detach jupyter hooks / others that needs to happen before backend shutdown
2111
- for hook in self._teardown_hooks:
2112
- if hook.stage == TeardownStage.EARLY:
2113
- hook.call()
2114
2113
 
2115
- self._atexit_cleanup(exit_code=exit_code)
2114
+ if quiet is not None:
2115
+ self._quiet = quiet
2116
+
2117
+ # Pop this run (hopefully) from the run stack, to support the "reinit"
2118
+ # functionality of wandb.init().
2119
+ #
2120
+ # TODO: It's not clear how _global_run_stack could have length other
2121
+ # than 1 at this point in the code. If you're reading this, consider
2122
+ # refactoring this thing.
2116
2123
  if self._wl and len(self._wl._global_run_stack) > 0:
2117
2124
  self._wl._global_run_stack.pop()
2118
- # detach logger / others meant to be run after we've shutdown the backend
2125
+
2126
+ # Run hooks that need to happen before the last messages to the
2127
+ # internal service, like Jupyter hooks.
2119
2128
  for hook in self._teardown_hooks:
2120
- if hook.stage == TeardownStage.LATE:
2129
+ if hook.stage == TeardownStage.EARLY:
2121
2130
  hook.call()
2122
- self._teardown_hooks = []
2123
- module.unset_globals()
2124
-
2125
- # inform manager this run is finished
2126
- manager = self._wl and self._wl._get_manager()
2127
- if manager:
2128
- manager._inform_finish(run_id=self._run_id)
2129
2131
 
2132
+ # Early-stage hooks may use methods that require _is_finished
2133
+ # to be False, so we set this after running those hooks.
2130
2134
  self._is_finished = True
2131
- # end sentry session
2132
- wandb._sentry.end_session()
2135
+
2136
+ try:
2137
+ self._atexit_cleanup(exit_code=exit_code)
2138
+
2139
+ # Run hooks that should happen after the last messages to the
2140
+ # internal service, like detaching the logger.
2141
+ for hook in self._teardown_hooks:
2142
+ if hook.stage == TeardownStage.LATE:
2143
+ hook.call()
2144
+ self._teardown_hooks = []
2145
+
2146
+ # Inform the service that we're done sending messages for this run.
2147
+ #
2148
+ # TODO: Why not do this in _atexit_cleanup()?
2149
+ manager = self._wl and self._wl._get_manager()
2150
+ if manager:
2151
+ manager._inform_finish(run_id=self._run_id)
2152
+
2153
+ finally:
2154
+ module.unset_globals()
2155
+ wandb._sentry.end_session()
2133
2156
 
2134
2157
  @_run_decorator._noop
2135
2158
  @_run_decorator._attach
@@ -2345,36 +2368,49 @@ class Run:
2345
2368
  return
2346
2369
  self._atexit_cleanup_called = True
2347
2370
 
2348
- exit_code = exit_code or self._hooks.exit_code if self._hooks else 0
2371
+ exit_code = (
2372
+ exit_code #
2373
+ or (self._hooks and self._hooks.exit_code)
2374
+ or 0
2375
+ )
2376
+ self._exit_code = exit_code
2349
2377
  logger.info(f"got exitcode: {exit_code}")
2378
+
2379
+ # Delete this run's "resume" file if the run finished successfully.
2380
+ #
2381
+ # This is used by the "auto" resume mode, which resumes from the last
2382
+ # failed (or unfinished/crashed) run. If we reach this line, then this
2383
+ # run shouldn't be a candidate for "auto" resume.
2350
2384
  if exit_code == 0:
2351
- # Cleanup our resume file on a clean exit
2352
2385
  if os.path.exists(self._settings.resume_fname):
2353
2386
  os.remove(self._settings.resume_fname)
2354
2387
 
2355
- self._exit_code = exit_code
2356
- report_failure = False
2357
2388
  try:
2358
2389
  self._on_finish()
2359
- except KeyboardInterrupt as ki:
2360
- if wandb.wandb_agent._is_running():
2361
- raise ki
2362
- wandb.termerror("Control-C detected -- Run data was not synced")
2363
- if not self._settings._notebook:
2364
- os._exit(-1)
2390
+
2391
+ except KeyboardInterrupt:
2392
+ if not wandb.wandb_agent._is_running():
2393
+ wandb.termerror("Control-C detected -- Run data was not synced")
2394
+ raise
2395
+
2365
2396
  except Exception as e:
2366
- if not self._settings._notebook:
2367
- report_failure = True
2368
2397
  self._console_stop()
2369
- self._backend.cleanup()
2370
2398
  logger.error("Problem finishing run", exc_info=e)
2371
2399
  wandb.termerror("Problem finishing run")
2372
- traceback.print_exc()
2373
- else:
2374
- self._on_final()
2375
- finally:
2376
- if report_failure:
2377
- os._exit(-1)
2400
+ raise
2401
+
2402
+ Run._footer(
2403
+ sampled_history=self._sampled_history,
2404
+ final_summary=self._final_summary,
2405
+ poll_exit_response=self._poll_exit_response,
2406
+ server_info_response=self._server_info_response,
2407
+ check_version_response=self._check_version,
2408
+ internal_messages_response=self._internal_messages_response,
2409
+ reporter=self._reporter,
2410
+ quiet=self._quiet,
2411
+ settings=self._settings,
2412
+ printer=self._printer,
2413
+ )
2378
2414
 
2379
2415
  def _console_start(self) -> None:
2380
2416
  logger.info("atexit reg")
@@ -2659,20 +2695,6 @@ class Run:
2659
2695
  for module_name in import_telemetry_set:
2660
2696
  unregister_post_import_hook(module_name, run_id)
2661
2697
 
2662
- def _on_final(self) -> None:
2663
- self._footer(
2664
- sampled_history=self._sampled_history,
2665
- final_summary=self._final_summary,
2666
- poll_exit_response=self._poll_exit_response,
2667
- server_info_response=self._server_info_response,
2668
- check_version_response=self._check_version,
2669
- internal_messages_response=self._internal_messages_response,
2670
- reporter=self._reporter,
2671
- quiet=self._quiet,
2672
- settings=self._settings,
2673
- printer=self._printer,
2674
- )
2675
-
2676
2698
  @_run_decorator._noop_on_finish()
2677
2699
  @_run_decorator._attach
2678
2700
  def define_metric(
@@ -2684,29 +2706,48 @@ class Run:
2684
2706
  summary: Optional[str] = None,
2685
2707
  goal: Optional[str] = None,
2686
2708
  overwrite: Optional[bool] = None,
2687
- **kwargs: Any,
2688
2709
  ) -> wandb_metric.Metric:
2689
- """Define metric properties which will later be logged with `wandb.log()`.
2710
+ """Customize metrics logged with `wandb.log()`.
2690
2711
 
2691
2712
  Arguments:
2692
- name: Name of the metric.
2693
- step_metric: Independent variable associated with the metric.
2694
- step_sync: Automatically add `step_metric` to history if needed.
2695
- Defaults to True if step_metric is specified.
2713
+ name: The name of the metric to customize.
2714
+ step_metric: The name of another metric to serve as the X-axis
2715
+ for this metric in automatically generated charts.
2716
+ step_sync: Automatically insert the last value of step_metric into
2717
+ `run.log()` if it is not provided explicitly. Defaults to True
2718
+ if step_metric is specified.
2696
2719
  hidden: Hide this metric from automatic plots.
2697
2720
  summary: Specify aggregate metrics added to summary.
2698
- Supported aggregations: "min,max,mean,best,last,none"
2699
- Default aggregation is `copy`
2700
- Aggregation `best` defaults to `goal`==`minimize`
2701
- goal: Specify direction for optimizing the metric.
2702
- Supported directions: "minimize,maximize"
2721
+ Supported aggregations include "min", "max", "mean", "last",
2722
+ "best", "copy" and "none". "best" is used together with the
2723
+ goal parameter. "none" prevents a summary from being generated.
2724
+ "copy" is deprecated and should not be used.
2725
+ goal: Specify how to interpret the "best" summary type.
2726
+ Supported options are "minimize" and "maximize".
2727
+ overwrite: If false, then this call is merged with previous
2728
+ `define_metric` calls for the same metric by using their
2729
+ values for any unspecified parameters. If true, then
2730
+ unspecified parameters overwrite values specified by
2731
+ previous calls.
2703
2732
 
2704
2733
  Returns:
2705
- A metric object is returned that can be further specified.
2706
-
2734
+ An object that represents this call but can otherwise be discarded.
2707
2735
  """
2736
+ if summary and "copy" in summary:
2737
+ deprecate.deprecate(
2738
+ deprecate.Deprecated.run__define_metric_copy,
2739
+ "define_metric(summary='copy') is deprecated and will be removed.",
2740
+ self,
2741
+ )
2742
+
2708
2743
  return self._define_metric(
2709
- name, step_metric, step_sync, hidden, summary, goal, overwrite, **kwargs
2744
+ name,
2745
+ step_metric,
2746
+ step_sync,
2747
+ hidden,
2748
+ summary,
2749
+ goal,
2750
+ overwrite,
2710
2751
  )
2711
2752
 
2712
2753
  def _define_metric(
@@ -2718,12 +2759,9 @@ class Run:
2718
2759
  summary: Optional[str] = None,
2719
2760
  goal: Optional[str] = None,
2720
2761
  overwrite: Optional[bool] = None,
2721
- **kwargs: Any,
2722
2762
  ) -> wandb_metric.Metric:
2723
2763
  if not name:
2724
2764
  raise wandb.Error("define_metric() requires non-empty name argument")
2725
- for k in kwargs:
2726
- wandb.termwarn(f"Unhandled define_metric() arg: {k}")
2727
2765
  if isinstance(step_metric, wandb_metric.Metric):
2728
2766
  step_metric = step_metric.name
2729
2767
  for arg_name, arg_val, exp_type in (
@@ -2878,7 +2916,7 @@ class Run:
2878
2916
  if artifact.is_draft() and not artifact._is_draft_save_started():
2879
2917
  artifact = self._log_artifact(artifact)
2880
2918
  if not self._settings._offline:
2881
- self._backend.interface.publish_link_artifact(
2919
+ handle = self._backend.interface.deliver_link_artifact(
2882
2920
  self,
2883
2921
  artifact,
2884
2922
  portfolio,
@@ -2890,6 +2928,13 @@ class Run:
2890
2928
  wandb.termwarn(
2891
2929
  "Artifact TTL will be disabled for source artifacts that are linked to portfolios."
2892
2930
  )
2931
+ result = handle.wait(timeout=-1)
2932
+ if result is None:
2933
+ handle.abandon()
2934
+ else:
2935
+ response = result.response.link_artifact_response
2936
+ if response.error_message:
2937
+ wandb.termerror(response.error_message)
2893
2938
  else:
2894
2939
  # TODO: implement offline mode + sync
2895
2940
  raise NotImplementedError
@@ -3836,34 +3881,33 @@ class Run:
3836
3881
  if not poll_exit_response:
3837
3882
  return
3838
3883
 
3839
- progress = poll_exit_response.pusher_stats
3840
- done = poll_exit_response.done
3884
+ stats = poll_exit_response.pusher_stats
3841
3885
 
3842
3886
  megabyte = wandb.util.POW_2_BYTES[2][1]
3843
- line = f"{progress.uploaded_bytes / megabyte :.3f} MB of {progress.total_bytes / megabyte:.3f} MB uploaded"
3844
- if progress.deduped_bytes > 0:
3845
- line += f" ({progress.deduped_bytes / megabyte:.3f} MB deduped)\r"
3846
- else:
3847
- line += "\r"
3848
-
3849
- percent_done = (
3850
- 1.0
3851
- if progress.total_bytes == 0
3852
- else progress.uploaded_bytes / progress.total_bytes
3887
+ line = (
3888
+ f"{stats.uploaded_bytes / megabyte:.3f} MB"
3889
+ f" of {stats.total_bytes / megabyte:.3f} MB uploaded"
3853
3890
  )
3891
+ if stats.deduped_bytes > 0:
3892
+ line += f" ({stats.deduped_bytes / megabyte:.3f} MB deduped)"
3893
+ line += "\r"
3854
3894
 
3855
- printer.progress_update(line, percent_done)
3856
- if done:
3895
+ if stats.total_bytes > 0:
3896
+ printer.progress_update(line, stats.uploaded_bytes / stats.total_bytes)
3897
+ else:
3898
+ printer.progress_update(line, 1.0)
3899
+
3900
+ if poll_exit_response.done:
3857
3901
  printer.progress_close()
3858
3902
 
3859
- dedupe_fraction = (
3860
- progress.deduped_bytes / float(progress.total_bytes)
3861
- if progress.total_bytes > 0
3862
- else 0
3863
- )
3864
- if dedupe_fraction > 0.01:
3903
+ if stats.total_bytes > 0:
3904
+ dedupe_fraction = stats.deduped_bytes / float(stats.total_bytes)
3905
+ else:
3906
+ dedupe_fraction = 0
3907
+
3908
+ if stats.deduped_bytes > 0.01:
3865
3909
  printer.display(
3866
- f"W&B sync reduced upload amount by {dedupe_fraction * 100:.1f}% "
3910
+ f"W&B sync reduced upload amount by {dedupe_fraction:.1%}"
3867
3911
  )
3868
3912
 
3869
3913
  @staticmethod
@@ -1882,9 +1882,10 @@ class Settings(SettingsData):
1882
1882
  if self.resume_from is None:
1883
1883
  return
1884
1884
 
1885
- if self.run_id is not None:
1885
+ if self.run_id is not None and (self.resume_from.run != self.run_id):
1886
1886
  wandb.termwarn(
1887
- "You cannot specify both run_id and resume_from. " "Ignoring run_id."
1887
+ "Both `run_id` and `resume_from` have been specified with different ids. "
1888
+ "`run_id` will be ignored."
1888
1889
  )
1889
1890
  self.update({"run_id": self.resume_from.run}, source=Source.INIT)
1890
1891
 
wandb/util.py CHANGED
@@ -1748,21 +1748,39 @@ def make_docker_image_name_safe(name: str) -> str:
1748
1748
  return trimmed if trimmed else "image"
1749
1749
 
1750
1750
 
1751
- def merge_dicts(source: Dict[str, Any], destination: Dict[str, Any]) -> Dict[str, Any]:
1752
- """Recursively merge two dictionaries."""
1751
+ def merge_dicts(
1752
+ source: Dict[str, Any],
1753
+ destination: Dict[str, Any],
1754
+ ) -> Dict[str, Any]:
1755
+ """Recursively merge two dictionaries.
1756
+
1757
+ This mutates the destination and its nested dictionaries and lists.
1758
+
1759
+ Instances of `dict` are recursively merged and instances of `list`
1760
+ are appended to the destination. If the destination type is not
1761
+ `dict` or `list`, respectively, the key is overwritten with the
1762
+ source value.
1763
+
1764
+ For all other types, the source value overwrites the destination value.
1765
+ """
1753
1766
  for key, value in source.items():
1754
1767
  if isinstance(value, dict):
1755
- # get node or create one
1756
- node = destination.setdefault(key, {})
1757
- merge_dicts(value, node)
1758
- else:
1759
- if isinstance(value, list):
1760
- if key in destination:
1761
- destination[key].extend(value)
1762
- else:
1763
- destination[key] = value
1768
+ node = destination.get(key)
1769
+ if isinstance(node, dict):
1770
+ merge_dicts(value, node)
1764
1771
  else:
1765
1772
  destination[key] = value
1773
+
1774
+ elif isinstance(value, list):
1775
+ dest_value = destination.get(key)
1776
+ if isinstance(dest_value, list):
1777
+ dest_value.extend(value)
1778
+ else:
1779
+ destination[key] = value
1780
+
1781
+ else:
1782
+ destination[key] = value
1783
+
1766
1784
  return destination
1767
1785
 
1768
1786
 
wandb/wandb_agent.py CHANGED
@@ -43,6 +43,8 @@ class AgentProcess:
43
43
  kwargs = dict(creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
44
44
  else:
45
45
  kwargs = dict(preexec_fn=os.setpgrp)
46
+ if env.get(wandb.env.SERVICE):
47
+ env.pop(wandb.env.SERVICE)
46
48
  self._popen = subprocess.Popen(command, env=env, **kwargs)
47
49
  elif function:
48
50
  self._proc = multiprocessing.Process(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wandb
3
- Version: 0.17.4
3
+ Version: 0.17.6
4
4
  Summary: A CLI and library for interacting with the Weights & Biases API.
5
5
  Project-URL: Source, https://github.com/wandb/wandb
6
6
  Project-URL: Bug Reports, https://github.com/wandb/wandb/issues
@@ -71,7 +71,7 @@ Requires-Dist: google-cloud-storage; extra == 'gcp'
71
71
  Provides-Extra: importers
72
72
  Requires-Dist: filelock; extra == 'importers'
73
73
  Requires-Dist: mlflow; extra == 'importers'
74
- Requires-Dist: polars; extra == 'importers'
74
+ Requires-Dist: polars<=1.2.1; extra == 'importers'
75
75
  Requires-Dist: rich; extra == 'importers'
76
76
  Requires-Dist: tenacity; extra == 'importers'
77
77
  Provides-Extra: kubeflow
@@ -93,6 +93,7 @@ Requires-Dist: google-cloud-artifact-registry; extra == 'launch'
93
93
  Requires-Dist: google-cloud-compute; extra == 'launch'
94
94
  Requires-Dist: google-cloud-storage; extra == 'launch'
95
95
  Requires-Dist: iso8601; extra == 'launch'
96
+ Requires-Dist: jsonschema; extra == 'launch'
96
97
  Requires-Dist: kubernetes; extra == 'launch'
97
98
  Requires-Dist: kubernetes-asyncio; extra == 'launch'
98
99
  Requires-Dist: nbconvert; extra == 'launch'