wandb 0.21.0__py3-none-musllinux_1_2_aarch64.whl → 0.21.1__py3-none-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. wandb/__init__.py +16 -14
  2. wandb/__init__.pyi +427 -450
  3. wandb/agents/pyagent.py +41 -12
  4. wandb/analytics/sentry.py +7 -2
  5. wandb/apis/importers/mlflow.py +1 -1
  6. wandb/apis/public/__init__.py +1 -1
  7. wandb/apis/public/api.py +526 -360
  8. wandb/apis/public/artifacts.py +204 -8
  9. wandb/apis/public/automations.py +19 -3
  10. wandb/apis/public/files.py +172 -33
  11. wandb/apis/public/history.py +67 -15
  12. wandb/apis/public/integrations.py +25 -2
  13. wandb/apis/public/jobs.py +90 -2
  14. wandb/apis/public/projects.py +130 -79
  15. wandb/apis/public/query_generator.py +11 -1
  16. wandb/apis/public/registries/registries_search.py +7 -15
  17. wandb/apis/public/reports.py +83 -5
  18. wandb/apis/public/runs.py +299 -105
  19. wandb/apis/public/sweeps.py +222 -22
  20. wandb/apis/public/teams.py +41 -4
  21. wandb/apis/public/users.py +45 -4
  22. wandb/beta/workflows.py +66 -30
  23. wandb/bin/gpu_stats +0 -0
  24. wandb/bin/wandb-core +0 -0
  25. wandb/cli/cli.py +80 -1
  26. wandb/env.py +8 -0
  27. wandb/errors/errors.py +4 -1
  28. wandb/integration/lightning/fabric/logger.py +3 -4
  29. wandb/integration/metaflow/__init__.py +6 -0
  30. wandb/integration/metaflow/data_pandas.py +74 -0
  31. wandb/integration/metaflow/errors.py +13 -0
  32. wandb/integration/metaflow/metaflow.py +205 -190
  33. wandb/integration/openai/fine_tuning.py +1 -2
  34. wandb/jupyter.py +5 -5
  35. wandb/plot/custom_chart.py +30 -7
  36. wandb/proto/v3/wandb_internal_pb2.py +280 -280
  37. wandb/proto/v3/wandb_telemetry_pb2.py +4 -4
  38. wandb/proto/v4/wandb_internal_pb2.py +280 -280
  39. wandb/proto/v4/wandb_telemetry_pb2.py +4 -4
  40. wandb/proto/v5/wandb_internal_pb2.py +280 -280
  41. wandb/proto/v5/wandb_telemetry_pb2.py +4 -4
  42. wandb/proto/v6/wandb_internal_pb2.py +280 -280
  43. wandb/proto/v6/wandb_telemetry_pb2.py +4 -4
  44. wandb/proto/wandb_deprecated.py +6 -0
  45. wandb/sdk/artifacts/_internal_artifact.py +19 -8
  46. wandb/sdk/artifacts/_validators.py +8 -0
  47. wandb/sdk/artifacts/artifact.py +106 -75
  48. wandb/sdk/data_types/audio.py +38 -10
  49. wandb/sdk/data_types/base_types/media.py +6 -56
  50. wandb/sdk/data_types/graph.py +48 -14
  51. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +1 -3
  52. wandb/sdk/data_types/helper_types/image_mask.py +1 -3
  53. wandb/sdk/data_types/histogram.py +34 -21
  54. wandb/sdk/data_types/html.py +35 -12
  55. wandb/sdk/data_types/image.py +104 -68
  56. wandb/sdk/data_types/molecule.py +32 -19
  57. wandb/sdk/data_types/object_3d.py +36 -17
  58. wandb/sdk/data_types/plotly.py +18 -5
  59. wandb/sdk/data_types/saved_model.py +4 -6
  60. wandb/sdk/data_types/table.py +59 -30
  61. wandb/sdk/data_types/video.py +53 -26
  62. wandb/sdk/integration_utils/auto_logging.py +2 -2
  63. wandb/sdk/internal/internal_api.py +6 -0
  64. wandb/sdk/internal/job_builder.py +6 -0
  65. wandb/sdk/launch/agent/agent.py +8 -1
  66. wandb/sdk/launch/agent/run_queue_item_file_saver.py +2 -2
  67. wandb/sdk/launch/create_job.py +3 -1
  68. wandb/sdk/launch/inputs/internal.py +3 -4
  69. wandb/sdk/launch/inputs/schema.py +1 -0
  70. wandb/sdk/launch/runner/kubernetes_monitor.py +1 -0
  71. wandb/sdk/launch/runner/kubernetes_runner.py +328 -1
  72. wandb/sdk/launch/sweeps/scheduler.py +2 -3
  73. wandb/sdk/lib/asyncio_compat.py +3 -0
  74. wandb/sdk/lib/deprecate.py +1 -7
  75. wandb/sdk/lib/disabled.py +1 -1
  76. wandb/sdk/lib/hashutil.py +14 -1
  77. wandb/sdk/lib/module.py +7 -13
  78. wandb/sdk/lib/progress.py +0 -19
  79. wandb/sdk/lib/sock_client.py +0 -4
  80. wandb/sdk/wandb_init.py +66 -91
  81. wandb/sdk/wandb_login.py +18 -14
  82. wandb/sdk/wandb_metric.py +2 -0
  83. wandb/sdk/wandb_run.py +406 -414
  84. wandb/sdk/wandb_settings.py +130 -2
  85. wandb/sdk/wandb_setup.py +28 -28
  86. wandb/sdk/wandb_sweep.py +14 -13
  87. wandb/sdk/wandb_watch.py +4 -6
  88. wandb/sync/sync.py +10 -0
  89. wandb/util.py +57 -0
  90. wandb/wandb_run.py +1 -2
  91. {wandb-0.21.0.dist-info → wandb-0.21.1.dist-info}/METADATA +1 -1
  92. {wandb-0.21.0.dist-info → wandb-0.21.1.dist-info}/RECORD +786 -786
  93. wandb/vendor/pynvml/__init__.py +0 -0
  94. wandb/vendor/pynvml/pynvml.py +0 -4779
  95. {wandb-0.21.0.dist-info → wandb-0.21.1.dist-info}/WHEEL +0 -0
  96. {wandb-0.21.0.dist-info → wandb-0.21.1.dist-info}/entry_points.txt +0 -0
  97. {wandb-0.21.0.dist-info → wandb-0.21.1.dist-info}/licenses/LICENSE +0 -0
@@ -6,6 +6,8 @@ import datetime
6
6
  import json
7
7
  import logging
8
8
  import os
9
+ import time
10
+ import uuid
9
11
  from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
10
12
 
11
13
  import yaml
@@ -20,6 +22,7 @@ from wandb.sdk.launch.registry.local_registry import LocalRegistry
20
22
  from wandb.sdk.launch.runner.abstract import Status
21
23
  from wandb.sdk.launch.runner.kubernetes_monitor import (
22
24
  WANDB_K8S_LABEL_AGENT,
25
+ WANDB_K8S_LABEL_AUXILIARY_RESOURCE,
23
26
  WANDB_K8S_LABEL_MONITOR,
24
27
  WANDB_K8S_RUN_ID,
25
28
  CustomResource,
@@ -47,6 +50,9 @@ get_module(
47
50
 
48
51
  import kubernetes_asyncio # type: ignore # noqa: E402
49
52
  from kubernetes_asyncio import client # noqa: E402
53
+ from kubernetes_asyncio.client.api.apps_v1_api import ( # type: ignore # noqa: E402
54
+ AppsV1Api,
55
+ )
50
56
  from kubernetes_asyncio.client.api.batch_v1_api import ( # type: ignore # noqa: E402
51
57
  BatchV1Api,
52
58
  )
@@ -78,9 +84,11 @@ class KubernetesSubmittedRun(AbstractRun):
78
84
  self,
79
85
  batch_api: "BatchV1Api",
80
86
  core_api: "CoreV1Api",
87
+ apps_api: "AppsV1Api",
81
88
  name: str,
82
89
  namespace: Optional[str] = "default",
83
90
  secret: Optional["V1Secret"] = None,
91
+ auxiliary_resource_label_key: Optional[str] = None,
84
92
  ) -> None:
85
93
  """Initialize a KubernetesSubmittedRun.
86
94
 
@@ -104,10 +112,12 @@ class KubernetesSubmittedRun(AbstractRun):
104
112
  """
105
113
  self.batch_api = batch_api
106
114
  self.core_api = core_api
115
+ self.apps_api = apps_api
107
116
  self.name = name
108
117
  self.namespace = namespace
109
118
  self._fail_count = 0
110
119
  self.secret = secret
120
+ self.auxiliary_resource_label_key = auxiliary_resource_label_key
111
121
 
112
122
  @property
113
123
  def id(self) -> str:
@@ -149,6 +159,7 @@ class KubernetesSubmittedRun(AbstractRun):
149
159
  await asyncio.sleep(5)
150
160
 
151
161
  await self._delete_secret()
162
+ await self._delete_auxiliary_resources_by_label()
152
163
  return (
153
164
  status.state == "finished"
154
165
  ) # todo: not sure if this (copied from aws runner) is the right approach? should we return false on failure
@@ -157,6 +168,7 @@ class KubernetesSubmittedRun(AbstractRun):
157
168
  status = LaunchKubernetesMonitor.get_status(self.name)
158
169
  if status in ["stopped", "failed", "finished", "preempted"]:
159
170
  await self._delete_secret()
171
+ await self._delete_auxiliary_resources_by_label()
160
172
  return status
161
173
 
162
174
  async def cancel(self) -> None:
@@ -167,6 +179,7 @@ class KubernetesSubmittedRun(AbstractRun):
167
179
  name=self.name,
168
180
  )
169
181
  await self._delete_secret()
182
+ await self._delete_auxiliary_resources_by_label()
170
183
  except ApiException as e:
171
184
  raise LaunchError(
172
185
  f"Failed to delete Kubernetes Job {self.name} in namespace {self.namespace}: {str(e)}"
@@ -181,6 +194,52 @@ class KubernetesSubmittedRun(AbstractRun):
181
194
  )
182
195
  self.secret = None
183
196
 
197
+ async def _delete_auxiliary_resources_by_label(self) -> None:
198
+ if self.auxiliary_resource_label_key is None:
199
+ return
200
+
201
+ label_selector = (
202
+ f"{WANDB_K8S_LABEL_AUXILIARY_RESOURCE}={self.auxiliary_resource_label_key}"
203
+ )
204
+
205
+ try:
206
+ resource_cleanups = [
207
+ (self.core_api, "service"),
208
+ (self.batch_api, "job"),
209
+ (self.core_api, "pod"),
210
+ (self.core_api, "config_map"),
211
+ (self.core_api, "secret"),
212
+ (self.apps_api, "deployment"),
213
+ (self.apps_api, "replica_set"),
214
+ (self.apps_api, "daemon_set"),
215
+ ]
216
+
217
+ for api_client, resource_type in resource_cleanups:
218
+ try:
219
+ list_method = getattr(
220
+ api_client, f"list_namespaced_{resource_type}"
221
+ )
222
+ delete_method = getattr(
223
+ api_client, f"delete_namespaced_{resource_type}"
224
+ )
225
+
226
+ # List resources with our label
227
+ resources = await list_method(
228
+ namespace=self.namespace, label_selector=label_selector
229
+ )
230
+
231
+ # Delete each resource
232
+ for resource in resources.items:
233
+ await delete_method(
234
+ name=resource.metadata.name, namespace=self.namespace
235
+ )
236
+
237
+ except (AttributeError, ApiException) as e:
238
+ wandb.termwarn(f"Could not clean up {resource_type}: {e}")
239
+
240
+ except Exception as e:
241
+ wandb.termwarn(f"Failed to clean up some auxiliary resources: {e}")
242
+
184
243
 
185
244
  class CrdSubmittedRun(AbstractRun):
186
245
  """Run submitted to a CRD backend, e.g. Volcano."""
@@ -366,6 +425,7 @@ class KubernetesRunner(AbstractRunner):
366
425
  job_metadata["generateName"] = make_name_dns_safe(
367
426
  f"launch-{launch_project.target_entity}-{launch_project.target_project}-"
368
427
  )
428
+ job_metadata["namespace"] = namespace
369
429
 
370
430
  for i, cont in enumerate(containers):
371
431
  if "name" not in cont:
@@ -489,6 +549,235 @@ class KubernetesRunner(AbstractRunner):
489
549
 
490
550
  return job, api_key_secret
491
551
 
552
+ async def _wait_for_resource_ready(
553
+ self,
554
+ api_client: kubernetes_asyncio.client.ApiClient,
555
+ config: Dict[str, Any],
556
+ namespace: str,
557
+ timeout_seconds: int = 300,
558
+ ) -> None:
559
+ """Wait for a Kubernetes resource to be ready.
560
+
561
+ Arguments:
562
+ api_client: The Kubernetes API client.
563
+ config: The resource configuration.
564
+ namespace: The namespace where the resource was created.
565
+ timeout_seconds: Maximum time to wait for readiness.
566
+ """
567
+ resource_kind = config.get("kind")
568
+ resource_name = config.get("metadata", {}).get("name")
569
+
570
+ if not resource_kind or not resource_name:
571
+ wandb.termerror(
572
+ f"{LOG_PREFIX}Cannot wait for resource without kind or name"
573
+ )
574
+ return
575
+
576
+ wandb.termlog(
577
+ f"{LOG_PREFIX}Waiting for {resource_kind} '{resource_name}' to be ready..."
578
+ )
579
+
580
+ start_time = time.time()
581
+
582
+ if resource_kind == "Deployment":
583
+ await self._wait_for_deployment_ready(
584
+ api_client, resource_name, namespace, timeout_seconds
585
+ )
586
+ elif resource_kind == "Service":
587
+ await self._wait_for_service_ready(
588
+ api_client, resource_name, namespace, timeout_seconds
589
+ )
590
+ elif resource_kind == "Pod":
591
+ await self._wait_for_pod_ready(
592
+ api_client, resource_name, namespace, timeout_seconds
593
+ )
594
+ else:
595
+ wandb.termlog(
596
+ f"{LOG_PREFIX}No specific readiness check for {resource_kind}, waiting 5 seconds..."
597
+ )
598
+ await asyncio.sleep(5)
599
+
600
+ elapsed = time.time() - start_time
601
+ wandb.termlog(
602
+ f"{LOG_PREFIX}{resource_kind} '{resource_name}' is ready after {elapsed:.1f}s"
603
+ )
604
+
605
+ async def _wait_for_deployment_ready(
606
+ self,
607
+ api_client: kubernetes_asyncio.client.ApiClient,
608
+ name: str,
609
+ namespace: str,
610
+ timeout_seconds: int,
611
+ ) -> None:
612
+ """Wait for a Deployment to be ready."""
613
+ apps_api = kubernetes_asyncio.client.AppsV1Api(api_client)
614
+
615
+ async def check_deployment_ready():
616
+ deployment = await apps_api.read_namespaced_deployment(
617
+ name=name, namespace=namespace
618
+ )
619
+ status = deployment.status
620
+
621
+ if status.ready_replicas and status.replicas:
622
+ return status.ready_replicas >= status.replicas
623
+
624
+ return False
625
+
626
+ await self._wait_with_timeout(check_deployment_ready, timeout_seconds, name)
627
+
628
+ async def _wait_for_service_ready(
629
+ self,
630
+ api_client: kubernetes_asyncio.client.ApiClient,
631
+ name: str,
632
+ namespace: str,
633
+ timeout_seconds: int,
634
+ ) -> None:
635
+ """Wait for a Service to have endpoints."""
636
+ core_api = kubernetes_asyncio.client.CoreV1Api(api_client)
637
+
638
+ async def check_service_ready():
639
+ endpoints = await core_api.read_namespaced_endpoints(
640
+ name=name, namespace=namespace
641
+ )
642
+ if endpoints.subsets:
643
+ for subset in endpoints.subsets:
644
+ if subset.addresses: # These are ready pod addresses
645
+ return True
646
+ return False
647
+
648
+ await self._wait_with_timeout(check_service_ready, timeout_seconds, name)
649
+
650
+ async def _wait_for_pod_ready(
651
+ self,
652
+ api_client: kubernetes_asyncio.client.ApiClient,
653
+ name: str,
654
+ namespace: str,
655
+ timeout_seconds: int,
656
+ ) -> None:
657
+ """Wait for a Pod to be ready."""
658
+ core_api = kubernetes_asyncio.client.CoreV1Api(api_client)
659
+
660
+ async def check_pod_ready():
661
+ pod = await core_api.read_namespaced_pod(name=name, namespace=namespace)
662
+ if pod.status.phase == "Running":
663
+ if pod.status.container_statuses:
664
+ return all(status.ready for status in pod.status.container_statuses)
665
+ return True
666
+ return False
667
+
668
+ await self._wait_with_timeout(check_pod_ready, timeout_seconds, name)
669
+
670
+ async def _wait_with_timeout(
671
+ self, check_func, timeout_seconds: int, name: str
672
+ ) -> None:
673
+ """Generic timeout wrapper for readiness checks."""
674
+ start_time = time.time()
675
+
676
+ while time.time() - start_time < timeout_seconds:
677
+ try:
678
+ if await check_func():
679
+ return
680
+ except kubernetes_asyncio.client.ApiException as e:
681
+ if e.status == 404:
682
+ pass
683
+ else:
684
+ wandb.termerror(
685
+ f"{LOG_PREFIX}Error waiting for resource '{name}': {e}"
686
+ )
687
+ raise
688
+ except Exception as e:
689
+ wandb.termerror(f"{LOG_PREFIX}Error waiting for resource '{name}': {e}")
690
+ raise
691
+ await asyncio.sleep(2)
692
+
693
+ raise LaunchError(
694
+ f"Resource '{name}' not ready within {timeout_seconds} seconds"
695
+ )
696
+
697
+ async def _prepare_resource(
698
+ self,
699
+ api_client: kubernetes_asyncio.client.ApiClient,
700
+ config: Dict[str, Any],
701
+ namespace: str,
702
+ run_id: str,
703
+ auxiliary_resource_label_key: str,
704
+ launch_project: LaunchProject,
705
+ api_key_secret: Optional["V1Secret"] = None,
706
+ wait_for_ready: bool = True,
707
+ wait_timeout: int = 300,
708
+ ) -> None:
709
+ """Prepare a service for launch.
710
+
711
+ Arguments:
712
+ api_client: The Kubernetes API client.
713
+ config: The resource configuration to prepare.
714
+ namespace: The namespace to create the resource in.
715
+ run_id: The run ID to label the resource with.
716
+ auxiliary_resource_label_key: The key of the auxiliary resource label.
717
+ launch_project: The launch project to get environment variables from.
718
+ api_key_secret: The API key secret to inject.
719
+ wait_for_ready: Whether to wait for the resource to be ready after creation.
720
+ wait_timeout: Maximum time in seconds to wait for resource readiness.
721
+ """
722
+ config.setdefault("metadata", {})
723
+ config["metadata"].setdefault("labels", {})
724
+ config["metadata"]["labels"][WANDB_K8S_RUN_ID] = run_id
725
+ config["metadata"]["labels"][WANDB_K8S_LABEL_AUXILIARY_RESOURCE] = (
726
+ auxiliary_resource_label_key
727
+ )
728
+ config["metadata"]["labels"]["wandb.ai/created-by"] = "launch-agent"
729
+
730
+ if config.get("kind") == "Service" or config.get("kind") == "Deployment":
731
+ config.setdefault("metadata", {})
732
+ original_name = config["metadata"].get("name", config.get("kind"))
733
+ safe_name = make_name_dns_safe(original_name)
734
+ safe_entity = make_name_dns_safe(launch_project.target_entity or "")
735
+ safe_project = make_name_dns_safe(launch_project.target_project or "")
736
+ safe_run_id = make_name_dns_safe(run_id or "")
737
+
738
+ new_name = f"{safe_name}-{safe_entity}-{safe_project}-{safe_run_id}"
739
+ config["metadata"]["name"] = new_name
740
+ wandb.termlog(
741
+ f"{LOG_PREFIX}Modified {config.get('kind')} name from '{original_name}' to '{new_name}'"
742
+ )
743
+
744
+ env_vars = launch_project.get_env_vars_dict(
745
+ self._api, MAX_ENV_LENGTHS[self.__class__.__name__]
746
+ )
747
+ wandb_config_env = {
748
+ "WANDB_CONFIG": env_vars.get("WANDB_CONFIG", "{}"),
749
+ }
750
+ add_wandb_env(config, wandb_config_env)
751
+
752
+ if api_key_secret:
753
+ for cont in yield_containers(config):
754
+ env = cont.setdefault("env", [])
755
+ env.append(
756
+ {
757
+ "name": "WANDB_API_KEY",
758
+ "valueFrom": {
759
+ "secretKeyRef": {
760
+ "name": api_key_secret.metadata.name,
761
+ "key": "password",
762
+ }
763
+ },
764
+ }
765
+ )
766
+ cont["env"] = env
767
+
768
+ try:
769
+ await kubernetes_asyncio.utils.create_from_dict(
770
+ api_client, config, namespace=namespace
771
+ )
772
+
773
+ if wait_for_ready:
774
+ await self._wait_for_resource_ready(
775
+ api_client, config, namespace, wait_timeout
776
+ )
777
+ except Exception as e:
778
+ wandb.termerror(f"{LOG_PREFIX}Failed to create Kubernetes resource: {e}")
779
+ raise LaunchError(f"Failed to create Kubernetes resource: {e}")
780
+
492
781
  async def run(
493
782
  self, launch_project: LaunchProject, image_uri: str
494
783
  ) -> Optional[AbstractRun]:
@@ -630,10 +919,42 @@ class KubernetesRunner(AbstractRunner):
630
919
 
631
920
  batch_api = kubernetes_asyncio.client.BatchV1Api(api_client)
632
921
  core_api = kubernetes_asyncio.client.CoreV1Api(api_client)
922
+ apps_api = kubernetes_asyncio.client.AppsV1Api(api_client)
923
+
633
924
  namespace = self.get_namespace(resource_args, context)
634
925
  job, secret = await self._inject_defaults(
635
926
  resource_args, launch_project, image_uri, namespace, core_api
636
927
  )
928
+
929
+ additional_services = launch_project.launch_spec.get("additional_services", [])
930
+ auxiliary_resource_label_key = None
931
+ if additional_services:
932
+ wandb.termlog(
933
+ f"{LOG_PREFIX}Creating additional services: {additional_services}"
934
+ )
935
+ auxiliary_resource_label_key = f"aux-{uuid.uuid4()}"
936
+
937
+ wait_for_ready = resource_args.get("wait_for_ready", True)
938
+ wait_timeout = resource_args.get("wait_timeout", 300)
939
+
940
+ await asyncio.gather(
941
+ *[
942
+ self._prepare_resource(
943
+ api_client,
944
+ resource.get("config"),
945
+ namespace,
946
+ launch_project.run_id,
947
+ auxiliary_resource_label_key,
948
+ launch_project,
949
+ secret,
950
+ wait_for_ready,
951
+ wait_timeout,
952
+ )
953
+ for resource in additional_services
954
+ if resource.get("config", {})
955
+ ]
956
+ )
957
+
637
958
  msg = "Creating Kubernetes job"
638
959
  if "name" in resource_args:
639
960
  msg += f": {resource_args['name']}"
@@ -658,7 +979,13 @@ class KubernetesRunner(AbstractRunner):
658
979
  job_name = job_response.metadata.name
659
980
  LaunchKubernetesMonitor.monitor_namespace(namespace)
660
981
  submitted_job = KubernetesSubmittedRun(
661
- batch_api, core_api, job_name, namespace, secret
982
+ batch_api,
983
+ core_api,
984
+ apps_api,
985
+ job_name,
986
+ namespace,
987
+ secret,
988
+ auxiliary_resource_label_key,
662
989
  )
663
990
  if self.backend_config[PROJECT_SYNCHRONOUS]:
664
991
  await submitted_job.wait()
@@ -36,7 +36,6 @@ if TYPE_CHECKING:
36
36
  import wandb.apis.public as public
37
37
  from wandb.apis.internal import Api
38
38
  from wandb.apis.public import QueuedRun, Run
39
- from wandb.sdk.wandb_run import Run as SdkRun
40
39
 
41
40
 
42
41
  _logger = logging.getLogger(__name__)
@@ -255,10 +254,10 @@ class Scheduler(ABC):
255
254
  _id: w for _id, w in self._workers.items() if _id not in self.busy_workers
256
255
  }
257
256
 
258
- def _init_wandb_run(self) -> "SdkRun":
257
+ def _init_wandb_run(self) -> "wandb.Run":
259
258
  """Controls resume or init logic for a scheduler wandb run."""
260
259
  settings = wandb.Settings(disable_job_creation=True)
261
- run: SdkRun = wandb.init( # type: ignore
260
+ run: wandb.Run = wandb.init( # type: ignore
262
261
  name=f"Scheduler.{self._sweep_id}",
263
262
  resume="allow",
264
263
  config=self._kwargs, # when run as a job, this sets config
@@ -100,6 +100,9 @@ class _Runner:
100
100
  raise _RunnerCancelledError()
101
101
 
102
102
  finally:
103
+ # NOTE: asyncio.run() cancels all tasks after the main task exits,
104
+ # but this is not documented, so we cancel them explicitly here
105
+ # as well. It also blocks until canceled tasks complete.
103
106
  cancellation_task.cancel()
104
107
  fn_task.cancel()
105
108
 
@@ -1,20 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
4
-
5
3
  import wandb
6
4
  from wandb.proto.wandb_deprecated import DEPRECATED_FEATURES
7
5
  from wandb.sdk.lib import telemetry
8
6
 
9
- # Necessary to break import cycle.
10
- if TYPE_CHECKING:
11
- from wandb import wandb_run
12
-
13
7
 
14
8
  def deprecate(
15
9
  field_name: DEPRECATED_FEATURES,
16
10
  warning_message: str,
17
- run: wandb_run.Run | None = None,
11
+ run: wandb.Run | None = None,
18
12
  ) -> None:
19
13
  """Warn the user that a feature has been deprecated.
20
14
 
wandb/sdk/lib/disabled.py CHANGED
@@ -26,5 +26,5 @@ class RunDisabled:
26
26
  deprecate.deprecate(
27
27
  field_name=Deprecated.run_disabled,
28
28
  warning_message="RunDisabled is deprecated and is a no-op. "
29
- '`wandb.init(mode="disabled")` now returns and instance of `wandb.sdk.wandb_run.Run`.',
29
+ '`wandb.init(mode="disabled")` now returns an instance of `wandb.Run`.',
30
30
  )
wandb/sdk/lib/hashutil.py CHANGED
@@ -2,8 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  import base64
4
4
  import hashlib
5
+ import logging
5
6
  import mmap
6
7
  import sys
8
+ import time
7
9
  from typing import TYPE_CHECKING, NewType
8
10
 
9
11
  from wandb.sdk.lib.paths import StrPath
@@ -15,6 +17,8 @@ ETag = NewType("ETag", str)
15
17
  HexMD5 = NewType("HexMD5", str)
16
18
  B64MD5 = NewType("B64MD5", str)
17
19
 
20
+ logger = logging.getLogger(__name__)
21
+
18
22
 
19
23
  def _md5(data: bytes = b"") -> _hashlib.HASH:
20
24
  """Allow FIPS-compliant md5 hash when supported."""
@@ -44,7 +48,16 @@ def hex_to_b64_id(encoded_string: str | bytes) -> B64MD5:
44
48
 
45
49
 
46
50
  def md5_file_b64(*paths: StrPath) -> B64MD5:
47
- return _b64_from_hasher(_md5_file_hasher(*paths))
51
+ start_time = time.monotonic()
52
+ digest = _b64_from_hasher(_md5_file_hasher(*paths))
53
+ hash_time_seconds = time.monotonic() - start_time
54
+ if hash_time_seconds > 1.0:
55
+ logger.debug(
56
+ "Computed MD5 hash for file. paths=%s, hashTimeMs=%d",
57
+ paths,
58
+ int(hash_time_seconds * 1000),
59
+ )
60
+ return digest
48
61
 
49
62
 
50
63
  def md5_file_hex(*paths: StrPath) -> HexMD5:
wandb/sdk/lib/module.py CHANGED
@@ -57,22 +57,16 @@ def unset_globals():
57
57
  wandb.run = None
58
58
  wandb.config = preinit.PreInitObject("wandb.config")
59
59
  wandb.summary = preinit.PreInitObject("wandb.summary")
60
- wandb.log = preinit.PreInitCallable("wandb.log", wandb.wandb_sdk.wandb_run.Run.log)
61
- wandb.watch = preinit.PreInitCallable(
62
- "wandb.watch", wandb.wandb_sdk.wandb_run.Run.watch
63
- )
64
- wandb.unwatch = preinit.PreInitCallable(
65
- "wandb.unwatch", wandb.wandb_sdk.wandb_run.Run.unwatch
66
- )
67
- wandb.save = preinit.PreInitCallable(
68
- "wandb.save", wandb.wandb_sdk.wandb_run.Run.save
69
- )
60
+ wandb.log = preinit.PreInitCallable("wandb.log", wandb.Run.log)
61
+ wandb.watch = preinit.PreInitCallable("wandb.watch", wandb.Run.watch)
62
+ wandb.unwatch = preinit.PreInitCallable("wandb.unwatch", wandb.Run.unwatch)
63
+ wandb.save = preinit.PreInitCallable("wandb.save", wandb.Run.save)
70
64
  wandb.use_artifact = preinit.PreInitCallable(
71
- "wandb.use_artifact", wandb.wandb_sdk.wandb_run.Run.use_artifact
65
+ "wandb.use_artifact", wandb.Run.use_artifact
72
66
  )
73
67
  wandb.log_artifact = preinit.PreInitCallable(
74
- "wandb.log_artifact", wandb.wandb_sdk.wandb_run.Run.log_artifact
68
+ "wandb.log_artifact", wandb.Run.log_artifact
75
69
  )
76
70
  wandb.define_metric = preinit.PreInitCallable(
77
- "wandb.define_metric", wandb.wandb_sdk.wandb_run.Run.define_metric
71
+ "wandb.define_metric", wandb.Run.define_metric
78
72
  )
wandb/sdk/lib/progress.py CHANGED
@@ -14,25 +14,6 @@ from wandb.sdk.lib import asyncio_compat
14
14
  from . import printer as p
15
15
 
16
16
 
17
- def print_sync_dedupe_stats(
18
- printer: p.Printer,
19
- final_result: pb.PollExitResponse,
20
- ) -> None:
21
- """Print how much W&B sync reduced the amount of uploaded data.
22
-
23
- Args:
24
- final_result: The final PollExit result.
25
- """
26
- deduped_bytes = final_result.pusher_stats.deduped_bytes
27
- total_bytes = final_result.pusher_stats.total_bytes
28
-
29
- if total_bytes <= 0 or deduped_bytes <= 0:
30
- return
31
-
32
- frac = deduped_bytes / total_bytes
33
- printer.display(f"W&B sync reduced upload amount by {frac:.1%}")
34
-
35
-
36
17
  async def loop_printing_operation_stats(
37
18
  progress: ProgressPrinter,
38
19
  interface: interface.InterfaceBase,
@@ -110,10 +110,6 @@ class SockClient:
110
110
  def shutdown(self, val: int) -> None:
111
111
  self._sock.shutdown(val)
112
112
 
113
- def set_socket(self, sock: socket.socket) -> None:
114
- self._sock = sock
115
- self._detect_bufsize()
116
-
117
113
  def _sendall_with_error_handle(self, data: bytes) -> None:
118
114
  # This is a helper function for sending data in a retry fashion.
119
115
  # Similar to the sendall() function in the socket module, but with