wandb 0.21.4__py3-none-macosx_12_0_arm64.whl → 0.22.0__py3-none-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: wandb/proto/wandb_server.proto
5
- # Protobuf Python Version: 6.30.0
5
+ # Protobuf Python Version: 6.31.1
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -12,8 +12,8 @@ from google.protobuf.internal import builder as _builder
12
12
  _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
- 30,
16
- 0,
15
+ 31,
16
+ 1,
17
17
  '',
18
18
  'wandb/proto/wandb_server.proto'
19
19
  )
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: wandb/proto/wandb_settings.proto
5
- # Protobuf Python Version: 6.30.0
5
+ # Protobuf Python Version: 6.31.1
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -12,8 +12,8 @@ from google.protobuf.internal import builder as _builder
12
12
  _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
- 30,
16
- 0,
15
+ 31,
16
+ 1,
17
17
  '',
18
18
  'wandb/proto/wandb_settings.proto'
19
19
  )
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: wandb/proto/wandb_sync.proto
5
- # Protobuf Python Version: 6.30.0
5
+ # Protobuf Python Version: 6.31.1
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -12,8 +12,8 @@ from google.protobuf.internal import builder as _builder
12
12
  _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
- 30,
16
- 0,
15
+ 31,
16
+ 1,
17
17
  '',
18
18
  'wandb/proto/wandb_sync.proto'
19
19
  )
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: wandb/proto/wandb_telemetry.proto
5
- # Protobuf Python Version: 6.30.0
5
+ # Protobuf Python Version: 6.31.1
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -12,8 +12,8 @@ from google.protobuf.internal import builder as _builder
12
12
  _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
14
  6,
15
- 30,
16
- 0,
15
+ 31,
16
+ 1,
17
17
  '',
18
18
  'wandb/proto/wandb_telemetry.proto'
19
19
  )
@@ -65,7 +65,7 @@ class GCSHandler(StorageHandler):
65
65
  path, hit, cache_open = self._cache.check_etag_obj_path(
66
66
  url=URIStr(manifest_entry.ref),
67
67
  etag=ETag(manifest_entry.digest),
68
- size=manifest_entry.size if manifest_entry.size is not None else 0,
68
+ size=manifest_entry.size or 0,
69
69
  )
70
70
  if hit:
71
71
  return path
@@ -43,7 +43,7 @@ class HTTPHandler(StorageHandler):
43
43
  path, hit, cache_open = self._cache.check_etag_obj_path(
44
44
  URIStr(manifest_entry.ref),
45
45
  ETag(manifest_entry.digest),
46
- manifest_entry.size if manifest_entry.size is not None else 0,
46
+ manifest_entry.size or 0,
47
47
  )
48
48
  if hit:
49
49
  return path
@@ -54,7 +54,6 @@ class HTTPHandler(StorageHandler):
54
54
  cookies=_thread_local_api_settings.cookies,
55
55
  headers=_thread_local_api_settings.headers,
56
56
  )
57
- response.raise_for_status()
58
57
 
59
58
  digest: ETag | FilePathStr | URIStr | None
60
59
  digest, size, extra = self._entry_from_headers(response.headers)
@@ -87,7 +86,6 @@ class HTTPHandler(StorageHandler):
87
86
  cookies=_thread_local_api_settings.cookies,
88
87
  headers=_thread_local_api_settings.headers,
89
88
  ) as response:
90
- response.raise_for_status()
91
89
  digest: ETag | FilePathStr | URIStr | None
92
90
  digest, size, extra = self._entry_from_headers(response.headers)
93
91
  digest = digest or path
@@ -51,7 +51,7 @@ class LocalFileHandler(StorageHandler):
51
51
 
52
52
  path, hit, cache_open = self._cache.check_md5_obj_path(
53
53
  B64MD5(manifest_entry.digest), # TODO(spencerpearson): unsafe cast
54
- manifest_entry.size if manifest_entry.size is not None else 0,
54
+ manifest_entry.size or 0,
55
55
  )
56
56
  if hit:
57
57
  return path
@@ -96,7 +96,7 @@ class S3Handler(StorageHandler):
96
96
  path, hit, cache_open = self._cache.check_etag_obj_path(
97
97
  URIStr(manifest_entry.ref),
98
98
  ETag(manifest_entry.digest),
99
- manifest_entry.size if manifest_entry.size is not None else 0,
99
+ manifest_entry.size or 0,
100
100
  )
101
101
  if hit:
102
102
  return path
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Final
4
+
5
+ from requests import Response, Session
6
+ from requests.adapters import HTTPAdapter
7
+ from urllib3.util.retry import Retry
8
+
9
+ from ..storage_handler import StorageHandler
10
+ from ..storage_handlers.azure_handler import AzureHandler
11
+ from ..storage_handlers.gcs_handler import GCSHandler
12
+ from ..storage_handlers.http_handler import HTTPHandler
13
+ from ..storage_handlers.local_file_handler import LocalFileHandler
14
+ from ..storage_handlers.s3_handler import S3Handler
15
+ from ..storage_handlers.wb_artifact_handler import WBArtifactHandler
16
+ from ..storage_handlers.wb_local_artifact_handler import WBLocalArtifactHandler
17
+
18
+ # Sleep length: 0, 2, 4, 8, 16, 32, 64, 120, 120, 120, 120, 120, 120, 120, 120, 120
19
+ # seconds, i.e. a total of 20min 6s.
20
+ HTTP_RETRY_STRATEGY: Final[Retry] = Retry(
21
+ backoff_factor=1,
22
+ total=16,
23
+ status_forcelist=(308, 408, 409, 429, 500, 502, 503, 504),
24
+ )
25
+ HTTP_POOL_CONNECTIONS: Final[int] = 64
26
+ HTTP_POOL_MAXSIZE: Final[int] = 64
27
+
28
+
29
+ def raise_for_status(response: Response, *_, **__) -> None:
30
+ """A `requests.Session` hook to raise for status on all requests."""
31
+ response.raise_for_status()
32
+
33
+
34
+ def make_http_session() -> Session:
35
+ """A factory that returns a `requests.Session` for use with artifact storage handlers."""
36
+ session = Session()
37
+
38
+ # Explicitly configure the retry strategy for http/https adapters.
39
+ adapter = HTTPAdapter(
40
+ max_retries=HTTP_RETRY_STRATEGY,
41
+ pool_connections=HTTP_POOL_CONNECTIONS,
42
+ pool_maxsize=HTTP_POOL_MAXSIZE,
43
+ )
44
+ session.mount("http://", adapter)
45
+ session.mount("https://", adapter)
46
+
47
+ # Always raise on HTTP status errors.
48
+ session.hooks["response"].append(raise_for_status)
49
+ return session
50
+
51
+
52
+ def make_storage_handlers(session: Session) -> list[StorageHandler]:
53
+ """A factory that returns the default artifact storage handlers."""
54
+ return [
55
+ S3Handler(), # s3
56
+ GCSHandler(), # gcs
57
+ AzureHandler(), # azure
58
+ HTTPHandler(session, scheme="http"), # http
59
+ HTTPHandler(session, scheme="https"), # https
60
+ WBArtifactHandler(), # artifact
61
+ WBLocalArtifactHandler(), # local_artifact
62
+ LocalFileHandler(), # file_handler
63
+ ]
@@ -16,7 +16,6 @@ from typing import IO, TYPE_CHECKING, Any, NamedTuple, Sequence
16
16
  from urllib.parse import quote
17
17
 
18
18
  import requests
19
- import urllib3
20
19
 
21
20
  from wandb import env
22
21
  from wandb.errors.term import termwarn
@@ -27,40 +26,24 @@ from wandb.sdk.artifacts.artifact_file_cache import (
27
26
  get_artifact_file_cache,
28
27
  )
29
28
  from wandb.sdk.artifacts.staging import get_staging_dir
30
- from wandb.sdk.artifacts.storage_handlers.azure_handler import AzureHandler
31
- from wandb.sdk.artifacts.storage_handlers.gcs_handler import GCSHandler
32
- from wandb.sdk.artifacts.storage_handlers.http_handler import HTTPHandler
33
- from wandb.sdk.artifacts.storage_handlers.local_file_handler import LocalFileHandler
34
29
  from wandb.sdk.artifacts.storage_handlers.multi_handler import MultiHandler
35
- from wandb.sdk.artifacts.storage_handlers.s3_handler import S3Handler
36
30
  from wandb.sdk.artifacts.storage_handlers.tracking_handler import TrackingHandler
37
- from wandb.sdk.artifacts.storage_handlers.wb_artifact_handler import WBArtifactHandler
38
- from wandb.sdk.artifacts.storage_handlers.wb_local_artifact_handler import (
39
- WBLocalArtifactHandler,
40
- )
41
31
  from wandb.sdk.artifacts.storage_layout import StorageLayout
42
32
  from wandb.sdk.artifacts.storage_policies.register import WANDB_STORAGE_POLICY
43
33
  from wandb.sdk.artifacts.storage_policy import StoragePolicy
44
34
  from wandb.sdk.internal.internal_api import Api as InternalApi
45
35
  from wandb.sdk.internal.thread_local_settings import _thread_local_api_settings
46
- from wandb.sdk.lib.hashutil import B64MD5, b64_to_hex_id, hex_to_b64_id
36
+ from wandb.sdk.lib.hashutil import b64_to_hex_id, hex_to_b64_id
47
37
  from wandb.sdk.lib.paths import FilePathStr, URIStr
48
38
 
39
+ from ._factories import make_http_session, make_storage_handlers
40
+
49
41
  if TYPE_CHECKING:
50
42
  from wandb.filesync.step_prepare import StepPrepare
51
43
  from wandb.sdk.artifacts.artifact import Artifact
52
44
  from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
53
45
  from wandb.sdk.internal import progress
54
46
 
55
- # Sleep length: 0, 2, 4, 8, 16, 32, 64, 120, 120, 120, 120, 120, 120, 120, 120, 120
56
- # seconds, i.e. a total of 20min 6s.
57
- _REQUEST_RETRY_STRATEGY = urllib3.util.retry.Retry(
58
- backoff_factor=1,
59
- total=16,
60
- status_forcelist=(308, 408, 409, 429, 500, 502, 503, 504),
61
- )
62
- _REQUEST_POOL_CONNECTIONS = 64
63
- _REQUEST_POOL_MAXSIZE = 64
64
47
 
65
48
  # AWS S3 max upload parts without having to make additional requests for extra parts
66
49
  S3_MAX_PART_NUMBERS = 1000
@@ -96,48 +79,23 @@ class WandbStoragePolicy(StoragePolicy):
96
79
 
97
80
  @classmethod
98
81
  def from_config(
99
- cls, config: dict, api: InternalApi | None = None
82
+ cls, config: dict[str, Any], api: InternalApi | None = None
100
83
  ) -> WandbStoragePolicy:
101
84
  return cls(config=config, api=api)
102
85
 
103
86
  def __init__(
104
87
  self,
105
- config: dict | None = None,
88
+ config: dict[str, Any] | None = None,
106
89
  cache: ArtifactFileCache | None = None,
107
90
  api: InternalApi | None = None,
91
+ session: requests.Session | None = None,
108
92
  ) -> None:
109
- self._cache = cache or get_artifact_file_cache()
110
93
  self._config = config or {}
111
- self._session = requests.Session()
112
- adapter = requests.adapters.HTTPAdapter(
113
- max_retries=_REQUEST_RETRY_STRATEGY,
114
- pool_connections=_REQUEST_POOL_CONNECTIONS,
115
- pool_maxsize=_REQUEST_POOL_MAXSIZE,
116
- )
117
- self._session.mount("http://", adapter)
118
- self._session.mount("https://", adapter)
119
-
120
- s3 = S3Handler()
121
- gcs = GCSHandler()
122
- azure = AzureHandler()
123
- http = HTTPHandler(self._session)
124
- https = HTTPHandler(self._session, scheme="https")
125
- artifact = WBArtifactHandler()
126
- local_artifact = WBLocalArtifactHandler()
127
- file_handler = LocalFileHandler()
128
-
94
+ self._cache = cache or get_artifact_file_cache()
95
+ self._session = session or make_http_session()
129
96
  self._api = api or InternalApi()
130
97
  self._handler = MultiHandler(
131
- handlers=[
132
- s3,
133
- gcs,
134
- azure,
135
- http,
136
- https,
137
- artifact,
138
- local_artifact,
139
- file_handler,
140
- ],
98
+ handlers=make_storage_handlers(self._session),
141
99
  default_handler=TrackingHandler(),
142
100
  )
143
101
 
@@ -167,54 +125,52 @@ class WandbStoragePolicy(StoragePolicy):
167
125
  self._cache._override_cache_path = dest_path
168
126
 
169
127
  path, hit, cache_open = self._cache.check_md5_obj_path(
170
- B64MD5(manifest_entry.digest),
171
- manifest_entry.size if manifest_entry.size is not None else 0,
128
+ manifest_entry.digest,
129
+ size=manifest_entry.size or 0,
172
130
  )
173
131
  if hit:
174
132
  return path
175
133
 
176
- if manifest_entry._download_url is not None:
134
+ if (url := manifest_entry._download_url) is not None:
177
135
  # Use multipart parallel download for large file
178
136
  if (
179
- executor is not None
180
- and manifest_entry.size is not None
181
- and self._should_multipart_download(manifest_entry.size, multipart)
137
+ executor
138
+ and (size := manifest_entry.size)
139
+ and self._should_multipart_download(size, multipart)
182
140
  ):
183
- self._multipart_file_download(
184
- executor,
185
- manifest_entry._download_url,
186
- manifest_entry.size,
187
- cache_open,
188
- )
141
+ self._multipart_file_download(executor, url, size, cache_open)
189
142
  return path
143
+
190
144
  # Serial download
191
- response = self._session.get(manifest_entry._download_url, stream=True)
192
145
  try:
193
- response.raise_for_status()
194
- except Exception:
146
+ response = self._session.get(url, stream=True)
147
+ except requests.HTTPError:
195
148
  # Signed URL might have expired, fall back to fetching it one by one.
196
149
  manifest_entry._download_url = None
150
+
197
151
  if manifest_entry._download_url is None:
198
152
  auth = None
199
- http_headers = _thread_local_api_settings.headers or {}
200
- if self._api.access_token is not None:
201
- http_headers["Authorization"] = f"Bearer {self._api.access_token}"
202
- elif _thread_local_api_settings.cookies is None:
153
+ headers = _thread_local_api_settings.headers
154
+ cookies = _thread_local_api_settings.cookies
155
+
156
+ # For auth, prefer using (in order): auth header, cookies, HTTP Basic Auth
157
+ if token := self._api.access_token:
158
+ headers = {**(headers or {}), "Authorization": f"Bearer {token}"}
159
+ elif cookies is not None:
160
+ pass
161
+ else:
203
162
  auth = ("api", self._api.api_key or "")
163
+
164
+ file_url = self._file_url(
165
+ self._api,
166
+ artifact.entity,
167
+ artifact.project,
168
+ artifact.name.split(":")[0],
169
+ manifest_entry,
170
+ )
204
171
  response = self._session.get(
205
- self._file_url(
206
- self._api,
207
- artifact.entity,
208
- artifact.project,
209
- artifact.name.split(":")[0],
210
- manifest_entry,
211
- ),
212
- auth=auth,
213
- cookies=_thread_local_api_settings.cookies,
214
- headers=http_headers,
215
- stream=True,
172
+ file_url, auth=auth, cookies=cookies, headers=headers, stream=True
216
173
  )
217
- response.raise_for_status()
218
174
 
219
175
  with cache_open(mode="wb") as file:
220
176
  for data in response.iter_content(chunk_size=16 * 1024):
@@ -269,12 +225,7 @@ class WandbStoragePolicy(StoragePolicy):
269
225
  # Other threads has error, no need to start
270
226
  if download_has_error.is_set():
271
227
  return
272
- response = self._session.get(
273
- url=download_url,
274
- headers=headers,
275
- stream=True,
276
- )
277
- response.raise_for_status()
228
+ response = self._session.get(url=download_url, headers=headers, stream=True)
278
229
 
279
230
  file_offset = start
280
231
  for content in response.iter_content(chunk_size=_HTTP_RES_CHUNK_SIZE_BYTES):
@@ -376,43 +327,27 @@ class WandbStoragePolicy(StoragePolicy):
376
327
  entity_name: str,
377
328
  project_name: str,
378
329
  artifact_name: str,
379
- manifest_entry: ArtifactManifestEntry,
330
+ entry: ArtifactManifestEntry,
380
331
  ) -> str:
381
- storage_layout = self._config.get("storageLayout", StorageLayout.V1)
382
- storage_region = self._config.get("storageRegion", "default")
383
- md5_hex = b64_to_hex_id(B64MD5(manifest_entry.digest))
332
+ layout = self._config.get("storageLayout", StorageLayout.V1)
333
+ region = self._config.get("storageRegion", "default")
334
+ md5_hex = b64_to_hex_id(entry.digest)
384
335
 
385
- if storage_layout == StorageLayout.V1:
386
- return "{}/artifacts/{}/{}".format(
387
- api.settings("base_url"), entity_name, md5_hex
388
- )
389
- elif storage_layout == StorageLayout.V2:
336
+ base_url: str = api.settings("base_url")
337
+
338
+ if layout == StorageLayout.V1:
339
+ return f"{base_url}/artifacts/{entity_name}/{md5_hex}"
340
+
341
+ if layout == StorageLayout.V2:
342
+ birth_artifact_id = entry.birth_artifact_id or ""
390
343
  if api._server_supports(
391
- ServerFeature.ARTIFACT_COLLECTION_MEMBERSHIP_FILE_DOWNLOAD_HANDLER # type: ignore
344
+ ServerFeature.ARTIFACT_COLLECTION_MEMBERSHIP_FILE_DOWNLOAD_HANDLER
392
345
  ):
393
- return "{}/artifactsV2/{}/{}/{}/{}/{}/{}/{}".format(
394
- api.settings("base_url"),
395
- storage_region,
396
- quote(entity_name),
397
- quote(project_name),
398
- quote(artifact_name),
399
- quote(manifest_entry.birth_artifact_id or ""),
400
- md5_hex,
401
- manifest_entry.path.name,
402
- )
403
- return "{}/artifactsV2/{}/{}/{}/{}".format(
404
- api.settings("base_url"),
405
- storage_region,
406
- entity_name,
407
- quote(
408
- manifest_entry.birth_artifact_id
409
- if manifest_entry.birth_artifact_id is not None
410
- else ""
411
- ),
412
- md5_hex,
413
- )
414
- else:
415
- raise Exception(f"unrecognized storage layout: {storage_layout}")
346
+ return f"{base_url}/artifactsV2/{region}/{quote(entity_name)}/{quote(project_name)}/{quote(artifact_name)}/{quote(birth_artifact_id)}/{md5_hex}/{entry.path.name}"
347
+
348
+ return f"{base_url}/artifactsV2/{region}/{entity_name}/{quote(birth_artifact_id)}/{md5_hex}"
349
+
350
+ raise ValueError(f"unrecognized storage layout: {layout!r}")
416
351
 
417
352
  def s3_multipart_file_upload(
418
353
  self,
@@ -486,7 +421,7 @@ class WandbStoragePolicy(StoragePolicy):
486
421
  True if the file was a duplicate (did not need to be uploaded),
487
422
  False if it needed to be uploaded or was a reference (nothing to dedupe).
488
423
  """
489
- file_size = entry.size if entry.size is not None else 0
424
+ file_size = entry.size or 0
490
425
  chunk_size = self.calc_chunk_size(file_size)
491
426
  upload_parts = []
492
427
  hex_digests = {}
@@ -562,8 +497,8 @@ class WandbStoragePolicy(StoragePolicy):
562
497
 
563
498
  # Cache upon successful upload.
564
499
  _, hit, cache_open = self._cache.check_md5_obj_path(
565
- B64MD5(entry.digest),
566
- entry.size if entry.size is not None else 0,
500
+ entry.digest,
501
+ size=entry.size or 0,
567
502
  )
568
503
 
569
504
  staging_dir = get_staging_dir()
@@ -883,6 +883,16 @@ class InterfaceBase:
883
883
  ) -> MailboxHandle[pb.Result]:
884
884
  raise NotImplementedError
885
885
 
886
+ def publish_probe_system_info(self) -> None:
887
+ probe_system_info = pb.ProbeSystemInfoRequest()
888
+ return self._publish_probe_system_info(probe_system_info)
889
+
890
+ @abstractmethod
891
+ def _publish_probe_system_info(
892
+ self, probe_system_info: pb.ProbeSystemInfoRequest
893
+ ) -> None:
894
+ raise NotImplementedError
895
+
886
896
  def join(self) -> None:
887
897
  # Drop indicates that the internal process has already been shutdown
888
898
  if self._drop:
@@ -112,6 +112,7 @@ class InterfaceShared(InterfaceBase):
112
112
  python_packages: Optional[pb.PythonPackagesRequest] = None,
113
113
  job_input: Optional[pb.JobInputRequest] = None,
114
114
  run_finish_without_exit: Optional[pb.RunFinishWithoutExitRequest] = None,
115
+ probe_system_info: Optional[pb.ProbeSystemInfoRequest] = None,
115
116
  ) -> pb.Record:
116
117
  request = pb.Request()
117
118
  if get_summary:
@@ -178,6 +179,8 @@ class InterfaceShared(InterfaceBase):
178
179
  request.job_input.CopyFrom(job_input)
179
180
  elif run_finish_without_exit:
180
181
  request.run_finish_without_exit.CopyFrom(run_finish_without_exit)
182
+ elif probe_system_info:
183
+ request.probe_system_info.CopyFrom(probe_system_info)
181
184
  else:
182
185
  raise Exception("Invalid request")
183
186
  record = self._make_record(request=request)
@@ -330,6 +333,12 @@ class InterfaceShared(InterfaceBase):
330
333
  rec = self._make_record(use_artifact=use_artifact)
331
334
  self._publish(rec)
332
335
 
336
+ def _publish_probe_system_info(
337
+ self, probe_system_info: pb.ProbeSystemInfoRequest
338
+ ) -> None:
339
+ record = self._make_request(probe_system_info=probe_system_info)
340
+ self._publish(record)
341
+
333
342
  def _deliver_artifact(
334
343
  self,
335
344
  log_artifact: pb.LogArtifactRequest,
wandb/sdk/wandb_init.py CHANGED
@@ -839,6 +839,13 @@ class _WandbInit:
839
839
  " and reinit is set to 'create_new', so continuing"
840
840
  )
841
841
 
842
+ elif settings.resume == "must":
843
+ raise wandb.Error(
844
+ "Cannot resume a run while another run is active."
845
+ " You must either finish it using run.finish(),"
846
+ " or use reinit='create_new' when calling wandb.init()."
847
+ )
848
+
842
849
  else:
843
850
  run_printer.display(
844
851
  "wandb.init() called while a run is active and reinit is"
@@ -864,7 +871,6 @@ class _WandbInit:
864
871
  backend.ensure_launched()
865
872
  self._logger.info("backend started and connected")
866
873
 
867
- # resuming needs access to the server, check server_status()?
868
874
  run = Run(
869
875
  config=config.base_no_artifacts,
870
876
  settings=settings,
@@ -1019,6 +1025,8 @@ class _WandbInit:
1019
1025
  except TimeoutError:
1020
1026
  pass
1021
1027
 
1028
+ backend.interface.publish_probe_system_info()
1029
+
1022
1030
  assert self._wl is not None
1023
1031
  self.run = run
1024
1032
 
wandb/wandb_agent.py CHANGED
@@ -42,11 +42,42 @@ class AgentProcess:
42
42
  if command:
43
43
  if platform.system() == "Windows":
44
44
  kwargs = dict(creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
45
+ env.pop(wandb.env.SERVICE, None)
46
+ # TODO: Determine if we need the same stdin workaround as POSIX case below.
47
+ self._popen = subprocess.Popen(command, env=env, **kwargs)
45
48
  else:
46
- kwargs = dict(preexec_fn=os.setpgrp)
47
- if env.get(wandb.env.SERVICE):
48
- env.pop(wandb.env.SERVICE)
49
- self._popen = subprocess.Popen(command, env=env, **kwargs)
49
+ if sys.version_info >= (3, 11):
50
+ # preexec_fn=os.setpgrp is not thread-safe; process_group was introduced in
51
+ # python 3.11 to replace it, so use that when possible
52
+ kwargs = dict(process_group=0)
53
+ else:
54
+ kwargs = dict(preexec_fn=os.setpgrp)
55
+ env.pop(wandb.env.SERVICE, None)
56
+ # Upon spawning the subprocess in a new process group, the child's process group is
57
+ # not connected to the controlling terminal's stdin. If it tries to access stdin,
58
+ # it gets a SIGTTIN and blocks until we give it the terminal, which we don't want
59
+ # to do.
60
+ #
61
+ # By using subprocess.PIPE, we give it an independent stdin. However, it will still
62
+ # block if it tries to read from stdin, because we're not writing anything to it.
63
+ # We immediately close the subprocess's stdin here so it can fail fast and get an
64
+ # EOF.
65
+ #
66
+ # (One situation that makes this relevant is that importing `readline` even
67
+ # indirectly can cause the child to attempt to access stdin, which can trigger the
68
+ # deadlock. In Python 3.13, `import torch` indirectly imports `readline` via `pdb`,
69
+ # meaning `import torch` in a run script can deadlock unless we override stdin.
70
+ # See https://github.com/wandb/wandb/pull/10489 description for more details.)
71
+ #
72
+ # Also, we avoid spawning a new session because that breaks preempted child process
73
+ # handling.
74
+ self._popen = subprocess.Popen(
75
+ command,
76
+ env=env,
77
+ stdin=subprocess.PIPE,
78
+ **kwargs,
79
+ )
80
+ self._popen.stdin.close()
50
81
  elif function:
51
82
  self._proc = multiprocessing.Process(
52
83
  target=self._start,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wandb
3
- Version: 0.21.4
3
+ Version: 0.22.0
4
4
  Summary: A CLI and library for interacting with the Weights & Biases API.
5
5
  Project-URL: Source, https://github.com/wandb/wandb
6
6
  Project-URL: Bug Reports, https://github.com/wandb/wandb/issues