wandb 0.16.3__py3-none-any.whl → 0.16.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. wandb/__init__.py +2 -2
  2. wandb/agents/pyagent.py +1 -1
  3. wandb/apis/importers/__init__.py +1 -4
  4. wandb/apis/importers/internals/internal.py +386 -0
  5. wandb/apis/importers/internals/protocols.py +125 -0
  6. wandb/apis/importers/internals/util.py +78 -0
  7. wandb/apis/importers/mlflow.py +125 -88
  8. wandb/apis/importers/validation.py +108 -0
  9. wandb/apis/importers/wandb.py +1604 -0
  10. wandb/apis/public/api.py +7 -10
  11. wandb/apis/public/artifacts.py +38 -0
  12. wandb/apis/public/files.py +11 -2
  13. wandb/apis/reports/v2/__init__.py +0 -19
  14. wandb/apis/reports/v2/expr_parsing.py +0 -1
  15. wandb/apis/reports/v2/interface.py +15 -18
  16. wandb/apis/reports/v2/internal.py +12 -45
  17. wandb/cli/cli.py +52 -55
  18. wandb/integration/gym/__init__.py +2 -1
  19. wandb/integration/keras/callbacks/model_checkpoint.py +1 -1
  20. wandb/integration/keras/keras.py +6 -4
  21. wandb/integration/kfp/kfp_patch.py +2 -2
  22. wandb/integration/openai/fine_tuning.py +1 -2
  23. wandb/integration/ultralytics/callback.py +0 -1
  24. wandb/proto/v3/wandb_internal_pb2.py +332 -312
  25. wandb/proto/v3/wandb_settings_pb2.py +13 -3
  26. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  27. wandb/proto/v4/wandb_internal_pb2.py +316 -312
  28. wandb/proto/v4/wandb_settings_pb2.py +5 -3
  29. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  30. wandb/sdk/artifacts/artifact.py +75 -31
  31. wandb/sdk/artifacts/artifact_manifest.py +5 -2
  32. wandb/sdk/artifacts/artifact_manifest_entry.py +6 -1
  33. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +8 -2
  34. wandb/sdk/artifacts/artifact_saver.py +19 -47
  35. wandb/sdk/artifacts/storage_handler.py +2 -1
  36. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +22 -9
  37. wandb/sdk/artifacts/storage_policy.py +4 -1
  38. wandb/sdk/data_types/base_types/wb_value.py +1 -1
  39. wandb/sdk/data_types/image.py +2 -2
  40. wandb/sdk/interface/interface.py +49 -13
  41. wandb/sdk/interface/interface_shared.py +17 -11
  42. wandb/sdk/internal/file_stream.py +20 -1
  43. wandb/sdk/internal/handler.py +1 -4
  44. wandb/sdk/internal/internal_api.py +3 -1
  45. wandb/sdk/internal/job_builder.py +49 -19
  46. wandb/sdk/internal/profiler.py +1 -1
  47. wandb/sdk/internal/sender.py +96 -124
  48. wandb/sdk/internal/sender_config.py +197 -0
  49. wandb/sdk/internal/settings_static.py +9 -0
  50. wandb/sdk/internal/system/system_info.py +5 -3
  51. wandb/sdk/internal/update.py +1 -1
  52. wandb/sdk/launch/_launch.py +3 -3
  53. wandb/sdk/launch/_launch_add.py +28 -29
  54. wandb/sdk/launch/_project_spec.py +148 -136
  55. wandb/sdk/launch/agent/agent.py +3 -7
  56. wandb/sdk/launch/agent/config.py +0 -27
  57. wandb/sdk/launch/builder/build.py +54 -28
  58. wandb/sdk/launch/builder/docker_builder.py +4 -15
  59. wandb/sdk/launch/builder/kaniko_builder.py +72 -45
  60. wandb/sdk/launch/create_job.py +6 -40
  61. wandb/sdk/launch/loader.py +10 -0
  62. wandb/sdk/launch/registry/anon.py +29 -0
  63. wandb/sdk/launch/registry/local_registry.py +4 -1
  64. wandb/sdk/launch/runner/kubernetes_runner.py +20 -2
  65. wandb/sdk/launch/runner/local_container.py +15 -10
  66. wandb/sdk/launch/runner/sagemaker_runner.py +1 -1
  67. wandb/sdk/launch/sweeps/scheduler.py +11 -3
  68. wandb/sdk/launch/utils.py +14 -0
  69. wandb/sdk/lib/__init__.py +2 -5
  70. wandb/sdk/lib/_settings_toposort_generated.py +4 -1
  71. wandb/sdk/lib/apikey.py +0 -5
  72. wandb/sdk/lib/config_util.py +0 -31
  73. wandb/sdk/lib/filesystem.py +11 -1
  74. wandb/sdk/lib/run_moment.py +72 -0
  75. wandb/sdk/service/service.py +7 -2
  76. wandb/sdk/service/streams.py +1 -6
  77. wandb/sdk/verify/verify.py +2 -1
  78. wandb/sdk/wandb_init.py +12 -1
  79. wandb/sdk/wandb_login.py +43 -26
  80. wandb/sdk/wandb_run.py +164 -110
  81. wandb/sdk/wandb_settings.py +58 -16
  82. wandb/testing/relay.py +5 -6
  83. wandb/util.py +50 -7
  84. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/METADATA +8 -1
  85. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/RECORD +89 -82
  86. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/WHEEL +1 -1
  87. wandb/apis/importers/base.py +0 -400
  88. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/LICENSE +0 -0
  89. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/entry_points.txt +0 -0
  90. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  """WandB storage policy."""
2
2
  import hashlib
3
3
  import math
4
+ import os
4
5
  import shutil
5
6
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union
6
7
  from urllib.parse import quote
@@ -8,12 +9,12 @@ from urllib.parse import quote
8
9
  import requests
9
10
  import urllib3
10
11
 
11
- from wandb.apis import InternalApi
12
12
  from wandb.errors.term import termwarn
13
13
  from wandb.sdk.artifacts.artifact_file_cache import (
14
14
  ArtifactFileCache,
15
15
  get_artifact_file_cache,
16
16
  )
17
+ from wandb.sdk.artifacts.staging import get_staging_dir
17
18
  from wandb.sdk.artifacts.storage_handlers.azure_handler import AzureHandler
18
19
  from wandb.sdk.artifacts.storage_handlers.gcs_handler import GCSHandler
19
20
  from wandb.sdk.artifacts.storage_handlers.http_handler import HTTPHandler
@@ -28,6 +29,7 @@ from wandb.sdk.artifacts.storage_handlers.wb_local_artifact_handler import (
28
29
  from wandb.sdk.artifacts.storage_layout import StorageLayout
29
30
  from wandb.sdk.artifacts.storage_policies.register import WANDB_STORAGE_POLICY
30
31
  from wandb.sdk.artifacts.storage_policy import StoragePolicy
32
+ from wandb.sdk.internal.internal_api import Api as InternalApi
31
33
  from wandb.sdk.internal.thread_local_settings import _thread_local_api_settings
32
34
  from wandb.sdk.lib.hashutil import B64MD5, b64_to_hex_id, hex_to_b64_id
33
35
  from wandb.sdk.lib.paths import FilePathStr, URIStr
@@ -60,8 +62,10 @@ class WandbStoragePolicy(StoragePolicy):
60
62
  return WANDB_STORAGE_POLICY
61
63
 
62
64
  @classmethod
63
- def from_config(cls, config: Dict) -> "WandbStoragePolicy":
64
- return cls(config=config)
65
+ def from_config(
66
+ cls, config: Dict, api: Optional[InternalApi] = None
67
+ ) -> "WandbStoragePolicy":
68
+ return cls(config=config, api=api)
65
69
 
66
70
  def __init__(
67
71
  self,
@@ -131,6 +135,7 @@ class WandbStoragePolicy(StoragePolicy):
131
135
  if manifest_entry._download_url is None:
132
136
  auth = None
133
137
  if not _thread_local_api_settings.cookies:
138
+ assert self._api.api_key is not None
134
139
  auth = ("api", self._api.api_key)
135
140
  response = self._session.get(
136
141
  self._file_url(self._api, artifact.entity, manifest_entry),
@@ -222,9 +227,10 @@ class WandbStoragePolicy(StoragePolicy):
222
227
  extra_headers={
223
228
  "content-md5": md5_b64_str,
224
229
  "content-length": str(len(data)),
225
- "content-type": extra_headers.get("Content-Type"),
230
+ "content-type": extra_headers.get("Content-Type", ""),
226
231
  },
227
232
  )
233
+ assert upload_resp is not None
228
234
  etags.append(
229
235
  {"partNumber": part_number, "hexMD5": upload_resp.headers["ETag"]}
230
236
  )
@@ -311,7 +317,6 @@ class WandbStoragePolicy(StoragePolicy):
311
317
  return True
312
318
  if entry.local_path is None:
313
319
  return False
314
-
315
320
  extra_headers = {
316
321
  header.split(":", 1)[0]: header.split(":", 1)[1]
317
322
  for header in (resp.upload_headers or {})
@@ -333,6 +338,7 @@ class WandbStoragePolicy(StoragePolicy):
333
338
  multipart_urls,
334
339
  extra_headers,
335
340
  )
341
+ assert resp.storage_path is not None
336
342
  self._api.complete_multipart_upload_artifact(
337
343
  artifact_id, resp.storage_path, etags, resp.upload_id
338
344
  )
@@ -389,9 +395,16 @@ class WandbStoragePolicy(StoragePolicy):
389
395
  B64MD5(entry.digest),
390
396
  entry.size if entry.size is not None else 0,
391
397
  )
392
- if not hit:
393
- try:
398
+
399
+ staging_dir = get_staging_dir()
400
+ try:
401
+ if not entry.skip_cache and not hit:
394
402
  with cache_open("wb") as f, open(entry.local_path, "rb") as src:
395
403
  shutil.copyfileobj(src, f)
396
- except OSError as e:
397
- termwarn(f"Failed to cache {entry.local_path}, ignoring {e}")
404
+ if entry.local_path.startswith(staging_dir):
405
+ # Delete staged files here instead of waiting till
406
+ # all the files are uploaded
407
+ os.chmod(entry.local_path, 0o600)
408
+ os.remove(entry.local_path)
409
+ except OSError as e:
410
+ termwarn(f"Failed to cache {entry.local_path}, ignoring {e}")
@@ -1,6 +1,7 @@
1
1
  """Storage policy."""
2
2
  from typing import TYPE_CHECKING, Dict, Optional, Sequence, Type, Union
3
3
 
4
+ from wandb.sdk.internal.internal_api import Api as InternalApi
4
5
  from wandb.sdk.lib.paths import FilePathStr, URIStr
5
6
 
6
7
  if TYPE_CHECKING:
@@ -25,7 +26,9 @@ class StoragePolicy:
25
26
  raise NotImplementedError
26
27
 
27
28
  @classmethod
28
- def from_config(cls, config: Dict) -> "StoragePolicy":
29
+ def from_config(
30
+ cls, config: Dict, api: Optional[InternalApi] = None
31
+ ) -> "StoragePolicy":
29
32
  raise NotImplementedError
30
33
 
31
34
  def config(self) -> Dict:
@@ -11,7 +11,7 @@ if TYPE_CHECKING: # pragma: no cover
11
11
 
12
12
 
13
13
  def _server_accepts_client_ids() -> bool:
14
- from pkg_resources import parse_version
14
+ from wandb.util import parse_version
15
15
 
16
16
  # First, if we are offline, assume the backend server cannot
17
17
  # accept client IDs. Unfortunately, this is the best we can do
@@ -42,7 +42,7 @@ def _server_accepts_image_filenames() -> bool:
42
42
  max_cli_version = util._get_max_cli_version()
43
43
  if max_cli_version is None:
44
44
  return False
45
- from pkg_resources import parse_version
45
+ from wandb.util import parse_version
46
46
 
47
47
  accepts_image_filenames: bool = parse_version("0.12.10") <= parse_version(
48
48
  max_cli_version
@@ -51,7 +51,7 @@ def _server_accepts_image_filenames() -> bool:
51
51
 
52
52
 
53
53
  def _server_accepts_artifact_path() -> bool:
54
- from pkg_resources import parse_version
54
+ from wandb.util import parse_version
55
55
 
56
56
  target_version = "0.12.14"
57
57
  max_cli_version = util._get_max_cli_version() if not util._is_offline() else None
@@ -13,7 +13,17 @@ import os
13
13
  import sys
14
14
  import time
15
15
  from abc import abstractmethod
16
- from typing import TYPE_CHECKING, Any, Dict, Iterable, NewType, Optional, Tuple, Union
16
+ from typing import (
17
+ TYPE_CHECKING,
18
+ Any,
19
+ Dict,
20
+ Iterable,
21
+ List,
22
+ NewType,
23
+ Optional,
24
+ Tuple,
25
+ Union,
26
+ )
17
27
 
18
28
  from wandb.proto import wandb_internal_pb2 as pb
19
29
  from wandb.proto import wandb_telemetry_pb2 as tpb
@@ -34,6 +44,7 @@ from wandb.util import (
34
44
  from ..data_types.utils import history_dict_to_json, val_to_json
35
45
  from ..lib.mailbox import MailboxHandle
36
46
  from . import summary_record as sr
47
+ from .message_future import MessageFuture
37
48
 
38
49
  GlobStr = NewType("GlobStr", str)
39
50
 
@@ -339,6 +350,7 @@ class InterfaceBase:
339
350
  proto_entry.ref = entry.ref
340
351
  if entry.local_path:
341
352
  proto_entry.local_path = entry.local_path
353
+ proto_entry.skip_cache = entry.skip_cache
342
354
  for k, v in entry.extra.items():
343
355
  proto_extra = proto_entry.extra.add()
344
356
  proto_extra.key = k
@@ -452,7 +464,7 @@ class InterfaceBase:
452
464
  def _publish_use_artifact(self, proto_artifact: pb.UseArtifactRecord) -> None:
453
465
  raise NotImplementedError
454
466
 
455
- def deliver_artifact(
467
+ def communicate_artifact(
456
468
  self,
457
469
  run: "Run",
458
470
  artifact: "Artifact",
@@ -461,7 +473,7 @@ class InterfaceBase:
461
473
  is_user_created: bool = False,
462
474
  use_after_commit: bool = False,
463
475
  finalize: bool = True,
464
- ) -> MailboxHandle:
476
+ ) -> MessageFuture:
465
477
  proto_run = self._make_run(run)
466
478
  proto_artifact = self._make_artifact(artifact)
467
479
  proto_artifact.run_id = proto_run.run_id
@@ -478,11 +490,13 @@ class InterfaceBase:
478
490
  if history_step is not None:
479
491
  log_artifact.history_step = history_step
480
492
  log_artifact.staging_dir = get_staging_dir()
481
- resp = self._deliver_artifact(log_artifact)
493
+ resp = self._communicate_artifact(log_artifact)
482
494
  return resp
483
495
 
484
496
  @abstractmethod
485
- def _deliver_artifact(self, log_artifact: pb.LogArtifactRequest) -> MailboxHandle:
497
+ def _communicate_artifact(
498
+ self, log_artifact: pb.LogArtifactRequest
499
+ ) -> MessageFuture:
486
500
  raise NotImplementedError
487
501
 
488
502
  def deliver_download_artifact(
@@ -753,6 +767,36 @@ class InterfaceBase:
753
767
  run_start.run.CopyFrom(run_pb)
754
768
  return self._deliver_run_start(run_start)
755
769
 
770
+ def publish_launch_wandb_config_parameters(
771
+ self, include_paths: List[List[str]], exclude_paths: List[List[str]]
772
+ ):
773
+ """Tells the internal process to treat wandb.config fields as job inputs.
774
+
775
+ The paths provided as arguments are sequences of dictionary keys that
776
+ specify a path within the wandb.config. If a path is included, the
777
+ corresponding field will be treated as a job input. If a path is
778
+ excluded, the corresponding field will not be treated as a job input.
779
+
780
+ Args:
781
+ include_paths: paths within config to include as job inputs.
782
+ exclude_paths: paths within config to exclude as job inputs.
783
+
784
+ Returns:
785
+ None
786
+ """
787
+ config_parameters = pb.LaunchWandbConfigParametersRecord()
788
+ include_records = [pb.ConfigFilterPath(path=path) for path in include_paths]
789
+ exclude_records = [pb.ConfigFilterPath(path=path) for path in exclude_paths]
790
+ config_parameters.include_paths.extend(include_records)
791
+ config_parameters.exclude_paths.extend(exclude_records)
792
+ return self._publish_launch_wandb_config_parameters(config_parameters)
793
+
794
+ @abstractmethod
795
+ def _publish_launch_wandb_config_parameters(
796
+ self, config_parameters: pb.LaunchWandbConfigParametersRecord
797
+ ) -> None:
798
+ raise NotImplementedError
799
+
756
800
  @abstractmethod
757
801
  def _deliver_run_start(self, run_start: pb.RunStartRequest) -> MailboxHandle:
758
802
  raise NotImplementedError
@@ -868,11 +912,3 @@ class InterfaceBase:
868
912
  self, run_status: pb.RunStatusRequest
869
913
  ) -> MailboxHandle:
870
914
  raise NotImplementedError
871
-
872
- def deliver_request_job_info(self) -> MailboxHandle:
873
- job_info = pb.JobInfoRequest()
874
- return self._deliver_request_job_info(job_info)
875
-
876
- @abstractmethod
877
- def _deliver_request_job_info(self, job_info: pb.JobInfoRequest) -> MailboxHandle:
878
- raise NotImplementedError
@@ -145,7 +145,6 @@ class InterfaceShared(InterfaceBase):
145
145
  cancel: Optional[pb.CancelRequest] = None,
146
146
  summary_record: Optional[pb.SummaryRecordRequest] = None,
147
147
  telemetry_record: Optional[pb.TelemetryRecordRequest] = None,
148
- job_info: Optional[pb.JobInfoRequest] = None,
149
148
  get_system_metrics: Optional[pb.GetSystemMetricsRequest] = None,
150
149
  python_packages: Optional[pb.PythonPackagesRequest] = None,
151
150
  ) -> pb.Record:
@@ -202,8 +201,6 @@ class InterfaceShared(InterfaceBase):
202
201
  request.summary_record.CopyFrom(summary_record)
203
202
  elif telemetry_record:
204
203
  request.telemetry_record.CopyFrom(telemetry_record)
205
- elif job_info:
206
- request.job_info.CopyFrom(job_info)
207
204
  elif get_system_metrics:
208
205
  request.get_system_metrics.CopyFrom(get_system_metrics)
209
206
  elif sync:
@@ -242,6 +239,9 @@ class InterfaceShared(InterfaceBase):
242
239
  use_artifact: Optional[pb.UseArtifactRecord] = None,
243
240
  output: Optional[pb.OutputRecord] = None,
244
241
  output_raw: Optional[pb.OutputRawRecord] = None,
242
+ launch_wandb_config_parameters: Optional[
243
+ pb.LaunchWandbConfigParametersRecord
244
+ ] = None,
245
245
  ) -> pb.Record:
246
246
  record = pb.Record()
247
247
  if run:
@@ -286,6 +286,8 @@ class InterfaceShared(InterfaceBase):
286
286
  record.output.CopyFrom(output)
287
287
  elif output_raw:
288
288
  record.output_raw.CopyFrom(output_raw)
289
+ elif launch_wandb_config_parameters:
290
+ record.wandb_config_parameters.CopyFrom(launch_wandb_config_parameters)
289
291
  else:
290
292
  raise Exception("Invalid record")
291
293
  return record
@@ -389,17 +391,17 @@ class InterfaceShared(InterfaceBase):
389
391
  rec = self._make_record(files=files)
390
392
  self._publish(rec)
391
393
 
392
- def _publish_link_artifact(self, link_artifact: pb.LinkArtifactRecord) -> None:
394
+ def _publish_link_artifact(self, link_artifact: pb.LinkArtifactRecord) -> Any:
393
395
  rec = self._make_record(link_artifact=link_artifact)
394
396
  self._publish(rec)
395
397
 
396
- def _publish_use_artifact(self, use_artifact: pb.UseArtifactRecord) -> None:
398
+ def _publish_use_artifact(self, use_artifact: pb.UseArtifactRecord) -> Any:
397
399
  rec = self._make_record(use_artifact=use_artifact)
398
400
  self._publish(rec)
399
401
 
400
- def _deliver_artifact(self, log_artifact: pb.LogArtifactRequest) -> MailboxHandle:
402
+ def _communicate_artifact(self, log_artifact: pb.LogArtifactRequest) -> Any:
401
403
  rec = self._make_request(log_artifact=log_artifact)
402
- return self._deliver_record(rec)
404
+ return self._communicate_async(rec)
403
405
 
404
406
  def _deliver_download_artifact(
405
407
  self, download_artifact: pb.DownloadArtifactRequest
@@ -415,6 +417,14 @@ class InterfaceShared(InterfaceBase):
415
417
  rec = self._make_record(alert=proto_alert)
416
418
  self._publish(rec)
417
419
 
420
+ def _publish_launch_wandb_config_parameters(
421
+ self, launch_wandb_config_parameters: pb.LaunchWandbConfigParametersRecord
422
+ ) -> None:
423
+ rec = self._make_record(
424
+ launch_wandb_config_parameters=launch_wandb_config_parameters
425
+ )
426
+ self._publish(rec)
427
+
418
428
  def _communicate_status(
419
429
  self, status: pb.StatusRequest
420
430
  ) -> Optional[pb.StatusResponse]:
@@ -523,10 +533,6 @@ class InterfaceShared(InterfaceBase):
523
533
  record = self._make_request(run_status=run_status)
524
534
  return self._deliver_record(record)
525
535
 
526
- def _deliver_request_job_info(self, job_info: pb.JobInfoRequest) -> MailboxHandle:
527
- record = self._make_request(job_info=job_info)
528
- return self._deliver_record(record)
529
-
530
536
  def _transport_keepalive_failed(self, keepalive_interval: int = 5) -> bool:
531
537
  if self._transport_failed:
532
538
  return True
@@ -1,6 +1,7 @@
1
1
  import base64
2
2
  import functools
3
3
  import itertools
4
+ import json
4
5
  import logging
5
6
  import os
6
7
  import queue
@@ -58,6 +59,7 @@ class Chunk(NamedTuple):
58
59
  class DefaultFilePolicy:
59
60
  def __init__(self, start_chunk_id: int = 0) -> None:
60
61
  self._chunk_id = start_chunk_id
62
+ self.has_debug_log = False
61
63
 
62
64
  def process_chunks(
63
65
  self, chunks: List[Chunk]
@@ -66,6 +68,21 @@ class DefaultFilePolicy:
66
68
  self._chunk_id += len(chunks)
67
69
  return {"offset": chunk_id, "content": [c.data for c in chunks]}
68
70
 
71
+ # TODO: this is very inefficient, this is meant for temporary debugging and will be removed in future releases
72
+ def _debug_log(self, data: Any):
73
+ if self.has_debug_log or not os.environ.get("WANDB_DEBUG_FILESTREAM_LOG"):
74
+ return
75
+
76
+ loaded = json.loads(data)
77
+ if not isinstance(loaded, dict):
78
+ return
79
+
80
+ # get key size and convert to MB
81
+ key_sizes = [(k, len(json.dumps(v))) for k, v in loaded.items()]
82
+ key_msg = [f"{k}: {v/1048576:.5f} MB" for k, v in key_sizes]
83
+ wandb.termerror(f"Step: {loaded['_step']} | {key_msg}", repeat=False)
84
+ self.has_debug_log = True
85
+
69
86
 
70
87
  class JsonlFilePolicy(DefaultFilePolicy):
71
88
  def process_chunks(self, chunks: List[Chunk]) -> "ProcessedChunk":
@@ -81,6 +98,7 @@ class JsonlFilePolicy(DefaultFilePolicy):
81
98
  )
82
99
  wandb.termerror(msg, repeat=False)
83
100
  wandb._sentry.message(msg, repeat=False)
101
+ self._debug_log(chunk.data)
84
102
  else:
85
103
  chunk_data.append(chunk.data)
86
104
 
@@ -99,6 +117,7 @@ class SummaryFilePolicy(DefaultFilePolicy):
99
117
  )
100
118
  wandb.termerror(msg, repeat=False)
101
119
  wandb._sentry.message(msg, repeat=False)
120
+ self._debug_log(data)
102
121
  return False
103
122
  return {"offset": 0, "content": [data]}
104
123
 
@@ -274,7 +293,7 @@ class CRDedupeFilePolicy(DefaultFilePolicy):
274
293
  ret = []
275
294
  for a, b in intervals:
276
295
  processed_chunk: ProcessedChunk = {
277
- "offset": a,
296
+ "offset": self._chunk_id + a,
278
297
  "content": [console[i] for i in range(a, b + 1)],
279
298
  }
280
299
  ret.append(processed_chunk)
@@ -689,7 +689,7 @@ class HandleManager:
689
689
  self._settings, interface=self._interface, run_proto=run_start.run
690
690
  )
691
691
 
692
- if run_start.run.resumed:
692
+ if run_start.run.resumed or run_start.run.forked:
693
693
  self._step = run_start.run.starting_step
694
694
  result = proto_util._result_from_record(record)
695
695
  self._respond_result(result)
@@ -862,9 +862,6 @@ class HandleManager:
862
862
  self._respond_result(result)
863
863
  self._stopped.set()
864
864
 
865
- def handle_request_job_info(self, record: Record) -> None:
866
- self._dispatch_record(record, always_send=True)
867
-
868
865
  def finish(self) -> None:
869
866
  logger.info("shutting down handler")
870
867
  if self._system_monitor is not None:
@@ -2150,6 +2150,7 @@ class Api:
2150
2150
  name
2151
2151
  }
2152
2152
  }
2153
+ historyLineCount
2153
2154
  }
2154
2155
  inserted
2155
2156
  _Server_Settings_
@@ -2237,6 +2238,7 @@ class Api:
2237
2238
  .get("serverSettings", {})
2238
2239
  .get("serverMessages", [])
2239
2240
  )
2241
+
2240
2242
  return (
2241
2243
  response["upsertBucket"]["bucket"],
2242
2244
  response["upsertBucket"]["inserted"],
@@ -3720,7 +3722,7 @@ class Api:
3720
3722
  artifact_id: str,
3721
3723
  storage_path: str,
3722
3724
  completed_parts: List[Dict[str, Any]],
3723
- upload_id: str,
3725
+ upload_id: Optional[str],
3724
3726
  complete_multipart_action: str = "Complete",
3725
3727
  ) -> Optional[str]:
3726
3728
  mutation = gql(
@@ -4,7 +4,7 @@ import logging
4
4
  import os
5
5
  import re
6
6
  import sys
7
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
7
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
8
8
 
9
9
  import wandb
10
10
  from wandb.sdk.artifacts.artifact import Artifact
@@ -28,6 +28,8 @@ FROZEN_REQUIREMENTS_FNAME = "requirements.frozen.txt"
28
28
  JOB_FNAME = "wandb-job.json"
29
29
  JOB_ARTIFACT_TYPE = "job"
30
30
 
31
+ LOG_LEVEL = Literal["log", "warn", "error"]
32
+
31
33
 
32
34
  class GitInfo(TypedDict):
33
35
  remote: str
@@ -89,8 +91,9 @@ class JobBuilder:
89
91
  _job_seq_id: Optional[str]
90
92
  _job_version_alias: Optional[str]
91
93
  _is_notebook_run: bool
94
+ _verbose: bool
92
95
 
93
- def __init__(self, settings: SettingsStatic):
96
+ def __init__(self, settings: SettingsStatic, verbose: bool = False):
94
97
  self._settings = settings
95
98
  self._metadatafile_path = None
96
99
  self._requirements_path = None
@@ -106,6 +109,7 @@ class JobBuilder:
106
109
  Literal["repo", "artifact", "image"]
107
110
  ] = settings.job_source # type: ignore[assignment]
108
111
  self._is_notebook_run = self._get_is_notebook_run()
112
+ self._verbose = verbose
109
113
 
110
114
  def set_config(self, config: Dict[str, Any]) -> None:
111
115
  self._config = config
@@ -121,7 +125,9 @@ class JobBuilder:
121
125
  def disable(self, val: bool) -> None:
122
126
  self._disable = val
123
127
 
124
- def _handle_server_artifact(self, res: Dict, artifact: "ArtifactRecord") -> None:
128
+ def _handle_server_artifact(
129
+ self, res: Optional[Dict], artifact: "ArtifactRecord"
130
+ ) -> None:
125
131
  if artifact.type == "job" and res is not None:
126
132
  try:
127
133
  if res["artifactSequence"]["latestArtifact"] is None:
@@ -135,7 +141,7 @@ class JobBuilder:
135
141
  self._job_seq_id = res["artifactSequence"]["id"]
136
142
  except KeyError as e:
137
143
  _logger.info(f"Malformed response from ArtifactSaver.save {e}")
138
- if artifact.type == "code" and "id" in res:
144
+ if artifact.type == "code" and res is not None:
139
145
  self._logged_code_artifact = ArtifactInfoForJob(
140
146
  {
141
147
  "id": res["id"],
@@ -195,6 +201,21 @@ class JobBuilder:
195
201
 
196
202
  return source, name
197
203
 
204
+ def _log_if_verbose(self, message: str, level: LOG_LEVEL) -> None:
205
+ log_func: Optional[Union[Callable[[Any], None], Callable[[Any], None]]] = None
206
+ if level == "log":
207
+ _logger.info(message)
208
+ log_func = wandb.termlog
209
+ elif level == "warn":
210
+ _logger.warning(message)
211
+ log_func = wandb.termwarn
212
+ elif level == "error":
213
+ _logger.error(message)
214
+ log_func = wandb.termerror
215
+
216
+ if self._verbose and log_func is not None:
217
+ log_func(message)
218
+
198
219
  def _build_artifact_job_source(
199
220
  self,
200
221
  program_relpath: str,
@@ -210,8 +231,9 @@ class JobBuilder:
210
231
  # at the directory the notebook is in instead of the jupyter core
211
232
  if not os.path.exists(os.path.basename(program_relpath)):
212
233
  _logger.info("target path does not exist, exiting")
213
- wandb.termwarn(
214
- "No program path found when generating artifact job source for a non-colab notebook run. See https://docs.wandb.ai/guides/launch/create-job"
234
+ self._log_if_verbose(
235
+ "No program path found when generating artifact job source for a non-colab notebook run. See https://docs.wandb.ai/guides/launch/create-job",
236
+ "warn",
215
237
  )
216
238
  return None, None
217
239
  full_program_relpath = os.path.basename(program_relpath)
@@ -297,22 +319,25 @@ class JobBuilder:
297
319
  if not os.path.exists(
298
320
  os.path.join(self._settings.files_dir, REQUIREMENTS_FNAME)
299
321
  ):
300
- wandb.termwarn(
301
- "No requirements.txt found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"
322
+ self._log_if_verbose(
323
+ "No requirements.txt found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job",
324
+ "warn",
302
325
  )
303
326
  return None
304
327
  metadata = self._handle_metadata_file()
305
328
  if metadata is None:
306
- wandb.termwarn(
307
- f"Ensure read and write access to run files dir: {self._settings.files_dir}, control this via the WANDB_DIR env var. See https://docs.wandb.ai/guides/track/environment-variables"
329
+ self._log_if_verbose(
330
+ f"Ensure read and write access to run files dir: {self._settings.files_dir}, control this via the WANDB_DIR env var. See https://docs.wandb.ai/guides/track/environment-variables",
331
+ "warn",
308
332
  )
309
333
  return None
310
334
 
311
335
  runtime: Optional[str] = metadata.get("python")
312
336
  # can't build a job without a python version
313
337
  if runtime is None:
314
- wandb.termwarn(
315
- "No python version found in metadata, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"
338
+ self._log_if_verbose(
339
+ "No python version found in metadata, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job",
340
+ "warn",
316
341
  )
317
342
  return None
318
343
 
@@ -343,13 +368,16 @@ class JobBuilder:
343
368
  or self._settings.job_source
344
369
  or self._source_type
345
370
  ):
346
- wandb.termwarn("No source type found, not creating job artifact")
371
+ self._log_if_verbose(
372
+ "No source type found, not creating job artifact", "warn"
373
+ )
347
374
  return None
348
375
 
349
376
  program_relpath = self._get_program_relpath(source_type, metadata)
350
377
  if source_type != "image" and not program_relpath:
351
- wandb.termwarn(
352
- "No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"
378
+ self._log_if_verbose(
379
+ "No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job",
380
+ "warn",
353
381
  )
354
382
  return None
355
383
 
@@ -375,10 +403,11 @@ class JobBuilder:
375
403
 
376
404
  if source is None:
377
405
  if source_type:
378
- wandb.termwarn(
406
+ self._log_if_verbose(
379
407
  f"Source type is set to '{source_type}' but some required information is missing "
380
408
  "from the environment. A job will not be created from this run. See "
381
- "https://docs.wandb.ai/guides/launch/create-job"
409
+ "https://docs.wandb.ai/guides/launch/create-job",
410
+ "warn",
382
411
  )
383
412
  return None
384
413
 
@@ -445,8 +474,9 @@ class JobBuilder:
445
474
  program = metadata.get("program")
446
475
 
447
476
  if not program:
448
- wandb.termwarn(
449
- "Notebook 'program' path not found in metadata. See https://docs.wandb.ai/guides/launch/create-job"
477
+ self._log_if_verbose(
478
+ "Notebook 'program' path not found in metadata. See https://docs.wandb.ai/guides/launch/create-job",
479
+ "warn",
450
480
  )
451
481
 
452
482
  return program
@@ -52,7 +52,7 @@ def torch_trace_handler():
52
52
  prof.step()
53
53
  ```
54
54
  """
55
- from pkg_resources import parse_version
55
+ from wandb.util import parse_version
56
56
 
57
57
  torch = wandb.util.get_module(PYTORCH_MODULE, required=True)
58
58
  torch_profiler = wandb.util.get_module(PYTORCH_PROFILER_MODULE, required=True)