wandb 0.16.3__py3-none-any.whl → 0.16.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. wandb/__init__.py +2 -2
  2. wandb/agents/pyagent.py +1 -1
  3. wandb/apis/importers/__init__.py +1 -4
  4. wandb/apis/importers/internals/internal.py +386 -0
  5. wandb/apis/importers/internals/protocols.py +125 -0
  6. wandb/apis/importers/internals/util.py +78 -0
  7. wandb/apis/importers/mlflow.py +125 -88
  8. wandb/apis/importers/validation.py +108 -0
  9. wandb/apis/importers/wandb.py +1604 -0
  10. wandb/apis/public/api.py +7 -10
  11. wandb/apis/public/artifacts.py +38 -0
  12. wandb/apis/public/files.py +11 -2
  13. wandb/apis/reports/v2/__init__.py +0 -19
  14. wandb/apis/reports/v2/expr_parsing.py +0 -1
  15. wandb/apis/reports/v2/interface.py +15 -18
  16. wandb/apis/reports/v2/internal.py +12 -45
  17. wandb/cli/cli.py +52 -55
  18. wandb/integration/gym/__init__.py +2 -1
  19. wandb/integration/keras/callbacks/model_checkpoint.py +1 -1
  20. wandb/integration/keras/keras.py +6 -4
  21. wandb/integration/kfp/kfp_patch.py +2 -2
  22. wandb/integration/openai/fine_tuning.py +1 -2
  23. wandb/integration/ultralytics/callback.py +0 -1
  24. wandb/proto/v3/wandb_internal_pb2.py +332 -312
  25. wandb/proto/v3/wandb_settings_pb2.py +13 -3
  26. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  27. wandb/proto/v4/wandb_internal_pb2.py +316 -312
  28. wandb/proto/v4/wandb_settings_pb2.py +5 -3
  29. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  30. wandb/sdk/artifacts/artifact.py +75 -31
  31. wandb/sdk/artifacts/artifact_manifest.py +5 -2
  32. wandb/sdk/artifacts/artifact_manifest_entry.py +6 -1
  33. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +8 -2
  34. wandb/sdk/artifacts/artifact_saver.py +19 -47
  35. wandb/sdk/artifacts/storage_handler.py +2 -1
  36. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +22 -9
  37. wandb/sdk/artifacts/storage_policy.py +4 -1
  38. wandb/sdk/data_types/base_types/wb_value.py +1 -1
  39. wandb/sdk/data_types/image.py +2 -2
  40. wandb/sdk/interface/interface.py +49 -13
  41. wandb/sdk/interface/interface_shared.py +17 -11
  42. wandb/sdk/internal/file_stream.py +20 -1
  43. wandb/sdk/internal/handler.py +1 -4
  44. wandb/sdk/internal/internal_api.py +3 -1
  45. wandb/sdk/internal/job_builder.py +49 -19
  46. wandb/sdk/internal/profiler.py +1 -1
  47. wandb/sdk/internal/sender.py +96 -124
  48. wandb/sdk/internal/sender_config.py +197 -0
  49. wandb/sdk/internal/settings_static.py +9 -0
  50. wandb/sdk/internal/system/system_info.py +5 -3
  51. wandb/sdk/internal/update.py +1 -1
  52. wandb/sdk/launch/_launch.py +3 -3
  53. wandb/sdk/launch/_launch_add.py +28 -29
  54. wandb/sdk/launch/_project_spec.py +148 -136
  55. wandb/sdk/launch/agent/agent.py +3 -7
  56. wandb/sdk/launch/agent/config.py +0 -27
  57. wandb/sdk/launch/builder/build.py +54 -28
  58. wandb/sdk/launch/builder/docker_builder.py +4 -15
  59. wandb/sdk/launch/builder/kaniko_builder.py +72 -45
  60. wandb/sdk/launch/create_job.py +6 -40
  61. wandb/sdk/launch/loader.py +10 -0
  62. wandb/sdk/launch/registry/anon.py +29 -0
  63. wandb/sdk/launch/registry/local_registry.py +4 -1
  64. wandb/sdk/launch/runner/kubernetes_runner.py +20 -2
  65. wandb/sdk/launch/runner/local_container.py +15 -10
  66. wandb/sdk/launch/runner/sagemaker_runner.py +1 -1
  67. wandb/sdk/launch/sweeps/scheduler.py +11 -3
  68. wandb/sdk/launch/utils.py +14 -0
  69. wandb/sdk/lib/__init__.py +2 -5
  70. wandb/sdk/lib/_settings_toposort_generated.py +4 -1
  71. wandb/sdk/lib/apikey.py +0 -5
  72. wandb/sdk/lib/config_util.py +0 -31
  73. wandb/sdk/lib/filesystem.py +11 -1
  74. wandb/sdk/lib/run_moment.py +72 -0
  75. wandb/sdk/service/service.py +7 -2
  76. wandb/sdk/service/streams.py +1 -6
  77. wandb/sdk/verify/verify.py +2 -1
  78. wandb/sdk/wandb_init.py +12 -1
  79. wandb/sdk/wandb_login.py +43 -26
  80. wandb/sdk/wandb_run.py +164 -110
  81. wandb/sdk/wandb_settings.py +58 -16
  82. wandb/testing/relay.py +5 -6
  83. wandb/util.py +50 -7
  84. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/METADATA +8 -1
  85. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/RECORD +89 -82
  86. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/WHEEL +1 -1
  87. wandb/apis/importers/base.py +0 -400
  88. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/LICENSE +0 -0
  89. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/entry_points.txt +0 -0
  90. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  """WandB storage policy."""
2
2
  import hashlib
3
3
  import math
4
+ import os
4
5
  import shutil
5
6
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union
6
7
  from urllib.parse import quote
@@ -8,12 +9,12 @@ from urllib.parse import quote
8
9
  import requests
9
10
  import urllib3
10
11
 
11
- from wandb.apis import InternalApi
12
12
  from wandb.errors.term import termwarn
13
13
  from wandb.sdk.artifacts.artifact_file_cache import (
14
14
  ArtifactFileCache,
15
15
  get_artifact_file_cache,
16
16
  )
17
+ from wandb.sdk.artifacts.staging import get_staging_dir
17
18
  from wandb.sdk.artifacts.storage_handlers.azure_handler import AzureHandler
18
19
  from wandb.sdk.artifacts.storage_handlers.gcs_handler import GCSHandler
19
20
  from wandb.sdk.artifacts.storage_handlers.http_handler import HTTPHandler
@@ -28,6 +29,7 @@ from wandb.sdk.artifacts.storage_handlers.wb_local_artifact_handler import (
28
29
  from wandb.sdk.artifacts.storage_layout import StorageLayout
29
30
  from wandb.sdk.artifacts.storage_policies.register import WANDB_STORAGE_POLICY
30
31
  from wandb.sdk.artifacts.storage_policy import StoragePolicy
32
+ from wandb.sdk.internal.internal_api import Api as InternalApi
31
33
  from wandb.sdk.internal.thread_local_settings import _thread_local_api_settings
32
34
  from wandb.sdk.lib.hashutil import B64MD5, b64_to_hex_id, hex_to_b64_id
33
35
  from wandb.sdk.lib.paths import FilePathStr, URIStr
@@ -60,8 +62,10 @@ class WandbStoragePolicy(StoragePolicy):
60
62
  return WANDB_STORAGE_POLICY
61
63
 
62
64
  @classmethod
63
- def from_config(cls, config: Dict) -> "WandbStoragePolicy":
64
- return cls(config=config)
65
+ def from_config(
66
+ cls, config: Dict, api: Optional[InternalApi] = None
67
+ ) -> "WandbStoragePolicy":
68
+ return cls(config=config, api=api)
65
69
 
66
70
  def __init__(
67
71
  self,
@@ -131,6 +135,7 @@ class WandbStoragePolicy(StoragePolicy):
131
135
  if manifest_entry._download_url is None:
132
136
  auth = None
133
137
  if not _thread_local_api_settings.cookies:
138
+ assert self._api.api_key is not None
134
139
  auth = ("api", self._api.api_key)
135
140
  response = self._session.get(
136
141
  self._file_url(self._api, artifact.entity, manifest_entry),
@@ -222,9 +227,10 @@ class WandbStoragePolicy(StoragePolicy):
222
227
  extra_headers={
223
228
  "content-md5": md5_b64_str,
224
229
  "content-length": str(len(data)),
225
- "content-type": extra_headers.get("Content-Type"),
230
+ "content-type": extra_headers.get("Content-Type", ""),
226
231
  },
227
232
  )
233
+ assert upload_resp is not None
228
234
  etags.append(
229
235
  {"partNumber": part_number, "hexMD5": upload_resp.headers["ETag"]}
230
236
  )
@@ -311,7 +317,6 @@ class WandbStoragePolicy(StoragePolicy):
311
317
  return True
312
318
  if entry.local_path is None:
313
319
  return False
314
-
315
320
  extra_headers = {
316
321
  header.split(":", 1)[0]: header.split(":", 1)[1]
317
322
  for header in (resp.upload_headers or {})
@@ -333,6 +338,7 @@ class WandbStoragePolicy(StoragePolicy):
333
338
  multipart_urls,
334
339
  extra_headers,
335
340
  )
341
+ assert resp.storage_path is not None
336
342
  self._api.complete_multipart_upload_artifact(
337
343
  artifact_id, resp.storage_path, etags, resp.upload_id
338
344
  )
@@ -389,9 +395,16 @@ class WandbStoragePolicy(StoragePolicy):
389
395
  B64MD5(entry.digest),
390
396
  entry.size if entry.size is not None else 0,
391
397
  )
392
- if not hit:
393
- try:
398
+
399
+ staging_dir = get_staging_dir()
400
+ try:
401
+ if not entry.skip_cache and not hit:
394
402
  with cache_open("wb") as f, open(entry.local_path, "rb") as src:
395
403
  shutil.copyfileobj(src, f)
396
- except OSError as e:
397
- termwarn(f"Failed to cache {entry.local_path}, ignoring {e}")
404
+ if entry.local_path.startswith(staging_dir):
405
+ # Delete staged files here instead of waiting till
406
+ # all the files are uploaded
407
+ os.chmod(entry.local_path, 0o600)
408
+ os.remove(entry.local_path)
409
+ except OSError as e:
410
+ termwarn(f"Failed to cache {entry.local_path}, ignoring {e}")
@@ -1,6 +1,7 @@
1
1
  """Storage policy."""
2
2
  from typing import TYPE_CHECKING, Dict, Optional, Sequence, Type, Union
3
3
 
4
+ from wandb.sdk.internal.internal_api import Api as InternalApi
4
5
  from wandb.sdk.lib.paths import FilePathStr, URIStr
5
6
 
6
7
  if TYPE_CHECKING:
@@ -25,7 +26,9 @@ class StoragePolicy:
25
26
  raise NotImplementedError
26
27
 
27
28
  @classmethod
28
- def from_config(cls, config: Dict) -> "StoragePolicy":
29
+ def from_config(
30
+ cls, config: Dict, api: Optional[InternalApi] = None
31
+ ) -> "StoragePolicy":
29
32
  raise NotImplementedError
30
33
 
31
34
  def config(self) -> Dict:
@@ -11,7 +11,7 @@ if TYPE_CHECKING: # pragma: no cover
11
11
 
12
12
 
13
13
  def _server_accepts_client_ids() -> bool:
14
- from pkg_resources import parse_version
14
+ from wandb.util import parse_version
15
15
 
16
16
  # First, if we are offline, assume the backend server cannot
17
17
  # accept client IDs. Unfortunately, this is the best we can do
@@ -42,7 +42,7 @@ def _server_accepts_image_filenames() -> bool:
42
42
  max_cli_version = util._get_max_cli_version()
43
43
  if max_cli_version is None:
44
44
  return False
45
- from pkg_resources import parse_version
45
+ from wandb.util import parse_version
46
46
 
47
47
  accepts_image_filenames: bool = parse_version("0.12.10") <= parse_version(
48
48
  max_cli_version
@@ -51,7 +51,7 @@ def _server_accepts_image_filenames() -> bool:
51
51
 
52
52
 
53
53
  def _server_accepts_artifact_path() -> bool:
54
- from pkg_resources import parse_version
54
+ from wandb.util import parse_version
55
55
 
56
56
  target_version = "0.12.14"
57
57
  max_cli_version = util._get_max_cli_version() if not util._is_offline() else None
@@ -13,7 +13,17 @@ import os
13
13
  import sys
14
14
  import time
15
15
  from abc import abstractmethod
16
- from typing import TYPE_CHECKING, Any, Dict, Iterable, NewType, Optional, Tuple, Union
16
+ from typing import (
17
+ TYPE_CHECKING,
18
+ Any,
19
+ Dict,
20
+ Iterable,
21
+ List,
22
+ NewType,
23
+ Optional,
24
+ Tuple,
25
+ Union,
26
+ )
17
27
 
18
28
  from wandb.proto import wandb_internal_pb2 as pb
19
29
  from wandb.proto import wandb_telemetry_pb2 as tpb
@@ -34,6 +44,7 @@ from wandb.util import (
34
44
  from ..data_types.utils import history_dict_to_json, val_to_json
35
45
  from ..lib.mailbox import MailboxHandle
36
46
  from . import summary_record as sr
47
+ from .message_future import MessageFuture
37
48
 
38
49
  GlobStr = NewType("GlobStr", str)
39
50
 
@@ -339,6 +350,7 @@ class InterfaceBase:
339
350
  proto_entry.ref = entry.ref
340
351
  if entry.local_path:
341
352
  proto_entry.local_path = entry.local_path
353
+ proto_entry.skip_cache = entry.skip_cache
342
354
  for k, v in entry.extra.items():
343
355
  proto_extra = proto_entry.extra.add()
344
356
  proto_extra.key = k
@@ -452,7 +464,7 @@ class InterfaceBase:
452
464
  def _publish_use_artifact(self, proto_artifact: pb.UseArtifactRecord) -> None:
453
465
  raise NotImplementedError
454
466
 
455
- def deliver_artifact(
467
+ def communicate_artifact(
456
468
  self,
457
469
  run: "Run",
458
470
  artifact: "Artifact",
@@ -461,7 +473,7 @@ class InterfaceBase:
461
473
  is_user_created: bool = False,
462
474
  use_after_commit: bool = False,
463
475
  finalize: bool = True,
464
- ) -> MailboxHandle:
476
+ ) -> MessageFuture:
465
477
  proto_run = self._make_run(run)
466
478
  proto_artifact = self._make_artifact(artifact)
467
479
  proto_artifact.run_id = proto_run.run_id
@@ -478,11 +490,13 @@ class InterfaceBase:
478
490
  if history_step is not None:
479
491
  log_artifact.history_step = history_step
480
492
  log_artifact.staging_dir = get_staging_dir()
481
- resp = self._deliver_artifact(log_artifact)
493
+ resp = self._communicate_artifact(log_artifact)
482
494
  return resp
483
495
 
484
496
  @abstractmethod
485
- def _deliver_artifact(self, log_artifact: pb.LogArtifactRequest) -> MailboxHandle:
497
+ def _communicate_artifact(
498
+ self, log_artifact: pb.LogArtifactRequest
499
+ ) -> MessageFuture:
486
500
  raise NotImplementedError
487
501
 
488
502
  def deliver_download_artifact(
@@ -753,6 +767,36 @@ class InterfaceBase:
753
767
  run_start.run.CopyFrom(run_pb)
754
768
  return self._deliver_run_start(run_start)
755
769
 
770
+ def publish_launch_wandb_config_parameters(
771
+ self, include_paths: List[List[str]], exclude_paths: List[List[str]]
772
+ ):
773
+ """Tells the internal process to treat wandb.config fields as job inputs.
774
+
775
+ The paths provided as arguments are sequences of dictionary keys that
776
+ specify a path within the wandb.config. If a path is included, the
777
+ corresponding field will be treated as a job input. If a path is
778
+ excluded, the corresponding field will not be treated as a job input.
779
+
780
+ Args:
781
+ include_paths: paths within config to include as job inputs.
782
+ exclude_paths: paths within config to exclude as job inputs.
783
+
784
+ Returns:
785
+ None
786
+ """
787
+ config_parameters = pb.LaunchWandbConfigParametersRecord()
788
+ include_records = [pb.ConfigFilterPath(path=path) for path in include_paths]
789
+ exclude_records = [pb.ConfigFilterPath(path=path) for path in exclude_paths]
790
+ config_parameters.include_paths.extend(include_records)
791
+ config_parameters.exclude_paths.extend(exclude_records)
792
+ return self._publish_launch_wandb_config_parameters(config_parameters)
793
+
794
+ @abstractmethod
795
+ def _publish_launch_wandb_config_parameters(
796
+ self, config_parameters: pb.LaunchWandbConfigParametersRecord
797
+ ) -> None:
798
+ raise NotImplementedError
799
+
756
800
  @abstractmethod
757
801
  def _deliver_run_start(self, run_start: pb.RunStartRequest) -> MailboxHandle:
758
802
  raise NotImplementedError
@@ -868,11 +912,3 @@ class InterfaceBase:
868
912
  self, run_status: pb.RunStatusRequest
869
913
  ) -> MailboxHandle:
870
914
  raise NotImplementedError
871
-
872
- def deliver_request_job_info(self) -> MailboxHandle:
873
- job_info = pb.JobInfoRequest()
874
- return self._deliver_request_job_info(job_info)
875
-
876
- @abstractmethod
877
- def _deliver_request_job_info(self, job_info: pb.JobInfoRequest) -> MailboxHandle:
878
- raise NotImplementedError
@@ -145,7 +145,6 @@ class InterfaceShared(InterfaceBase):
145
145
  cancel: Optional[pb.CancelRequest] = None,
146
146
  summary_record: Optional[pb.SummaryRecordRequest] = None,
147
147
  telemetry_record: Optional[pb.TelemetryRecordRequest] = None,
148
- job_info: Optional[pb.JobInfoRequest] = None,
149
148
  get_system_metrics: Optional[pb.GetSystemMetricsRequest] = None,
150
149
  python_packages: Optional[pb.PythonPackagesRequest] = None,
151
150
  ) -> pb.Record:
@@ -202,8 +201,6 @@ class InterfaceShared(InterfaceBase):
202
201
  request.summary_record.CopyFrom(summary_record)
203
202
  elif telemetry_record:
204
203
  request.telemetry_record.CopyFrom(telemetry_record)
205
- elif job_info:
206
- request.job_info.CopyFrom(job_info)
207
204
  elif get_system_metrics:
208
205
  request.get_system_metrics.CopyFrom(get_system_metrics)
209
206
  elif sync:
@@ -242,6 +239,9 @@ class InterfaceShared(InterfaceBase):
242
239
  use_artifact: Optional[pb.UseArtifactRecord] = None,
243
240
  output: Optional[pb.OutputRecord] = None,
244
241
  output_raw: Optional[pb.OutputRawRecord] = None,
242
+ launch_wandb_config_parameters: Optional[
243
+ pb.LaunchWandbConfigParametersRecord
244
+ ] = None,
245
245
  ) -> pb.Record:
246
246
  record = pb.Record()
247
247
  if run:
@@ -286,6 +286,8 @@ class InterfaceShared(InterfaceBase):
286
286
  record.output.CopyFrom(output)
287
287
  elif output_raw:
288
288
  record.output_raw.CopyFrom(output_raw)
289
+ elif launch_wandb_config_parameters:
290
+ record.wandb_config_parameters.CopyFrom(launch_wandb_config_parameters)
289
291
  else:
290
292
  raise Exception("Invalid record")
291
293
  return record
@@ -389,17 +391,17 @@ class InterfaceShared(InterfaceBase):
389
391
  rec = self._make_record(files=files)
390
392
  self._publish(rec)
391
393
 
392
- def _publish_link_artifact(self, link_artifact: pb.LinkArtifactRecord) -> None:
394
+ def _publish_link_artifact(self, link_artifact: pb.LinkArtifactRecord) -> Any:
393
395
  rec = self._make_record(link_artifact=link_artifact)
394
396
  self._publish(rec)
395
397
 
396
- def _publish_use_artifact(self, use_artifact: pb.UseArtifactRecord) -> None:
398
+ def _publish_use_artifact(self, use_artifact: pb.UseArtifactRecord) -> Any:
397
399
  rec = self._make_record(use_artifact=use_artifact)
398
400
  self._publish(rec)
399
401
 
400
- def _deliver_artifact(self, log_artifact: pb.LogArtifactRequest) -> MailboxHandle:
402
+ def _communicate_artifact(self, log_artifact: pb.LogArtifactRequest) -> Any:
401
403
  rec = self._make_request(log_artifact=log_artifact)
402
- return self._deliver_record(rec)
404
+ return self._communicate_async(rec)
403
405
 
404
406
  def _deliver_download_artifact(
405
407
  self, download_artifact: pb.DownloadArtifactRequest
@@ -415,6 +417,14 @@ class InterfaceShared(InterfaceBase):
415
417
  rec = self._make_record(alert=proto_alert)
416
418
  self._publish(rec)
417
419
 
420
+ def _publish_launch_wandb_config_parameters(
421
+ self, launch_wandb_config_parameters: pb.LaunchWandbConfigParametersRecord
422
+ ) -> None:
423
+ rec = self._make_record(
424
+ launch_wandb_config_parameters=launch_wandb_config_parameters
425
+ )
426
+ self._publish(rec)
427
+
418
428
  def _communicate_status(
419
429
  self, status: pb.StatusRequest
420
430
  ) -> Optional[pb.StatusResponse]:
@@ -523,10 +533,6 @@ class InterfaceShared(InterfaceBase):
523
533
  record = self._make_request(run_status=run_status)
524
534
  return self._deliver_record(record)
525
535
 
526
- def _deliver_request_job_info(self, job_info: pb.JobInfoRequest) -> MailboxHandle:
527
- record = self._make_request(job_info=job_info)
528
- return self._deliver_record(record)
529
-
530
536
  def _transport_keepalive_failed(self, keepalive_interval: int = 5) -> bool:
531
537
  if self._transport_failed:
532
538
  return True
@@ -1,6 +1,7 @@
1
1
  import base64
2
2
  import functools
3
3
  import itertools
4
+ import json
4
5
  import logging
5
6
  import os
6
7
  import queue
@@ -58,6 +59,7 @@ class Chunk(NamedTuple):
58
59
  class DefaultFilePolicy:
59
60
  def __init__(self, start_chunk_id: int = 0) -> None:
60
61
  self._chunk_id = start_chunk_id
62
+ self.has_debug_log = False
61
63
 
62
64
  def process_chunks(
63
65
  self, chunks: List[Chunk]
@@ -66,6 +68,21 @@ class DefaultFilePolicy:
66
68
  self._chunk_id += len(chunks)
67
69
  return {"offset": chunk_id, "content": [c.data for c in chunks]}
68
70
 
71
+ # TODO: this is very inefficient, this is meant for temporary debugging and will be removed in future releases
72
+ def _debug_log(self, data: Any):
73
+ if self.has_debug_log or not os.environ.get("WANDB_DEBUG_FILESTREAM_LOG"):
74
+ return
75
+
76
+ loaded = json.loads(data)
77
+ if not isinstance(loaded, dict):
78
+ return
79
+
80
+ # get key size and convert to MB
81
+ key_sizes = [(k, len(json.dumps(v))) for k, v in loaded.items()]
82
+ key_msg = [f"{k}: {v/1048576:.5f} MB" for k, v in key_sizes]
83
+ wandb.termerror(f"Step: {loaded['_step']} | {key_msg}", repeat=False)
84
+ self.has_debug_log = True
85
+
69
86
 
70
87
  class JsonlFilePolicy(DefaultFilePolicy):
71
88
  def process_chunks(self, chunks: List[Chunk]) -> "ProcessedChunk":
@@ -81,6 +98,7 @@ class JsonlFilePolicy(DefaultFilePolicy):
81
98
  )
82
99
  wandb.termerror(msg, repeat=False)
83
100
  wandb._sentry.message(msg, repeat=False)
101
+ self._debug_log(chunk.data)
84
102
  else:
85
103
  chunk_data.append(chunk.data)
86
104
 
@@ -99,6 +117,7 @@ class SummaryFilePolicy(DefaultFilePolicy):
99
117
  )
100
118
  wandb.termerror(msg, repeat=False)
101
119
  wandb._sentry.message(msg, repeat=False)
120
+ self._debug_log(data)
102
121
  return False
103
122
  return {"offset": 0, "content": [data]}
104
123
 
@@ -274,7 +293,7 @@ class CRDedupeFilePolicy(DefaultFilePolicy):
274
293
  ret = []
275
294
  for a, b in intervals:
276
295
  processed_chunk: ProcessedChunk = {
277
- "offset": a,
296
+ "offset": self._chunk_id + a,
278
297
  "content": [console[i] for i in range(a, b + 1)],
279
298
  }
280
299
  ret.append(processed_chunk)
@@ -689,7 +689,7 @@ class HandleManager:
689
689
  self._settings, interface=self._interface, run_proto=run_start.run
690
690
  )
691
691
 
692
- if run_start.run.resumed:
692
+ if run_start.run.resumed or run_start.run.forked:
693
693
  self._step = run_start.run.starting_step
694
694
  result = proto_util._result_from_record(record)
695
695
  self._respond_result(result)
@@ -862,9 +862,6 @@ class HandleManager:
862
862
  self._respond_result(result)
863
863
  self._stopped.set()
864
864
 
865
- def handle_request_job_info(self, record: Record) -> None:
866
- self._dispatch_record(record, always_send=True)
867
-
868
865
  def finish(self) -> None:
869
866
  logger.info("shutting down handler")
870
867
  if self._system_monitor is not None:
@@ -2150,6 +2150,7 @@ class Api:
2150
2150
  name
2151
2151
  }
2152
2152
  }
2153
+ historyLineCount
2153
2154
  }
2154
2155
  inserted
2155
2156
  _Server_Settings_
@@ -2237,6 +2238,7 @@ class Api:
2237
2238
  .get("serverSettings", {})
2238
2239
  .get("serverMessages", [])
2239
2240
  )
2241
+
2240
2242
  return (
2241
2243
  response["upsertBucket"]["bucket"],
2242
2244
  response["upsertBucket"]["inserted"],
@@ -3720,7 +3722,7 @@ class Api:
3720
3722
  artifact_id: str,
3721
3723
  storage_path: str,
3722
3724
  completed_parts: List[Dict[str, Any]],
3723
- upload_id: str,
3725
+ upload_id: Optional[str],
3724
3726
  complete_multipart_action: str = "Complete",
3725
3727
  ) -> Optional[str]:
3726
3728
  mutation = gql(
@@ -4,7 +4,7 @@ import logging
4
4
  import os
5
5
  import re
6
6
  import sys
7
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
7
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
8
8
 
9
9
  import wandb
10
10
  from wandb.sdk.artifacts.artifact import Artifact
@@ -28,6 +28,8 @@ FROZEN_REQUIREMENTS_FNAME = "requirements.frozen.txt"
28
28
  JOB_FNAME = "wandb-job.json"
29
29
  JOB_ARTIFACT_TYPE = "job"
30
30
 
31
+ LOG_LEVEL = Literal["log", "warn", "error"]
32
+
31
33
 
32
34
  class GitInfo(TypedDict):
33
35
  remote: str
@@ -89,8 +91,9 @@ class JobBuilder:
89
91
  _job_seq_id: Optional[str]
90
92
  _job_version_alias: Optional[str]
91
93
  _is_notebook_run: bool
94
+ _verbose: bool
92
95
 
93
- def __init__(self, settings: SettingsStatic):
96
+ def __init__(self, settings: SettingsStatic, verbose: bool = False):
94
97
  self._settings = settings
95
98
  self._metadatafile_path = None
96
99
  self._requirements_path = None
@@ -106,6 +109,7 @@ class JobBuilder:
106
109
  Literal["repo", "artifact", "image"]
107
110
  ] = settings.job_source # type: ignore[assignment]
108
111
  self._is_notebook_run = self._get_is_notebook_run()
112
+ self._verbose = verbose
109
113
 
110
114
  def set_config(self, config: Dict[str, Any]) -> None:
111
115
  self._config = config
@@ -121,7 +125,9 @@ class JobBuilder:
121
125
  def disable(self, val: bool) -> None:
122
126
  self._disable = val
123
127
 
124
- def _handle_server_artifact(self, res: Dict, artifact: "ArtifactRecord") -> None:
128
+ def _handle_server_artifact(
129
+ self, res: Optional[Dict], artifact: "ArtifactRecord"
130
+ ) -> None:
125
131
  if artifact.type == "job" and res is not None:
126
132
  try:
127
133
  if res["artifactSequence"]["latestArtifact"] is None:
@@ -135,7 +141,7 @@ class JobBuilder:
135
141
  self._job_seq_id = res["artifactSequence"]["id"]
136
142
  except KeyError as e:
137
143
  _logger.info(f"Malformed response from ArtifactSaver.save {e}")
138
- if artifact.type == "code" and "id" in res:
144
+ if artifact.type == "code" and res is not None:
139
145
  self._logged_code_artifact = ArtifactInfoForJob(
140
146
  {
141
147
  "id": res["id"],
@@ -195,6 +201,21 @@ class JobBuilder:
195
201
 
196
202
  return source, name
197
203
 
204
+ def _log_if_verbose(self, message: str, level: LOG_LEVEL) -> None:
205
+ log_func: Optional[Union[Callable[[Any], None], Callable[[Any], None]]] = None
206
+ if level == "log":
207
+ _logger.info(message)
208
+ log_func = wandb.termlog
209
+ elif level == "warn":
210
+ _logger.warning(message)
211
+ log_func = wandb.termwarn
212
+ elif level == "error":
213
+ _logger.error(message)
214
+ log_func = wandb.termerror
215
+
216
+ if self._verbose and log_func is not None:
217
+ log_func(message)
218
+
198
219
  def _build_artifact_job_source(
199
220
  self,
200
221
  program_relpath: str,
@@ -210,8 +231,9 @@ class JobBuilder:
210
231
  # at the directory the notebook is in instead of the jupyter core
211
232
  if not os.path.exists(os.path.basename(program_relpath)):
212
233
  _logger.info("target path does not exist, exiting")
213
- wandb.termwarn(
214
- "No program path found when generating artifact job source for a non-colab notebook run. See https://docs.wandb.ai/guides/launch/create-job"
234
+ self._log_if_verbose(
235
+ "No program path found when generating artifact job source for a non-colab notebook run. See https://docs.wandb.ai/guides/launch/create-job",
236
+ "warn",
215
237
  )
216
238
  return None, None
217
239
  full_program_relpath = os.path.basename(program_relpath)
@@ -297,22 +319,25 @@ class JobBuilder:
297
319
  if not os.path.exists(
298
320
  os.path.join(self._settings.files_dir, REQUIREMENTS_FNAME)
299
321
  ):
300
- wandb.termwarn(
301
- "No requirements.txt found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"
322
+ self._log_if_verbose(
323
+ "No requirements.txt found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job",
324
+ "warn",
302
325
  )
303
326
  return None
304
327
  metadata = self._handle_metadata_file()
305
328
  if metadata is None:
306
- wandb.termwarn(
307
- f"Ensure read and write access to run files dir: {self._settings.files_dir}, control this via the WANDB_DIR env var. See https://docs.wandb.ai/guides/track/environment-variables"
329
+ self._log_if_verbose(
330
+ f"Ensure read and write access to run files dir: {self._settings.files_dir}, control this via the WANDB_DIR env var. See https://docs.wandb.ai/guides/track/environment-variables",
331
+ "warn",
308
332
  )
309
333
  return None
310
334
 
311
335
  runtime: Optional[str] = metadata.get("python")
312
336
  # can't build a job without a python version
313
337
  if runtime is None:
314
- wandb.termwarn(
315
- "No python version found in metadata, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"
338
+ self._log_if_verbose(
339
+ "No python version found in metadata, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job",
340
+ "warn",
316
341
  )
317
342
  return None
318
343
 
@@ -343,13 +368,16 @@ class JobBuilder:
343
368
  or self._settings.job_source
344
369
  or self._source_type
345
370
  ):
346
- wandb.termwarn("No source type found, not creating job artifact")
371
+ self._log_if_verbose(
372
+ "No source type found, not creating job artifact", "warn"
373
+ )
347
374
  return None
348
375
 
349
376
  program_relpath = self._get_program_relpath(source_type, metadata)
350
377
  if source_type != "image" and not program_relpath:
351
- wandb.termwarn(
352
- "No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"
378
+ self._log_if_verbose(
379
+ "No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job",
380
+ "warn",
353
381
  )
354
382
  return None
355
383
 
@@ -375,10 +403,11 @@ class JobBuilder:
375
403
 
376
404
  if source is None:
377
405
  if source_type:
378
- wandb.termwarn(
406
+ self._log_if_verbose(
379
407
  f"Source type is set to '{source_type}' but some required information is missing "
380
408
  "from the environment. A job will not be created from this run. See "
381
- "https://docs.wandb.ai/guides/launch/create-job"
409
+ "https://docs.wandb.ai/guides/launch/create-job",
410
+ "warn",
382
411
  )
383
412
  return None
384
413
 
@@ -445,8 +474,9 @@ class JobBuilder:
445
474
  program = metadata.get("program")
446
475
 
447
476
  if not program:
448
- wandb.termwarn(
449
- "Notebook 'program' path not found in metadata. See https://docs.wandb.ai/guides/launch/create-job"
477
+ self._log_if_verbose(
478
+ "Notebook 'program' path not found in metadata. See https://docs.wandb.ai/guides/launch/create-job",
479
+ "warn",
450
480
  )
451
481
 
452
482
  return program
@@ -52,7 +52,7 @@ def torch_trace_handler():
52
52
  prof.step()
53
53
  ```
54
54
  """
55
- from pkg_resources import parse_version
55
+ from wandb.util import parse_version
56
56
 
57
57
  torch = wandb.util.get_module(PYTORCH_MODULE, required=True)
58
58
  torch_profiler = wandb.util.get_module(PYTORCH_PROFILER_MODULE, required=True)