wandb 0.16.3__py3-none-any.whl → 0.16.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. wandb/__init__.py +2 -2
  2. wandb/agents/pyagent.py +1 -1
  3. wandb/apis/importers/__init__.py +1 -4
  4. wandb/apis/importers/internals/internal.py +386 -0
  5. wandb/apis/importers/internals/protocols.py +125 -0
  6. wandb/apis/importers/internals/util.py +78 -0
  7. wandb/apis/importers/mlflow.py +125 -88
  8. wandb/apis/importers/validation.py +108 -0
  9. wandb/apis/importers/wandb.py +1604 -0
  10. wandb/apis/public/api.py +7 -10
  11. wandb/apis/public/artifacts.py +38 -0
  12. wandb/apis/public/files.py +11 -2
  13. wandb/apis/reports/v2/__init__.py +0 -19
  14. wandb/apis/reports/v2/expr_parsing.py +0 -1
  15. wandb/apis/reports/v2/interface.py +15 -18
  16. wandb/apis/reports/v2/internal.py +12 -45
  17. wandb/cli/cli.py +52 -55
  18. wandb/integration/gym/__init__.py +2 -1
  19. wandb/integration/keras/callbacks/model_checkpoint.py +1 -1
  20. wandb/integration/keras/keras.py +6 -4
  21. wandb/integration/kfp/kfp_patch.py +2 -2
  22. wandb/integration/openai/fine_tuning.py +1 -2
  23. wandb/integration/ultralytics/callback.py +0 -1
  24. wandb/proto/v3/wandb_internal_pb2.py +332 -312
  25. wandb/proto/v3/wandb_settings_pb2.py +13 -3
  26. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  27. wandb/proto/v4/wandb_internal_pb2.py +316 -312
  28. wandb/proto/v4/wandb_settings_pb2.py +5 -3
  29. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  30. wandb/sdk/artifacts/artifact.py +75 -31
  31. wandb/sdk/artifacts/artifact_manifest.py +5 -2
  32. wandb/sdk/artifacts/artifact_manifest_entry.py +6 -1
  33. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +8 -2
  34. wandb/sdk/artifacts/artifact_saver.py +19 -47
  35. wandb/sdk/artifacts/storage_handler.py +2 -1
  36. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +22 -9
  37. wandb/sdk/artifacts/storage_policy.py +4 -1
  38. wandb/sdk/data_types/base_types/wb_value.py +1 -1
  39. wandb/sdk/data_types/image.py +2 -2
  40. wandb/sdk/interface/interface.py +49 -13
  41. wandb/sdk/interface/interface_shared.py +17 -11
  42. wandb/sdk/internal/file_stream.py +20 -1
  43. wandb/sdk/internal/handler.py +1 -4
  44. wandb/sdk/internal/internal_api.py +3 -1
  45. wandb/sdk/internal/job_builder.py +49 -19
  46. wandb/sdk/internal/profiler.py +1 -1
  47. wandb/sdk/internal/sender.py +96 -124
  48. wandb/sdk/internal/sender_config.py +197 -0
  49. wandb/sdk/internal/settings_static.py +9 -0
  50. wandb/sdk/internal/system/system_info.py +5 -3
  51. wandb/sdk/internal/update.py +1 -1
  52. wandb/sdk/launch/_launch.py +3 -3
  53. wandb/sdk/launch/_launch_add.py +28 -29
  54. wandb/sdk/launch/_project_spec.py +148 -136
  55. wandb/sdk/launch/agent/agent.py +3 -7
  56. wandb/sdk/launch/agent/config.py +0 -27
  57. wandb/sdk/launch/builder/build.py +54 -28
  58. wandb/sdk/launch/builder/docker_builder.py +4 -15
  59. wandb/sdk/launch/builder/kaniko_builder.py +72 -45
  60. wandb/sdk/launch/create_job.py +6 -40
  61. wandb/sdk/launch/loader.py +10 -0
  62. wandb/sdk/launch/registry/anon.py +29 -0
  63. wandb/sdk/launch/registry/local_registry.py +4 -1
  64. wandb/sdk/launch/runner/kubernetes_runner.py +20 -2
  65. wandb/sdk/launch/runner/local_container.py +15 -10
  66. wandb/sdk/launch/runner/sagemaker_runner.py +1 -1
  67. wandb/sdk/launch/sweeps/scheduler.py +11 -3
  68. wandb/sdk/launch/utils.py +14 -0
  69. wandb/sdk/lib/__init__.py +2 -5
  70. wandb/sdk/lib/_settings_toposort_generated.py +4 -1
  71. wandb/sdk/lib/apikey.py +0 -5
  72. wandb/sdk/lib/config_util.py +0 -31
  73. wandb/sdk/lib/filesystem.py +11 -1
  74. wandb/sdk/lib/run_moment.py +72 -0
  75. wandb/sdk/service/service.py +7 -2
  76. wandb/sdk/service/streams.py +1 -6
  77. wandb/sdk/verify/verify.py +2 -1
  78. wandb/sdk/wandb_init.py +12 -1
  79. wandb/sdk/wandb_login.py +43 -26
  80. wandb/sdk/wandb_run.py +164 -110
  81. wandb/sdk/wandb_settings.py +58 -16
  82. wandb/testing/relay.py +5 -6
  83. wandb/util.py +50 -7
  84. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/METADATA +8 -1
  85. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/RECORD +89 -82
  86. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/WHEEL +1 -1
  87. wandb/apis/importers/base.py +0 -400
  88. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/LICENSE +0 -0
  89. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/entry_points.txt +0 -0
  90. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  """sender."""
2
2
 
3
- import concurrent.futures
4
3
  import json
5
4
  import logging
6
5
  import os
@@ -18,12 +17,10 @@ from typing import (
18
17
  Dict,
19
18
  Generator,
20
19
  List,
21
- NewType,
22
20
  Optional,
23
21
  Tuple,
24
22
  Type,
25
23
  Union,
26
- cast,
27
24
  )
28
25
 
29
26
  import requests
@@ -43,6 +40,7 @@ from wandb.sdk.internal import (
43
40
  file_stream,
44
41
  internal_api,
45
42
  job_builder,
43
+ sender_config,
46
44
  update,
47
45
  )
48
46
  from wandb.sdk.internal.file_pusher import FilePusher
@@ -85,9 +83,6 @@ if TYPE_CHECKING:
85
83
  logger = logging.getLogger(__name__)
86
84
 
87
85
 
88
- DictWithValues = NewType("DictWithValues", Dict[str, Any])
89
- DictNoValues = NewType("DictNoValues", Dict[str, Any])
90
-
91
86
  _OUTPUT_MIN_CALLBACK_INTERVAL = 2 # seconds
92
87
 
93
88
 
@@ -120,6 +115,7 @@ def _manifest_json_from_proto(manifest: "ArtifactManifest") -> Dict:
120
115
  "ref": content.ref if content.ref else None,
121
116
  "size": content.size if content.size is not None else None,
122
117
  "local_path": content.local_path if content.local_path else None,
118
+ "skip_cache": content.skip_cache,
123
119
  "extra": {
124
120
  extra.key: json.loads(extra.value_json) for extra in content.extra
125
121
  },
@@ -263,8 +259,9 @@ class SendManager:
263
259
  self._project = None
264
260
 
265
261
  # keep track of config from key/val updates
266
- self._consolidated_config: DictNoValues = cast(DictNoValues, dict())
267
- self._start_time: float = 0
262
+ self._consolidated_config = sender_config.ConfigState()
263
+
264
+ self._start_time: int = 0
268
265
  self._telemetry_obj = telemetry.TelemetryRecord()
269
266
  self._config_metric_pbdict_list: List[Dict[int, Any]] = []
270
267
  self._metadata_summary: Dict[str, Any] = defaultdict()
@@ -540,7 +537,7 @@ class SendManager:
540
537
  self._maybe_update_config(always=final)
541
538
 
542
539
  def _debounce_config(self) -> None:
543
- config_value_dict = self._config_format(self._consolidated_config)
540
+ config_value_dict = self._config_backend_dict()
544
541
  # TODO(jhr): check result of upsert_run?
545
542
  if self._run:
546
543
  self._api.upsert_run(
@@ -737,18 +734,7 @@ class SendManager:
737
734
  )
738
735
  self._respond_result(result)
739
736
 
740
- def send_request_job_info(self, record: "Record") -> None:
741
- """Respond to a request for a job link."""
742
- result = proto_util._result_from_record(record)
743
- result.response.job_info_response.sequenceId = (
744
- self._job_builder._job_seq_id or ""
745
- )
746
- result.response.job_info_response.version = (
747
- self._job_builder._job_version_alias or ""
748
- )
749
- self._respond_result(result)
750
-
751
- def _maybe_setup_resume(
737
+ def _setup_resume(
752
738
  self, run: "RunRecord"
753
739
  ) -> Optional["wandb_internal_pb2.ErrorInfo"]:
754
740
  """Queries the backend for a run; fail if the settings are incompatible."""
@@ -856,51 +842,20 @@ class SendManager:
856
842
  )
857
843
  return framework
858
844
 
859
- def _config_telemetry_update(self, config_dict: Dict[str, Any]) -> None:
860
- """Add legacy telemetry to config object."""
861
- wandb_key = "_wandb"
862
- config_dict.setdefault(wandb_key, dict())
863
- s: str
864
- b: bool
865
- s = self._telemetry_obj.python_version
866
- if s:
867
- config_dict[wandb_key]["python_version"] = s
868
- s = self._telemetry_obj.cli_version
869
- if s:
870
- config_dict[wandb_key]["cli_version"] = s
871
- s = self._telemetry_get_framework()
872
- if s:
873
- config_dict[wandb_key]["framework"] = s
874
- s = self._telemetry_obj.huggingface_version
875
- if s:
876
- config_dict[wandb_key]["huggingface_version"] = s
877
- b = self._telemetry_obj.env.jupyter
878
- config_dict[wandb_key]["is_jupyter_run"] = b
879
- b = self._telemetry_obj.env.kaggle
880
- config_dict[wandb_key]["is_kaggle_kernel"] = b
881
-
882
- config_dict[wandb_key]["start_time"] = self._start_time
883
-
884
- t: Dict[int, Any] = proto_util.proto_encode_to_dict(self._telemetry_obj)
885
- config_dict[wandb_key]["t"] = t
886
-
887
- def _config_metric_update(self, config_dict: Dict[str, Any]) -> None:
888
- """Add default xaxis to config."""
889
- if not self._config_metric_pbdict_list:
890
- return
891
- wandb_key = "_wandb"
892
- config_dict.setdefault(wandb_key, dict())
893
- config_dict[wandb_key]["m"] = self._config_metric_pbdict_list
894
-
895
- def _config_format(self, config_data: Optional[DictNoValues]) -> DictWithValues:
896
- """Format dict into value dict with telemetry info."""
897
- config_dict: Dict[str, Any] = config_data.copy() if config_data else dict()
898
- self._config_telemetry_update(config_dict)
899
- self._config_metric_update(config_dict)
900
- config_value_dict: DictWithValues = config_util.dict_add_value_dict(config_dict)
901
- return config_value_dict
902
-
903
- def _config_save(self, config_value_dict: DictWithValues) -> None:
845
+ def _config_backend_dict(self) -> sender_config.BackendConfigDict:
846
+ config = self._consolidated_config or sender_config.ConfigState()
847
+
848
+ return config.to_backend_dict(
849
+ telemetry_record=self._telemetry_obj,
850
+ framework=self._telemetry_get_framework(),
851
+ start_time_millis=self._start_time,
852
+ metric_pbdicts=self._config_metric_pbdict_list,
853
+ )
854
+
855
+ def _config_save(
856
+ self,
857
+ config_value_dict: sender_config.BackendConfigDict,
858
+ ) -> None:
904
859
  config_path = os.path.join(self._settings.files_dir, "config.yaml")
905
860
  config_util.save_config_file_from_dict(config_path, config_value_dict)
906
861
 
@@ -925,13 +880,37 @@ class SendManager:
925
880
  pass
926
881
  # TODO: do something if sync spell is not successful?
927
882
 
883
+ def _setup_fork(self, server_run: dict):
884
+ assert self._settings.fork_from
885
+ assert self._settings.fork_from.metric == "_step"
886
+ assert self._run
887
+ first_step = int(self._settings.fork_from.value) + 1
888
+ self._resume_state.step = first_step
889
+ self._resume_state.history = server_run.get("historyLineCount", 0)
890
+ self._run.forked = True
891
+ self._run.starting_step = first_step
892
+
893
+ def _handle_error(
894
+ self,
895
+ record: "Record",
896
+ error: "wandb_internal_pb2.ErrorInfo",
897
+ run: "RunRecord",
898
+ ) -> None:
899
+ if record.control.req_resp or record.control.mailbox_slot:
900
+ result = proto_util._result_from_record(record)
901
+ result.run_result.run.CopyFrom(run)
902
+ result.run_result.error.CopyFrom(error)
903
+ self._respond_result(result)
904
+ else:
905
+ logger.error("Got error in async mode: %s", error.message)
906
+
928
907
  def send_run(self, record: "Record", file_dir: Optional[str] = None) -> None:
929
908
  run = record.run
930
909
  error = None
931
910
  is_wandb_init = self._run is None
932
911
 
933
912
  # save start time of a run
934
- self._start_time = run.start_time.ToMicroseconds() / 1e6
913
+ self._start_time = run.start_time.ToMicroseconds() // 1e6
935
914
 
936
915
  # update telemetry
937
916
  if run.telemetry:
@@ -940,61 +919,69 @@ class SendManager:
940
919
  self._telemetry_obj.feature.sync = True
941
920
 
942
921
  # build config dict
943
- config_value_dict: Optional[DictWithValues] = None
922
+ config_value_dict: Optional[sender_config.BackendConfigDict] = None
944
923
  if run.config:
945
- config_util.update_from_proto(self._consolidated_config, run.config)
946
- config_value_dict = self._config_format(self._consolidated_config)
924
+ self._consolidated_config.update_from_proto(run.config)
925
+ config_value_dict = self._config_backend_dict()
947
926
  self._config_save(config_value_dict)
948
927
 
928
+ do_fork = self._settings.fork_from is not None and is_wandb_init
929
+ do_resume = bool(self._settings.resume)
930
+
931
+ if do_fork and do_resume:
932
+ error = wandb_internal_pb2.ErrorInfo()
933
+ error.code = wandb_internal_pb2.ErrorInfo.ErrorCode.USAGE
934
+ error.message = (
935
+ "You cannot use `resume` and `fork_from` together. Please choose one."
936
+ )
937
+ self._handle_error(record, error, run)
938
+
949
939
  if is_wandb_init:
950
940
  # Ensure we have a project to query for status
951
941
  if run.project == "":
952
942
  run.project = util.auto_project_name(self._settings.program)
953
943
  # Only check resume status on `wandb.init`
954
- error = self._maybe_setup_resume(run)
944
+
945
+ if do_resume:
946
+ error = self._setup_resume(run)
955
947
 
956
948
  if error is not None:
957
- if record.control.req_resp or record.control.mailbox_slot:
958
- result = proto_util._result_from_record(record)
959
- result.run_result.run.CopyFrom(run)
960
- result.run_result.error.CopyFrom(error)
961
- self._respond_result(result)
962
- else:
963
- logger.error("Got error in async mode: %s", error.message)
949
+ self._handle_error(record, error, run)
964
950
  return
965
951
 
966
952
  # Save the resumed config
967
953
  if self._resume_state.config is not None:
968
- # TODO: should we merge this with resumed config?
969
- config_override = self._consolidated_config
970
- config_dict = self._resume_state.config
971
- config_dict = config_util.dict_strip_value_dict(config_dict)
972
- config_dict.update(config_override)
973
- self._consolidated_config.update(config_dict)
974
- config_value_dict = self._config_format(self._consolidated_config)
954
+ self._consolidated_config.merge_resumed_config(
955
+ config_util.dict_strip_value_dict(self._resume_state.config)
956
+ )
957
+
958
+ config_value_dict = self._config_backend_dict()
975
959
  self._config_save(config_value_dict)
976
960
 
977
961
  # handle empty config
978
962
  # TODO(jhr): consolidate the 4 ways config is built:
979
963
  # (passed config, empty config, resume config, send_config)
980
964
  if not config_value_dict:
981
- config_value_dict = self._config_format(None)
965
+ config_value_dict = self._config_backend_dict()
982
966
  self._config_save(config_value_dict)
983
967
 
984
968
  try:
985
- self._init_run(run, config_value_dict)
969
+ server_run = self._init_run(run, config_value_dict)
986
970
  except (CommError, UsageError) as e:
987
971
  logger.error(e, exc_info=True)
988
- if record.control.req_resp or record.control.mailbox_slot:
989
- result = proto_util._result_from_record(record)
990
- result.run_result.run.CopyFrom(run)
991
- error = ProtobufErrorHandler.from_exception(e)
992
- result.run_result.error.CopyFrom(error)
993
- self._respond_result(result)
972
+ error = ProtobufErrorHandler.from_exception(e)
973
+ self._handle_error(record, error, run)
994
974
  return
995
975
 
996
976
  assert self._run # self._run is configured in _init_run()
997
977
 
978
+ if do_fork:
979
+ error = self._setup_fork(server_run)
980
+
981
+ if error is not None:
982
+ self._handle_error(record, error, run)
983
+ return
984
+
998
985
  if record.control.req_resp or record.control.mailbox_slot:
999
986
  result = proto_util._result_from_record(record)
1000
987
  # TODO: we could do self._interface.publish_defer(resp) to notify
@@ -1012,8 +999,8 @@ class SendManager:
1012
999
  def _init_run(
1013
1000
  self,
1014
1001
  run: "RunRecord",
1015
- config_dict: Optional[DictWithValues],
1016
- ) -> None:
1002
+ config_dict: Optional[sender_config.BackendConfigDict],
1003
+ ) -> dict:
1017
1004
  # We subtract the previous runs runtime when resuming
1018
1005
  start_time = (
1019
1006
  run.start_time.ToMicroseconds() / 1e6
@@ -1098,6 +1085,7 @@ class SendManager:
1098
1085
  self._run.sweep_id = sweep_id
1099
1086
  if os.getenv("SPELL_RUN_URL"):
1100
1087
  self._sync_spell()
1088
+ return server_run
1101
1089
 
1102
1090
  def _start_run_threads(self, file_dir: Optional[str] = None) -> None:
1103
1091
  assert self._run # self._run is configured by caller
@@ -1344,8 +1332,7 @@ class SendManager:
1344
1332
  self._config_needs_debounce = True
1345
1333
 
1346
1334
  def send_config(self, record: "Record") -> None:
1347
- cfg = record.config
1348
- config_util.update_from_proto(self._consolidated_config, cfg)
1335
+ self._consolidated_config.update_from_proto(record.config)
1349
1336
  self._update_config()
1350
1337
 
1351
1338
  def send_metric(self, record: "Record") -> None:
@@ -1457,40 +1444,27 @@ class SendManager:
1457
1444
  )
1458
1445
 
1459
1446
  def send_request_log_artifact(self, record: "Record") -> None:
1460
- assert record.control.mailbox_slot
1447
+ assert record.control.req_resp
1461
1448
  result = proto_util._result_from_record(record)
1462
1449
  artifact = record.request.log_artifact.artifact
1463
1450
  history_step = record.request.log_artifact.history_step
1464
1451
 
1465
- future = None
1466
1452
  try:
1467
- res, future = self._send_artifact(artifact, history_step)
1453
+ res = self._send_artifact(artifact, history_step)
1468
1454
  assert res, "Unable to send artifact"
1469
- result.response.log_artifact_response.artifact_id = res.get("id", None)
1455
+ result.response.log_artifact_response.artifact_id = res["id"]
1470
1456
  logger.info(f"logged artifact {artifact.name} - {res}")
1471
1457
  except Exception as e:
1472
1458
  result.response.log_artifact_response.error_message = (
1473
1459
  f'error logging artifact "{artifact.type}/{artifact.name}": {e}'
1474
1460
  )
1475
1461
 
1476
- def _respond_result(fut: concurrent.futures.Future):
1477
- if fut.exception() is not None:
1478
- result.response.log_artifact_response.error_message = f'error logging artifact "{artifact.type}/{artifact.name}": {fut.exception()}'
1479
- self._respond_result(result)
1480
-
1481
- if future is not None:
1482
- # respond to the request only after the artifact is fully committed
1483
- future.add_done_callback(_respond_result)
1484
- else:
1485
- self._respond_result(result)
1462
+ self._respond_result(result)
1486
1463
 
1487
1464
  def send_artifact(self, record: "Record") -> None:
1488
1465
  artifact = record.artifact
1489
1466
  try:
1490
- res, future = self._send_artifact(artifact)
1491
- # wait for future to complete in send artifact
1492
- if future is not None:
1493
- future.result()
1467
+ res = self._send_artifact(artifact)
1494
1468
  logger.info(f"sent artifact {artifact.name} - {res}")
1495
1469
  except Exception as e:
1496
1470
  logger.error(
@@ -1501,8 +1475,8 @@ class SendManager:
1501
1475
 
1502
1476
  def _send_artifact(
1503
1477
  self, artifact: "ArtifactRecord", history_step: Optional[int] = None
1504
- ) -> Tuple[Dict, Optional[concurrent.futures.Future]]:
1505
- from pkg_resources import parse_version
1478
+ ) -> Optional[Dict]:
1479
+ from wandb.util import parse_version
1506
1480
 
1507
1481
  assert self._pusher
1508
1482
  saver = ArtifactSaver(
@@ -1522,10 +1496,10 @@ class SendManager:
1522
1496
  "This W&B Server doesn't support distributed artifacts, "
1523
1497
  "have your administrator install wandb/local >= 0.9.37"
1524
1498
  )
1525
- return {}, None
1499
+ return None
1526
1500
 
1527
1501
  metadata = json.loads(artifact.metadata) if artifact.metadata else None
1528
- res, future = saver.save(
1502
+ res = saver.save(
1529
1503
  type=artifact.type,
1530
1504
  name=artifact.name,
1531
1505
  client_id=artifact.client_id,
@@ -1543,10 +1517,10 @@ class SendManager:
1543
1517
  )
1544
1518
 
1545
1519
  self._job_builder._handle_server_artifact(res, artifact)
1546
- return res, future
1520
+ return res
1547
1521
 
1548
1522
  def send_alert(self, record: "Record") -> None:
1549
- from pkg_resources import parse_version
1523
+ from wandb.util import parse_version
1550
1524
 
1551
1525
  alert = record.alert
1552
1526
  max_cli_version = self._max_cli_version()
@@ -1639,9 +1613,7 @@ class SendManager:
1639
1613
  def _flush_job(self) -> None:
1640
1614
  if self._job_builder.disable or self._settings._offline:
1641
1615
  return
1642
- self._job_builder.set_config(
1643
- {k: v for k, v in self._consolidated_config.items() if k != "_wandb"}
1644
- )
1616
+ self._job_builder.set_config(self._consolidated_config.non_internal_config())
1645
1617
  summary_dict = self._cached_summary.copy()
1646
1618
  summary_dict.pop("_wandb", None)
1647
1619
  self._job_builder.set_summary(summary_dict)
@@ -0,0 +1,197 @@
1
+ import json
2
+ from typing import Any, Dict, NewType, Optional, Sequence
3
+
4
+ from wandb.proto import wandb_internal_pb2
5
+ from wandb.sdk.lib import proto_util, telemetry
6
+
7
+ BackendConfigDict = NewType("BackendConfigDict", Dict[str, Any])
8
+ """Run config dictionary in the format used by the backend."""
9
+
10
+ _WANDB_INTERNAL_KEY = "_wandb"
11
+
12
+
13
+ class ConfigState:
14
+ """The configuration of a run."""
15
+
16
+ def __init__(self, tree: Optional[Dict[str, Any]] = None) -> None:
17
+ self._tree: Dict[str, Any] = tree or {}
18
+ """A tree with string-valued nodes and JSON leaves.
19
+
20
+ Leaves are Python objects that are valid JSON values:
21
+
22
+ * Primitives like strings and numbers
23
+ * Dictionaries from strings to JSON objects
24
+ * Lists of JSON objects
25
+ """
26
+
27
+ def non_internal_config(self) -> Dict[str, Any]:
28
+ """Returns the config settings minus "_wandb"."""
29
+ return {k: v for k, v in self._tree.items() if k != _WANDB_INTERNAL_KEY}
30
+
31
+ def update_from_proto(
32
+ self,
33
+ config_record: wandb_internal_pb2.ConfigRecord,
34
+ ) -> None:
35
+ """Applies update and remove commands."""
36
+ for config_item in config_record.update:
37
+ self._update_at_path(
38
+ _key_path(config_item),
39
+ json.loads(config_item.value_json),
40
+ )
41
+
42
+ for config_item in config_record.remove:
43
+ self._delete_at_path(_key_path(config_item))
44
+
45
+ def merge_resumed_config(self, old_config_tree: Dict[str, Any]) -> None:
46
+ """Merges the config from a run that's being resumed."""
47
+ # Add any top-level keys that aren't already set.
48
+ self._add_unset_keys_from_subtree(old_config_tree, [])
49
+
50
+ # Unfortunately, when a user logs visualizations, we store them in the
51
+ # run's config. When resuming a run, we want to avoid erasing previously
52
+ # logged visualizations, hence this special handling:
53
+ self._add_unset_keys_from_subtree(
54
+ old_config_tree,
55
+ [_WANDB_INTERNAL_KEY, "visualize"],
56
+ )
57
+ self._add_unset_keys_from_subtree(
58
+ old_config_tree,
59
+ [_WANDB_INTERNAL_KEY, "viz"],
60
+ )
61
+
62
+ def _add_unset_keys_from_subtree(
63
+ self,
64
+ old_config_tree: Dict[str, Any],
65
+ path: Sequence[str],
66
+ ) -> None:
67
+ """Uses the given subtree for keys that aren't already set."""
68
+ old_subtree = _subtree(old_config_tree, path, create=False)
69
+ if not old_subtree:
70
+ return
71
+
72
+ new_subtree = _subtree(self._tree, path, create=True)
73
+ assert new_subtree is not None
74
+
75
+ for key, value in old_subtree.items():
76
+ if key not in new_subtree:
77
+ new_subtree[key] = value
78
+
79
+ def to_backend_dict(
80
+ self,
81
+ telemetry_record: telemetry.TelemetryRecord,
82
+ framework: Optional[str],
83
+ start_time_millis: int,
84
+ metric_pbdicts: Sequence[Dict[int, Any]],
85
+ ) -> BackendConfigDict:
86
+ """Returns a dictionary representation expected by the backend.
87
+
88
+ The backend expects the configuration in a specific format, and the
89
+ config is also used to store additional metadata about the run.
90
+
91
+ Args:
92
+ telemetry_record: Telemetry information to insert.
93
+ framework: The detected framework used in the run (e.g. TensorFlow).
94
+ start_time_millis: The run's start time in Unix milliseconds.
95
+ metric_pbdicts: List of dict representations of metric protobuffers.
96
+ """
97
+ backend_dict = self._tree.copy()
98
+ wandb_internal = backend_dict.setdefault(_WANDB_INTERNAL_KEY, {})
99
+
100
+ ###################################################
101
+ # Telemetry information
102
+ ###################################################
103
+ py_version = telemetry_record.python_version
104
+ if py_version:
105
+ wandb_internal["python_version"] = py_version
106
+
107
+ cli_version = telemetry_record.cli_version
108
+ if cli_version:
109
+ wandb_internal["cli_version"] = cli_version
110
+
111
+ if framework:
112
+ wandb_internal["framework"] = framework
113
+
114
+ huggingface_version = telemetry_record.huggingface_version
115
+ if huggingface_version:
116
+ wandb_internal["huggingface_version"] = huggingface_version
117
+
118
+ wandb_internal["is_jupyter_run"] = telemetry_record.env.jupyter
119
+ wandb_internal["is_kaggle_kernel"] = telemetry_record.env.kaggle
120
+ wandb_internal["start_time"] = start_time_millis
121
+
122
+ # The full telemetry record.
123
+ wandb_internal["t"] = proto_util.proto_encode_to_dict(telemetry_record)
124
+
125
+ ###################################################
126
+ # Metrics
127
+ ###################################################
128
+ if metric_pbdicts:
129
+ wandb_internal["m"] = metric_pbdicts
130
+
131
+ return BackendConfigDict(
132
+ {
133
+ key: {
134
+ # Configurations can be stored in a hand-written YAML file,
135
+ # and users can add descriptions to their hyperparameters
136
+ # there. However, we don't support a way to set descriptions
137
+ # via code, so this is always None.
138
+ "desc": None,
139
+ "value": value,
140
+ }
141
+ for key, value in self._tree.items()
142
+ }
143
+ )
144
+
145
+ def _update_at_path(
146
+ self,
147
+ key_path: Sequence[str],
148
+ value: Any,
149
+ ) -> None:
150
+ """Sets the value at the path in the config tree."""
151
+ subtree = _subtree(self._tree, key_path[:-1], create=True)
152
+ assert subtree is not None
153
+
154
+ subtree[key_path[-1]] = value
155
+
156
+ def _delete_at_path(
157
+ self,
158
+ key_path: Sequence[str],
159
+ ) -> None:
160
+ """Removes the subtree at the path in the config tree."""
161
+ subtree = _subtree(self._tree, key_path[:-1], create=False)
162
+ if subtree:
163
+ del subtree[key_path[-1]]
164
+
165
+
166
+ def _key_path(config_item: wandb_internal_pb2.ConfigItem) -> Sequence[str]:
167
+ """Returns the key path referenced by the config item."""
168
+ if config_item.nested_key:
169
+ return config_item.nested_key
170
+ elif config_item.key:
171
+ return [config_item.key]
172
+ else:
173
+ raise AssertionError(
174
+ "Invalid ConfigItem: either key or nested_key must be set",
175
+ )
176
+
177
+
178
+ def _subtree(
179
+ tree: Dict[str, Any],
180
+ key_path: Sequence[str],
181
+ *,
182
+ create: bool = False,
183
+ ) -> Optional[Dict[str, Any]]:
184
+ """Returns a subtree at the given path."""
185
+ for key in key_path:
186
+ subtree = tree.get(key)
187
+
188
+ if not subtree:
189
+ if create:
190
+ subtree = {}
191
+ tree[key] = subtree
192
+ else:
193
+ return None
194
+
195
+ tree = subtree
196
+
197
+ return tree
@@ -2,6 +2,7 @@ from dataclasses import fields
2
2
  from typing import Any, Iterable, Sequence, Tuple
3
3
 
4
4
  from wandb.proto import wandb_settings_pb2
5
+ from wandb.sdk.lib import RunMoment
5
6
  from wandb.sdk.wandb_settings import SettingsData
6
7
 
7
8
 
@@ -38,6 +39,14 @@ class SettingsStatic(SettingsData):
38
39
  unpacked_inner[inner_key] = inner_value
39
40
  unpacked_mapping[outer_key] = unpacked_inner
40
41
  value = unpacked_mapping
42
+ elif key == "fork_from":
43
+ value = getattr(proto, key)
44
+ if value.run:
45
+ value = RunMoment(
46
+ run=value.run, value=value.value, metric=value.metric
47
+ )
48
+ else:
49
+ value = None
41
50
  else:
42
51
  if proto.HasField(key): # type: ignore [arg-type]
43
52
  value = getattr(proto, key).value
@@ -14,7 +14,6 @@ from wandb.sdk.internal.settings_static import SettingsStatic
14
14
  from wandb.sdk.lib import filesystem
15
15
  from wandb.sdk.lib.filenames import CONDA_ENVIRONMENTS_FNAME, DIFF_FNAME, METADATA_FNAME
16
16
  from wandb.sdk.lib.gitlib import GitRepo
17
- from wandb.sdk.wandb_settings import _get_program_relpath
18
17
 
19
18
  from .assets.interfaces import Interface
20
19
 
@@ -168,7 +167,7 @@ class SystemInfo:
168
167
  data["program"] = self.settings.program
169
168
  # Used during artifact-job creation, always points to the relpath
170
169
  # of code execution, even when in a git repo
171
- data["codePathLocal"] = _get_program_relpath(self.settings.program)
170
+ data["codePathLocal"] = self.settings._code_path_local
172
171
  if not self.settings.disable_code:
173
172
  if self.settings.program_relpath:
174
173
  data["codePath"] = self.settings.program_relpath
@@ -213,7 +212,10 @@ class SystemInfo:
213
212
  os.path.join(self.settings.files_dir, CONDA_ENVIRONMENTS_FNAME), "w"
214
213
  ) as f:
215
214
  subprocess.call(
216
- ["conda", "env", "export"], stdout=f, stderr=subprocess.DEVNULL
215
+ ["conda", "env", "export"],
216
+ stdout=f,
217
+ stderr=subprocess.DEVNULL,
218
+ timeout=15, # add timeout since conda env export could take a really long time
217
219
  )
218
220
  except Exception as e:
219
221
  logger.exception(f"Error saving conda packages: {e}")
@@ -8,7 +8,7 @@ import wandb
8
8
  def _find_available(
9
9
  current_version: str,
10
10
  ) -> Optional[Tuple[str, bool, bool, bool, Optional[str]]]:
11
- from pkg_resources import parse_version
11
+ from wandb.util import parse_version
12
12
 
13
13
  pypi_url = f"https://pypi.org/pypi/{wandb._wandb_module}/json"
14
14
 
@@ -10,7 +10,7 @@ import wandb
10
10
  from wandb.apis.internal import Api
11
11
 
12
12
  from . import loader
13
- from ._project_spec import create_project_from_spec, fetch_and_validate_project
13
+ from ._project_spec import LaunchProject
14
14
  from .agent import LaunchAgent
15
15
  from .builder.build import construct_agent_configs
16
16
  from .environment.local_environment import LocalEnvironment
@@ -221,8 +221,8 @@ async def _launch(
221
221
  author=None,
222
222
  )
223
223
  validate_launch_spec_source(launch_spec)
224
- launch_project = create_project_from_spec(launch_spec, api)
225
- launch_project = fetch_and_validate_project(launch_project, api)
224
+ launch_project = LaunchProject.from_spec(launch_spec, api)
225
+ launch_project.fetch_and_validate_project()
226
226
  entrypoint = launch_project.get_single_entry_point()
227
227
  image_uri = launch_project.docker_image # Either set by user or None.
228
228