wandb 0.13.10__py3-none-any.whl → 0.14.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. wandb/__init__.py +2 -3
  2. wandb/apis/__init__.py +1 -3
  3. wandb/apis/importers/__init__.py +4 -0
  4. wandb/apis/importers/base.py +312 -0
  5. wandb/apis/importers/mlflow.py +113 -0
  6. wandb/apis/internal.py +29 -2
  7. wandb/apis/normalize.py +6 -5
  8. wandb/apis/public.py +163 -180
  9. wandb/apis/reports/_templates.py +6 -12
  10. wandb/apis/reports/report.py +1 -1
  11. wandb/apis/reports/runset.py +1 -3
  12. wandb/apis/reports/util.py +12 -10
  13. wandb/beta/workflows.py +57 -34
  14. wandb/catboost/__init__.py +1 -2
  15. wandb/cli/cli.py +215 -133
  16. wandb/data_types.py +63 -56
  17. wandb/docker/__init__.py +78 -16
  18. wandb/docker/auth.py +21 -22
  19. wandb/env.py +0 -1
  20. wandb/errors/__init__.py +8 -116
  21. wandb/errors/term.py +1 -1
  22. wandb/fastai/__init__.py +1 -2
  23. wandb/filesync/dir_watcher.py +8 -5
  24. wandb/filesync/step_prepare.py +76 -75
  25. wandb/filesync/step_upload.py +1 -2
  26. wandb/integration/catboost/__init__.py +1 -3
  27. wandb/integration/catboost/catboost.py +8 -14
  28. wandb/integration/fastai/__init__.py +7 -13
  29. wandb/integration/gym/__init__.py +35 -4
  30. wandb/integration/keras/__init__.py +3 -3
  31. wandb/integration/keras/callbacks/metrics_logger.py +9 -8
  32. wandb/integration/keras/callbacks/model_checkpoint.py +9 -9
  33. wandb/integration/keras/callbacks/tables_builder.py +31 -19
  34. wandb/integration/kfp/kfp_patch.py +20 -17
  35. wandb/integration/kfp/wandb_logging.py +1 -2
  36. wandb/integration/lightgbm/__init__.py +21 -19
  37. wandb/integration/prodigy/prodigy.py +6 -7
  38. wandb/integration/sacred/__init__.py +9 -12
  39. wandb/integration/sagemaker/__init__.py +1 -3
  40. wandb/integration/sagemaker/auth.py +0 -1
  41. wandb/integration/sagemaker/config.py +1 -1
  42. wandb/integration/sagemaker/resources.py +1 -1
  43. wandb/integration/sb3/sb3.py +8 -4
  44. wandb/integration/tensorboard/__init__.py +1 -3
  45. wandb/integration/tensorboard/log.py +8 -8
  46. wandb/integration/tensorboard/monkeypatch.py +11 -9
  47. wandb/integration/tensorflow/__init__.py +1 -3
  48. wandb/integration/xgboost/__init__.py +4 -6
  49. wandb/integration/yolov8/__init__.py +7 -0
  50. wandb/integration/yolov8/yolov8.py +250 -0
  51. wandb/jupyter.py +31 -35
  52. wandb/lightgbm/__init__.py +1 -2
  53. wandb/old/settings.py +2 -2
  54. wandb/plot/bar.py +1 -2
  55. wandb/plot/confusion_matrix.py +1 -3
  56. wandb/plot/histogram.py +1 -2
  57. wandb/plot/line.py +1 -2
  58. wandb/plot/line_series.py +4 -4
  59. wandb/plot/pr_curve.py +17 -20
  60. wandb/plot/roc_curve.py +1 -3
  61. wandb/plot/scatter.py +1 -2
  62. wandb/proto/v3/wandb_server_pb2.py +85 -39
  63. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  64. wandb/proto/v4/wandb_server_pb2.py +51 -39
  65. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  66. wandb/sdk/__init__.py +1 -3
  67. wandb/sdk/backend/backend.py +1 -1
  68. wandb/sdk/data_types/_dtypes.py +38 -30
  69. wandb/sdk/data_types/base_types/json_metadata.py +1 -3
  70. wandb/sdk/data_types/base_types/media.py +17 -17
  71. wandb/sdk/data_types/base_types/wb_value.py +33 -26
  72. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +91 -125
  73. wandb/sdk/data_types/helper_types/classes.py +1 -1
  74. wandb/sdk/data_types/helper_types/image_mask.py +12 -12
  75. wandb/sdk/data_types/histogram.py +5 -4
  76. wandb/sdk/data_types/html.py +1 -2
  77. wandb/sdk/data_types/image.py +11 -11
  78. wandb/sdk/data_types/molecule.py +3 -6
  79. wandb/sdk/data_types/object_3d.py +1 -2
  80. wandb/sdk/data_types/plotly.py +1 -2
  81. wandb/sdk/data_types/saved_model.py +10 -8
  82. wandb/sdk/data_types/video.py +1 -1
  83. wandb/sdk/integration_utils/data_logging.py +5 -5
  84. wandb/sdk/interface/artifacts.py +288 -266
  85. wandb/sdk/interface/interface.py +2 -3
  86. wandb/sdk/interface/interface_grpc.py +1 -1
  87. wandb/sdk/interface/interface_queue.py +1 -1
  88. wandb/sdk/interface/interface_relay.py +1 -1
  89. wandb/sdk/interface/interface_shared.py +1 -2
  90. wandb/sdk/interface/interface_sock.py +1 -1
  91. wandb/sdk/interface/message_future.py +1 -1
  92. wandb/sdk/interface/message_future_poll.py +1 -1
  93. wandb/sdk/interface/router.py +1 -1
  94. wandb/sdk/interface/router_queue.py +1 -1
  95. wandb/sdk/interface/router_relay.py +1 -1
  96. wandb/sdk/interface/router_sock.py +1 -1
  97. wandb/sdk/interface/summary_record.py +1 -1
  98. wandb/sdk/internal/artifacts.py +1 -1
  99. wandb/sdk/internal/datastore.py +2 -3
  100. wandb/sdk/internal/file_pusher.py +5 -3
  101. wandb/sdk/internal/file_stream.py +22 -19
  102. wandb/sdk/internal/handler.py +5 -4
  103. wandb/sdk/internal/internal.py +1 -1
  104. wandb/sdk/internal/internal_api.py +115 -55
  105. wandb/sdk/internal/job_builder.py +1 -3
  106. wandb/sdk/internal/profiler.py +1 -1
  107. wandb/sdk/internal/progress.py +4 -6
  108. wandb/sdk/internal/sample.py +1 -3
  109. wandb/sdk/internal/sender.py +28 -16
  110. wandb/sdk/internal/settings_static.py +5 -5
  111. wandb/sdk/internal/system/assets/__init__.py +1 -0
  112. wandb/sdk/internal/system/assets/cpu.py +3 -9
  113. wandb/sdk/internal/system/assets/disk.py +2 -4
  114. wandb/sdk/internal/system/assets/gpu.py +6 -18
  115. wandb/sdk/internal/system/assets/gpu_apple.py +2 -4
  116. wandb/sdk/internal/system/assets/interfaces.py +50 -22
  117. wandb/sdk/internal/system/assets/ipu.py +1 -3
  118. wandb/sdk/internal/system/assets/memory.py +7 -13
  119. wandb/sdk/internal/system/assets/network.py +4 -8
  120. wandb/sdk/internal/system/assets/open_metrics.py +283 -0
  121. wandb/sdk/internal/system/assets/tpu.py +1 -4
  122. wandb/sdk/internal/system/assets/trainium.py +26 -14
  123. wandb/sdk/internal/system/system_info.py +2 -3
  124. wandb/sdk/internal/system/system_monitor.py +52 -20
  125. wandb/sdk/internal/tb_watcher.py +12 -13
  126. wandb/sdk/launch/_project_spec.py +54 -65
  127. wandb/sdk/launch/agent/agent.py +374 -90
  128. wandb/sdk/launch/builder/abstract.py +61 -7
  129. wandb/sdk/launch/builder/build.py +81 -110
  130. wandb/sdk/launch/builder/docker_builder.py +181 -0
  131. wandb/sdk/launch/builder/kaniko_builder.py +419 -0
  132. wandb/sdk/launch/builder/noop.py +31 -12
  133. wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +70 -20
  134. wandb/sdk/launch/environment/abstract.py +28 -0
  135. wandb/sdk/launch/environment/aws_environment.py +276 -0
  136. wandb/sdk/launch/environment/gcp_environment.py +271 -0
  137. wandb/sdk/launch/environment/local_environment.py +65 -0
  138. wandb/sdk/launch/github_reference.py +3 -8
  139. wandb/sdk/launch/launch.py +38 -29
  140. wandb/sdk/launch/launch_add.py +6 -8
  141. wandb/sdk/launch/loader.py +230 -0
  142. wandb/sdk/launch/registry/abstract.py +54 -0
  143. wandb/sdk/launch/registry/elastic_container_registry.py +163 -0
  144. wandb/sdk/launch/registry/google_artifact_registry.py +203 -0
  145. wandb/sdk/launch/registry/local_registry.py +62 -0
  146. wandb/sdk/launch/runner/abstract.py +1 -16
  147. wandb/sdk/launch/runner/{kubernetes.py → kubernetes_runner.py} +83 -95
  148. wandb/sdk/launch/runner/local_container.py +46 -22
  149. wandb/sdk/launch/runner/local_process.py +1 -4
  150. wandb/sdk/launch/runner/{aws.py → sagemaker_runner.py} +53 -212
  151. wandb/sdk/launch/runner/{gcp_vertex.py → vertex_runner.py} +38 -55
  152. wandb/sdk/launch/sweeps/__init__.py +3 -2
  153. wandb/sdk/launch/sweeps/scheduler.py +132 -39
  154. wandb/sdk/launch/sweeps/scheduler_sweep.py +80 -89
  155. wandb/sdk/launch/utils.py +101 -30
  156. wandb/sdk/launch/wandb_reference.py +2 -7
  157. wandb/sdk/lib/_settings_toposort_generate.py +166 -0
  158. wandb/sdk/lib/_settings_toposort_generated.py +201 -0
  159. wandb/sdk/lib/apikey.py +2 -4
  160. wandb/sdk/lib/config_util.py +4 -1
  161. wandb/sdk/lib/console.py +1 -3
  162. wandb/sdk/lib/deprecate.py +3 -3
  163. wandb/sdk/lib/file_stream_utils.py +7 -5
  164. wandb/sdk/lib/filenames.py +1 -1
  165. wandb/sdk/lib/filesystem.py +61 -5
  166. wandb/sdk/lib/git.py +1 -3
  167. wandb/sdk/lib/import_hooks.py +4 -7
  168. wandb/sdk/lib/ipython.py +8 -5
  169. wandb/sdk/lib/lazyloader.py +1 -3
  170. wandb/sdk/lib/mailbox.py +14 -4
  171. wandb/sdk/lib/proto_util.py +10 -5
  172. wandb/sdk/lib/redirect.py +15 -22
  173. wandb/sdk/lib/reporting.py +1 -3
  174. wandb/sdk/lib/retry.py +4 -5
  175. wandb/sdk/lib/runid.py +1 -3
  176. wandb/sdk/lib/server.py +15 -9
  177. wandb/sdk/lib/sock_client.py +1 -1
  178. wandb/sdk/lib/sparkline.py +1 -1
  179. wandb/sdk/lib/wburls.py +1 -1
  180. wandb/sdk/service/port_file.py +1 -2
  181. wandb/sdk/service/service.py +36 -13
  182. wandb/sdk/service/service_base.py +12 -1
  183. wandb/sdk/verify/verify.py +5 -7
  184. wandb/sdk/wandb_artifacts.py +142 -177
  185. wandb/sdk/wandb_config.py +5 -8
  186. wandb/sdk/wandb_helper.py +1 -1
  187. wandb/sdk/wandb_init.py +24 -13
  188. wandb/sdk/wandb_login.py +9 -9
  189. wandb/sdk/wandb_manager.py +39 -4
  190. wandb/sdk/wandb_metric.py +2 -6
  191. wandb/sdk/wandb_require.py +4 -15
  192. wandb/sdk/wandb_require_helpers.py +1 -9
  193. wandb/sdk/wandb_run.py +95 -141
  194. wandb/sdk/wandb_save.py +1 -3
  195. wandb/sdk/wandb_settings.py +149 -54
  196. wandb/sdk/wandb_setup.py +66 -46
  197. wandb/sdk/wandb_summary.py +13 -10
  198. wandb/sdk/wandb_sweep.py +6 -7
  199. wandb/sdk/wandb_watch.py +1 -1
  200. wandb/sklearn/calculate/confusion_matrix.py +1 -1
  201. wandb/sklearn/calculate/learning_curve.py +1 -1
  202. wandb/sklearn/calculate/summary_metrics.py +1 -3
  203. wandb/sklearn/plot/__init__.py +1 -1
  204. wandb/sklearn/plot/classifier.py +27 -18
  205. wandb/sklearn/plot/clusterer.py +4 -5
  206. wandb/sklearn/plot/regressor.py +4 -4
  207. wandb/sklearn/plot/shared.py +2 -2
  208. wandb/sync/__init__.py +1 -3
  209. wandb/sync/sync.py +4 -5
  210. wandb/testing/relay.py +11 -10
  211. wandb/trigger.py +1 -1
  212. wandb/util.py +106 -81
  213. wandb/viz.py +4 -4
  214. wandb/wandb_agent.py +50 -50
  215. wandb/wandb_controller.py +2 -3
  216. wandb/wandb_run.py +1 -2
  217. wandb/wandb_torch.py +1 -1
  218. wandb/xgboost/__init__.py +1 -2
  219. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/METADATA +6 -2
  220. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/RECORD +224 -209
  221. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/WHEEL +1 -1
  222. wandb/sdk/launch/builder/docker.py +0 -80
  223. wandb/sdk/launch/builder/kaniko.py +0 -393
  224. wandb/sdk/launch/builder/loader.py +0 -32
  225. wandb/sdk/launch/runner/loader.py +0 -50
  226. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/LICENSE +0 -0
  227. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/entry_points.txt +0 -0
  228. {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/top_level.txt +0 -0
wandb/__init__.py CHANGED
@@ -5,13 +5,13 @@ The most commonly used functions/objects are:
5
5
  - wandb.config — track hyperparameters and metadata
6
6
  - wandb.log — log metrics and media over time within your training loop
7
7
 
8
- For guides and examples, see https://docs.wandb.com/guides.
8
+ For guides and examples, see https://docs.wandb.ai.
9
9
 
10
10
  For scripts and interactive notebooks, see https://github.com/wandb/examples.
11
11
 
12
12
  For reference documentation, see https://docs.wandb.com/ref/python.
13
13
  """
14
- __version__ = "0.13.10"
14
+ __version__ = "0.14.0"
15
15
 
16
16
  # Used with pypi checks and other messages related to pip
17
17
  _wandb_module = "wandb"
@@ -193,7 +193,6 @@ if wandb_sdk.lib.ipython.in_jupyter():
193
193
 
194
194
  load_ipython_extension(get_ipython())
195
195
 
196
- wandb.require("service")
197
196
 
198
197
  __all__ = [
199
198
  "__version__",
wandb/apis/__init__.py CHANGED
@@ -1,6 +1,4 @@
1
- """
2
- api.
3
- """
1
+ """api."""
4
2
 
5
3
  from typing import Callable
6
4
 
@@ -0,0 +1,4 @@
1
+ from wandb.util import get_module
2
+
3
+ if get_module("mlflow"):
4
+ from .mlflow import MlflowImporter, MlflowRun # noqa: F401
@@ -0,0 +1,312 @@
1
+ import json
2
+ import platform
3
+ from abc import ABC, abstractmethod
4
+ from concurrent.futures import ProcessPoolExecutor, as_completed
5
+ from contextlib import contextmanager
6
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
7
+
8
+ from tqdm.auto import tqdm
9
+
10
+ import wandb
11
+ from wandb.proto import wandb_internal_pb2 as pb
12
+ from wandb.proto import wandb_telemetry_pb2 as telem_pb
13
+ from wandb.sdk.interface.interface import file_policy_to_enum
14
+ from wandb.sdk.interface.interface_queue import InterfaceQueue
15
+ from wandb.sdk.internal.sender import SendManager
16
+
17
+ Name = str
18
+ Path = str
19
+
20
+
21
+ def coalesce(*arg: Any) -> Any:
22
+ """Return the first non-none value in the list of arguments. Similar to ?? in C#."""
23
+ return next((a for a in arg if a is not None), None)
24
+
25
+
26
+ @contextmanager
27
+ def send_manager(root_dir):
28
+ sm = SendManager.setup(root_dir, resume=False)
29
+ try:
30
+ yield sm
31
+ finally:
32
+ # flush any remaining records
33
+ while sm:
34
+ data = next(sm)
35
+ sm.send(data)
36
+ sm.finish()
37
+
38
+
39
+ class ImporterRun:
40
+ def __init__(self) -> None:
41
+ self.interface = InterfaceQueue()
42
+ self.run_dir = f"./wandb-importer/{self.run_id()}"
43
+
44
+ def run_id(self) -> str:
45
+ _id = wandb.util.generate_id()
46
+ wandb.termwarn(f"`run_id` not specified. Autogenerating id: {_id}")
47
+ return _id
48
+
49
+ def entity(self) -> str:
50
+ _entity = "unspecified-entity"
51
+ wandb.termwarn(f"`entity` not specified. Defaulting to: {_entity}")
52
+ return _entity
53
+
54
+ def project(self) -> str:
55
+ _project = "unspecified-project"
56
+ wandb.termwarn(f"`project` not specified. Defaulting to: {_project}")
57
+ return _project
58
+
59
+ def config(self) -> Dict[str, Any]:
60
+ return {}
61
+
62
+ def summary(self) -> Dict[str, float]:
63
+ return {}
64
+
65
+ def metrics(self) -> List[Dict[str, float]]:
66
+ """Metrics for the run.
67
+
68
+ We expect metrics in this shape:
69
+
70
+ [
71
+ {'metric1': 1, 'metric2': 1, '_step': 0},
72
+ {'metric1': 2, 'metric2': 4, '_step': 1},
73
+ {'metric1': 3, 'metric2': 9, '_step': 2},
74
+ ...
75
+ ]
76
+
77
+ You can also submit metrics in this shape:
78
+ [
79
+ {'metric1': 1, '_step': 0},
80
+ {'metric2': 1, '_step': 0},
81
+ {'metric1': 2, '_step': 1},
82
+ {'metric2': 4, '_step': 1},
83
+ ...
84
+ ]
85
+ """
86
+ return []
87
+
88
+ def run_group(self) -> Optional[str]:
89
+ ...
90
+
91
+ def job_type(self) -> Optional[str]:
92
+ ...
93
+
94
+ def display_name(self) -> str:
95
+ return self.run_id()
96
+
97
+ def notes(self) -> Optional[str]:
98
+ ...
99
+
100
+ def tags(self) -> Optional[List[str]]:
101
+ ...
102
+
103
+ def artifacts(self) -> Optional[Iterable[Tuple[Name, Path]]]:
104
+ ...
105
+
106
+ def os_version(self) -> Optional[str]:
107
+ ...
108
+
109
+ def python_version(self) -> Optional[str]:
110
+ ...
111
+
112
+ def cuda_version(self) -> Optional[str]:
113
+ ...
114
+
115
+ def program(self) -> Optional[str]:
116
+ ...
117
+
118
+ def host(self) -> Optional[str]:
119
+ ...
120
+
121
+ def username(self) -> Optional[str]:
122
+ ...
123
+
124
+ def executable(self) -> Optional[str]:
125
+ ...
126
+
127
+ def gpus_used(self) -> Optional[str]:
128
+ ...
129
+
130
+ def cpus_used(self) -> Optional[int]: # can we get the model?
131
+ ...
132
+
133
+ def memory_used(self) -> Optional[int]:
134
+ ...
135
+
136
+ def runtime(self) -> Optional[int]:
137
+ ...
138
+
139
+ def start_time(self) -> Optional[int]:
140
+ ...
141
+
142
+ def _make_run_record(self) -> pb.Record:
143
+ run = pb.RunRecord()
144
+ run.run_id = self.run_id()
145
+ run.entity = self.entity()
146
+ run.project = self.project()
147
+ run.display_name = coalesce(self.display_name())
148
+ run.notes = coalesce(self.notes(), "")
149
+ run.tags.extend(coalesce(self.tags(), list()))
150
+ # run.start_time.FromMilliseconds(self.start_time())
151
+ # run.runtime = self.runtime()
152
+ run_group = self.run_group()
153
+ if run_group is not None:
154
+ run.run_group = run_group
155
+ self.interface._make_config(
156
+ data=self.config(),
157
+ obj=run.config,
158
+ ) # is there a better way?
159
+ return self.interface._make_record(run=run)
160
+
161
+ def _make_summary_record(self) -> pb.Record:
162
+ d: dict = {
163
+ **self.summary(),
164
+ "_runtime": self.runtime(), # quirk of runtime -- it has to be here!
165
+ # '_timestamp': self.start_time()/1000,
166
+ }
167
+ summary = self.interface._make_summary_from_dict(d)
168
+ return self.interface._make_record(summary=summary)
169
+
170
+ def _make_history_records(self) -> Iterable[pb.Record]:
171
+ for _, metrics in enumerate(self.metrics()):
172
+ history = pb.HistoryRecord()
173
+ for k, v in metrics.items():
174
+ item = history.item.add()
175
+ item.key = k
176
+ item.value_json = json.dumps(v)
177
+ yield self.interface._make_record(history=history)
178
+
179
+ def _make_files_record(self, files_dict) -> pb.Record:
180
+ # when making the metadata file, it captures most things correctly
181
+ # but notably it doesn't capture the start time!
182
+ files_record = pb.FilesRecord()
183
+ for path, policy in files_dict["files"]:
184
+ f = files_record.files.add()
185
+ f.path = path
186
+ f.policy = file_policy_to_enum(policy) # is this always "end"?
187
+ return self.interface._make_record(files=files_record)
188
+
189
+ def _make_metadata_files_record(self) -> pb.Record:
190
+ self._make_metadata_file(self.run_dir)
191
+ return self._make_files_record(
192
+ {"files": [[f"{self.run_dir}/files/wandb-metadata.json", "end"]]}
193
+ )
194
+
195
+ def _make_artifact_record(self) -> pb.Record:
196
+ art = wandb.Artifact(self.display_name(), "imported-artifacts")
197
+ artifacts = self.artifacts()
198
+ if artifacts is not None:
199
+ for name, path in artifacts:
200
+ art.add_file(path, name)
201
+ proto = self.interface._make_artifact(art)
202
+ proto.run_id = self.run_id()
203
+ proto.project = self.project()
204
+ proto.entity = self.entity()
205
+ proto.user_created = False
206
+ proto.use_after_commit = False
207
+ proto.finalize = True
208
+ for tag in ["latest", "imported"]:
209
+ proto.aliases.append(tag)
210
+ return self.interface._make_record(artifact=proto)
211
+
212
+ def _make_telem_record(self) -> pb.Record:
213
+ feature = telem_pb.Feature()
214
+ feature.importer_mlflow = True
215
+
216
+ telem = telem_pb.TelemetryRecord()
217
+ telem.feature.CopyFrom(feature)
218
+ telem.python_version = platform.python_version() # importer's python version
219
+ telem.cli_version = wandb.__version__
220
+ return self.interface._make_record(telemetry=telem)
221
+
222
+ def _make_metadata_file(self, run_dir: str) -> None:
223
+ missing_text = "MLFlow did not capture this info."
224
+
225
+ d = {}
226
+ if self.os_version() is not None:
227
+ d["os"] = self.os_version()
228
+ else:
229
+ d["os"] = missing_text
230
+
231
+ if self.python_version() is not None:
232
+ d["python"] = self.python_version()
233
+ else:
234
+ d["python"] = missing_text
235
+
236
+ if self.program() is not None:
237
+ d["program"] = self.program()
238
+ else:
239
+ d["program"] = missing_text
240
+
241
+ if self.cuda_version() is not None:
242
+ d["cuda"] = self.cuda_version()
243
+ if self.host() is not None:
244
+ d["host"] = self.host()
245
+ if self.username() is not None:
246
+ d["username"] = self.username()
247
+ if self.executable() is not None:
248
+ d["executable"] = self.executable()
249
+ gpus_used = self.gpus_used()
250
+ if gpus_used is not None:
251
+ d["gpu_devices"] = json.dumps(gpus_used)
252
+ d["gpu_count"] = json.dumps(len(gpus_used))
253
+ cpus_used = self.cpus_used()
254
+ if cpus_used is not None:
255
+ d["cpu_count"] = json.dumps(self.cpus_used())
256
+ mem_used = self.memory_used()
257
+ if mem_used is not None:
258
+ d["memory"] = json.dumps({"total": self.memory_used()})
259
+
260
+ with open(f"{run_dir}/files/wandb-metadata.json", "w") as f:
261
+ f.write(json.dumps(d))
262
+
263
+
264
+ class Importer(ABC):
265
+ @abstractmethod
266
+ def download_all_runs(self) -> Iterable[ImporterRun]:
267
+ ...
268
+
269
+ def import_all(self, overrides: Optional[Dict[str, Any]] = None) -> None:
270
+ for run in tqdm(self.download_all_runs(), desc="Sending runs"):
271
+ self.import_one(run, overrides)
272
+
273
+ def import_all_parallel(
274
+ self, overrides: Optional[Dict[str, Any]] = None, **pool_kwargs: Any
275
+ ) -> None:
276
+ runs = list(self.download_all_runs())
277
+ with tqdm(total=len(runs)) as pbar:
278
+ with ProcessPoolExecutor(**pool_kwargs) as exc:
279
+ futures = {
280
+ exc.submit(self.import_one, run, overrides=overrides): run
281
+ for run in runs
282
+ }
283
+ for future in as_completed(futures):
284
+ run = futures[future]
285
+ pbar.update(1)
286
+ pbar.set_description(
287
+ f"Imported Run: {run.run_group()} {run.display_name()}"
288
+ )
289
+
290
+ def import_one(
291
+ self,
292
+ run: ImporterRun,
293
+ overrides: Optional[Dict[str, Any]] = None,
294
+ ) -> None:
295
+ # does this need to be here for pmap?
296
+ if overrides:
297
+ for k, v in overrides.items():
298
+ # `lambda: v` won't work!
299
+ # https://stackoverflow.com/questions/10802002/why-deepcopy-doesnt-create-new-references-to-lambda-function
300
+ setattr(run, k, lambda v=v: v)
301
+ self._import_one(run)
302
+
303
+ def _import_one(self, run: ImporterRun) -> None:
304
+ with send_manager(run.run_dir) as sm:
305
+ sm.send(run._make_run_record())
306
+ sm.send(run._make_summary_record())
307
+ sm.send(run._make_metadata_files_record())
308
+ for history_record in run._make_history_records():
309
+ sm.send(history_record)
310
+ if run.artifacts() is not None:
311
+ sm.send(run._make_artifact_record())
312
+ sm.send(run._make_telem_record())
@@ -0,0 +1,113 @@
1
+ from typing import Any, Dict, Iterable, Optional
2
+
3
+ from wandb.util import get_module
4
+
5
+ from .base import Importer, ImporterRun
6
+
7
+ mlflow = get_module(
8
+ "mlflow",
9
+ required="To use the MlflowImporter, please install mlflow: `pip install mlflow`",
10
+ )
11
+
12
+
13
+ class MlflowRun(ImporterRun):
14
+ def __init__(self, run, mlflow_client):
15
+ self.run = run
16
+ self.mlflow_client = mlflow_client
17
+ super().__init__()
18
+
19
+ def run_id(self):
20
+ return self.run.info.run_id
21
+
22
+ def entity(self):
23
+ return self.run.info.user_id
24
+
25
+ def project(self):
26
+ return "imported-from-mlflow"
27
+
28
+ def config(self):
29
+ return self.run.data.params
30
+
31
+ def summary(self):
32
+ return self.run.data.metrics
33
+
34
+ def metrics(self):
35
+ def wandbify(metrics):
36
+ for step, t in enumerate(metrics):
37
+ d = {m.key: m.value for m in t}
38
+ d["_step"] = step
39
+ yield d
40
+
41
+ metrics = [
42
+ self.mlflow_client.get_metric_history(self.run.info.run_id, k)
43
+ for k in self.run.data.metrics.keys()
44
+ ]
45
+ metrics = zip(*metrics) # transpose
46
+ return wandbify(metrics)
47
+
48
+ # Alternate: Might be slower but use less mem
49
+ # Can't make this a generator. See mlflow get_metric_history internals
50
+ # https://github.com/mlflow/mlflow/blob/master/mlflow/tracking/_tracking_service/client.py#L74-L93
51
+ # for k in self.run.data.metrics.keys():
52
+ # history = self.mlflow_client.get_metric_history(self.run.info.run_id, k)
53
+ # yield wandbify(history)
54
+
55
+ def run_group(self):
56
+ # this is nesting? Parent at `run.info.tags.get("mlflow.parentRunId")`
57
+ return f"Experiment {self.run.info.experiment_id}"
58
+
59
+ def job_type(self):
60
+ # Is this the right approach?
61
+ return f"User {self.run.info.user_id}"
62
+
63
+ def display_name(self):
64
+ return self.run.info.run_name
65
+
66
+ def notes(self):
67
+ return self.run.data.tags.get("mlflow.note.content")
68
+
69
+ def tags(self):
70
+ return {
71
+ k: v for k, v in self.run.data.tags.items() if not k.startswith("mlflow.")
72
+ }
73
+
74
+ def start_time(self):
75
+ return self.run.info.start_time // 1000
76
+
77
+ def runtime(self):
78
+ return self.run.info.end_time // 1_000 - self.start_time()
79
+
80
+ def git(self):
81
+ ...
82
+
83
+ def artifacts(self):
84
+ for f in self.mlflow_client.list_artifacts(self.run.info.run_id):
85
+ dir_path = mlflow.artifacts.download_artifacts(run_id=self.run.info.run_id)
86
+ full_path = dir_path + f.path
87
+ yield (f.path, full_path)
88
+
89
+
90
+ class MlflowImporter(Importer):
91
+ def __init__(
92
+ self, mlflow_tracking_uri, mlflow_registry_uri=None, wandb_base_url=None
93
+ ) -> None:
94
+ super().__init__()
95
+ self.mlflow_tracking_uri = mlflow_tracking_uri
96
+
97
+ mlflow.set_tracking_uri(self.mlflow_tracking_uri)
98
+ if mlflow_registry_uri:
99
+ mlflow.set_registry_uri(mlflow_registry_uri)
100
+ self.mlflow_client = mlflow.tracking.MlflowClient(mlflow_tracking_uri)
101
+
102
+ def import_one(
103
+ self,
104
+ run: ImporterRun,
105
+ overrides: Optional[Dict[str, Any]] = None,
106
+ ) -> None:
107
+ mlflow.set_tracking_uri(self.mlflow_tracking_uri)
108
+ super().import_one(run, overrides)
109
+
110
+ def download_all_runs(self) -> Iterable[MlflowRun]:
111
+ for exp in self.mlflow_client.search_experiments():
112
+ for run in self.mlflow_client.search_runs(exp.experiment_id):
113
+ yield MlflowRun(run, self.mlflow_client)
wandb/apis/internal.py CHANGED
@@ -2,14 +2,32 @@ from wandb.sdk.internal.internal_api import Api as InternalApi
2
2
 
3
3
 
4
4
  class Api:
5
- """Internal proxy to the official internal API. Eventually these methods
6
- should likely be moved to PublicApi"""
5
+ """Internal proxy to the official internal API."""
6
+
7
+ # TODO: Move these methods to PublicApi.
7
8
 
8
9
  def __init__(self, *args, **kwargs):
9
10
  self._api_args = args
10
11
  self._api_kwargs = kwargs
11
12
  self._api = None
12
13
 
14
+ def __getstate__(self):
15
+ """Use for serializing.
16
+
17
+ self._api is not serializable, so it's dropped
18
+ """
19
+ state = self.__dict__.copy()
20
+ del state["_api"]
21
+ return state
22
+
23
+ def __setstate__(self, state):
24
+ """Used for deserializing.
25
+
26
+ Don't need to set self._api because it's constructed when needed.
27
+ """
28
+ self.__dict__.update(state)
29
+ self._api = None
30
+
13
31
  @property
14
32
  def api(self):
15
33
  # This is a property in order to delay construction of Internal API
@@ -143,6 +161,9 @@ class Api:
143
161
  def get_run_state(self, *args, **kwargs):
144
162
  return self.api.get_run_state(*args, **kwargs)
145
163
 
164
+ def entity_is_team(self, *args, **kwargs):
165
+ return self.api.entity_is_team(*args, **kwargs)
166
+
146
167
  def get_project_run_queues(self, *args, **kwargs):
147
168
  return self.api.get_project_run_queues(*args, **kwargs)
148
169
 
@@ -164,6 +185,12 @@ class Api:
164
185
  def launch_agent_introspection(self, *args, **kwargs):
165
186
  return self.api.launch_agent_introspection(*args, **kwargs)
166
187
 
188
+ def fail_run_queue_item_introspection(self, *args, **kwargs):
189
+ return self.api.fail_run_queue_item_introspection(*args, **kwargs)
190
+
191
+ def fail_run_queue_item(self, *args, **kwargs):
192
+ return self.api.fail_run_queue_item(*args, **kwargs)
193
+
167
194
  def get_launch_agent(self, *args, **kwargs):
168
195
  return self.api.get_launch_agent(*args, **kwargs)
169
196
 
wandb/apis/normalize.py CHANGED
@@ -1,6 +1,4 @@
1
- """
2
- normalize.
3
- """
1
+ """normalize."""
4
2
 
5
3
  import ast
6
4
  import sys
@@ -11,13 +9,14 @@ import requests
11
9
  from wandb_gql.client import RetryError
12
10
 
13
11
  from wandb import env
14
- from wandb.errors import CommError, ContextCancelledError
12
+ from wandb.errors import CommError
13
+ from wandb.sdk.lib.mailbox import ContextCancelledError
15
14
 
16
15
  _F = TypeVar("_F", bound=Callable)
17
16
 
18
17
 
19
18
  def normalize_exceptions(func: _F) -> _F:
20
- """Function decorator for catching common errors and re-raising as wandb.Error"""
19
+ """Function decorator for catching common errors and re-raising as wandb.Error."""
21
20
 
22
21
  @wraps(func)
23
22
  def wrapper(*args, **kwargs):
@@ -48,6 +47,8 @@ def normalize_exceptions(func: _F) -> _F:
48
47
  raise CommError(message, err.last_exception).with_traceback(
49
48
  sys.exc_info()[2]
50
49
  )
50
+ except CommError as err:
51
+ raise err
51
52
  except Exception as err:
52
53
  # gql raises server errors with dict's as strings...
53
54
  if len(err.args) > 0: