wandb 0.16.3__py3-none-any.whl → 0.16.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. wandb/__init__.py +2 -2
  2. wandb/agents/pyagent.py +1 -1
  3. wandb/apis/importers/__init__.py +1 -4
  4. wandb/apis/importers/internals/internal.py +386 -0
  5. wandb/apis/importers/internals/protocols.py +125 -0
  6. wandb/apis/importers/internals/util.py +78 -0
  7. wandb/apis/importers/mlflow.py +125 -88
  8. wandb/apis/importers/validation.py +108 -0
  9. wandb/apis/importers/wandb.py +1604 -0
  10. wandb/apis/public/api.py +7 -10
  11. wandb/apis/public/artifacts.py +38 -0
  12. wandb/apis/public/files.py +11 -2
  13. wandb/apis/reports/v2/__init__.py +0 -19
  14. wandb/apis/reports/v2/expr_parsing.py +0 -1
  15. wandb/apis/reports/v2/interface.py +15 -18
  16. wandb/apis/reports/v2/internal.py +12 -45
  17. wandb/cli/cli.py +52 -55
  18. wandb/integration/gym/__init__.py +2 -1
  19. wandb/integration/keras/callbacks/model_checkpoint.py +1 -1
  20. wandb/integration/keras/keras.py +6 -4
  21. wandb/integration/kfp/kfp_patch.py +2 -2
  22. wandb/integration/openai/fine_tuning.py +1 -2
  23. wandb/integration/ultralytics/callback.py +0 -1
  24. wandb/proto/v3/wandb_internal_pb2.py +332 -312
  25. wandb/proto/v3/wandb_settings_pb2.py +13 -3
  26. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  27. wandb/proto/v4/wandb_internal_pb2.py +316 -312
  28. wandb/proto/v4/wandb_settings_pb2.py +5 -3
  29. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  30. wandb/sdk/artifacts/artifact.py +75 -31
  31. wandb/sdk/artifacts/artifact_manifest.py +5 -2
  32. wandb/sdk/artifacts/artifact_manifest_entry.py +6 -1
  33. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +8 -2
  34. wandb/sdk/artifacts/artifact_saver.py +19 -47
  35. wandb/sdk/artifacts/storage_handler.py +2 -1
  36. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +22 -9
  37. wandb/sdk/artifacts/storage_policy.py +4 -1
  38. wandb/sdk/data_types/base_types/wb_value.py +1 -1
  39. wandb/sdk/data_types/image.py +2 -2
  40. wandb/sdk/interface/interface.py +49 -13
  41. wandb/sdk/interface/interface_shared.py +17 -11
  42. wandb/sdk/internal/file_stream.py +20 -1
  43. wandb/sdk/internal/handler.py +1 -4
  44. wandb/sdk/internal/internal_api.py +3 -1
  45. wandb/sdk/internal/job_builder.py +49 -19
  46. wandb/sdk/internal/profiler.py +1 -1
  47. wandb/sdk/internal/sender.py +96 -124
  48. wandb/sdk/internal/sender_config.py +197 -0
  49. wandb/sdk/internal/settings_static.py +9 -0
  50. wandb/sdk/internal/system/system_info.py +5 -3
  51. wandb/sdk/internal/update.py +1 -1
  52. wandb/sdk/launch/_launch.py +3 -3
  53. wandb/sdk/launch/_launch_add.py +28 -29
  54. wandb/sdk/launch/_project_spec.py +148 -136
  55. wandb/sdk/launch/agent/agent.py +3 -7
  56. wandb/sdk/launch/agent/config.py +0 -27
  57. wandb/sdk/launch/builder/build.py +54 -28
  58. wandb/sdk/launch/builder/docker_builder.py +4 -15
  59. wandb/sdk/launch/builder/kaniko_builder.py +72 -45
  60. wandb/sdk/launch/create_job.py +6 -40
  61. wandb/sdk/launch/loader.py +10 -0
  62. wandb/sdk/launch/registry/anon.py +29 -0
  63. wandb/sdk/launch/registry/local_registry.py +4 -1
  64. wandb/sdk/launch/runner/kubernetes_runner.py +20 -2
  65. wandb/sdk/launch/runner/local_container.py +15 -10
  66. wandb/sdk/launch/runner/sagemaker_runner.py +1 -1
  67. wandb/sdk/launch/sweeps/scheduler.py +11 -3
  68. wandb/sdk/launch/utils.py +14 -0
  69. wandb/sdk/lib/__init__.py +2 -5
  70. wandb/sdk/lib/_settings_toposort_generated.py +4 -1
  71. wandb/sdk/lib/apikey.py +0 -5
  72. wandb/sdk/lib/config_util.py +0 -31
  73. wandb/sdk/lib/filesystem.py +11 -1
  74. wandb/sdk/lib/run_moment.py +72 -0
  75. wandb/sdk/service/service.py +7 -2
  76. wandb/sdk/service/streams.py +1 -6
  77. wandb/sdk/verify/verify.py +2 -1
  78. wandb/sdk/wandb_init.py +12 -1
  79. wandb/sdk/wandb_login.py +43 -26
  80. wandb/sdk/wandb_run.py +164 -110
  81. wandb/sdk/wandb_settings.py +58 -16
  82. wandb/testing/relay.py +5 -6
  83. wandb/util.py +50 -7
  84. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/METADATA +8 -1
  85. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/RECORD +89 -82
  86. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/WHEEL +1 -1
  87. wandb/apis/importers/base.py +0 -400
  88. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/LICENSE +0 -0
  89. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/entry_points.txt +0 -0
  90. {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/top_level.txt +0 -0
@@ -1,400 +0,0 @@
1
- import json
2
- import os
3
- import queue
4
- import sys
5
- from dataclasses import dataclass
6
- from typing import Any, Dict, Iterable, List, Optional, Tuple
7
- from unittest.mock import patch
8
-
9
- from google.protobuf.json_format import ParseDict
10
- from tqdm import tqdm
11
-
12
- import wandb
13
- from wandb.proto import wandb_internal_pb2 as pb
14
- from wandb.proto import wandb_settings_pb2
15
- from wandb.proto import wandb_telemetry_pb2 as telem_pb
16
- from wandb.sdk.interface.interface import file_policy_to_enum
17
- from wandb.sdk.interface.interface_queue import InterfaceQueue
18
- from wandb.sdk.internal import context
19
- from wandb.sdk.internal.sender import SendManager
20
- from wandb.sdk.internal.settings_static import SettingsStatic
21
- from wandb.util import cast_dictlike_to_dict, coalesce
22
-
23
- if sys.version_info >= (3, 8):
24
- from typing import Protocol
25
- else:
26
- from typing_extensions import Protocol
27
-
28
-
29
- with patch("click.echo"):
30
- from wandb.apis.reports import Report
31
-
32
-
33
- class ImporterRun(Protocol):
34
- def run_id(self) -> str:
35
- ...
36
-
37
- def entity(self) -> str:
38
- ...
39
-
40
- def project(self) -> str:
41
- ...
42
-
43
- def config(self) -> Dict[str, Any]:
44
- ...
45
-
46
- def summary(self) -> Dict[str, float]:
47
- ...
48
-
49
- def metrics(self) -> Iterable[Dict[str, float]]:
50
- """Metrics for the run.
51
-
52
- We expect metrics in this shape:
53
-
54
- [
55
- {'metric1': 1, 'metric2': 1, '_step': 0},
56
- {'metric1': 2, 'metric2': 4, '_step': 1},
57
- {'metric1': 3, 'metric2': 9, '_step': 2},
58
- ...
59
- ]
60
-
61
- You can also submit metrics in this shape:
62
- [
63
- {'metric1': 1, '_step': 0},
64
- {'metric2': 1, '_step': 0},
65
- {'metric1': 2, '_step': 1},
66
- {'metric2': 4, '_step': 1},
67
- ...
68
- ]
69
- """
70
- ...
71
-
72
- def run_group(self) -> Optional[str]:
73
- ...
74
-
75
- def job_type(self) -> Optional[str]:
76
- ...
77
-
78
- def display_name(self) -> str:
79
- ...
80
-
81
- def notes(self) -> Optional[str]:
82
- ...
83
-
84
- def tags(self) -> Optional[List[str]]:
85
- ...
86
-
87
- def artifacts(self) -> Optional[Iterable[wandb.Artifact]]: # type: ignore
88
- ...
89
-
90
- def used_artifacts(self) -> Optional[Iterable[wandb.Artifact]]: # type: ignore
91
- ...
92
-
93
- def os_version(self) -> Optional[str]:
94
- ...
95
-
96
- def python_version(self) -> Optional[str]:
97
- ...
98
-
99
- def cuda_version(self) -> Optional[str]:
100
- ...
101
-
102
- def program(self) -> Optional[str]:
103
- ...
104
-
105
- def host(self) -> Optional[str]:
106
- ...
107
-
108
- def username(self) -> Optional[str]:
109
- ...
110
-
111
- def executable(self) -> Optional[str]:
112
- ...
113
-
114
- def gpus_used(self) -> Optional[str]:
115
- ...
116
-
117
- def cpus_used(self) -> Optional[int]: # can we get the model?
118
- ...
119
-
120
- def memory_used(self) -> Optional[int]:
121
- ...
122
-
123
- def runtime(self) -> Optional[int]:
124
- ...
125
-
126
- def start_time(self) -> Optional[int]:
127
- ...
128
-
129
- def code_path(self) -> Optional[str]:
130
- ...
131
-
132
- def cli_version(self) -> Optional[str]:
133
- ...
134
-
135
- def files(self) -> Optional[Iterable[Tuple[str, str]]]:
136
- ...
137
-
138
- def logs(self) -> Optional[Iterable[str]]:
139
- ...
140
-
141
-
142
- class Importer(Protocol):
143
- def collect_runs(self, *args, **kwargs) -> Iterable[ImporterRun]:
144
- ...
145
-
146
- def collect_reports(self, *args, **kwargs) -> Iterable[Report]:
147
- ...
148
-
149
- def import_run(self, run: ImporterRun) -> None:
150
- ...
151
-
152
- def import_report(self, report: Report) -> None:
153
- ...
154
-
155
-
156
- @dataclass
157
- class RecordMaker:
158
- run: ImporterRun
159
- interface: InterfaceQueue = InterfaceQueue()
160
-
161
- @property
162
- def run_dir(self) -> str:
163
- return f"./wandb-importer/{self.run.run_id()}"
164
-
165
- def _make_run_record(self) -> pb.Record:
166
- run = pb.RunRecord()
167
- run.run_id = self.run.run_id()
168
- run.entity = self.run.entity()
169
- run.project = self.run.project()
170
- run.display_name = coalesce(self.run.display_name())
171
- run.notes = coalesce(self.run.notes(), "")
172
- run.tags.extend(coalesce(self.run.tags(), []))
173
- run.start_time.FromMilliseconds(self.run.start_time())
174
-
175
- host = self.run.host()
176
- if host is not None:
177
- run.host = host
178
-
179
- runtime = self.run.runtime()
180
- if runtime is not None:
181
- run.runtime = runtime
182
-
183
- run_group = self.run.run_group()
184
- if run_group is not None:
185
- run.run_group = run_group
186
-
187
- config = self.run.config()
188
- if "_wandb" not in config:
189
- config["_wandb"] = {}
190
-
191
- # how do I get this automatically?
192
- config["_wandb"]["code_path"] = self.run.code_path()
193
- config["_wandb"]["python_version"] = self.run.python_version()
194
- config["_wandb"]["cli_version"] = self.run.cli_version()
195
-
196
- self.interface._make_config(
197
- data=config,
198
- obj=run.config,
199
- ) # is there a better way?
200
- return self.interface._make_record(run=run)
201
-
202
- def _make_output_record(self, line) -> pb.Record:
203
- output_raw = pb.OutputRawRecord()
204
- output_raw.output_type = pb.OutputRawRecord.OutputType.STDOUT
205
- output_raw.line = line
206
- return self.interface._make_record(output_raw=output_raw)
207
-
208
- def _make_summary_record(self) -> pb.Record:
209
- d: dict = {
210
- **self.run.summary(),
211
- "_runtime": self.run.runtime(), # quirk of runtime -- it has to be here!
212
- # '_timestamp': self.run.start_time()/1000,
213
- }
214
- d = cast_dictlike_to_dict(d)
215
- summary = self.interface._make_summary_from_dict(d)
216
- return self.interface._make_record(summary=summary)
217
-
218
- def _make_history_records(self) -> Iterable[pb.Record]:
219
- for _, metrics in enumerate(self.run.metrics()):
220
- history = pb.HistoryRecord()
221
- for k, v in metrics.items():
222
- item = history.item.add()
223
- item.key = k
224
- item.value_json = json.dumps(v)
225
- yield self.interface._make_record(history=history)
226
-
227
- def _make_files_record(
228
- self,
229
- files_dict,
230
- ) -> pb.Record:
231
- files_record = pb.FilesRecord()
232
- for path, policy in files_dict["files"]:
233
- f = files_record.files.add()
234
- f.path = path
235
- f.policy = file_policy_to_enum(policy) # is this always "end"?
236
- return self.interface._make_record(files=files_record)
237
-
238
- def _make_metadata_files_record(self) -> pb.Record:
239
- files = self.run.files()
240
- if files is None:
241
- metadata_fname = self._make_metadata_file()
242
- files = [(metadata_fname, "end")]
243
-
244
- files_dict = {"files": files}
245
- return self._make_files_record(files_dict)
246
-
247
- def _make_artifact_record(self, artifact, use_artifact=False) -> pb.Record:
248
- proto = self.interface._make_artifact(artifact)
249
- proto.run_id = self.run.run_id()
250
- proto.project = self.run.project()
251
- proto.entity = self.run.entity()
252
- proto.user_created = use_artifact
253
- proto.use_after_commit = use_artifact
254
- proto.finalize = True
255
- for tag in ["latest", "imported"]:
256
- proto.aliases.append(tag)
257
- return self.interface._make_record(artifact=proto)
258
-
259
- def _make_telem_record(self) -> pb.Record:
260
- telem = telem_pb.TelemetryRecord()
261
-
262
- feature = telem_pb.Feature()
263
- feature.importer_mlflow = True
264
- telem.feature.CopyFrom(feature)
265
-
266
- cli_version = self.run.cli_version()
267
- if cli_version:
268
- telem.cli_version = cli_version
269
-
270
- python_version = self.run.python_version()
271
- if python_version:
272
- telem.python_version = python_version
273
-
274
- return self.interface._make_record(telemetry=telem)
275
-
276
- def _make_metadata_file(self) -> str:
277
- missing_text = "This data was not captured"
278
-
279
- d = {}
280
- d["os"] = coalesce(self.run.os_version(), missing_text)
281
- d["python"] = coalesce(self.run.python_version(), missing_text)
282
- d["program"] = coalesce(self.run.program(), missing_text)
283
- d["cuda"] = coalesce(self.run.cuda_version(), missing_text)
284
- d["host"] = coalesce(self.run.host(), missing_text)
285
- d["username"] = coalesce(self.run.username(), missing_text)
286
- d["executable"] = coalesce(self.run.executable(), missing_text)
287
-
288
- gpus_used = self.run.gpus_used()
289
- if gpus_used is not None:
290
- d["gpu_devices"] = json.dumps(gpus_used)
291
- d["gpu_count"] = json.dumps(len(gpus_used))
292
-
293
- cpus_used = self.run.cpus_used()
294
- if cpus_used is not None:
295
- d["cpu_count"] = json.dumps(self.run.cpus_used())
296
-
297
- mem_used = self.run.memory_used()
298
- if mem_used is not None:
299
- d["memory"] = json.dumps({"total": self.run.memory_used()})
300
-
301
- fname = f"{self.run_dir}/files/wandb-metadata.json"
302
- with open(fname, "w") as f:
303
- f.write(json.dumps(d))
304
- return fname
305
-
306
-
307
- def send_run_with_send_manager(
308
- run: ImporterRun,
309
- overrides: Optional[Dict[str, Any]] = None,
310
- settings_override: Optional[SettingsStatic] = None,
311
- ) -> None:
312
- # does this need to be here for pmap?
313
- if overrides:
314
- for k, v in overrides.items():
315
- # `lambda: v` won't work!
316
- # https://stackoverflow.com/questions/10802002/why-deepcopy-doesnt-create-new-references-to-lambda-function
317
- setattr(run, k, lambda v=v: v)
318
- _settings_override = coalesce(settings_override, {})
319
- rm = RecordMaker(run)
320
-
321
- root_dir = rm.run_dir
322
- default_settings = {
323
- "files_dir": os.path.join(root_dir, "files"),
324
- "root_dir": root_dir,
325
- "_start_time": 0,
326
- "git_remote": None,
327
- "resume": False,
328
- "program": None,
329
- "ignore_globs": (),
330
- "run_id": None,
331
- "entity": None,
332
- "project": None,
333
- "run_group": None,
334
- "job_type": None,
335
- "run_tags": None,
336
- "run_name": None,
337
- "run_notes": None,
338
- "save_code": None,
339
- "email": None,
340
- "silent": None,
341
- "_offline": None,
342
- "_sync": True,
343
- "_live_policy_rate_limit": None,
344
- "_live_policy_wait_time": None,
345
- "disable_job_creation": False,
346
- "_async_upload_concurrency_limit": None,
347
- }
348
- combined_settings = {**default_settings, **_settings_override}
349
- settings_message = wandb_settings_pb2.Settings()
350
- ParseDict(combined_settings, settings_message)
351
-
352
- settings = SettingsStatic(settings_message)
353
-
354
- record_q: queue.Queue = queue.Queue()
355
- result_q: queue.Queue = queue.Queue()
356
- interface = InterfaceQueue(record_q=record_q)
357
- context_keeper = context.ContextKeeper()
358
-
359
- with SendManager(settings, record_q, result_q, interface, context_keeper) as sm:
360
- wandb.termlog(">> Make run record")
361
- sm.send(rm._make_run_record())
362
-
363
- wandb.termlog(">> Use Artifacts")
364
- used_artifacts = run.used_artifacts()
365
- if used_artifacts is not None:
366
- for artifact in tqdm(
367
- used_artifacts, desc="Used artifacts", unit="artifacts", leave=False
368
- ):
369
- sm.send(rm._make_artifact_record(artifact, use_artifact=True))
370
-
371
- wandb.termlog(">> Log Artifacts")
372
- artifacts = run.artifacts()
373
- if artifacts is not None:
374
- for artifact in tqdm(
375
- artifacts, desc="Logged artifacts", unit="artifacts", leave=False
376
- ):
377
- sm.send(rm._make_artifact_record(artifact))
378
-
379
- wandb.termlog(">> Log Metadata")
380
- sm.send(rm._make_metadata_files_record())
381
-
382
- wandb.termlog(">> Log History")
383
- for history_record in tqdm(
384
- rm._make_history_records(), desc="History", unit="steps", leave=False
385
- ):
386
- sm.send(history_record)
387
-
388
- wandb.termlog(">> Log Summary")
389
- sm.send(rm._make_summary_record())
390
-
391
- wandb.termlog(">> Log Output")
392
- # if hasattr(run, "_logs"):
393
- # lines = run._logs
394
- lines = run.logs()
395
- if lines is not None:
396
- for line in tqdm(lines, desc="Stdout", unit="lines", leave=False):
397
- sm.send(rm._make_output_record(line))
398
-
399
- wandb.termlog(">> Log Telem")
400
- sm.send(rm._make_telem_record())