wandb 0.16.3__py3-none-any.whl → 0.16.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +2 -2
- wandb/agents/pyagent.py +1 -1
- wandb/apis/importers/__init__.py +1 -4
- wandb/apis/importers/internals/internal.py +386 -0
- wandb/apis/importers/internals/protocols.py +125 -0
- wandb/apis/importers/internals/util.py +78 -0
- wandb/apis/importers/mlflow.py +125 -88
- wandb/apis/importers/validation.py +108 -0
- wandb/apis/importers/wandb.py +1604 -0
- wandb/apis/public/api.py +7 -10
- wandb/apis/public/artifacts.py +38 -0
- wandb/apis/public/files.py +11 -2
- wandb/apis/reports/v2/__init__.py +0 -19
- wandb/apis/reports/v2/expr_parsing.py +0 -1
- wandb/apis/reports/v2/interface.py +15 -18
- wandb/apis/reports/v2/internal.py +12 -45
- wandb/cli/cli.py +52 -55
- wandb/integration/gym/__init__.py +2 -1
- wandb/integration/keras/callbacks/model_checkpoint.py +1 -1
- wandb/integration/keras/keras.py +6 -4
- wandb/integration/kfp/kfp_patch.py +2 -2
- wandb/integration/openai/fine_tuning.py +1 -2
- wandb/integration/ultralytics/callback.py +0 -1
- wandb/proto/v3/wandb_internal_pb2.py +332 -312
- wandb/proto/v3/wandb_settings_pb2.py +13 -3
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +316 -312
- wandb/proto/v4/wandb_settings_pb2.py +5 -3
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/artifacts/artifact.py +75 -31
- wandb/sdk/artifacts/artifact_manifest.py +5 -2
- wandb/sdk/artifacts/artifact_manifest_entry.py +6 -1
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +8 -2
- wandb/sdk/artifacts/artifact_saver.py +19 -47
- wandb/sdk/artifacts/storage_handler.py +2 -1
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +22 -9
- wandb/sdk/artifacts/storage_policy.py +4 -1
- wandb/sdk/data_types/base_types/wb_value.py +1 -1
- wandb/sdk/data_types/image.py +2 -2
- wandb/sdk/interface/interface.py +49 -13
- wandb/sdk/interface/interface_shared.py +17 -11
- wandb/sdk/internal/file_stream.py +20 -1
- wandb/sdk/internal/handler.py +1 -4
- wandb/sdk/internal/internal_api.py +3 -1
- wandb/sdk/internal/job_builder.py +49 -19
- wandb/sdk/internal/profiler.py +1 -1
- wandb/sdk/internal/sender.py +96 -124
- wandb/sdk/internal/sender_config.py +197 -0
- wandb/sdk/internal/settings_static.py +9 -0
- wandb/sdk/internal/system/system_info.py +5 -3
- wandb/sdk/internal/update.py +1 -1
- wandb/sdk/launch/_launch.py +3 -3
- wandb/sdk/launch/_launch_add.py +28 -29
- wandb/sdk/launch/_project_spec.py +148 -136
- wandb/sdk/launch/agent/agent.py +3 -7
- wandb/sdk/launch/agent/config.py +0 -27
- wandb/sdk/launch/builder/build.py +54 -28
- wandb/sdk/launch/builder/docker_builder.py +4 -15
- wandb/sdk/launch/builder/kaniko_builder.py +72 -45
- wandb/sdk/launch/create_job.py +6 -40
- wandb/sdk/launch/loader.py +10 -0
- wandb/sdk/launch/registry/anon.py +29 -0
- wandb/sdk/launch/registry/local_registry.py +4 -1
- wandb/sdk/launch/runner/kubernetes_runner.py +20 -2
- wandb/sdk/launch/runner/local_container.py +15 -10
- wandb/sdk/launch/runner/sagemaker_runner.py +1 -1
- wandb/sdk/launch/sweeps/scheduler.py +11 -3
- wandb/sdk/launch/utils.py +14 -0
- wandb/sdk/lib/__init__.py +2 -5
- wandb/sdk/lib/_settings_toposort_generated.py +4 -1
- wandb/sdk/lib/apikey.py +0 -5
- wandb/sdk/lib/config_util.py +0 -31
- wandb/sdk/lib/filesystem.py +11 -1
- wandb/sdk/lib/run_moment.py +72 -0
- wandb/sdk/service/service.py +7 -2
- wandb/sdk/service/streams.py +1 -6
- wandb/sdk/verify/verify.py +2 -1
- wandb/sdk/wandb_init.py +12 -1
- wandb/sdk/wandb_login.py +43 -26
- wandb/sdk/wandb_run.py +164 -110
- wandb/sdk/wandb_settings.py +58 -16
- wandb/testing/relay.py +5 -6
- wandb/util.py +50 -7
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/METADATA +8 -1
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/RECORD +89 -82
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/WHEEL +1 -1
- wandb/apis/importers/base.py +0 -400
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/LICENSE +0 -0
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.3.dist-info → wandb-0.16.5.dist-info}/top_level.txt +0 -0
wandb/__init__.py
CHANGED
@@ -11,8 +11,8 @@ For scripts and interactive notebooks, see https://github.com/wandb/examples.
|
|
11
11
|
|
12
12
|
For reference documentation, see https://docs.wandb.com/ref/python.
|
13
13
|
"""
|
14
|
-
__version__ = "0.16.
|
15
|
-
_minimum_core_version = "0.17.
|
14
|
+
__version__ = "0.16.5"
|
15
|
+
_minimum_core_version = "0.17.0b10"
|
16
16
|
|
17
17
|
# Used with pypi checks and other messages related to pip
|
18
18
|
_wandb_module = "wandb"
|
wandb/agents/pyagent.py
CHANGED
@@ -347,7 +347,7 @@ def pyagent(sweep_id, function, entity=None, project=None, count=None):
|
|
347
347
|
count (int, optional): the number of trials to run.
|
348
348
|
"""
|
349
349
|
if not callable(function):
|
350
|
-
raise Exception("function
|
350
|
+
raise Exception("function parameter must be callable!")
|
351
351
|
agent = Agent(
|
352
352
|
sweep_id,
|
353
353
|
function=function,
|
wandb/apis/importers/__init__.py
CHANGED
@@ -0,0 +1,386 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
import math
|
4
|
+
import os
|
5
|
+
import queue
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from pathlib import Path
|
8
|
+
from typing import Any, Dict, Iterable, Optional
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
from google.protobuf.json_format import ParseDict
|
12
|
+
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
13
|
+
|
14
|
+
from wandb import Artifact
|
15
|
+
from wandb.proto import wandb_internal_pb2 as pb
|
16
|
+
from wandb.proto import wandb_settings_pb2
|
17
|
+
from wandb.proto import wandb_telemetry_pb2 as telem_pb
|
18
|
+
from wandb.sdk.interface.interface import file_policy_to_enum
|
19
|
+
from wandb.sdk.interface.interface_queue import InterfaceQueue
|
20
|
+
from wandb.sdk.internal import context
|
21
|
+
from wandb.sdk.internal.sender import SendManager
|
22
|
+
from wandb.sdk.internal.settings_static import SettingsStatic
|
23
|
+
from wandb.util import coalesce, recursive_cast_dictlike_to_dict
|
24
|
+
|
25
|
+
from .protocols import ImporterRun
|
26
|
+
|
27
|
+
ROOT_DIR = "./wandb-importer"
|
28
|
+
|
29
|
+
|
30
|
+
logger = logging.getLogger(__name__)
|
31
|
+
logger.setLevel(logging.INFO)
|
32
|
+
|
33
|
+
if os.getenv("WANDB_IMPORTER_ENABLE_RICH_LOGGING"):
|
34
|
+
from rich.logging import RichHandler
|
35
|
+
|
36
|
+
logger.addHandler(RichHandler(rich_tracebacks=True, tracebacks_show_locals=True))
|
37
|
+
else:
|
38
|
+
console_handler = logging.StreamHandler()
|
39
|
+
console_handler.setLevel(logging.INFO)
|
40
|
+
|
41
|
+
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
42
|
+
console_handler.setFormatter(formatter)
|
43
|
+
|
44
|
+
logger.addHandler(console_handler)
|
45
|
+
|
46
|
+
|
47
|
+
exp_retry = retry(
|
48
|
+
wait=wait_random_exponential(multiplier=1, max=10), stop=stop_after_attempt(3)
|
49
|
+
)
|
50
|
+
|
51
|
+
|
52
|
+
class AlternateSendManager(SendManager):
|
53
|
+
def __init__(self, *args, **kwargs):
|
54
|
+
super().__init__(*args, **kwargs)
|
55
|
+
self._send_artifact = exp_retry(self._send_artifact)
|
56
|
+
|
57
|
+
|
58
|
+
@dataclass(frozen=True)
|
59
|
+
class SendManagerConfig:
|
60
|
+
"""Configure which parts of SendManager tooling to use."""
|
61
|
+
|
62
|
+
use_artifacts: bool = False
|
63
|
+
log_artifacts: bool = False
|
64
|
+
metadata: bool = False
|
65
|
+
files: bool = False
|
66
|
+
media: bool = False
|
67
|
+
code: bool = False
|
68
|
+
history: bool = False
|
69
|
+
summary: bool = False
|
70
|
+
terminal_output: bool = False
|
71
|
+
|
72
|
+
|
73
|
+
@dataclass
|
74
|
+
class RecordMaker:
|
75
|
+
run: ImporterRun
|
76
|
+
interface: InterfaceQueue = InterfaceQueue()
|
77
|
+
|
78
|
+
@property
|
79
|
+
def run_dir(self) -> str:
|
80
|
+
p = Path(f"{ROOT_DIR}/{self.run.run_id()}/wandb")
|
81
|
+
p.mkdir(parents=True, exist_ok=True)
|
82
|
+
return f"{ROOT_DIR}/{self.run.run_id()}"
|
83
|
+
|
84
|
+
def make_artifacts_only_records(
|
85
|
+
self,
|
86
|
+
artifacts: Optional[Iterable[Artifact]] = None,
|
87
|
+
used_artifacts: Optional[Iterable[Artifact]] = None,
|
88
|
+
) -> Iterable[pb.Record]:
|
89
|
+
"""Only make records required to upload artifacts.
|
90
|
+
|
91
|
+
Escape hatch for adding extra artifacts to a run.
|
92
|
+
"""
|
93
|
+
yield self._make_run_record()
|
94
|
+
|
95
|
+
if used_artifacts:
|
96
|
+
for art in used_artifacts:
|
97
|
+
yield self._make_artifact_record(art, use_artifact=True)
|
98
|
+
|
99
|
+
if artifacts:
|
100
|
+
for art in artifacts:
|
101
|
+
yield self._make_artifact_record(art)
|
102
|
+
|
103
|
+
def make_records(
|
104
|
+
self,
|
105
|
+
config: SendManagerConfig,
|
106
|
+
) -> Iterable[pb.Record]:
|
107
|
+
"""Make all the records that constitute a run."""
|
108
|
+
yield self._make_run_record()
|
109
|
+
yield self._make_telem_record()
|
110
|
+
|
111
|
+
include_artifacts = config.log_artifacts or config.use_artifacts
|
112
|
+
yield self._make_files_record(
|
113
|
+
include_artifacts, config.files, config.media, config.code
|
114
|
+
)
|
115
|
+
|
116
|
+
if config.use_artifacts:
|
117
|
+
if (used_artifacts := self.run.used_artifacts()) is not None:
|
118
|
+
for artifact in used_artifacts:
|
119
|
+
yield self._make_artifact_record(artifact, use_artifact=True)
|
120
|
+
|
121
|
+
if config.log_artifacts:
|
122
|
+
if (artifacts := self.run.artifacts()) is not None:
|
123
|
+
for artifact in artifacts:
|
124
|
+
yield self._make_artifact_record(artifact)
|
125
|
+
|
126
|
+
if config.history:
|
127
|
+
yield from self._make_history_records()
|
128
|
+
|
129
|
+
if config.summary:
|
130
|
+
yield self._make_summary_record()
|
131
|
+
|
132
|
+
if config.terminal_output:
|
133
|
+
if (lines := self.run.logs()) is not None:
|
134
|
+
for line in lines:
|
135
|
+
yield self._make_output_record(line)
|
136
|
+
|
137
|
+
def _make_run_record(self) -> pb.Record:
|
138
|
+
run = pb.RunRecord()
|
139
|
+
run.run_id = self.run.run_id()
|
140
|
+
run.entity = self.run.entity()
|
141
|
+
run.project = self.run.project()
|
142
|
+
run.display_name = coalesce(self.run.display_name())
|
143
|
+
run.notes = coalesce(self.run.notes(), "")
|
144
|
+
run.tags.extend(coalesce(self.run.tags(), []))
|
145
|
+
run.start_time.FromMilliseconds(self.run.start_time())
|
146
|
+
|
147
|
+
host = self.run.host()
|
148
|
+
if host is not None:
|
149
|
+
run.host = host
|
150
|
+
|
151
|
+
runtime = self.run.runtime()
|
152
|
+
if runtime is not None:
|
153
|
+
run.runtime = runtime
|
154
|
+
|
155
|
+
run_group = self.run.run_group()
|
156
|
+
if run_group is not None:
|
157
|
+
run.run_group = run_group
|
158
|
+
|
159
|
+
config = self.run.config()
|
160
|
+
if "_wandb" not in config:
|
161
|
+
config["_wandb"] = {}
|
162
|
+
|
163
|
+
# how do I get this automatically?
|
164
|
+
config["_wandb"]["code_path"] = self.run.code_path()
|
165
|
+
config["_wandb"]["python_version"] = self.run.python_version()
|
166
|
+
config["_wandb"]["cli_version"] = self.run.cli_version()
|
167
|
+
|
168
|
+
self.interface._make_config(
|
169
|
+
data=config,
|
170
|
+
obj=run.config,
|
171
|
+
) # is there a better way?
|
172
|
+
return self.interface._make_record(run=run)
|
173
|
+
|
174
|
+
def _make_output_record(self, line) -> pb.Record:
|
175
|
+
output_raw = pb.OutputRawRecord()
|
176
|
+
output_raw.output_type = pb.OutputRawRecord.OutputType.STDOUT
|
177
|
+
output_raw.line = line
|
178
|
+
return self.interface._make_record(output_raw=output_raw)
|
179
|
+
|
180
|
+
def _make_summary_record(self) -> pb.Record:
|
181
|
+
d: dict = {
|
182
|
+
**self.run.summary(),
|
183
|
+
"_runtime": self.run.runtime(), # quirk of runtime -- it has to be here!
|
184
|
+
# '_timestamp': self.run.start_time()/1000,
|
185
|
+
}
|
186
|
+
d = recursive_cast_dictlike_to_dict(d)
|
187
|
+
summary = self.interface._make_summary_from_dict(d)
|
188
|
+
return self.interface._make_record(summary=summary)
|
189
|
+
|
190
|
+
def _make_history_records(self) -> Iterable[pb.Record]:
|
191
|
+
for metrics in self.run.metrics():
|
192
|
+
history = pb.HistoryRecord()
|
193
|
+
for k, v in metrics.items():
|
194
|
+
item = history.item.add()
|
195
|
+
item.key = k
|
196
|
+
# There seems to be some conversion issue to breaks when we try to re-upload.
|
197
|
+
# np.NaN gets converted to float("nan"), which is not expected by our system.
|
198
|
+
# If this cast to string (!) is not done, the row will be dropped.
|
199
|
+
if (isinstance(v, float) and math.isnan(v)) or v == "NaN":
|
200
|
+
v = np.NaN
|
201
|
+
|
202
|
+
if isinstance(v, bytes):
|
203
|
+
# it's a json string encoded as bytes
|
204
|
+
v = v.decode("utf-8")
|
205
|
+
else:
|
206
|
+
v = json.dumps(v)
|
207
|
+
|
208
|
+
item.value_json = v
|
209
|
+
rec = self.interface._make_record(history=history)
|
210
|
+
yield rec
|
211
|
+
|
212
|
+
def _make_files_record(
|
213
|
+
self, artifacts: bool, files: bool, media: bool, code: bool
|
214
|
+
) -> pb.Record:
|
215
|
+
run_files = self.run.files()
|
216
|
+
metadata_fname = f"{self.run_dir}/files/wandb-metadata.json"
|
217
|
+
if not files or run_files is None:
|
218
|
+
# We'll always need a metadata file even if there are no other files to upload
|
219
|
+
metadata_fname = self._make_metadata_file()
|
220
|
+
run_files = [(metadata_fname, "end")]
|
221
|
+
files_record = pb.FilesRecord()
|
222
|
+
for path, policy in run_files:
|
223
|
+
if not artifacts and path.startswith("artifact/"):
|
224
|
+
continue
|
225
|
+
if not media and path.startswith("media/"):
|
226
|
+
continue
|
227
|
+
if not code and path.startswith("code/"):
|
228
|
+
continue
|
229
|
+
|
230
|
+
# DirWatcher requires the path to start with media/ instead of the full path
|
231
|
+
if "media" in path:
|
232
|
+
p = Path(path)
|
233
|
+
path = str(p.relative_to(f"{self.run_dir}/files"))
|
234
|
+
f = files_record.files.add()
|
235
|
+
f.path = path
|
236
|
+
f.policy = file_policy_to_enum(policy)
|
237
|
+
|
238
|
+
return self.interface._make_record(files=files_record)
|
239
|
+
|
240
|
+
def _make_artifact_record(
|
241
|
+
self, artifact: Artifact, use_artifact=False
|
242
|
+
) -> pb.Record:
|
243
|
+
proto = self.interface._make_artifact(artifact)
|
244
|
+
proto.run_id = str(self.run.run_id())
|
245
|
+
proto.project = str(self.run.project())
|
246
|
+
proto.entity = str(self.run.entity())
|
247
|
+
proto.user_created = use_artifact
|
248
|
+
proto.use_after_commit = use_artifact
|
249
|
+
proto.finalize = True
|
250
|
+
|
251
|
+
aliases = artifact._aliases
|
252
|
+
aliases += ["latest", "imported"]
|
253
|
+
|
254
|
+
for alias in aliases:
|
255
|
+
proto.aliases.append(alias)
|
256
|
+
return self.interface._make_record(artifact=proto)
|
257
|
+
|
258
|
+
def _make_telem_record(self) -> pb.Record:
|
259
|
+
telem = telem_pb.TelemetryRecord()
|
260
|
+
|
261
|
+
feature = telem_pb.Feature()
|
262
|
+
feature.importer_mlflow = True
|
263
|
+
telem.feature.CopyFrom(feature)
|
264
|
+
|
265
|
+
cli_version = self.run.cli_version()
|
266
|
+
if cli_version:
|
267
|
+
telem.cli_version = cli_version
|
268
|
+
|
269
|
+
python_version = self.run.python_version()
|
270
|
+
if python_version:
|
271
|
+
telem.python_version = python_version
|
272
|
+
|
273
|
+
return self.interface._make_record(telemetry=telem)
|
274
|
+
|
275
|
+
def _make_metadata_file(self) -> str:
|
276
|
+
missing_text = "This data was not captured"
|
277
|
+
files_dir = f"{self.run_dir}/files"
|
278
|
+
os.makedirs(files_dir, exist_ok=True)
|
279
|
+
|
280
|
+
d = {}
|
281
|
+
d["os"] = coalesce(self.run.os_version(), missing_text)
|
282
|
+
d["python"] = coalesce(self.run.python_version(), missing_text)
|
283
|
+
d["program"] = coalesce(self.run.program(), missing_text)
|
284
|
+
d["cuda"] = coalesce(self.run.cuda_version(), missing_text)
|
285
|
+
d["host"] = coalesce(self.run.host(), missing_text)
|
286
|
+
d["username"] = coalesce(self.run.username(), missing_text)
|
287
|
+
d["executable"] = coalesce(self.run.executable(), missing_text)
|
288
|
+
|
289
|
+
gpus_used = self.run.gpus_used()
|
290
|
+
if gpus_used is not None:
|
291
|
+
d["gpu_devices"] = json.dumps(gpus_used)
|
292
|
+
d["gpu_count"] = json.dumps(len(gpus_used))
|
293
|
+
|
294
|
+
cpus_used = self.run.cpus_used()
|
295
|
+
if cpus_used is not None:
|
296
|
+
d["cpu_count"] = json.dumps(self.run.cpus_used())
|
297
|
+
|
298
|
+
mem_used = self.run.memory_used()
|
299
|
+
if mem_used is not None:
|
300
|
+
d["memory"] = json.dumps({"total": self.run.memory_used()})
|
301
|
+
|
302
|
+
fname = f"{files_dir}/wandb-metadata.json"
|
303
|
+
with open(fname, "w") as f:
|
304
|
+
f.write(json.dumps(d))
|
305
|
+
return fname
|
306
|
+
|
307
|
+
|
308
|
+
def _make_settings(
|
309
|
+
root_dir: str, settings_override: Optional[Dict[str, Any]] = None
|
310
|
+
) -> SettingsStatic:
|
311
|
+
_settings_override = coalesce(settings_override, {})
|
312
|
+
|
313
|
+
default_settings: Dict[str, Any] = {
|
314
|
+
"files_dir": os.path.join(root_dir, "files"),
|
315
|
+
"root_dir": root_dir,
|
316
|
+
"sync_file": os.path.join(root_dir, "txlog.wandb"),
|
317
|
+
"resume": "false",
|
318
|
+
"program": None,
|
319
|
+
"ignore_globs": [],
|
320
|
+
"disable_job_creation": True,
|
321
|
+
"_start_time": 0,
|
322
|
+
"_offline": None,
|
323
|
+
"_sync": True,
|
324
|
+
"_live_policy_rate_limit": 15, # matches dir_watcher
|
325
|
+
"_live_policy_wait_time": 600, # matches dir_watcher
|
326
|
+
"_async_upload_concurrency_limit": None,
|
327
|
+
"_file_stream_timeout_seconds": 60,
|
328
|
+
}
|
329
|
+
|
330
|
+
combined_settings = {**default_settings, **_settings_override}
|
331
|
+
settings_message = wandb_settings_pb2.Settings()
|
332
|
+
ParseDict(combined_settings, settings_message)
|
333
|
+
|
334
|
+
return SettingsStatic(settings_message)
|
335
|
+
|
336
|
+
|
337
|
+
def send_run(
|
338
|
+
run: ImporterRun,
|
339
|
+
*,
|
340
|
+
extra_arts: Optional[Iterable[Artifact]] = None,
|
341
|
+
extra_used_arts: Optional[Iterable[Artifact]] = None,
|
342
|
+
config: Optional[SendManagerConfig] = None,
|
343
|
+
overrides: Optional[Dict[str, Any]] = None,
|
344
|
+
settings_override: Optional[Dict[str, Any]] = None,
|
345
|
+
) -> None:
|
346
|
+
if config is None:
|
347
|
+
config = SendManagerConfig()
|
348
|
+
|
349
|
+
# does this need to be here for pmap?
|
350
|
+
if overrides:
|
351
|
+
for k, v in overrides.items():
|
352
|
+
# `lambda: v` won't work!
|
353
|
+
# https://stackoverflow.com/questions/10802002/why-deepcopy-doesnt-create-new-references-to-lambda-function
|
354
|
+
setattr(run, k, lambda v=v: v)
|
355
|
+
|
356
|
+
rm = RecordMaker(run)
|
357
|
+
root_dir = rm.run_dir
|
358
|
+
|
359
|
+
settings = _make_settings(root_dir, settings_override)
|
360
|
+
sm_record_q = queue.Queue()
|
361
|
+
# wm_record_q = queue.Queue()
|
362
|
+
result_q = queue.Queue()
|
363
|
+
interface = InterfaceQueue(record_q=sm_record_q)
|
364
|
+
context_keeper = context.ContextKeeper()
|
365
|
+
sm = AlternateSendManager(
|
366
|
+
settings, sm_record_q, result_q, interface, context_keeper
|
367
|
+
)
|
368
|
+
# wm = WriteManager(
|
369
|
+
# settings, wm_record_q, result_q, sm_record_q, interface, context_keeper
|
370
|
+
# )
|
371
|
+
|
372
|
+
if extra_arts or extra_used_arts:
|
373
|
+
records = rm.make_artifacts_only_records(extra_arts, extra_used_arts)
|
374
|
+
else:
|
375
|
+
records = rm.make_records(config)
|
376
|
+
|
377
|
+
for r in records:
|
378
|
+
logger.debug(f"Sending {r=}")
|
379
|
+
# In a future update, it might be good to write to a transaction log and have
|
380
|
+
# incremental uploads only send the missing records.
|
381
|
+
# wm.write(r)
|
382
|
+
|
383
|
+
sm.send(r)
|
384
|
+
|
385
|
+
sm.finish()
|
386
|
+
# wm.finish()
|
@@ -0,0 +1,125 @@
|
|
1
|
+
import logging
|
2
|
+
import sys
|
3
|
+
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple
|
4
|
+
|
5
|
+
from wandb.sdk.artifacts.artifact import Artifact
|
6
|
+
|
7
|
+
if sys.version_info >= (3, 8):
|
8
|
+
from typing import Protocol, runtime_checkable
|
9
|
+
else:
|
10
|
+
from typing_extensions import Protocol, runtime_checkable
|
11
|
+
|
12
|
+
logger = logging.getLogger("import_logger")
|
13
|
+
|
14
|
+
PathStr = str
|
15
|
+
Policy = Literal["now", "end", "live"]
|
16
|
+
|
17
|
+
|
18
|
+
@runtime_checkable
|
19
|
+
class ImporterRun(Protocol):
|
20
|
+
def run_id(self) -> str:
|
21
|
+
... # pragma: no cover
|
22
|
+
|
23
|
+
def entity(self) -> str:
|
24
|
+
... # pragma: no cover
|
25
|
+
|
26
|
+
def project(self) -> str:
|
27
|
+
... # pragma: no cover
|
28
|
+
|
29
|
+
def config(self) -> Dict[str, Any]:
|
30
|
+
... # pragma: no cover
|
31
|
+
|
32
|
+
def summary(self) -> Dict[str, float]:
|
33
|
+
... # pragma: no cover
|
34
|
+
|
35
|
+
def metrics(self) -> Iterable[Dict[str, float]]:
|
36
|
+
"""Metrics for the run.
|
37
|
+
|
38
|
+
We expect metrics in this shape:
|
39
|
+
|
40
|
+
[
|
41
|
+
{'metric1': 1, 'metric2': 1, '_step': 0},
|
42
|
+
{'metric1': 2, 'metric2': 4, '_step': 1},
|
43
|
+
{'metric1': 3, 'metric2': 9, '_step': 2},
|
44
|
+
...
|
45
|
+
]
|
46
|
+
|
47
|
+
You can also submit metrics in this shape:
|
48
|
+
[
|
49
|
+
{'metric1': 1, '_step': 0},
|
50
|
+
{'metric2': 1, '_step': 0},
|
51
|
+
{'metric1': 2, '_step': 1},
|
52
|
+
{'metric2': 4, '_step': 1},
|
53
|
+
...
|
54
|
+
]
|
55
|
+
"""
|
56
|
+
... # pragma: no cover
|
57
|
+
|
58
|
+
def run_group(self) -> Optional[str]:
|
59
|
+
... # pragma: no cover
|
60
|
+
|
61
|
+
def job_type(self) -> Optional[str]:
|
62
|
+
... # pragma: no cover
|
63
|
+
|
64
|
+
def display_name(self) -> str:
|
65
|
+
... # pragma: no cover
|
66
|
+
|
67
|
+
def notes(self) -> Optional[str]:
|
68
|
+
... # pragma: no cover
|
69
|
+
|
70
|
+
def tags(self) -> Optional[List[str]]:
|
71
|
+
... # pragma: no cover
|
72
|
+
|
73
|
+
def artifacts(self) -> Optional[Iterable[Artifact]]:
|
74
|
+
... # pragma: no cover
|
75
|
+
|
76
|
+
def used_artifacts(self) -> Optional[Iterable[Artifact]]:
|
77
|
+
... # pragma: no cover
|
78
|
+
|
79
|
+
def os_version(self) -> Optional[str]:
|
80
|
+
... # pragma: no cover
|
81
|
+
|
82
|
+
def python_version(self) -> Optional[str]:
|
83
|
+
... # pragma: no cover
|
84
|
+
|
85
|
+
def cuda_version(self) -> Optional[str]:
|
86
|
+
... # pragma: no cover
|
87
|
+
|
88
|
+
def program(self) -> Optional[str]:
|
89
|
+
... # pragma: no cover
|
90
|
+
|
91
|
+
def host(self) -> Optional[str]:
|
92
|
+
... # pragma: no cover
|
93
|
+
|
94
|
+
def username(self) -> Optional[str]:
|
95
|
+
... # pragma: no cover
|
96
|
+
|
97
|
+
def executable(self) -> Optional[str]:
|
98
|
+
... # pragma: no cover
|
99
|
+
|
100
|
+
def gpus_used(self) -> Optional[str]:
|
101
|
+
... # pragma: no cover
|
102
|
+
|
103
|
+
def cpus_used(self) -> Optional[int]:
|
104
|
+
... # pragma: no cover
|
105
|
+
|
106
|
+
def memory_used(self) -> Optional[int]:
|
107
|
+
... # pragma: no cover
|
108
|
+
|
109
|
+
def runtime(self) -> Optional[int]:
|
110
|
+
... # pragma: no cover
|
111
|
+
|
112
|
+
def start_time(self) -> Optional[int]:
|
113
|
+
... # pragma: no cover
|
114
|
+
|
115
|
+
def code_path(self) -> Optional[str]:
|
116
|
+
... # pragma: no cover
|
117
|
+
|
118
|
+
def cli_version(self) -> Optional[str]:
|
119
|
+
... # pragma: no cover
|
120
|
+
|
121
|
+
def files(self) -> Optional[Iterable[Tuple[PathStr, Policy]]]:
|
122
|
+
... # pragma: no cover
|
123
|
+
|
124
|
+
def logs(self) -> Optional[Iterable[str]]:
|
125
|
+
... # pragma: no cover
|
@@ -0,0 +1,78 @@
|
|
1
|
+
import logging
|
2
|
+
import sys
|
3
|
+
import traceback
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import Iterable, Optional
|
7
|
+
|
8
|
+
|
9
|
+
@dataclass(frozen=True)
|
10
|
+
class Namespace:
|
11
|
+
"""Configure an alternate entity/project at the dst server your data will end up in."""
|
12
|
+
|
13
|
+
entity: str
|
14
|
+
project: str
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
def from_path(cls, path: str):
|
18
|
+
entity, project = path.split("/")
|
19
|
+
return cls(entity, project)
|
20
|
+
|
21
|
+
@property
|
22
|
+
def path(self):
|
23
|
+
return f"{self.entity}/{self.project}"
|
24
|
+
|
25
|
+
@property
|
26
|
+
def send_manager_overrides(self):
|
27
|
+
overrides = {}
|
28
|
+
if self.entity:
|
29
|
+
overrides["entity"] = self.entity
|
30
|
+
if self.project:
|
31
|
+
overrides["project"] = self.project
|
32
|
+
return overrides
|
33
|
+
|
34
|
+
|
35
|
+
logger = logging.getLogger("import_logger")
|
36
|
+
|
37
|
+
|
38
|
+
def parallelize(
|
39
|
+
func,
|
40
|
+
iterable: Iterable,
|
41
|
+
*args,
|
42
|
+
max_workers: Optional[int] = None,
|
43
|
+
raise_on_error: bool = False,
|
44
|
+
**kwargs,
|
45
|
+
):
|
46
|
+
def safe_func(*args, **kwargs):
|
47
|
+
try:
|
48
|
+
return func(*args, **kwargs)
|
49
|
+
except Exception as e:
|
50
|
+
_, _, exc_traceback = sys.exc_info()
|
51
|
+
traceback_details = traceback.extract_tb(exc_traceback)
|
52
|
+
filename = traceback_details[-1].filename
|
53
|
+
lineno = traceback_details[-1].lineno
|
54
|
+
logger.debug(
|
55
|
+
f"Exception: {func=} {args=} {kwargs=} {e=} {filename=} {lineno=}. {traceback_details=}"
|
56
|
+
)
|
57
|
+
if raise_on_error:
|
58
|
+
raise e
|
59
|
+
|
60
|
+
results = []
|
61
|
+
with ThreadPoolExecutor(max_workers) as exc:
|
62
|
+
futures = {exc.submit(safe_func, x, *args, **kwargs): x for x in iterable}
|
63
|
+
for future in as_completed(futures):
|
64
|
+
results.append(future.result())
|
65
|
+
return results
|
66
|
+
|
67
|
+
|
68
|
+
def for_each(
|
69
|
+
func, iterable: Iterable, parallel: bool = True, max_workers: Optional[int] = None
|
70
|
+
):
|
71
|
+
if parallel:
|
72
|
+
return parallelize(
|
73
|
+
func,
|
74
|
+
iterable,
|
75
|
+
max_workers=max_workers,
|
76
|
+
)
|
77
|
+
|
78
|
+
return [func(x) for x in iterable]
|