wandb 0.16.5__py3-none-any.whl → 0.16.6__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +1 -1
- wandb/cli/cli.py +5 -2
- wandb/integration/openai/fine_tuning.py +74 -37
- wandb/proto/v3/wandb_internal_pb2.py +192 -192
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_internal_pb2.py +192 -192
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/sdk/artifacts/artifact.py +25 -9
- wandb/sdk/artifacts/artifact_saver.py +16 -19
- wandb/sdk/interface/interface.py +18 -6
- wandb/sdk/launch/_launch.py +5 -0
- wandb/sdk/launch/_project_spec.py +5 -20
- wandb/sdk/launch/agent/agent.py +80 -37
- wandb/sdk/launch/agent/config.py +8 -0
- wandb/sdk/launch/builder/kaniko_builder.py +149 -134
- wandb/sdk/launch/create_job.py +43 -48
- wandb/sdk/launch/runner/kubernetes_monitor.py +3 -1
- wandb/sdk/launch/sweeps/scheduler.py +3 -1
- wandb/sdk/launch/utils.py +18 -0
- wandb/sdk/lib/_settings_toposort_generated.py +1 -0
- wandb/sdk/lib/run_moment.py +7 -1
- wandb/sdk/wandb_init.py +2 -8
- wandb/sdk/wandb_run.py +50 -34
- wandb/sdk/wandb_settings.py +2 -0
- {wandb-0.16.5.dist-info → wandb-0.16.6.dist-info}/METADATA +1 -1
- {wandb-0.16.5.dist-info → wandb-0.16.6.dist-info}/RECORD +30 -30
- {wandb-0.16.5.dist-info → wandb-0.16.6.dist-info}/LICENSE +0 -0
- {wandb-0.16.5.dist-info → wandb-0.16.6.dist-info}/WHEEL +0 -0
- {wandb-0.16.5.dist-info → wandb-0.16.6.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.5.dist-info → wandb-0.16.6.dist-info}/top_level.txt +0 -0
wandb/__init__.py
CHANGED
@@ -11,7 +11,7 @@ For scripts and interactive notebooks, see https://github.com/wandb/examples.
|
|
11
11
|
|
12
12
|
For reference documentation, see https://docs.wandb.com/ref/python.
|
13
13
|
"""
|
14
|
-
__version__ = "0.16.
|
14
|
+
__version__ = "0.16.6"
|
15
15
|
_minimum_core_version = "0.17.0b10"
|
16
16
|
|
17
17
|
# Used with pypi checks and other messages related to pip
|
wandb/cli/cli.py
CHANGED
@@ -1680,6 +1680,7 @@ def launch(
|
|
1680
1680
|
hidden=True,
|
1681
1681
|
help="a wandb client registration URL, this is generated in the UI",
|
1682
1682
|
)
|
1683
|
+
@click.option("--verbose", "-v", count=True, help="Display verbose output")
|
1683
1684
|
@display_error
|
1684
1685
|
def launch_agent(
|
1685
1686
|
ctx,
|
@@ -1690,6 +1691,7 @@ def launch_agent(
|
|
1690
1691
|
config=None,
|
1691
1692
|
url=None,
|
1692
1693
|
log_file=None,
|
1694
|
+
verbose=0,
|
1693
1695
|
):
|
1694
1696
|
logger.info(
|
1695
1697
|
f"=== Launch-agent called with kwargs {locals()} CLI Version: {wandb.__version__} ==="
|
@@ -1707,7 +1709,7 @@ def launch_agent(
|
|
1707
1709
|
api = _get_cling_api()
|
1708
1710
|
wandb._sentry.configure_scope(process_context="launch_agent")
|
1709
1711
|
agent_config, api = _launch.resolve_agent_config(
|
1710
|
-
entity, project, max_jobs, queues, config
|
1712
|
+
entity, project, max_jobs, queues, config, verbose
|
1711
1713
|
)
|
1712
1714
|
|
1713
1715
|
if len(agent_config.get("queues")) == 0:
|
@@ -1905,7 +1907,7 @@ def describe(job):
|
|
1905
1907
|
"--entry-point",
|
1906
1908
|
"-E",
|
1907
1909
|
"entrypoint",
|
1908
|
-
help="
|
1910
|
+
help="Entrypoint to the script, including an executable and an entrypoint file. Required for code or repo jobs",
|
1909
1911
|
)
|
1910
1912
|
@click.option(
|
1911
1913
|
"--git-hash",
|
@@ -2354,6 +2356,7 @@ def artifact():
|
|
2354
2356
|
@click.option(
|
2355
2357
|
"--policy",
|
2356
2358
|
default="mutable",
|
2359
|
+
type=click.Choice(["mutable", "immutable"]),
|
2357
2360
|
help="Set the storage policy while uploading artifact files.",
|
2358
2361
|
)
|
2359
2362
|
@display_error
|
@@ -1,9 +1,11 @@
|
|
1
1
|
import datetime
|
2
2
|
import io
|
3
3
|
import json
|
4
|
+
import os
|
4
5
|
import re
|
6
|
+
import tempfile
|
5
7
|
import time
|
6
|
-
from typing import Any, Dict, Optional, Tuple
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
7
9
|
|
8
10
|
import wandb
|
9
11
|
from wandb import util
|
@@ -26,7 +28,10 @@ if parse_version(openai.__version__) < parse_version("1.0.1"):
|
|
26
28
|
|
27
29
|
from openai import OpenAI # noqa: E402
|
28
30
|
from openai.types.fine_tuning import FineTuningJob # noqa: E402
|
29
|
-
from openai.types.fine_tuning.fine_tuning_job import
|
31
|
+
from openai.types.fine_tuning.fine_tuning_job import ( # noqa: E402
|
32
|
+
Error,
|
33
|
+
Hyperparameters,
|
34
|
+
)
|
30
35
|
|
31
36
|
np = util.get_module(
|
32
37
|
name="numpy",
|
@@ -59,6 +64,7 @@ class WandbLogger:
|
|
59
64
|
entity: Optional[str] = None,
|
60
65
|
overwrite: bool = False,
|
61
66
|
wait_for_job_success: bool = True,
|
67
|
+
log_datasets: bool = True,
|
62
68
|
**kwargs_wandb_init: Dict[str, Any],
|
63
69
|
) -> str:
|
64
70
|
"""Sync fine-tunes to Weights & Biases.
|
@@ -150,6 +156,7 @@ class WandbLogger:
|
|
150
156
|
entity,
|
151
157
|
overwrite,
|
152
158
|
show_individual_warnings,
|
159
|
+
log_datasets,
|
153
160
|
**kwargs_wandb_init,
|
154
161
|
)
|
155
162
|
|
@@ -160,11 +167,14 @@ class WandbLogger:
|
|
160
167
|
|
161
168
|
@classmethod
|
162
169
|
def _wait_for_job_success(cls, fine_tune: FineTuningJob) -> FineTuningJob:
|
163
|
-
wandb.termlog("Waiting for the OpenAI fine-tuning job to
|
170
|
+
wandb.termlog("Waiting for the OpenAI fine-tuning job to finish training...")
|
171
|
+
wandb.termlog(
|
172
|
+
"To avoid blocking, you can call `WandbLogger.sync` with `wait_for_job_success=False` after OpenAI training completes."
|
173
|
+
)
|
164
174
|
while True:
|
165
175
|
if fine_tune.status == "succeeded":
|
166
176
|
wandb.termlog(
|
167
|
-
"Fine-tuning finished, logging metrics, model metadata, and
|
177
|
+
"Fine-tuning finished, logging metrics, model metadata, and run metadata to Weights & Biases"
|
168
178
|
)
|
169
179
|
return fine_tune
|
170
180
|
if fine_tune.status == "failed":
|
@@ -190,6 +200,7 @@ class WandbLogger:
|
|
190
200
|
entity: Optional[str],
|
191
201
|
overwrite: bool,
|
192
202
|
show_individual_warnings: bool,
|
203
|
+
log_datasets: bool,
|
193
204
|
**kwargs_wandb_init: Dict[str, Any],
|
194
205
|
):
|
195
206
|
fine_tune_id = fine_tune.id
|
@@ -209,7 +220,7 @@ class WandbLogger:
|
|
209
220
|
# check results are present
|
210
221
|
try:
|
211
222
|
results_id = fine_tune.result_files[0]
|
212
|
-
results = cls.openai_client.files.
|
223
|
+
results = cls.openai_client.files.content(file_id=results_id).text
|
213
224
|
except openai.NotFoundError:
|
214
225
|
if show_individual_warnings:
|
215
226
|
wandb.termwarn(
|
@@ -233,7 +244,7 @@ class WandbLogger:
|
|
233
244
|
cls._run.summary["fine_tuned_model"] = fine_tuned_model
|
234
245
|
|
235
246
|
# training/validation files and fine-tune details
|
236
|
-
cls._log_artifacts(fine_tune, project, entity)
|
247
|
+
cls._log_artifacts(fine_tune, project, entity, log_datasets, overwrite)
|
237
248
|
|
238
249
|
# mark run as complete
|
239
250
|
cls._run.summary["status"] = "succeeded"
|
@@ -249,7 +260,7 @@ class WandbLogger:
|
|
249
260
|
else:
|
250
261
|
raise Exception(
|
251
262
|
"It appears you are not currently logged in to Weights & Biases. "
|
252
|
-
"Please run `wandb login` in your terminal. "
|
263
|
+
"Please run `wandb login` in your terminal or `wandb.login()` in a notebook."
|
253
264
|
"When prompted, you can obtain your API key by visiting wandb.ai/authorize."
|
254
265
|
)
|
255
266
|
|
@@ -286,15 +297,9 @@ class WandbLogger:
|
|
286
297
|
config["finished_at"]
|
287
298
|
).strftime("%Y-%m-%d %H:%M:%S")
|
288
299
|
if config.get("hyperparameters"):
|
289
|
-
hyperparameters =
|
290
|
-
|
291
|
-
|
292
|
-
# If unpacking fails, log the object which will render as string
|
293
|
-
config["hyperparameters"] = hyperparameters
|
294
|
-
else:
|
295
|
-
# nested rendering on hyperparameters
|
296
|
-
config["hyperparameters"] = hyperparams
|
297
|
-
|
300
|
+
config["hyperparameters"] = cls.sanitize(config["hyperparameters"])
|
301
|
+
if config.get("error"):
|
302
|
+
config["error"] = cls.sanitize(config["error"])
|
298
303
|
return config
|
299
304
|
|
300
305
|
@classmethod
|
@@ -314,21 +319,44 @@ class WandbLogger:
|
|
314
319
|
|
315
320
|
return hyperparams
|
316
321
|
|
322
|
+
@staticmethod
|
323
|
+
def sanitize(input: Any) -> Union[Dict, List, str]:
|
324
|
+
valid_types = [bool, int, float, str]
|
325
|
+
if isinstance(input, (Hyperparameters, Error)):
|
326
|
+
return dict(input)
|
327
|
+
if isinstance(input, dict):
|
328
|
+
return {
|
329
|
+
k: v if type(v) in valid_types else str(v) for k, v in input.items()
|
330
|
+
}
|
331
|
+
elif isinstance(input, list):
|
332
|
+
return [v if type(v) in valid_types else str(v) for v in input]
|
333
|
+
else:
|
334
|
+
return str(input)
|
335
|
+
|
317
336
|
@classmethod
|
318
337
|
def _log_artifacts(
|
319
|
-
cls,
|
338
|
+
cls,
|
339
|
+
fine_tune: FineTuningJob,
|
340
|
+
project: str,
|
341
|
+
entity: Optional[str],
|
342
|
+
log_datasets: bool,
|
343
|
+
overwrite: bool,
|
320
344
|
) -> None:
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
fine_tune.
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
345
|
+
if log_datasets:
|
346
|
+
wandb.termlog("Logging training/validation files...")
|
347
|
+
# training/validation files
|
348
|
+
training_file = fine_tune.training_file if fine_tune.training_file else None
|
349
|
+
validation_file = (
|
350
|
+
fine_tune.validation_file if fine_tune.validation_file else None
|
351
|
+
)
|
352
|
+
for file, prefix, artifact_type in (
|
353
|
+
(training_file, "train", "training_files"),
|
354
|
+
(validation_file, "valid", "validation_files"),
|
355
|
+
):
|
356
|
+
if file is not None:
|
357
|
+
cls._log_artifact_inputs(
|
358
|
+
file, prefix, artifact_type, project, entity, overwrite
|
359
|
+
)
|
332
360
|
|
333
361
|
# fine-tune details
|
334
362
|
fine_tune_id = fine_tune.id
|
@@ -337,9 +365,14 @@ class WandbLogger:
|
|
337
365
|
type="model",
|
338
366
|
metadata=dict(fine_tune),
|
339
367
|
)
|
368
|
+
|
340
369
|
with artifact.new_file("model_metadata.json", mode="w", encoding="utf-8") as f:
|
341
370
|
dict_fine_tune = dict(fine_tune)
|
342
|
-
dict_fine_tune["hyperparameters"] =
|
371
|
+
dict_fine_tune["hyperparameters"] = cls.sanitize(
|
372
|
+
dict_fine_tune["hyperparameters"]
|
373
|
+
)
|
374
|
+
dict_fine_tune["error"] = cls.sanitize(dict_fine_tune["error"])
|
375
|
+
dict_fine_tune = cls.sanitize(dict_fine_tune)
|
343
376
|
json.dump(dict_fine_tune, f, indent=2)
|
344
377
|
cls._run.log_artifact(
|
345
378
|
artifact,
|
@@ -354,6 +387,7 @@ class WandbLogger:
|
|
354
387
|
artifact_type: str,
|
355
388
|
project: str,
|
356
389
|
entity: Optional[str],
|
390
|
+
overwrite: bool,
|
357
391
|
) -> None:
|
358
392
|
# get input artifact
|
359
393
|
artifact_name = f"{prefix}-{file_id}"
|
@@ -366,23 +400,26 @@ class WandbLogger:
|
|
366
400
|
artifact = cls._get_wandb_artifact(artifact_path)
|
367
401
|
|
368
402
|
# create artifact if file not already logged previously
|
369
|
-
if artifact is None:
|
403
|
+
if artifact is None or overwrite:
|
370
404
|
# get file content
|
371
405
|
try:
|
372
|
-
file_content = cls.openai_client.files.
|
406
|
+
file_content = cls.openai_client.files.content(file_id=file_id)
|
373
407
|
except openai.NotFoundError:
|
374
408
|
wandb.termerror(
|
375
|
-
f"File {file_id} could not be retrieved. Make sure you
|
409
|
+
f"File {file_id} could not be retrieved. Make sure you have OpenAI permissions to download training/validation files"
|
376
410
|
)
|
377
411
|
return
|
378
412
|
|
379
413
|
artifact = wandb.Artifact(artifact_name, type=artifact_type)
|
380
|
-
with
|
381
|
-
|
414
|
+
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
415
|
+
tmp_file.write(file_content.content)
|
416
|
+
tmp_file_path = tmp_file.name
|
417
|
+
artifact.add_file(tmp_file_path, file_id)
|
418
|
+
os.unlink(tmp_file_path)
|
382
419
|
|
383
420
|
# create a Table
|
384
421
|
try:
|
385
|
-
table, n_items = cls._make_table(file_content)
|
422
|
+
table, n_items = cls._make_table(file_content.text)
|
386
423
|
# Add table to the artifact.
|
387
424
|
artifact.add(table, file_id)
|
388
425
|
# Add the same table to the workspace.
|
@@ -390,9 +427,9 @@ class WandbLogger:
|
|
390
427
|
# Update the run config and artifact metadata
|
391
428
|
cls._run.config.update({f"n_{prefix}": n_items})
|
392
429
|
artifact.metadata["items"] = n_items
|
393
|
-
except Exception:
|
430
|
+
except Exception as e:
|
394
431
|
wandb.termerror(
|
395
|
-
f"
|
432
|
+
f"Issue saving {file_id} as a Table to Artifacts, exception:\n '{e}'"
|
396
433
|
)
|
397
434
|
else:
|
398
435
|
# log number of items
|