wandb 0.16.5__py3-none-any.whl → 0.17.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- package_readme.md +95 -0
- wandb/__init__.py +2 -3
- wandb/agents/pyagent.py +0 -1
- wandb/analytics/sentry.py +2 -1
- wandb/apis/importers/internals/internal.py +0 -1
- wandb/apis/importers/internals/protocols.py +30 -56
- wandb/apis/importers/mlflow.py +13 -26
- wandb/apis/importers/wandb.py +8 -14
- wandb/apis/internal.py +0 -3
- wandb/apis/public/api.py +55 -3
- wandb/apis/public/artifacts.py +1 -0
- wandb/apis/public/files.py +1 -0
- wandb/apis/public/history.py +1 -0
- wandb/apis/public/jobs.py +17 -4
- wandb/apis/public/projects.py +1 -0
- wandb/apis/public/reports.py +1 -0
- wandb/apis/public/runs.py +15 -17
- wandb/apis/public/sweeps.py +1 -0
- wandb/apis/public/teams.py +1 -0
- wandb/apis/public/users.py +1 -0
- wandb/apis/reports/v1/_blocks.py +3 -7
- wandb/apis/reports/v2/gql.py +1 -0
- wandb/apis/reports/v2/interface.py +3 -4
- wandb/apis/reports/v2/internal.py +5 -8
- wandb/cli/cli.py +95 -22
- wandb/data_types.py +9 -6
- wandb/docker/__init__.py +1 -1
- wandb/env.py +38 -8
- wandb/errors/__init__.py +5 -0
- wandb/errors/term.py +10 -2
- wandb/filesync/step_checksum.py +1 -4
- wandb/filesync/step_prepare.py +4 -24
- wandb/filesync/step_upload.py +4 -106
- wandb/filesync/upload_job.py +0 -76
- wandb/integration/catboost/catboost.py +1 -1
- wandb/integration/fastai/__init__.py +1 -0
- wandb/integration/huggingface/resolver.py +2 -2
- wandb/integration/keras/__init__.py +1 -0
- wandb/integration/keras/callbacks/metrics_logger.py +1 -1
- wandb/integration/keras/keras.py +7 -7
- wandb/integration/langchain/wandb_tracer.py +1 -0
- wandb/integration/lightning/fabric/logger.py +1 -3
- wandb/integration/metaflow/metaflow.py +41 -6
- wandb/integration/openai/fine_tuning.py +77 -40
- wandb/integration/prodigy/prodigy.py +1 -1
- wandb/old/summary.py +1 -1
- wandb/plot/confusion_matrix.py +1 -1
- wandb/plot/pr_curve.py +2 -1
- wandb/plot/roc_curve.py +2 -1
- wandb/{plots → plot}/utils.py +13 -25
- wandb/proto/v3/wandb_internal_pb2.py +364 -332
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +322 -316
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/wandb_deprecated.py +7 -1
- wandb/proto/wandb_internal_codegen.py +3 -29
- wandb/sdk/artifacts/artifact.py +51 -20
- wandb/sdk/artifacts/artifact_download_logger.py +1 -0
- wandb/sdk/artifacts/artifact_file_cache.py +18 -4
- wandb/sdk/artifacts/artifact_instance_cache.py +1 -0
- wandb/sdk/artifacts/artifact_manifest.py +1 -0
- wandb/sdk/artifacts/artifact_manifest_entry.py +7 -3
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
- wandb/sdk/artifacts/artifact_saver.py +18 -27
- wandb/sdk/artifacts/artifact_state.py +1 -0
- wandb/sdk/artifacts/artifact_ttl.py +1 -0
- wandb/sdk/artifacts/exceptions.py +1 -0
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +13 -18
- wandb/sdk/artifacts/storage_handlers/http_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +5 -3
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -42
- wandb/sdk/artifacts/storage_policy.py +2 -12
- wandb/sdk/data_types/_dtypes.py +8 -8
- wandb/sdk/data_types/base_types/media.py +3 -6
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
- wandb/sdk/data_types/image.py +1 -1
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/integration_utils/auto_logging.py +5 -6
- wandb/sdk/integration_utils/data_logging.py +10 -6
- wandb/sdk/interface/interface.py +86 -38
- wandb/sdk/interface/interface_shared.py +7 -13
- wandb/sdk/internal/datastore.py +1 -1
- wandb/sdk/internal/file_pusher.py +2 -5
- wandb/sdk/internal/file_stream.py +5 -18
- wandb/sdk/internal/handler.py +18 -2
- wandb/sdk/internal/internal.py +0 -1
- wandb/sdk/internal/internal_api.py +1 -129
- wandb/sdk/internal/internal_util.py +0 -1
- wandb/sdk/internal/job_builder.py +159 -45
- wandb/sdk/internal/profiler.py +1 -0
- wandb/sdk/internal/progress.py +0 -28
- wandb/sdk/internal/run.py +1 -0
- wandb/sdk/internal/sender.py +1 -2
- wandb/sdk/internal/system/assets/gpu_amd.py +44 -44
- wandb/sdk/internal/system/assets/gpu_apple.py +56 -11
- wandb/sdk/internal/system/assets/interfaces.py +6 -8
- wandb/sdk/internal/system/assets/open_metrics.py +2 -2
- wandb/sdk/internal/system/assets/trainium.py +1 -3
- wandb/sdk/launch/__init__.py +9 -1
- wandb/sdk/launch/_launch.py +9 -24
- wandb/sdk/launch/_launch_add.py +1 -3
- wandb/sdk/launch/_project_spec.py +188 -241
- wandb/sdk/launch/agent/agent.py +115 -48
- wandb/sdk/launch/agent/config.py +80 -14
- wandb/sdk/launch/builder/abstract.py +69 -1
- wandb/sdk/launch/builder/build.py +156 -555
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +8 -23
- wandb/sdk/launch/builder/kaniko_builder.py +161 -159
- wandb/sdk/launch/builder/noop.py +1 -0
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +68 -63
- wandb/sdk/launch/environment/abstract.py +1 -0
- wandb/sdk/launch/environment/gcp_environment.py +1 -0
- wandb/sdk/launch/environment/local_environment.py +1 -0
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +217 -0
- wandb/sdk/launch/inputs/manage.py +95 -0
- wandb/sdk/launch/loader.py +1 -0
- wandb/sdk/launch/registry/abstract.py +1 -0
- wandb/sdk/launch/registry/azure_container_registry.py +1 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +1 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +2 -1
- wandb/sdk/launch/registry/local_registry.py +1 -0
- wandb/sdk/launch/runner/abstract.py +1 -0
- wandb/sdk/launch/runner/kubernetes_monitor.py +4 -1
- wandb/sdk/launch/runner/kubernetes_runner.py +9 -10
- wandb/sdk/launch/runner/local_container.py +2 -3
- wandb/sdk/launch/runner/local_process.py +8 -29
- wandb/sdk/launch/runner/sagemaker_runner.py +21 -20
- wandb/sdk/launch/runner/vertex_runner.py +8 -7
- wandb/sdk/launch/sweeps/scheduler.py +7 -4
- wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
- wandb/sdk/launch/sweeps/utils.py +3 -3
- wandb/sdk/launch/utils.py +33 -140
- wandb/sdk/lib/_settings_toposort_generated.py +1 -5
- wandb/sdk/lib/fsm.py +8 -12
- wandb/sdk/lib/gitlib.py +4 -4
- wandb/sdk/lib/import_hooks.py +1 -1
- wandb/sdk/lib/lazyloader.py +0 -1
- wandb/sdk/lib/proto_util.py +23 -2
- wandb/sdk/lib/redirect.py +19 -14
- wandb/sdk/lib/retry.py +3 -2
- wandb/sdk/lib/run_moment.py +7 -1
- wandb/sdk/lib/tracelog.py +1 -1
- wandb/sdk/service/service.py +19 -16
- wandb/sdk/verify/verify.py +2 -1
- wandb/sdk/wandb_init.py +16 -63
- wandb/sdk/wandb_manager.py +2 -2
- wandb/sdk/wandb_require.py +5 -0
- wandb/sdk/wandb_run.py +164 -90
- wandb/sdk/wandb_settings.py +2 -48
- wandb/sdk/wandb_setup.py +1 -1
- wandb/sklearn/__init__.py +1 -0
- wandb/sklearn/plot/__init__.py +1 -0
- wandb/sklearn/plot/classifier.py +11 -12
- wandb/sklearn/plot/clusterer.py +2 -1
- wandb/sklearn/plot/regressor.py +1 -0
- wandb/sklearn/plot/shared.py +1 -0
- wandb/sklearn/utils.py +1 -0
- wandb/testing/relay.py +4 -4
- wandb/trigger.py +1 -0
- wandb/util.py +67 -54
- wandb/wandb_controller.py +2 -3
- wandb/wandb_torch.py +1 -2
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/METADATA +67 -70
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/RECORD +178 -188
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/WHEEL +1 -2
- wandb/bin/apple_gpu_stats +0 -0
- wandb/catboost/__init__.py +0 -9
- wandb/fastai/__init__.py +0 -9
- wandb/keras/__init__.py +0 -18
- wandb/lightgbm/__init__.py +0 -9
- wandb/plots/__init__.py +0 -6
- wandb/plots/explain_text.py +0 -36
- wandb/plots/heatmap.py +0 -81
- wandb/plots/named_entity.py +0 -43
- wandb/plots/part_of_speech.py +0 -50
- wandb/plots/plot_definitions.py +0 -768
- wandb/plots/precision_recall.py +0 -121
- wandb/plots/roc.py +0 -103
- wandb/sacred/__init__.py +0 -3
- wandb/xgboost/__init__.py +0 -9
- wandb-0.16.5.dist-info/top_level.txt +0 -1
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info/licenses}/LICENSE +0 -0
@@ -100,6 +100,10 @@ class InterfaceShared(InterfaceBase):
|
|
100
100
|
rec = self._make_record(telemetry=telem)
|
101
101
|
self._publish(rec)
|
102
102
|
|
103
|
+
def _publish_job_input(self, job_input: pb.JobInputRequest) -> MailboxHandle:
|
104
|
+
record = self._make_request(job_input=job_input)
|
105
|
+
return self._deliver_record(record)
|
106
|
+
|
103
107
|
def _make_stats(self, stats_dict: dict) -> pb.StatsRecord:
|
104
108
|
stats = pb.StatsRecord()
|
105
109
|
stats.stats_type = pb.StatsRecord.StatsType.SYSTEM
|
@@ -147,6 +151,7 @@ class InterfaceShared(InterfaceBase):
|
|
147
151
|
telemetry_record: Optional[pb.TelemetryRecordRequest] = None,
|
148
152
|
get_system_metrics: Optional[pb.GetSystemMetricsRequest] = None,
|
149
153
|
python_packages: Optional[pb.PythonPackagesRequest] = None,
|
154
|
+
job_input: Optional[pb.JobInputRequest] = None,
|
150
155
|
) -> pb.Record:
|
151
156
|
request = pb.Request()
|
152
157
|
if login:
|
@@ -207,6 +212,8 @@ class InterfaceShared(InterfaceBase):
|
|
207
212
|
request.sync.CopyFrom(sync)
|
208
213
|
elif python_packages:
|
209
214
|
request.python_packages.CopyFrom(python_packages)
|
215
|
+
elif job_input:
|
216
|
+
request.job_input.CopyFrom(job_input)
|
210
217
|
else:
|
211
218
|
raise Exception("Invalid request")
|
212
219
|
record = self._make_record(request=request)
|
@@ -239,9 +246,6 @@ class InterfaceShared(InterfaceBase):
|
|
239
246
|
use_artifact: Optional[pb.UseArtifactRecord] = None,
|
240
247
|
output: Optional[pb.OutputRecord] = None,
|
241
248
|
output_raw: Optional[pb.OutputRawRecord] = None,
|
242
|
-
launch_wandb_config_parameters: Optional[
|
243
|
-
pb.LaunchWandbConfigParametersRecord
|
244
|
-
] = None,
|
245
249
|
) -> pb.Record:
|
246
250
|
record = pb.Record()
|
247
251
|
if run:
|
@@ -286,8 +290,6 @@ class InterfaceShared(InterfaceBase):
|
|
286
290
|
record.output.CopyFrom(output)
|
287
291
|
elif output_raw:
|
288
292
|
record.output_raw.CopyFrom(output_raw)
|
289
|
-
elif launch_wandb_config_parameters:
|
290
|
-
record.wandb_config_parameters.CopyFrom(launch_wandb_config_parameters)
|
291
293
|
else:
|
292
294
|
raise Exception("Invalid record")
|
293
295
|
return record
|
@@ -417,14 +419,6 @@ class InterfaceShared(InterfaceBase):
|
|
417
419
|
rec = self._make_record(alert=proto_alert)
|
418
420
|
self._publish(rec)
|
419
421
|
|
420
|
-
def _publish_launch_wandb_config_parameters(
|
421
|
-
self, launch_wandb_config_parameters: pb.LaunchWandbConfigParametersRecord
|
422
|
-
) -> None:
|
423
|
-
rec = self._make_record(
|
424
|
-
launch_wandb_config_parameters=launch_wandb_config_parameters
|
425
|
-
)
|
426
|
-
self._publish(rec)
|
427
|
-
|
428
422
|
def _communicate_status(
|
429
423
|
self, status: pb.StatusRequest
|
430
424
|
) -> Optional[pb.StatusResponse]:
|
wandb/sdk/internal/datastore.py
CHANGED
@@ -14,7 +14,7 @@ from wandb.sdk.lib.paths import LogicalPath
|
|
14
14
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
|
17
|
-
from wandb.sdk.artifacts.artifact_saver import SaveFn
|
17
|
+
from wandb.sdk.artifacts.artifact_saver import SaveFn
|
18
18
|
from wandb.sdk.internal import file_stream, internal_api
|
19
19
|
from wandb.sdk.internal.settings_static import SettingsStatic
|
20
20
|
|
@@ -148,11 +148,8 @@ class FilePusher:
|
|
148
148
|
manifest: "ArtifactManifest",
|
149
149
|
artifact_id: str,
|
150
150
|
save_fn: "SaveFn",
|
151
|
-
save_fn_async: "SaveFnAsync",
|
152
151
|
) -> None:
|
153
|
-
event = step_checksum.RequestStoreManifestFiles(
|
154
|
-
manifest, artifact_id, save_fn, save_fn_async
|
155
|
-
)
|
152
|
+
event = step_checksum.RequestStoreManifestFiles(manifest, artifact_id, save_fn)
|
156
153
|
self._incoming_queue.put(event)
|
157
154
|
|
158
155
|
def commit_artifact(
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import base64
|
2
1
|
import functools
|
3
2
|
import itertools
|
4
3
|
import json
|
@@ -53,7 +52,7 @@ logger = logging.getLogger(__name__)
|
|
53
52
|
|
54
53
|
class Chunk(NamedTuple):
|
55
54
|
filename: str
|
56
|
-
data:
|
55
|
+
data: str
|
57
56
|
|
58
57
|
|
59
58
|
class DefaultFilePolicy:
|
@@ -227,7 +226,7 @@ class CRDedupeFilePolicy(DefaultFilePolicy):
|
|
227
226
|
prefix += token + " "
|
228
227
|
return prefix, rest
|
229
228
|
|
230
|
-
def process_chunks(self, chunks: List) -> List["ProcessedChunk"]:
|
229
|
+
def process_chunks(self, chunks: List[Chunk]) -> List["ProcessedChunk"]:
|
231
230
|
r"""Process chunks.
|
232
231
|
|
233
232
|
Args:
|
@@ -300,18 +299,6 @@ class CRDedupeFilePolicy(DefaultFilePolicy):
|
|
300
299
|
return ret
|
301
300
|
|
302
301
|
|
303
|
-
class BinaryFilePolicy(DefaultFilePolicy):
|
304
|
-
def __init__(self) -> None:
|
305
|
-
super().__init__()
|
306
|
-
self._offset: int = 0
|
307
|
-
|
308
|
-
def process_chunks(self, chunks: List[Chunk]) -> "ProcessedBinaryChunk":
|
309
|
-
data = b"".join([c.data for c in chunks])
|
310
|
-
enc = base64.b64encode(data).decode("ascii")
|
311
|
-
self._offset += len(data)
|
312
|
-
return {"offset": self._offset, "content": enc, "encoding": "base64"}
|
313
|
-
|
314
|
-
|
315
302
|
class FileStreamApi:
|
316
303
|
"""Pushes chunks of files to our streaming endpoint.
|
317
304
|
|
@@ -585,12 +572,12 @@ class FileStreamApi:
|
|
585
572
|
def enqueue_preempting(self) -> None:
|
586
573
|
self._queue.put(self.Preempting())
|
587
574
|
|
588
|
-
def push(self, filename: str, data:
|
575
|
+
def push(self, filename: str, data: str) -> None:
|
589
576
|
"""Push a chunk of a file to the streaming endpoint.
|
590
577
|
|
591
578
|
Arguments:
|
592
|
-
filename: Name of file
|
593
|
-
data:
|
579
|
+
filename: Name of file to append to.
|
580
|
+
data: Text to append to the file.
|
594
581
|
"""
|
595
582
|
self._queue.put(Chunk(filename, data))
|
596
583
|
|
wandb/sdk/internal/handler.py
CHANGED
@@ -50,6 +50,18 @@ SummaryDict = Dict[str, Any]
|
|
50
50
|
|
51
51
|
logger = logging.getLogger(__name__)
|
52
52
|
|
53
|
+
# Update (March 5, 2024): Since ~2020/2021, when constructing the summary
|
54
|
+
# object, we had replaced the artifact path for media types with the latest
|
55
|
+
# artifact path. The primary purpose of this was to support live updating of
|
56
|
+
# media objects in the UI (since the default artifact path was fully qualified
|
57
|
+
# and would not update). However, in March of 2024, a bug was discovered with
|
58
|
+
# this approach which causes this path to be incorrect in cases where the media
|
59
|
+
# object is logged to another artifact before being logged to the run. Setting
|
60
|
+
# this to `False` disables this copy behavior. The impact is that users will
|
61
|
+
# need to refresh to see updates. Ironically, this updating behavior is not
|
62
|
+
# currently supported in the UI, so the impact of this change is minimal.
|
63
|
+
REPLACE_SUMMARY_ART_PATH_WITH_LATEST = False
|
64
|
+
|
53
65
|
|
54
66
|
def _dict_nested_set(target: Dict[str, Any], key_list: Sequence[str], v: Any) -> None:
|
55
67
|
# recurse down the dictionary structure:
|
@@ -371,7 +383,11 @@ class HandleManager:
|
|
371
383
|
updated = True
|
372
384
|
return updated
|
373
385
|
# If the dict is a media object, update the pointer to the latest alias
|
374
|
-
elif
|
386
|
+
elif (
|
387
|
+
REPLACE_SUMMARY_ART_PATH_WITH_LATEST
|
388
|
+
and isinstance(v, dict)
|
389
|
+
and handler_util.metric_is_wandb_dict(v)
|
390
|
+
):
|
375
391
|
if "_latest_artifact_path" in v and "artifact_path" in v:
|
376
392
|
# TODO: Make non-destructive?
|
377
393
|
v["artifact_path"] = v["_latest_artifact_path"]
|
@@ -381,7 +397,7 @@ class HandleManager:
|
|
381
397
|
def _update_summary_media_objects(self, v: Dict[str, Any]) -> Dict[str, Any]:
|
382
398
|
# For now, non-recursive - just top level
|
383
399
|
for nk, nv in v.items():
|
384
|
-
if (
|
400
|
+
if REPLACE_SUMMARY_ART_PATH_WITH_LATEST and (
|
385
401
|
isinstance(nv, dict)
|
386
402
|
and handler_util.metric_is_wandb_dict(nv)
|
387
403
|
and "_latest_artifact_path" in nv
|
wandb/sdk/internal/internal.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
import ast
|
2
|
-
import asyncio
|
3
2
|
import base64
|
4
3
|
import datetime
|
5
4
|
import functools
|
@@ -49,7 +48,7 @@ from ..lib import retry
|
|
49
48
|
from ..lib.filenames import DIFF_FNAME, METADATA_FNAME
|
50
49
|
from ..lib.gitlib import GitRepo
|
51
50
|
from . import context
|
52
|
-
from .progress import
|
51
|
+
from .progress import Progress
|
53
52
|
|
54
53
|
logger = logging.getLogger(__name__)
|
55
54
|
|
@@ -121,13 +120,6 @@ if TYPE_CHECKING:
|
|
121
120
|
SweepState = Literal["RUNNING", "PAUSED", "CANCELED", "FINISHED"]
|
122
121
|
Number = Union[int, float]
|
123
122
|
|
124
|
-
# This funny if/else construction is the simplest thing I've found that
|
125
|
-
# works at runtime, satisfies Mypy, and gives autocomplete in VSCode:
|
126
|
-
if TYPE_CHECKING:
|
127
|
-
import httpx
|
128
|
-
else:
|
129
|
-
httpx = util.get_module("httpx")
|
130
|
-
|
131
123
|
# class _MappingSupportsCopy(Protocol):
|
132
124
|
# def copy(self) -> "_MappingSupportsCopy": ...
|
133
125
|
# def keys(self) -> Iterable: ...
|
@@ -161,23 +153,6 @@ def check_httpclient_logger_handler() -> None:
|
|
161
153
|
httpclient_logger.addHandler(root_logger.handlers[0])
|
162
154
|
|
163
155
|
|
164
|
-
def check_httpx_exc_retriable(exc: Exception) -> bool:
|
165
|
-
retriable_codes = (308, 408, 409, 429, 500, 502, 503, 504)
|
166
|
-
return (
|
167
|
-
isinstance(exc, (httpx.TimeoutException, httpx.NetworkError))
|
168
|
-
or (
|
169
|
-
isinstance(exc, httpx.HTTPStatusError)
|
170
|
-
and exc.response.status_code in retriable_codes
|
171
|
-
)
|
172
|
-
or (
|
173
|
-
isinstance(exc, httpx.HTTPStatusError)
|
174
|
-
and exc.response.status_code == 400
|
175
|
-
and "x-amz-meta-md5" in exc.request.headers
|
176
|
-
and "RequestTimeout" in str(exc.response.content)
|
177
|
-
)
|
178
|
-
)
|
179
|
-
|
180
|
-
|
181
156
|
class _ThreadLocalData(threading.local):
|
182
157
|
context: Optional[context.Context]
|
183
158
|
|
@@ -286,10 +261,6 @@ class Api:
|
|
286
261
|
)
|
287
262
|
)
|
288
263
|
|
289
|
-
# httpx is an optional dependency, so we lazily instantiate the client
|
290
|
-
# only when we need it
|
291
|
-
self._async_httpx_client: Optional[httpx.AsyncClient] = None
|
292
|
-
|
293
264
|
self.retry_callback = retry_callback
|
294
265
|
self._retry_gql = retry.Retry(
|
295
266
|
self.execute,
|
@@ -2794,105 +2765,6 @@ class Api:
|
|
2794
2765
|
|
2795
2766
|
return response
|
2796
2767
|
|
2797
|
-
async def upload_file_async(
|
2798
|
-
self,
|
2799
|
-
url: str,
|
2800
|
-
file: IO[bytes],
|
2801
|
-
callback: Optional["ProgressFn"] = None,
|
2802
|
-
extra_headers: Optional[Dict[str, str]] = None,
|
2803
|
-
) -> None:
|
2804
|
-
"""An async not-quite-equivalent version of `upload_file`.
|
2805
|
-
|
2806
|
-
Differences from `upload_file`:
|
2807
|
-
- This method doesn't implement Azure uploads. (The Azure SDK supports
|
2808
|
-
async, but it's nontrivial to use it here.) If the upload looks like
|
2809
|
-
it's destined for Azure, this method will delegate to the sync impl.
|
2810
|
-
- Consequently, this method doesn't return the response object.
|
2811
|
-
(Because it might fall back to the sync impl, it would sometimes
|
2812
|
-
return a `requests.Response` and sometimes an `httpx.Response`.)
|
2813
|
-
- This method doesn't wrap retryable errors in `TransientError`.
|
2814
|
-
It leaves that determination to the caller.
|
2815
|
-
"""
|
2816
|
-
check_httpclient_logger_handler()
|
2817
|
-
must_delegate = False
|
2818
|
-
|
2819
|
-
if httpx is None:
|
2820
|
-
wandb.termwarn( # type: ignore[unreachable]
|
2821
|
-
"async file-uploads require `pip install wandb[async]`; falling back to sync implementation",
|
2822
|
-
repeat=False,
|
2823
|
-
)
|
2824
|
-
must_delegate = True
|
2825
|
-
|
2826
|
-
if extra_headers is not None and "x-ms-blob-type" in extra_headers:
|
2827
|
-
wandb.termwarn(
|
2828
|
-
"async file-uploads don't support Azure; falling back to sync implementation",
|
2829
|
-
repeat=False,
|
2830
|
-
)
|
2831
|
-
must_delegate = True
|
2832
|
-
|
2833
|
-
if must_delegate:
|
2834
|
-
await asyncio.get_event_loop().run_in_executor(
|
2835
|
-
None,
|
2836
|
-
lambda: self.upload_file_retry(
|
2837
|
-
url=url,
|
2838
|
-
file=file,
|
2839
|
-
callback=callback,
|
2840
|
-
extra_headers=extra_headers,
|
2841
|
-
),
|
2842
|
-
)
|
2843
|
-
return
|
2844
|
-
|
2845
|
-
if self._async_httpx_client is None:
|
2846
|
-
self._async_httpx_client = httpx.AsyncClient()
|
2847
|
-
|
2848
|
-
progress = AsyncProgress(Progress(file, callback=callback))
|
2849
|
-
|
2850
|
-
try:
|
2851
|
-
response = await self._async_httpx_client.put(
|
2852
|
-
url=url,
|
2853
|
-
content=progress,
|
2854
|
-
headers={
|
2855
|
-
"Content-Length": str(len(progress)),
|
2856
|
-
**(extra_headers if extra_headers is not None else {}),
|
2857
|
-
},
|
2858
|
-
)
|
2859
|
-
response.raise_for_status()
|
2860
|
-
except Exception as e:
|
2861
|
-
progress.rewind()
|
2862
|
-
logger.error(f"upload_file_async exception {url}: {e}")
|
2863
|
-
if isinstance(e, httpx.RequestError):
|
2864
|
-
logger.error(f"upload_file_async request headers: {e.request.headers}")
|
2865
|
-
if isinstance(e, httpx.HTTPStatusError):
|
2866
|
-
logger.error(f"upload_file_async response body: {e.response.content!r}")
|
2867
|
-
raise
|
2868
|
-
|
2869
|
-
async def upload_file_retry_async(
|
2870
|
-
self,
|
2871
|
-
url: str,
|
2872
|
-
file: IO[bytes],
|
2873
|
-
callback: Optional["ProgressFn"] = None,
|
2874
|
-
extra_headers: Optional[Dict[str, str]] = None,
|
2875
|
-
num_retries: int = 100,
|
2876
|
-
) -> None:
|
2877
|
-
backoff = retry.FilteredBackoff(
|
2878
|
-
filter=check_httpx_exc_retriable,
|
2879
|
-
wrapped=retry.ExponentialBackoff(
|
2880
|
-
initial_sleep=datetime.timedelta(seconds=1),
|
2881
|
-
max_sleep=datetime.timedelta(seconds=60),
|
2882
|
-
max_retries=num_retries,
|
2883
|
-
timeout_at=datetime.datetime.now() + datetime.timedelta(days=7),
|
2884
|
-
),
|
2885
|
-
)
|
2886
|
-
|
2887
|
-
await retry.retry_async(
|
2888
|
-
backoff=backoff,
|
2889
|
-
fn=self.upload_file_async,
|
2890
|
-
url=url,
|
2891
|
-
file=file,
|
2892
|
-
callback=callback,
|
2893
|
-
extra_headers=extra_headers,
|
2894
|
-
)
|
2895
|
-
|
2896
2768
|
@normalize_exceptions
|
2897
2769
|
def register_agent(
|
2898
2770
|
self,
|
@@ -1,4 +1,5 @@
|
|
1
1
|
"""job builder."""
|
2
|
+
|
2
3
|
import json
|
3
4
|
import logging
|
4
5
|
import os
|
@@ -31,6 +32,43 @@ JOB_ARTIFACT_TYPE = "job"
|
|
31
32
|
LOG_LEVEL = Literal["log", "warn", "error"]
|
32
33
|
|
33
34
|
|
35
|
+
class Version:
|
36
|
+
def __init__(self, major: int, minor: int, patch: int):
|
37
|
+
self._major = major
|
38
|
+
self._minor = minor
|
39
|
+
self._patch = patch
|
40
|
+
|
41
|
+
def __repr__(self) -> str:
|
42
|
+
return f"{self._major}.{self._minor}.{self._patch}"
|
43
|
+
|
44
|
+
def __lt__(self, other: "Version") -> bool:
|
45
|
+
if self._major < other._major:
|
46
|
+
return True
|
47
|
+
elif self._major == other._major:
|
48
|
+
if self._minor < other._minor:
|
49
|
+
return True
|
50
|
+
elif self._minor == other._minor:
|
51
|
+
if self._patch < other._patch:
|
52
|
+
return True
|
53
|
+
return False
|
54
|
+
|
55
|
+
def __eq__(self, other: object) -> bool:
|
56
|
+
if not isinstance(other, Version):
|
57
|
+
return NotImplemented
|
58
|
+
return (
|
59
|
+
self._major == other._major
|
60
|
+
and self._minor == other._minor
|
61
|
+
and self._patch == other._patch
|
62
|
+
)
|
63
|
+
|
64
|
+
|
65
|
+
# Minimum supported wandb version for keys in the source dict of wandb-job.json
|
66
|
+
SOURCE_KEYS_MIN_SUPPORTED_VERSION = {
|
67
|
+
"dockerfile": Version(0, 17, 0),
|
68
|
+
"build_context": Version(0, 17, 0),
|
69
|
+
}
|
70
|
+
|
71
|
+
|
34
72
|
class GitInfo(TypedDict):
|
35
73
|
remote: str
|
36
74
|
commit: str
|
@@ -40,12 +78,16 @@ class GitSourceDict(TypedDict):
|
|
40
78
|
git: GitInfo
|
41
79
|
entrypoint: List[str]
|
42
80
|
notebook: bool
|
81
|
+
build_context: Optional[str]
|
82
|
+
dockerfile: Optional[str]
|
43
83
|
|
44
84
|
|
45
85
|
class ArtifactSourceDict(TypedDict):
|
46
86
|
artifact: str
|
47
87
|
entrypoint: List[str]
|
48
88
|
notebook: bool
|
89
|
+
build_context: Optional[str]
|
90
|
+
dockerfile: Optional[str]
|
49
91
|
|
50
92
|
|
51
93
|
class ImageSourceDict(TypedDict):
|
@@ -72,6 +114,19 @@ class ArtifactInfoForJob(TypedDict):
|
|
72
114
|
name: str
|
73
115
|
|
74
116
|
|
117
|
+
def get_min_supported_for_source_dict(
|
118
|
+
source: Union[GitSourceDict, ArtifactSourceDict, ImageSourceDict],
|
119
|
+
) -> Optional[Version]:
|
120
|
+
"""Get the minimum supported wandb version the source dict of wandb-job.json."""
|
121
|
+
min_seen = None
|
122
|
+
for key in source:
|
123
|
+
new_ver = SOURCE_KEYS_MIN_SUPPORTED_VERSION.get(key)
|
124
|
+
if new_ver:
|
125
|
+
if min_seen is None or new_ver < min_seen:
|
126
|
+
min_seen = new_ver
|
127
|
+
return min_seen
|
128
|
+
|
129
|
+
|
75
130
|
class JobArtifact(Artifact):
|
76
131
|
def __init__(self, name: str, *args: Any, **kwargs: Any):
|
77
132
|
super().__init__(name, "placeholder", *args, **kwargs)
|
@@ -105,9 +160,9 @@ class JobBuilder:
|
|
105
160
|
self._disable = settings.disable_job_creation
|
106
161
|
self._partial_source = None
|
107
162
|
self._aliases = []
|
108
|
-
self._source_type: Optional[
|
109
|
-
|
110
|
-
|
163
|
+
self._source_type: Optional[Literal["repo", "artifact", "image"]] = (
|
164
|
+
settings.job_source # type: ignore[assignment]
|
165
|
+
)
|
111
166
|
self._is_notebook_run = self._get_is_notebook_run()
|
112
167
|
self._verbose = verbose
|
113
168
|
|
@@ -196,6 +251,8 @@ class JobBuilder:
|
|
196
251
|
"git": {"remote": remote, "commit": commit},
|
197
252
|
"entrypoint": entrypoint,
|
198
253
|
"notebook": self._is_notebook_run,
|
254
|
+
"build_context": metadata.get("build_context"),
|
255
|
+
"dockerfile": metadata.get("dockerfile"),
|
199
256
|
}
|
200
257
|
name = self._make_job_name(f"{remote}_{program_relpath}")
|
201
258
|
|
@@ -246,6 +303,8 @@ class JobBuilder:
|
|
246
303
|
"entrypoint": entrypoint,
|
247
304
|
"notebook": self._is_notebook_run,
|
248
305
|
"artifact": f"wandb-artifact://_id/{self._logged_code_artifact['id']}",
|
306
|
+
"build_context": metadata.get("build_context"),
|
307
|
+
"dockerfile": metadata.get("dockerfile"),
|
249
308
|
}
|
250
309
|
name = self._make_job_name(self._logged_code_artifact["name"])
|
251
310
|
|
@@ -275,7 +334,7 @@ class JobBuilder:
|
|
275
334
|
return source, name
|
276
335
|
|
277
336
|
def _make_job_name(self, input_str: str) -> str:
|
278
|
-
"""Use job name from settings if provided, else use
|
337
|
+
"""Use job name from settings if provided, else use programmatic name."""
|
279
338
|
if self._settings.job_name:
|
280
339
|
return self._settings.job_name
|
281
340
|
|
@@ -292,17 +351,6 @@ class JobBuilder:
|
|
292
351
|
if metadata.get("entrypoint"):
|
293
352
|
entrypoint: List[str] = metadata["entrypoint"]
|
294
353
|
return entrypoint
|
295
|
-
|
296
|
-
# if entrypoint is not in metadata, then construct from python
|
297
|
-
assert metadata.get("python")
|
298
|
-
|
299
|
-
python = metadata["python"]
|
300
|
-
if python.count(".") > 1:
|
301
|
-
python = ".".join(python.split(".")[:2])
|
302
|
-
|
303
|
-
entrypoint = [f"python{python}", program_relpath]
|
304
|
-
return entrypoint
|
305
|
-
|
306
354
|
# job is being built from a run
|
307
355
|
entrypoint = [os.path.basename(sys.executable), program_relpath]
|
308
356
|
|
@@ -314,7 +362,73 @@ class JobBuilder:
|
|
314
362
|
def _is_colab_run(self) -> bool:
|
315
363
|
return hasattr(self._settings, "_colab") and bool(self._settings._colab)
|
316
364
|
|
317
|
-
def
|
365
|
+
def _build_job_source(
|
366
|
+
self,
|
367
|
+
source_type: str,
|
368
|
+
program_relpath: Optional[str],
|
369
|
+
metadata: Dict[str, Any],
|
370
|
+
) -> Tuple[
|
371
|
+
Union[GitSourceDict, ArtifactSourceDict, ImageSourceDict, None],
|
372
|
+
Optional[str],
|
373
|
+
]:
|
374
|
+
"""Construct a job source dict and name from the current run.
|
375
|
+
|
376
|
+
Arguments:
|
377
|
+
source_type (str): The type of source to build the job from. One of
|
378
|
+
"repo", "artifact", or "image".
|
379
|
+
"""
|
380
|
+
source: Union[
|
381
|
+
GitSourceDict,
|
382
|
+
ArtifactSourceDict,
|
383
|
+
ImageSourceDict,
|
384
|
+
None,
|
385
|
+
] = None
|
386
|
+
|
387
|
+
if source_type == "repo":
|
388
|
+
source, name = self._build_repo_job_source(
|
389
|
+
program_relpath or "",
|
390
|
+
metadata,
|
391
|
+
)
|
392
|
+
elif source_type == "artifact":
|
393
|
+
source, name = self._build_artifact_job_source(
|
394
|
+
program_relpath or "",
|
395
|
+
metadata,
|
396
|
+
)
|
397
|
+
elif source_type == "image" and self._has_image_job_ingredients(metadata):
|
398
|
+
source, name = self._build_image_job_source(metadata)
|
399
|
+
else:
|
400
|
+
source = None
|
401
|
+
|
402
|
+
if source is None:
|
403
|
+
if source_type:
|
404
|
+
self._log_if_verbose(
|
405
|
+
f"Source type is set to '{source_type}' but some required information is missing "
|
406
|
+
"from the environment. A job will not be created from this run. See "
|
407
|
+
"https://docs.wandb.ai/guides/launch/create-job",
|
408
|
+
"warn",
|
409
|
+
)
|
410
|
+
return None, None
|
411
|
+
|
412
|
+
return source, name
|
413
|
+
|
414
|
+
def build(
|
415
|
+
self,
|
416
|
+
build_context: Optional[str] = None,
|
417
|
+
dockerfile: Optional[str] = None,
|
418
|
+
) -> Optional[Artifact]:
|
419
|
+
"""Build a job artifact from the current run.
|
420
|
+
|
421
|
+
Arguments:
|
422
|
+
build_context (Optional[str]): Path within the job source code to
|
423
|
+
the image build context. Saved as part of the job for future
|
424
|
+
builds.
|
425
|
+
dockerfile (Optional[str]): Path within the build context the
|
426
|
+
Dockerfile. Saved as part of the job for future builds.
|
427
|
+
|
428
|
+
Returns:
|
429
|
+
Optional[Artifact]: The job artifact if it was successfully built,
|
430
|
+
otherwise None.
|
431
|
+
"""
|
318
432
|
_logger.info("Attempting to build job artifact")
|
319
433
|
if not os.path.exists(
|
320
434
|
os.path.join(self._settings.files_dir, REQUIREMENTS_FNAME)
|
@@ -331,6 +445,8 @@ class JobBuilder:
|
|
331
445
|
"warn",
|
332
446
|
)
|
333
447
|
return None
|
448
|
+
metadata["dockerfile"] = dockerfile
|
449
|
+
metadata["build_context"] = build_context
|
334
450
|
|
335
451
|
runtime: Optional[str] = metadata.get("python")
|
336
452
|
# can't build a job without a python version
|
@@ -374,45 +490,39 @@ class JobBuilder:
|
|
374
490
|
return None
|
375
491
|
|
376
492
|
program_relpath = self._get_program_relpath(source_type, metadata)
|
377
|
-
if
|
493
|
+
if (
|
494
|
+
not metadata.get("_partial")
|
495
|
+
and source_type != "image"
|
496
|
+
and not program_relpath
|
497
|
+
):
|
378
498
|
self._log_if_verbose(
|
379
499
|
"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job",
|
380
500
|
"warn",
|
381
501
|
)
|
382
502
|
return None
|
383
503
|
|
384
|
-
source
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
# make source dict
|
391
|
-
if source_type == "repo":
|
392
|
-
assert program_relpath
|
393
|
-
source, name = self._build_repo_job_source(program_relpath, metadata)
|
394
|
-
elif source_type == "artifact":
|
395
|
-
assert program_relpath
|
396
|
-
source, name = self._build_artifact_job_source(
|
397
|
-
program_relpath, metadata
|
398
|
-
)
|
399
|
-
elif source_type == "image" and self._has_image_job_ingredients(metadata):
|
400
|
-
source, name = self._build_image_job_source(metadata)
|
401
|
-
else:
|
402
|
-
source = None
|
403
|
-
|
504
|
+
source, name = self._build_job_source(
|
505
|
+
source_type,
|
506
|
+
program_relpath,
|
507
|
+
metadata,
|
508
|
+
)
|
404
509
|
if source is None:
|
405
|
-
if source_type:
|
406
|
-
self._log_if_verbose(
|
407
|
-
f"Source type is set to '{source_type}' but some required information is missing "
|
408
|
-
"from the environment. A job will not be created from this run. See "
|
409
|
-
"https://docs.wandb.ai/guides/launch/create-job",
|
410
|
-
"warn",
|
411
|
-
)
|
412
510
|
return None
|
413
511
|
|
512
|
+
if build_context:
|
513
|
+
source["build_context"] = build_context # type: ignore[typeddict-item]
|
514
|
+
if dockerfile:
|
515
|
+
source["dockerfile"] = dockerfile # type: ignore[typeddict-item]
|
516
|
+
|
517
|
+
# Pop any keys that are initialized to None. The current TypedDict
|
518
|
+
# system for source dicts requires all keys to be present, but we
|
519
|
+
# don't want to include keys that are None in the final dict.
|
520
|
+
for key in list(source.keys()):
|
521
|
+
if source[key] is None: # type: ignore[literal-required]
|
522
|
+
source.pop(key) # type: ignore[literal-require,misc]
|
523
|
+
|
414
524
|
source_info = {
|
415
|
-
"_version": "v0",
|
525
|
+
"_version": str(get_min_supported_for_source_dict(source) or "v0"),
|
416
526
|
"source_type": source_type,
|
417
527
|
"source": source,
|
418
528
|
"input_types": input_types,
|
@@ -530,6 +640,8 @@ def convert_use_artifact_to_job_source(
|
|
530
640
|
},
|
531
641
|
"entrypoint": entrypoint,
|
532
642
|
"notebook": source_info.source.git.notebook,
|
643
|
+
"build_context": None,
|
644
|
+
"dockerfile": None,
|
533
645
|
}
|
534
646
|
source_info_dict.update({"source": git_source})
|
535
647
|
elif source_info.source_type == "artifact":
|
@@ -538,6 +650,8 @@ def convert_use_artifact_to_job_source(
|
|
538
650
|
"artifact": source_info.source.artifact.artifact,
|
539
651
|
"entrypoint": entrypoint,
|
540
652
|
"notebook": source_info.source.artifact.notebook,
|
653
|
+
"build_context": None,
|
654
|
+
"dockerfile": None,
|
541
655
|
}
|
542
656
|
source_info_dict.update({"source": artifact_source})
|
543
657
|
elif source_info.source_type == "image":
|