wandb 0.17.0rc2__py3-none-win32.whl → 0.17.2__py3-none-win32.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +4 -2
- wandb/apis/importers/internals/internal.py +0 -1
- wandb/apis/importers/wandb.py +12 -7
- wandb/apis/internal.py +0 -3
- wandb/apis/public/api.py +213 -79
- wandb/apis/public/artifacts.py +335 -100
- wandb/apis/public/files.py +9 -9
- wandb/apis/public/jobs.py +16 -4
- wandb/apis/public/projects.py +26 -28
- wandb/apis/public/query_generator.py +1 -1
- wandb/apis/public/runs.py +163 -65
- wandb/apis/public/sweeps.py +2 -2
- wandb/apis/reports/__init__.py +1 -7
- wandb/apis/reports/v1/__init__.py +5 -27
- wandb/apis/reports/v2/__init__.py +7 -19
- wandb/apis/workspaces/__init__.py +8 -0
- wandb/beta/workflows.py +8 -3
- wandb/bin/wandb-core +0 -0
- wandb/cli/cli.py +151 -59
- wandb/docker/__init__.py +1 -1
- wandb/errors/term.py +10 -2
- wandb/filesync/step_checksum.py +1 -4
- wandb/filesync/step_prepare.py +4 -24
- wandb/filesync/step_upload.py +5 -107
- wandb/filesync/upload_job.py +0 -76
- wandb/integration/gym/__init__.py +35 -15
- wandb/integration/openai/fine_tuning.py +21 -3
- wandb/integration/prodigy/prodigy.py +1 -1
- wandb/jupyter.py +16 -17
- wandb/old/summary.py +5 -0
- wandb/plot/pr_curve.py +2 -1
- wandb/plot/roc_curve.py +2 -1
- wandb/{plots → plot}/utils.py +13 -25
- wandb/proto/v3/wandb_internal_pb2.py +54 -54
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +54 -54
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v5/wandb_base_pb2.py +30 -0
- wandb/proto/v5/wandb_internal_pb2.py +355 -0
- wandb/proto/v5/wandb_server_pb2.py +63 -0
- wandb/proto/v5/wandb_settings_pb2.py +45 -0
- wandb/proto/v5/wandb_telemetry_pb2.py +41 -0
- wandb/proto/wandb_base_pb2.py +2 -0
- wandb/proto/wandb_deprecated.py +9 -1
- wandb/proto/wandb_generate_deprecated.py +34 -0
- wandb/proto/{wandb_internal_codegen.py → wandb_generate_proto.py} +1 -35
- wandb/proto/wandb_internal_pb2.py +2 -0
- wandb/proto/wandb_server_pb2.py +2 -0
- wandb/proto/wandb_settings_pb2.py +2 -0
- wandb/proto/wandb_telemetry_pb2.py +2 -0
- wandb/sdk/artifacts/artifact.py +76 -23
- wandb/sdk/artifacts/artifact_manifest.py +1 -1
- wandb/sdk/artifacts/artifact_manifest_entry.py +6 -3
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -1
- wandb/sdk/artifacts/artifact_saver.py +1 -10
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +6 -2
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -1
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +6 -4
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +2 -42
- wandb/sdk/artifacts/storage_policy.py +1 -12
- wandb/sdk/data_types/_dtypes.py +5 -2
- wandb/sdk/data_types/html.py +1 -1
- wandb/sdk/data_types/image.py +1 -1
- wandb/sdk/data_types/object_3d.py +1 -1
- wandb/sdk/data_types/video.py +4 -2
- wandb/sdk/interface/interface.py +13 -0
- wandb/sdk/interface/interface_shared.py +1 -1
- wandb/sdk/internal/file_pusher.py +2 -5
- wandb/sdk/internal/file_stream.py +6 -19
- wandb/sdk/internal/internal_api.py +160 -138
- wandb/sdk/internal/job_builder.py +207 -135
- wandb/sdk/internal/progress.py +0 -28
- wandb/sdk/internal/sender.py +105 -42
- wandb/sdk/internal/settings_static.py +8 -1
- wandb/sdk/internal/system/assets/gpu.py +2 -0
- wandb/sdk/internal/system/assets/trainium.py +3 -3
- wandb/sdk/internal/system/system_info.py +4 -2
- wandb/sdk/internal/update.py +1 -1
- wandb/sdk/launch/__init__.py +9 -1
- wandb/sdk/launch/_launch.py +4 -24
- wandb/sdk/launch/_launch_add.py +1 -3
- wandb/sdk/launch/_project_spec.py +184 -224
- wandb/sdk/launch/agent/agent.py +58 -18
- wandb/sdk/launch/agent/config.py +0 -3
- wandb/sdk/launch/builder/abstract.py +67 -0
- wandb/sdk/launch/builder/build.py +165 -576
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +7 -23
- wandb/sdk/launch/builder/kaniko_builder.py +10 -23
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +51 -45
- wandb/sdk/launch/environment/aws_environment.py +26 -1
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +224 -0
- wandb/sdk/launch/inputs/manage.py +95 -0
- wandb/sdk/launch/runner/abstract.py +2 -2
- wandb/sdk/launch/runner/kubernetes_monitor.py +45 -12
- wandb/sdk/launch/runner/kubernetes_runner.py +6 -8
- wandb/sdk/launch/runner/local_container.py +2 -3
- wandb/sdk/launch/runner/local_process.py +8 -29
- wandb/sdk/launch/runner/sagemaker_runner.py +20 -14
- wandb/sdk/launch/runner/vertex_runner.py +8 -7
- wandb/sdk/launch/sweeps/scheduler.py +2 -0
- wandb/sdk/launch/sweeps/utils.py +2 -2
- wandb/sdk/launch/utils.py +16 -138
- wandb/sdk/lib/_settings_toposort_generated.py +2 -5
- wandb/sdk/lib/apikey.py +4 -2
- wandb/sdk/lib/config_util.py +3 -3
- wandb/sdk/lib/proto_util.py +22 -1
- wandb/sdk/lib/redirect.py +1 -1
- wandb/sdk/service/service.py +2 -1
- wandb/sdk/service/streams.py +5 -5
- wandb/sdk/wandb_init.py +25 -59
- wandb/sdk/wandb_login.py +28 -25
- wandb/sdk/wandb_run.py +135 -70
- wandb/sdk/wandb_settings.py +33 -64
- wandb/sdk/wandb_watch.py +1 -1
- wandb/sklearn/plot/classifier.py +4 -6
- wandb/sync/sync.py +2 -2
- wandb/testing/relay.py +32 -17
- wandb/util.py +39 -37
- wandb/wandb_agent.py +3 -3
- wandb/wandb_controller.py +3 -2
- {wandb-0.17.0rc2.dist-info → wandb-0.17.2.dist-info}/METADATA +7 -9
- {wandb-0.17.0rc2.dist-info → wandb-0.17.2.dist-info}/RECORD +130 -152
- wandb/apis/reports/v1/_blocks.py +0 -1406
- wandb/apis/reports/v1/_helpers.py +0 -70
- wandb/apis/reports/v1/_panels.py +0 -1282
- wandb/apis/reports/v1/_templates.py +0 -478
- wandb/apis/reports/v1/blocks.py +0 -27
- wandb/apis/reports/v1/helpers.py +0 -2
- wandb/apis/reports/v1/mutations.py +0 -66
- wandb/apis/reports/v1/panels.py +0 -17
- wandb/apis/reports/v1/report.py +0 -268
- wandb/apis/reports/v1/runset.py +0 -144
- wandb/apis/reports/v1/templates.py +0 -7
- wandb/apis/reports/v1/util.py +0 -406
- wandb/apis/reports/v1/validators.py +0 -131
- wandb/apis/reports/v2/blocks.py +0 -25
- wandb/apis/reports/v2/expr_parsing.py +0 -257
- wandb/apis/reports/v2/gql.py +0 -68
- wandb/apis/reports/v2/interface.py +0 -1911
- wandb/apis/reports/v2/internal.py +0 -867
- wandb/apis/reports/v2/metrics.py +0 -6
- wandb/apis/reports/v2/panels.py +0 -15
- wandb/catboost/__init__.py +0 -9
- wandb/fastai/__init__.py +0 -9
- wandb/keras/__init__.py +0 -19
- wandb/lightgbm/__init__.py +0 -9
- wandb/plots/__init__.py +0 -6
- wandb/plots/explain_text.py +0 -36
- wandb/plots/heatmap.py +0 -81
- wandb/plots/named_entity.py +0 -43
- wandb/plots/part_of_speech.py +0 -50
- wandb/plots/plot_definitions.py +0 -768
- wandb/plots/precision_recall.py +0 -121
- wandb/plots/roc.py +0 -103
- wandb/sacred/__init__.py +0 -3
- wandb/xgboost/__init__.py +0 -9
- {wandb-0.17.0rc2.dist-info → wandb-0.17.2.dist-info}/WHEEL +0 -0
- {wandb-0.17.0rc2.dist-info → wandb-0.17.2.dist-info}/entry_points.txt +0 -0
- {wandb-0.17.0rc2.dist-info → wandb-0.17.2.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/launch/agent/agent.py
CHANGED
@@ -9,7 +9,9 @@ import time
|
|
9
9
|
import traceback
|
10
10
|
from dataclasses import dataclass
|
11
11
|
from multiprocessing import Event
|
12
|
-
from typing import Any, Dict, List, Optional, Union
|
12
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
13
|
+
|
14
|
+
import yaml
|
13
15
|
|
14
16
|
import wandb
|
15
17
|
from wandb.apis.internal import Api
|
@@ -18,11 +20,11 @@ from wandb.sdk.launch._launch_add import launch_add
|
|
18
20
|
from wandb.sdk.launch.runner.local_container import LocalSubmittedRun
|
19
21
|
from wandb.sdk.launch.runner.local_process import LocalProcessRunner
|
20
22
|
from wandb.sdk.launch.sweeps.scheduler import Scheduler
|
23
|
+
from wandb.sdk.launch.utils import LAUNCH_CONFIG_FILE, resolve_build_and_registry_config
|
21
24
|
from wandb.sdk.lib import runid
|
22
25
|
|
23
26
|
from .. import loader
|
24
27
|
from .._project_spec import LaunchProject
|
25
|
-
from ..builder.build import construct_agent_configs
|
26
28
|
from ..errors import LaunchDockerError, LaunchError
|
27
29
|
from ..utils import (
|
28
30
|
LAUNCH_DEFAULT_PROJECT,
|
@@ -134,6 +136,31 @@ class InternalAgentLogger:
|
|
134
136
|
_logger.debug(f"{LOG_PREFIX}{message}")
|
135
137
|
|
136
138
|
|
139
|
+
def construct_agent_configs(
|
140
|
+
launch_config: Optional[Dict] = None,
|
141
|
+
build_config: Optional[Dict] = None,
|
142
|
+
) -> Tuple[Optional[Dict[str, Any]], Dict[str, Any], Dict[str, Any]]:
|
143
|
+
registry_config = None
|
144
|
+
environment_config = None
|
145
|
+
if launch_config is not None:
|
146
|
+
build_config = launch_config.get("builder")
|
147
|
+
registry_config = launch_config.get("registry")
|
148
|
+
|
149
|
+
default_launch_config = None
|
150
|
+
if os.path.exists(os.path.expanduser(LAUNCH_CONFIG_FILE)):
|
151
|
+
with open(os.path.expanduser(LAUNCH_CONFIG_FILE)) as f:
|
152
|
+
default_launch_config = (
|
153
|
+
yaml.safe_load(f) or {}
|
154
|
+
) # In case the config is empty, we want it to be {} instead of None.
|
155
|
+
environment_config = default_launch_config.get("environment")
|
156
|
+
|
157
|
+
build_config, registry_config = resolve_build_and_registry_config(
|
158
|
+
default_launch_config, build_config, registry_config
|
159
|
+
)
|
160
|
+
|
161
|
+
return environment_config, build_config, registry_config
|
162
|
+
|
163
|
+
|
137
164
|
class LaunchAgent:
|
138
165
|
"""Launch agent class which polls run given run queues and launches runs for wandb launch."""
|
139
166
|
|
@@ -173,7 +200,7 @@ class LaunchAgent:
|
|
173
200
|
config: Config dictionary for the agent.
|
174
201
|
"""
|
175
202
|
self._entity = config["entity"]
|
176
|
-
self._project =
|
203
|
+
self._project = LAUNCH_DEFAULT_PROJECT
|
177
204
|
self._api = api
|
178
205
|
self._base_url = self._api.settings().get("base_url")
|
179
206
|
self._ticks = 0
|
@@ -194,6 +221,7 @@ class LaunchAgent:
|
|
194
221
|
self._stopped_run_timeout = config.get(
|
195
222
|
"stopped_run_timeout", DEFAULT_STOPPED_RUN_TIMEOUT
|
196
223
|
)
|
224
|
+
self._known_warnings: List[str] = []
|
197
225
|
|
198
226
|
# Get agent version from env var if present, otherwise wandb version
|
199
227
|
self.version: str = "wandb@" + wandb.__version__
|
@@ -277,6 +305,8 @@ class LaunchAgent:
|
|
277
305
|
|
278
306
|
def _init_agent_run(self) -> None:
|
279
307
|
# TODO: has it been long enough that all backends support agents?
|
308
|
+
self._wandb_run = None
|
309
|
+
|
280
310
|
if self.gorilla_supports_agents:
|
281
311
|
settings = wandb.Settings(silent=True, disable_git=True)
|
282
312
|
self._wandb_run = wandb.init(
|
@@ -286,8 +316,6 @@ class LaunchAgent:
|
|
286
316
|
id=self._name,
|
287
317
|
job_type=HIDDEN_AGENT_RUN_TYPE,
|
288
318
|
)
|
289
|
-
else:
|
290
|
-
self._wandb_run = None
|
291
319
|
|
292
320
|
@property
|
293
321
|
def thread_ids(self) -> List[int]:
|
@@ -339,10 +367,7 @@ class LaunchAgent:
|
|
339
367
|
if self._name:
|
340
368
|
output_str += f"{self._name} "
|
341
369
|
if self.num_running_jobs < self._max_jobs:
|
342
|
-
output_str += "polling on "
|
343
|
-
if self._project != LAUNCH_DEFAULT_PROJECT:
|
344
|
-
output_str += f"project {self._project}, "
|
345
|
-
output_str += f"queues {','.join(self._queues)}, "
|
370
|
+
output_str += f"polling on queues {','.join(self._queues)}, "
|
346
371
|
output_str += (
|
347
372
|
f"running {self.num_running_jobs} out of a maximum of {self._max_jobs} jobs"
|
348
373
|
)
|
@@ -434,7 +459,6 @@ class LaunchAgent:
|
|
434
459
|
# We retry for 60 seconds with an exponential backoff in case
|
435
460
|
# upsert run is taking a while.
|
436
461
|
logs = None
|
437
|
-
start_time = time.time()
|
438
462
|
interval = 1
|
439
463
|
while True:
|
440
464
|
called_init = self._check_run_exists_and_inited(
|
@@ -443,7 +467,7 @@ class LaunchAgent:
|
|
443
467
|
job_and_run_status.run_id,
|
444
468
|
job_and_run_status.run_queue_item_id,
|
445
469
|
)
|
446
|
-
if called_init or
|
470
|
+
if called_init or interval > RUN_INFO_GRACE_PERIOD:
|
447
471
|
break
|
448
472
|
if not called_init:
|
449
473
|
# Fetch the logs now if we don't get run info on the
|
@@ -692,7 +716,7 @@ class LaunchAgent:
|
|
692
716
|
default_config, override_build_config
|
693
717
|
)
|
694
718
|
image_uri = project.docker_image
|
695
|
-
entrypoint = project.
|
719
|
+
entrypoint = project.get_job_entry_point()
|
696
720
|
environment = loader.environment_from_config(
|
697
721
|
default_config.get("environment", {})
|
698
722
|
)
|
@@ -790,14 +814,30 @@ class LaunchAgent:
|
|
790
814
|
known_error = False
|
791
815
|
try:
|
792
816
|
run = job_tracker.run
|
793
|
-
status =
|
817
|
+
status = await run.get_status()
|
818
|
+
state = status.state
|
819
|
+
|
820
|
+
for warning in status.messages:
|
821
|
+
if warning not in self._known_warnings:
|
822
|
+
self._known_warnings.append(warning)
|
823
|
+
success = self._api.update_run_queue_item_warning(
|
824
|
+
job_tracker.run_queue_item_id,
|
825
|
+
warning,
|
826
|
+
"Kubernetes",
|
827
|
+
[],
|
828
|
+
)
|
829
|
+
if not success:
|
830
|
+
_logger.warning(
|
831
|
+
f"Error adding warning {warning} to run queue item {job_tracker.run_queue_item_id}"
|
832
|
+
)
|
833
|
+
self._known_warnings.remove(warning)
|
794
834
|
|
795
|
-
if
|
835
|
+
if state == "preempted" and job_tracker.entity == self._entity:
|
796
836
|
config = launch_spec.copy()
|
797
837
|
config["run_id"] = job_tracker.run_id
|
798
838
|
config["_resume_count"] = config.get("_resume_count", 0) + 1
|
799
839
|
with self._jobs_lock:
|
800
|
-
job_tracker.completed_status =
|
840
|
+
job_tracker.completed_status = state
|
801
841
|
if config["_resume_count"] > MAX_RESUME_COUNT:
|
802
842
|
wandb.termlog(
|
803
843
|
f"{LOG_PREFIX}Run {job_tracker.run_id} has already resumed {MAX_RESUME_COUNT} times."
|
@@ -819,10 +859,10 @@ class LaunchAgent:
|
|
819
859
|
)
|
820
860
|
return True
|
821
861
|
# TODO change these statuses to an enum
|
822
|
-
if
|
862
|
+
if state in ["stopped", "failed", "finished", "preempted"]:
|
823
863
|
if job_tracker.is_scheduler:
|
824
864
|
wandb.termlog(f"{LOG_PREFIX}Scheduler finished with ID: {run.id}")
|
825
|
-
if
|
865
|
+
if state == "failed":
|
826
866
|
# on fail, update sweep state. scheduler run_id should == sweep_id
|
827
867
|
try:
|
828
868
|
self._api.set_sweep_state(
|
@@ -836,7 +876,7 @@ class LaunchAgent:
|
|
836
876
|
else:
|
837
877
|
wandb.termlog(f"{LOG_PREFIX}Job finished with ID: {run.id}")
|
838
878
|
with self._jobs_lock:
|
839
|
-
job_tracker.completed_status =
|
879
|
+
job_tracker.completed_status = state
|
840
880
|
return True
|
841
881
|
|
842
882
|
return False
|
wandb/sdk/launch/agent/config.py
CHANGED
@@ -192,9 +192,6 @@ class AgentConfig(BaseModel):
|
|
192
192
|
default=[],
|
193
193
|
description="The queues to use for this agent.",
|
194
194
|
)
|
195
|
-
project: Optional[str] = Field(
|
196
|
-
description="The W&B project to use for this agent.",
|
197
|
-
)
|
198
195
|
entity: Optional[str] = Field(
|
199
196
|
description="The W&B entity to use for this agent.",
|
200
197
|
)
|
@@ -7,6 +7,12 @@ from wandb.sdk.launch.environment.abstract import AbstractEnvironment
|
|
7
7
|
from wandb.sdk.launch.registry.abstract import AbstractRegistry
|
8
8
|
|
9
9
|
from .._project_spec import EntryPoint, LaunchProject
|
10
|
+
from ..registry.anon import AnonynmousRegistry
|
11
|
+
from ..utils import (
|
12
|
+
AZURE_CONTAINER_REGISTRY_URI_REGEX,
|
13
|
+
ELASTIC_CONTAINER_REGISTRY_URI_REGEX,
|
14
|
+
GCP_ARTIFACT_REGISTRY_URI_REGEX,
|
15
|
+
)
|
10
16
|
|
11
17
|
if TYPE_CHECKING:
|
12
18
|
from wandb.sdk.launch.agent.job_status_tracker import JobAndRunStatusTracker
|
@@ -87,3 +93,64 @@ class AbstractBuilder(ABC):
|
|
87
93
|
LaunchError: If the builder cannot be used to build images.
|
88
94
|
"""
|
89
95
|
raise NotImplementedError
|
96
|
+
|
97
|
+
|
98
|
+
def registry_from_uri(uri: str) -> AbstractRegistry:
|
99
|
+
"""Create a registry helper object from a uri.
|
100
|
+
|
101
|
+
This function parses the URI and determines which supported registry it
|
102
|
+
belongs to. It then creates a registry helper object for that registry.
|
103
|
+
The supported remote registry types are:
|
104
|
+
- Azure Container Registry
|
105
|
+
- Google Container Registry
|
106
|
+
- AWS Elastic Container Registry
|
107
|
+
|
108
|
+
The format of the URI is as follows:
|
109
|
+
- Azure Container Registry: <registry-name>.azurecr.io/<repo-name>/<image-name>
|
110
|
+
- Google Container Registry: <location>-docker.pkg.dev/<project-id>/<repo-name>/<image-name>
|
111
|
+
- AWS Elastic Container Registry: <account-id>.dkr.ecr.<region>.amazonaws.com/<repo-name>/<image-name>
|
112
|
+
|
113
|
+
Our classification of the registry is based on the domain name. For example,
|
114
|
+
if the uri contains `.azurecr.io`, we classify it as an Azure
|
115
|
+
Container Registry. If the uri contains `.dkr.ecr`, we classify
|
116
|
+
it as an AWS Elastic Container Registry. If the uri contains
|
117
|
+
`-docker.pkg.dev`, we classify it as a Google Artifact Registry.
|
118
|
+
|
119
|
+
This function will attempt to load the approriate cloud helpers for the
|
120
|
+
|
121
|
+
`https://` prefix is optional for all of the above.
|
122
|
+
|
123
|
+
Arguments:
|
124
|
+
uri: The uri to create a registry from.
|
125
|
+
|
126
|
+
Returns:
|
127
|
+
The registry.
|
128
|
+
|
129
|
+
Raises:
|
130
|
+
LaunchError: If the registry helper cannot be loaded for the given URI.
|
131
|
+
"""
|
132
|
+
if uri.startswith("https://"):
|
133
|
+
uri = uri[len("https://") :]
|
134
|
+
|
135
|
+
if AZURE_CONTAINER_REGISTRY_URI_REGEX.match(uri) is not None:
|
136
|
+
from wandb.sdk.launch.registry.azure_container_registry import (
|
137
|
+
AzureContainerRegistry,
|
138
|
+
)
|
139
|
+
|
140
|
+
return AzureContainerRegistry(uri=uri)
|
141
|
+
|
142
|
+
elif GCP_ARTIFACT_REGISTRY_URI_REGEX.match(uri) is not None:
|
143
|
+
from wandb.sdk.launch.registry.google_artifact_registry import (
|
144
|
+
GoogleArtifactRegistry,
|
145
|
+
)
|
146
|
+
|
147
|
+
return GoogleArtifactRegistry(uri=uri)
|
148
|
+
|
149
|
+
elif ELASTIC_CONTAINER_REGISTRY_URI_REGEX.match(uri) is not None:
|
150
|
+
from wandb.sdk.launch.registry.elastic_container_registry import (
|
151
|
+
ElasticContainerRegistry,
|
152
|
+
)
|
153
|
+
|
154
|
+
return ElasticContainerRegistry(uri=uri)
|
155
|
+
|
156
|
+
return AnonynmousRegistry(uri=uri)
|