wandb 0.19.1__py3-none-win32.whl → 0.19.2__py3-none-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +1 -1
- wandb/__init__.pyi +3 -5
- wandb/agents/pyagent.py +1 -1
- wandb/apis/importers/wandb.py +1 -1
- wandb/apis/public/files.py +1 -1
- wandb/apis/public/jobs.py +1 -1
- wandb/apis/public/runs.py +2 -7
- wandb/apis/reports/v1/__init__.py +1 -1
- wandb/apis/reports/v2/__init__.py +1 -1
- wandb/apis/workspaces/__init__.py +1 -1
- wandb/bin/gpu_stats.exe +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/beta.py +7 -4
- wandb/cli/cli.py +5 -7
- wandb/docker/__init__.py +4 -4
- wandb/integration/fastai/__init__.py +4 -6
- wandb/integration/keras/keras.py +5 -3
- wandb/integration/metaflow/metaflow.py +7 -7
- wandb/integration/prodigy/prodigy.py +3 -11
- wandb/integration/sagemaker/__init__.py +5 -3
- wandb/integration/sagemaker/config.py +17 -8
- wandb/integration/sagemaker/files.py +0 -1
- wandb/integration/sagemaker/resources.py +47 -18
- wandb/integration/torch/wandb_torch.py +1 -1
- wandb/proto/v3/wandb_internal_pb2.py +273 -235
- wandb/proto/v4/wandb_internal_pb2.py +222 -214
- wandb/proto/v5/wandb_internal_pb2.py +222 -214
- wandb/sdk/artifacts/artifact.py +3 -9
- wandb/sdk/backend/backend.py +1 -1
- wandb/sdk/data_types/base_types/wb_value.py +1 -1
- wandb/sdk/data_types/graph.py +2 -2
- wandb/sdk/data_types/saved_model.py +1 -1
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/interface/interface.py +25 -25
- wandb/sdk/interface/interface_shared.py +21 -5
- wandb/sdk/internal/handler.py +19 -1
- wandb/sdk/internal/internal.py +1 -1
- wandb/sdk/internal/internal_api.py +4 -5
- wandb/sdk/internal/sample.py +2 -2
- wandb/sdk/internal/sender.py +1 -2
- wandb/sdk/internal/settings_static.py +3 -1
- wandb/sdk/internal/system/assets/disk.py +4 -4
- wandb/sdk/internal/system/assets/gpu.py +1 -1
- wandb/sdk/internal/system/assets/memory.py +1 -1
- wandb/sdk/internal/system/system_info.py +1 -1
- wandb/sdk/internal/system/system_monitor.py +3 -1
- wandb/sdk/internal/tb_watcher.py +1 -1
- wandb/sdk/launch/_project_spec.py +3 -3
- wandb/sdk/launch/builder/abstract.py +1 -1
- wandb/sdk/lib/apikey.py +2 -3
- wandb/sdk/lib/fsm.py +1 -1
- wandb/sdk/lib/gitlib.py +1 -1
- wandb/sdk/lib/gql_request.py +1 -1
- wandb/sdk/lib/interrupt.py +37 -0
- wandb/sdk/lib/lazyloader.py +1 -1
- wandb/sdk/lib/service_connection.py +1 -1
- wandb/sdk/lib/telemetry.py +1 -1
- wandb/sdk/service/_startup_debug.py +1 -1
- wandb/sdk/service/server_sock.py +3 -2
- wandb/sdk/service/service.py +1 -1
- wandb/sdk/service/streams.py +19 -17
- wandb/sdk/verify/verify.py +13 -13
- wandb/sdk/wandb_init.py +95 -104
- wandb/sdk/wandb_login.py +1 -1
- wandb/sdk/wandb_metadata.py +547 -0
- wandb/sdk/wandb_run.py +127 -35
- wandb/sdk/wandb_settings.py +5 -36
- wandb/sdk/wandb_setup.py +83 -82
- wandb/sdk/wandb_sweep.py +2 -2
- wandb/sdk/wandb_sync.py +15 -18
- wandb/sync/sync.py +10 -10
- wandb/util.py +11 -3
- wandb/wandb_agent.py +11 -16
- wandb/wandb_controller.py +7 -7
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/METADATA +3 -2
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/RECORD +79 -77
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/WHEEL +0 -0
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/artifacts/artifact.py
CHANGED
@@ -1685,11 +1685,7 @@ class Artifact:
|
|
1685
1685
|
from wandb.sdk.backend.backend import Backend
|
1686
1686
|
|
1687
1687
|
if wandb.run is None:
|
1688
|
-
|
1689
|
-
from wandb.sdk import wandb_setup
|
1690
|
-
|
1691
|
-
wl = wandb_setup.setup()
|
1692
|
-
assert wl is not None
|
1688
|
+
wl = wandb.setup()
|
1693
1689
|
|
1694
1690
|
stream_id = generate_id()
|
1695
1691
|
|
@@ -1702,9 +1698,7 @@ class Artifact:
|
|
1702
1698
|
settings.files_dir.value = str(tmp_dir / "files")
|
1703
1699
|
settings.run_id.value = stream_id
|
1704
1700
|
|
1705
|
-
service = wl.
|
1706
|
-
assert service
|
1707
|
-
|
1701
|
+
service = wl.ensure_service()
|
1708
1702
|
service.inform_init(settings=settings, run_id=stream_id)
|
1709
1703
|
|
1710
1704
|
mailbox = Mailbox()
|
@@ -1941,7 +1935,7 @@ class Artifact:
|
|
1941
1935
|
else:
|
1942
1936
|
ref_count += 1
|
1943
1937
|
if ref_count > 0:
|
1944
|
-
|
1938
|
+
termwarn(f"skipped verification of {ref_count} refs")
|
1945
1939
|
|
1946
1940
|
@ensure_logged
|
1947
1941
|
def file(self, root: str | None = None) -> StrPath:
|
wandb/sdk/backend/backend.py
CHANGED
@@ -135,7 +135,7 @@ class WBValue:
|
|
135
135
|
def init_from_json(
|
136
136
|
json_obj: dict, source_artifact: "Artifact"
|
137
137
|
) -> Optional["WBValue"]:
|
138
|
-
"""Initialize a `WBValue` from a JSON blob based on the class that
|
138
|
+
"""Initialize a `WBValue` from a JSON blob based on the class that created it.
|
139
139
|
|
140
140
|
Looks through all subclasses and tries to match the json obj with the class
|
141
141
|
which created it. It will then call that subclass' `from_json` method.
|
wandb/sdk/data_types/graph.py
CHANGED
@@ -311,9 +311,9 @@ class Graph(Media):
|
|
311
311
|
|
312
312
|
def pprint(self):
|
313
313
|
for edge in self.edges:
|
314
|
-
pprint.pprint(edge.attributes)
|
314
|
+
pprint.pprint(edge.attributes) # noqa: T203
|
315
315
|
for node in self.nodes:
|
316
|
-
pprint.pprint(node.attributes)
|
316
|
+
pprint.pprint(node.attributes) # noqa: T203
|
317
317
|
|
318
318
|
def add_node(self, node=None, **node_kwargs):
|
319
319
|
if node is None:
|
@@ -231,7 +231,7 @@ class _SavedModel(WBValue, Generic[SavedModelObjType]):
|
|
231
231
|
return cls(obj_or_path, **kwargs)
|
232
232
|
except Exception as e:
|
233
233
|
if DEBUG_MODE:
|
234
|
-
print(f"{cls}._maybe_init({obj_or_path}) failed: {e}")
|
234
|
+
print(f"{cls}._maybe_init({obj_or_path}) failed: {e}") # noqa: T201
|
235
235
|
|
236
236
|
for child_cls in cls.__subclasses__():
|
237
237
|
maybe_instance = child_cls._maybe_init(obj_or_path, **kwargs)
|
wandb/sdk/data_types/video.py
CHANGED
@@ -212,7 +212,7 @@ class Video(BatchableMedia):
|
|
212
212
|
)
|
213
213
|
if video.ndim < 4:
|
214
214
|
raise ValueError(
|
215
|
-
"Video must be
|
215
|
+
"Video must be at least 4 dimensions: time, channels, height, width"
|
216
216
|
)
|
217
217
|
if video.ndim == 4:
|
218
218
|
video = video.reshape(1, *video.shape)
|
wandb/sdk/interface/interface.py
CHANGED
@@ -216,6 +216,13 @@ class InterfaceBase:
|
|
216
216
|
def _publish_config(self, cfg: pb.ConfigRecord) -> None:
|
217
217
|
raise NotImplementedError
|
218
218
|
|
219
|
+
def publish_metadata(self, metadata: pb.MetadataRequest) -> None:
|
220
|
+
self._publish_metadata(metadata)
|
221
|
+
|
222
|
+
@abstractmethod
|
223
|
+
def _publish_metadata(self, metadata: pb.MetadataRequest) -> None:
|
224
|
+
raise NotImplementedError
|
225
|
+
|
219
226
|
@abstractmethod
|
220
227
|
def _publish_metric(self, metric: pb.MetricRecord) -> None:
|
221
228
|
raise NotImplementedError
|
@@ -722,7 +729,7 @@ class InterfaceBase:
|
|
722
729
|
otype = pb.OutputRecord.OutputType.STDERR
|
723
730
|
else:
|
724
731
|
# TODO(jhr): throw error?
|
725
|
-
|
732
|
+
termwarn("unknown type")
|
726
733
|
o = pb.OutputRecord(output_type=otype, line=data)
|
727
734
|
o.timestamp.GetCurrentTime()
|
728
735
|
self._publish_output(o)
|
@@ -742,7 +749,7 @@ class InterfaceBase:
|
|
742
749
|
otype = pb.OutputRawRecord.OutputType.STDERR
|
743
750
|
else:
|
744
751
|
# TODO(jhr): throw error?
|
745
|
-
|
752
|
+
termwarn("unknown type")
|
746
753
|
o = pb.OutputRawRecord(output_type=otype, line=data)
|
747
754
|
o.timestamp.GetCurrentTime()
|
748
755
|
self._publish_output_raw(o)
|
@@ -872,31 +879,14 @@ class InterfaceBase:
|
|
872
879
|
run_record = self._make_run(run)
|
873
880
|
return self._deliver_run(run_record)
|
874
881
|
|
875
|
-
def
|
882
|
+
def deliver_finish_sync(
|
876
883
|
self,
|
877
|
-
start_offset: int,
|
878
|
-
final_offset: int,
|
879
|
-
entity: Optional[str] = None,
|
880
|
-
project: Optional[str] = None,
|
881
|
-
run_id: Optional[str] = None,
|
882
|
-
skip_output_raw: Optional[bool] = None,
|
883
884
|
) -> MailboxHandle:
|
884
|
-
sync = pb.
|
885
|
-
|
886
|
-
final_offset=final_offset,
|
887
|
-
)
|
888
|
-
if entity:
|
889
|
-
sync.overwrite.entity = entity
|
890
|
-
if project:
|
891
|
-
sync.overwrite.project = project
|
892
|
-
if run_id:
|
893
|
-
sync.overwrite.run_id = run_id
|
894
|
-
if skip_output_raw:
|
895
|
-
sync.skip.output_raw = skip_output_raw
|
896
|
-
return self._deliver_sync(sync)
|
885
|
+
sync = pb.SyncFinishRequest()
|
886
|
+
return self._deliver_finish_sync(sync)
|
897
887
|
|
898
888
|
@abstractmethod
|
899
|
-
def
|
889
|
+
def _deliver_finish_sync(self, sync: pb.SyncFinishRequest) -> MailboxHandle:
|
900
890
|
raise NotImplementedError
|
901
891
|
|
902
892
|
@abstractmethod
|
@@ -954,8 +944,8 @@ class InterfaceBase:
|
|
954
944
|
raise NotImplementedError
|
955
945
|
|
956
946
|
def deliver_get_system_metrics(self) -> MailboxHandle:
|
957
|
-
|
958
|
-
return self._deliver_get_system_metrics(
|
947
|
+
get_system_metrics = pb.GetSystemMetricsRequest()
|
948
|
+
return self._deliver_get_system_metrics(get_system_metrics)
|
959
949
|
|
960
950
|
@abstractmethod
|
961
951
|
def _deliver_get_system_metrics(
|
@@ -963,6 +953,16 @@ class InterfaceBase:
|
|
963
953
|
) -> MailboxHandle:
|
964
954
|
raise NotImplementedError
|
965
955
|
|
956
|
+
def deliver_get_system_metadata(self) -> MailboxHandle:
|
957
|
+
get_system_metadata = pb.GetSystemMetadataRequest()
|
958
|
+
return self._deliver_get_system_metadata(get_system_metadata)
|
959
|
+
|
960
|
+
@abstractmethod
|
961
|
+
def _deliver_get_system_metadata(
|
962
|
+
self, get_system_metadata: pb.GetSystemMetadataRequest
|
963
|
+
) -> MailboxHandle:
|
964
|
+
raise NotImplementedError
|
965
|
+
|
966
966
|
def deliver_exit(self, exit_code: Optional[int]) -> MailboxHandle:
|
967
967
|
exit_data = self._make_exit(exit_code)
|
968
968
|
return self._deliver_exit(exit_data)
|
@@ -145,15 +145,17 @@ class InterfaceShared(InterfaceBase):
|
|
145
145
|
run_status: Optional[pb.RunStatusRequest] = None,
|
146
146
|
sender_mark: Optional[pb.SenderMarkRequest] = None,
|
147
147
|
sender_read: Optional[pb.SenderReadRequest] = None,
|
148
|
-
|
148
|
+
sync_finish: Optional[pb.SyncFinishRequest] = None,
|
149
149
|
status_report: Optional[pb.StatusReportRequest] = None,
|
150
150
|
cancel: Optional[pb.CancelRequest] = None,
|
151
151
|
summary_record: Optional[pb.SummaryRecordRequest] = None,
|
152
152
|
telemetry_record: Optional[pb.TelemetryRecordRequest] = None,
|
153
153
|
get_system_metrics: Optional[pb.GetSystemMetricsRequest] = None,
|
154
|
+
get_system_metadata: Optional[pb.GetSystemMetadataRequest] = None,
|
154
155
|
python_packages: Optional[pb.PythonPackagesRequest] = None,
|
155
156
|
job_input: Optional[pb.JobInputRequest] = None,
|
156
157
|
run_finish_without_exit: Optional[pb.RunFinishWithoutExitRequest] = None,
|
158
|
+
metadata: Optional[pb.MetadataRequest] = None,
|
157
159
|
) -> pb.Record:
|
158
160
|
request = pb.Request()
|
159
161
|
if login:
|
@@ -212,14 +214,18 @@ class InterfaceShared(InterfaceBase):
|
|
212
214
|
request.telemetry_record.CopyFrom(telemetry_record)
|
213
215
|
elif get_system_metrics:
|
214
216
|
request.get_system_metrics.CopyFrom(get_system_metrics)
|
215
|
-
elif
|
216
|
-
request.
|
217
|
+
elif get_system_metadata:
|
218
|
+
request.get_system_metadata.CopyFrom(get_system_metadata)
|
219
|
+
elif sync_finish:
|
220
|
+
request.sync_finish.CopyFrom(sync_finish)
|
217
221
|
elif python_packages:
|
218
222
|
request.python_packages.CopyFrom(python_packages)
|
219
223
|
elif job_input:
|
220
224
|
request.job_input.CopyFrom(job_input)
|
221
225
|
elif run_finish_without_exit:
|
222
226
|
request.run_finish_without_exit.CopyFrom(run_finish_without_exit)
|
227
|
+
elif metadata:
|
228
|
+
request.metadata.CopyFrom(metadata)
|
223
229
|
else:
|
224
230
|
raise Exception("Invalid request")
|
225
231
|
record = self._make_record(request=request)
|
@@ -377,6 +383,10 @@ class InterfaceShared(InterfaceBase):
|
|
377
383
|
rec = self._make_record(summary=summary)
|
378
384
|
self._publish(rec)
|
379
385
|
|
386
|
+
def _publish_metadata(self, metadata: pb.MetadataRequest) -> None:
|
387
|
+
rec = self._make_request(metadata=metadata)
|
388
|
+
self._publish(rec)
|
389
|
+
|
380
390
|
def _publish_metric(self, metric: pb.MetricRecord) -> None:
|
381
391
|
rec = self._make_record(metric=metric)
|
382
392
|
self._publish(rec)
|
@@ -459,8 +469,8 @@ class InterfaceShared(InterfaceBase):
|
|
459
469
|
record = self._make_record(run=run)
|
460
470
|
return self._deliver_record(record)
|
461
471
|
|
462
|
-
def
|
463
|
-
record = self._make_request(
|
472
|
+
def _deliver_finish_sync(self, sync_finish: pb.SyncFinishRequest) -> MailboxHandle:
|
473
|
+
record = self._make_request(sync_finish=sync_finish)
|
464
474
|
return self._deliver_record(record)
|
465
475
|
|
466
476
|
def _deliver_run_start(self, run_start: pb.RunStartRequest) -> MailboxHandle:
|
@@ -477,6 +487,12 @@ class InterfaceShared(InterfaceBase):
|
|
477
487
|
record = self._make_request(get_system_metrics=get_system_metrics)
|
478
488
|
return self._deliver_record(record)
|
479
489
|
|
490
|
+
def _deliver_get_system_metadata(
|
491
|
+
self, get_system_metadata: pb.GetSystemMetadataRequest
|
492
|
+
) -> MailboxHandle:
|
493
|
+
record = self._make_request(get_system_metadata=get_system_metadata)
|
494
|
+
return self._deliver_record(record)
|
495
|
+
|
480
496
|
def _deliver_exit(self, exit_data: pb.RunExitRecord) -> MailboxHandle:
|
481
497
|
record = self._make_record(exit=exit_data)
|
482
498
|
return self._deliver_record(record)
|
wandb/sdk/internal/handler.py
CHANGED
@@ -39,6 +39,7 @@ from wandb.proto.wandb_internal_pb2 import (
|
|
39
39
|
|
40
40
|
from ..interface.interface_queue import InterfaceQueue
|
41
41
|
from ..lib import handler_util, proto_util
|
42
|
+
from ..wandb_metadata import Metadata
|
42
43
|
from . import context, sample, tb_watcher
|
43
44
|
from .settings_static import SettingsStatic
|
44
45
|
from .system.system_monitor import SystemMonitor
|
@@ -119,6 +120,7 @@ class HandleManager:
|
|
119
120
|
|
120
121
|
self._tb_watcher = None
|
121
122
|
self._system_monitor = None
|
123
|
+
self._metadata: Optional[Metadata] = None
|
122
124
|
self._step = 0
|
123
125
|
|
124
126
|
self._track_time = None
|
@@ -176,6 +178,9 @@ class HandleManager:
|
|
176
178
|
def handle_request_cancel(self, record: Record) -> None:
|
177
179
|
self._dispatch_record(record)
|
178
180
|
|
181
|
+
def handle_request_metadata(self, record: Record) -> None:
|
182
|
+
logger.warning("Metadata updates are ignored when using the legacy service.")
|
183
|
+
|
179
184
|
def handle_request_defer(self, record: Record) -> None:
|
180
185
|
defer = record.request.defer
|
181
186
|
state = defer.state
|
@@ -700,7 +705,10 @@ class HandleManager:
|
|
700
705
|
not (self._settings.x_disable_meta or self._settings.x_disable_machine_info)
|
701
706
|
and not run_start.run.resumed
|
702
707
|
):
|
703
|
-
|
708
|
+
try:
|
709
|
+
self._metadata = Metadata(**self._system_monitor.probe(publish=True))
|
710
|
+
except Exception as e:
|
711
|
+
logger.error("Error probing system metadata: %s", e)
|
704
712
|
|
705
713
|
self._tb_watcher = tb_watcher.TBWatcher(
|
706
714
|
self._settings, interface=self._interface, run_proto=run_start.run
|
@@ -778,6 +786,16 @@ class HandleManager:
|
|
778
786
|
|
779
787
|
self._respond_result(result)
|
780
788
|
|
789
|
+
def handle_request_get_system_metadata(self, record: Record) -> None:
|
790
|
+
result = proto_util._result_from_record(record)
|
791
|
+
if self._system_monitor is None or self._metadata is None:
|
792
|
+
return
|
793
|
+
|
794
|
+
result.response.get_system_metadata_response.metadata.CopyFrom(
|
795
|
+
self._metadata.to_proto()
|
796
|
+
)
|
797
|
+
self._respond_result(result)
|
798
|
+
|
781
799
|
def handle_tbrecord(self, record: Record) -> None:
|
782
800
|
logger.info("handling tbrecord: %s", record)
|
783
801
|
if self._tb_watcher:
|
wandb/sdk/internal/internal.py
CHANGED
@@ -165,7 +165,7 @@ def wandb_internal(
|
|
165
165
|
exc_info = thread.get_exception()
|
166
166
|
if exc_info:
|
167
167
|
logger.error(f"Thread {thread.name}:", exc_info=exc_info)
|
168
|
-
print(f"Thread {thread.name}:", file=sys.stderr)
|
168
|
+
print(f"Thread {thread.name}:", file=sys.stderr) # noqa: T201
|
169
169
|
traceback.print_exception(*exc_info)
|
170
170
|
wandb._sentry.exception(exc_info)
|
171
171
|
wandb.termerror("Internal wandb error: file data was not synced")
|
@@ -400,9 +400,6 @@ class Api:
|
|
400
400
|
wandb.termerror(f"Error while calling W&B API: {error} ({response})")
|
401
401
|
raise
|
402
402
|
|
403
|
-
def disabled(self) -> Union[str, bool]:
|
404
|
-
return self._settings.get(Settings.DEFAULT_SECTION, "disabled", fallback=False) # type: ignore
|
405
|
-
|
406
403
|
def set_current_run_id(self, run_id: str) -> None:
|
407
404
|
self._current_run_id = run_id
|
408
405
|
|
@@ -2321,7 +2318,9 @@ class Api:
|
|
2321
2318
|
"commit": commit,
|
2322
2319
|
"displayName": display_name,
|
2323
2320
|
"notes": notes,
|
2324
|
-
"host": None
|
2321
|
+
"host": None
|
2322
|
+
if self.settings().get("anonymous") in ["allow", "must"]
|
2323
|
+
else host,
|
2325
2324
|
"debug": env.is_debug(env=self._environ),
|
2326
2325
|
"repo": repo,
|
2327
2326
|
"program": program_path,
|
@@ -3468,7 +3467,7 @@ class Api:
|
|
3468
3467
|
else open(normal_name, "rb")
|
3469
3468
|
)
|
3470
3469
|
except OSError:
|
3471
|
-
print(f"{file_name} does not exist")
|
3470
|
+
print(f"{file_name} does not exist") # noqa: T201
|
3472
3471
|
continue
|
3473
3472
|
if progress is False:
|
3474
3473
|
responses.append(
|
wandb/sdk/internal/sample.py
CHANGED
@@ -30,11 +30,11 @@ class UniformSampleAccumulator:
|
|
30
30
|
self._log2 += [int(math.log(i, 2)) for i in range(1, 2**self._buckets + 1)]
|
31
31
|
|
32
32
|
def _show(self):
|
33
|
-
print("=" * 20)
|
33
|
+
print("=" * 20) # noqa: T201
|
34
34
|
for b in range(self._buckets):
|
35
35
|
b = (b + self._buckets_index) % self._buckets
|
36
36
|
vals = [self._bucket[b][i] for i in range(self._index[b])]
|
37
|
-
print(f"{b}: {vals}")
|
37
|
+
print(f"{b}: {vals}") # noqa: T201
|
38
38
|
|
39
39
|
def add(self, val):
|
40
40
|
self._count += 1
|
wandb/sdk/internal/sender.py
CHANGED
@@ -323,7 +323,6 @@ class SendManager:
|
|
323
323
|
|
324
324
|
Exclusively used in `sync.py`.
|
325
325
|
"""
|
326
|
-
print(root_dir)
|
327
326
|
files_dir = os.path.join(root_dir, "files")
|
328
327
|
settings = wandb.Settings(
|
329
328
|
x_files_dir=files_dir,
|
@@ -1339,7 +1338,7 @@ class SendManager:
|
|
1339
1338
|
if not line.endswith("\n"):
|
1340
1339
|
self._partial_output.setdefault(stream, "")
|
1341
1340
|
if line.startswith("\r"):
|
1342
|
-
# TODO: maybe we
|
1341
|
+
# TODO: maybe we shouldn't just drop this, what if there was some \ns in the partial
|
1343
1342
|
# that should probably be the check instead of not line.endswith(\n")
|
1344
1343
|
# logger.info(f"Dropping data {self._partial_output[stream]}")
|
1345
1344
|
self._partial_output[stream] = ""
|
@@ -1,3 +1,5 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
from typing import Any, Iterable
|
2
4
|
|
3
5
|
from wandb.proto import wandb_settings_pb2
|
@@ -78,7 +80,7 @@ class SettingsStatic(Settings):
|
|
78
80
|
def __setitem__(self, key: str, val: object) -> None:
|
79
81
|
raise AttributeError("Error: SettingsStatic is a readonly object")
|
80
82
|
|
81
|
-
def keys(self) ->
|
83
|
+
def keys(self) -> Iterable[str]:
|
82
84
|
return self.__dict__.keys()
|
83
85
|
|
84
86
|
def __getitem__(self, key: str) -> Any:
|
@@ -190,10 +190,10 @@ class Disk:
|
|
190
190
|
disk_metrics = {}
|
191
191
|
for disk_path in disk_paths:
|
192
192
|
try:
|
193
|
-
# total disk space in
|
194
|
-
total = psutil.disk_usage(disk_path).total
|
195
|
-
# total disk space used in
|
196
|
-
used = psutil.disk_usage(disk_path).used
|
193
|
+
# total disk space in Bytes:
|
194
|
+
total = psutil.disk_usage(disk_path).total
|
195
|
+
# total disk space used in Bytes:
|
196
|
+
used = psutil.disk_usage(disk_path).used
|
197
197
|
disk_metrics[disk_path] = {
|
198
198
|
"total": total,
|
199
199
|
"used": used,
|
@@ -32,7 +32,7 @@ def gpu_in_use_by_this_process(gpu_handle: "GPUHandle", pid: int) -> bool:
|
|
32
32
|
try:
|
33
33
|
base_process = psutil.Process(pid=pid)
|
34
34
|
except psutil.NoSuchProcess:
|
35
|
-
# do not report any gpu metrics if the base process
|
35
|
+
# do not report any gpu metrics if the base process can't be found
|
36
36
|
return False
|
37
37
|
|
38
38
|
our_processes = base_process.children(recursive=True)
|
@@ -190,7 +190,7 @@ class SystemInfo:
|
|
190
190
|
# get the git repo info
|
191
191
|
data = self._probe_git(data)
|
192
192
|
|
193
|
-
if self.settings.anonymous
|
193
|
+
if self.settings.anonymous not in ["allow", "must"]:
|
194
194
|
data["host"] = self.settings.host
|
195
195
|
data["username"] = self.settings.username
|
196
196
|
data["executable"] = sys.executable
|
@@ -203,7 +203,7 @@ class SystemMonitor:
|
|
203
203
|
logger.error(f"Error joining system monitor process: {e}")
|
204
204
|
self._process = None
|
205
205
|
|
206
|
-
def probe(self, publish: bool = True) ->
|
206
|
+
def probe(self, publish: bool = True) -> dict:
|
207
207
|
logger.info("Collecting system info")
|
208
208
|
# collect static info about the hardware from registered assets
|
209
209
|
hardware_info: dict = {
|
@@ -220,3 +220,5 @@ class SystemMonitor:
|
|
220
220
|
logger.info("Publishing system info")
|
221
221
|
self.system_info.publish(system_info)
|
222
222
|
logger.info("Finished publishing system info")
|
223
|
+
|
224
|
+
return system_info
|
wandb/sdk/internal/tb_watcher.py
CHANGED
@@ -492,7 +492,7 @@ class TBHistory:
|
|
492
492
|
self._step, len(dropped_keys)
|
493
493
|
)
|
494
494
|
)
|
495
|
-
print("\t" + ("\n\t".join(dropped_keys)))
|
495
|
+
print("\t" + ("\n\t".join(dropped_keys))) # noqa: T201
|
496
496
|
self._data["_step"] = self._step
|
497
497
|
self._added.append(self._data)
|
498
498
|
self._step += 1
|
@@ -116,7 +116,7 @@ def registry_from_uri(uri: str) -> AbstractRegistry:
|
|
116
116
|
it as an AWS Elastic Container Registry. If the uri contains
|
117
117
|
`-docker.pkg.dev`, we classify it as a Google Artifact Registry.
|
118
118
|
|
119
|
-
This function will attempt to load the
|
119
|
+
This function will attempt to load the appropriate cloud helpers for the
|
120
120
|
|
121
121
|
`https://` prefix is optional for all of the above.
|
122
122
|
|
wandb/sdk/lib/apikey.py
CHANGED
@@ -250,7 +250,7 @@ def write_key(
|
|
250
250
|
)
|
251
251
|
|
252
252
|
if anonymous:
|
253
|
-
api.set_setting("anonymous", "
|
253
|
+
api.set_setting("anonymous", "must", globally=True, persist=True)
|
254
254
|
else:
|
255
255
|
api.clear_setting("anonymous", globally=True, persist=True)
|
256
256
|
|
@@ -259,8 +259,7 @@ def write_key(
|
|
259
259
|
|
260
260
|
def api_key(settings: Optional["Settings"] = None) -> Optional[str]:
|
261
261
|
if settings is None:
|
262
|
-
settings = wandb.setup().settings
|
263
|
-
assert settings is not None
|
262
|
+
settings = wandb.setup().settings
|
264
263
|
if settings.api_key:
|
265
264
|
return settings.api_key
|
266
265
|
auth = get_netrc_auth(settings.base_url)
|
wandb/sdk/lib/fsm.py
CHANGED
@@ -93,7 +93,7 @@ class FsmStateExit(Protocol[T_FsmInputs, T_FsmContext_cov]):
|
|
93
93
|
def on_exit(self, inputs: T_FsmInputs) -> T_FsmContext_cov: ... # pragma: no cover
|
94
94
|
|
95
95
|
|
96
|
-
# It would be nice if python provided optional protocol members, but it
|
96
|
+
# It would be nice if python provided optional protocol members, but it does not as described here:
|
97
97
|
# https://peps.python.org/pep-0544/#support-optional-protocol-members
|
98
98
|
# Until then, we can only enforce that a state at least supports one protocol interface. This
|
99
99
|
# unfortunately will not check the signature of other potential protocols.
|
wandb/sdk/lib/gitlib.py
CHANGED
@@ -226,7 +226,7 @@ class GitRepo:
|
|
226
226
|
try:
|
227
227
|
return self.repo.create_tag(f"wandb/{name}", message=message, force=True)
|
228
228
|
except GitCommandError:
|
229
|
-
|
229
|
+
logger.debug("Failed to tag repository.")
|
230
230
|
return None
|
231
231
|
|
232
232
|
def push(self, name: str) -> Any:
|
wandb/sdk/lib/gql_request.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""A simple GraphQL client for sending queries and mutations.
|
2
2
|
|
3
3
|
Note: This was originally wandb/vendor/gql-0.2.0/wandb_gql/transport/requests.py
|
4
|
-
The only substantial change is to
|
4
|
+
The only substantial change is to reuse a requests.Session object.
|
5
5
|
"""
|
6
6
|
|
7
7
|
from typing import Any, Callable, Dict, Optional, Tuple, Union
|
@@ -0,0 +1,37 @@
|
|
1
|
+
"""Utility to send an interrupt (Ctrl+C) signal to the main thread.
|
2
|
+
|
3
|
+
This is necessary because Windows and POSIX use different models for Ctrl+C
|
4
|
+
interrupts.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import platform
|
8
|
+
import signal
|
9
|
+
import threading
|
10
|
+
|
11
|
+
|
12
|
+
def interrupt_main():
|
13
|
+
"""Interrupt the main Python thread with a SIGINT signal.
|
14
|
+
|
15
|
+
In POSIX, signal.pthread_kill() is the most reliable way to send a signal
|
16
|
+
to the main thread.
|
17
|
+
|
18
|
+
os.kill() is often recommended, but it isn't guaranteed to deliver the
|
19
|
+
signal to the main OS thread. Likewise, signal.raise_signal() delivers
|
20
|
+
the signal to the current thread in POSIX. The issue is that if any other
|
21
|
+
thread receives the signal, Python will set an internal flag and process it
|
22
|
+
on the main thread at the next opportunity. If the main thread is executing
|
23
|
+
C code or is blocked on a syscall (e.g. time.sleep(999999)) the signal
|
24
|
+
handler won't execute until that's done---i.e. Python won't preempt the OS
|
25
|
+
thread on its own.
|
26
|
+
|
27
|
+
On Windows, pthread_kill is not available and os.kill() ignores its
|
28
|
+
second argument and always kills the process. However,
|
29
|
+
signal.raise_signal() does the right thing.
|
30
|
+
"""
|
31
|
+
if platform.system() == "Windows":
|
32
|
+
signal.raise_signal(signal.SIGINT)
|
33
|
+
else:
|
34
|
+
signal.pthread_kill(
|
35
|
+
threading.main_thread().ident,
|
36
|
+
signal.SIGINT,
|
37
|
+
)
|
wandb/sdk/lib/lazyloader.py
CHANGED
@@ -41,7 +41,7 @@ def connect_to_service(
|
|
41
41
|
|
42
42
|
|
43
43
|
def _try_connect_to_existing_service() -> ServiceConnection | None:
|
44
|
-
"""
|
44
|
+
"""Attempts to connect to an existing service process."""
|
45
45
|
token = service_token.get_service_token()
|
46
46
|
if not token:
|
47
47
|
return None
|
wandb/sdk/lib/telemetry.py
CHANGED
@@ -65,7 +65,7 @@ def _parse_label_lines(lines: List[str]) -> Dict[str, str]:
|
|
65
65
|
label_str = line[idx + len(_LABEL_TOKEN) :]
|
66
66
|
|
67
67
|
# match identifier (first token without key=value syntax (optional)
|
68
|
-
# Note: Parse is fairly permissive as it
|
68
|
+
# Note: Parse is fairly permissive as it does not enforce strict syntax
|
69
69
|
r = MATCH_RE.match(label_str)
|
70
70
|
if r:
|
71
71
|
ret["code"] = r.group("code").replace("-", "_")
|
wandb/sdk/service/server_sock.py
CHANGED
@@ -4,6 +4,7 @@ import threading
|
|
4
4
|
import time
|
5
5
|
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
|
6
6
|
|
7
|
+
import wandb
|
7
8
|
from wandb.proto import wandb_server_pb2 as spb
|
8
9
|
from wandb.sdk.internal.settings_static import SettingsStatic
|
9
10
|
|
@@ -225,7 +226,7 @@ class DebugThread(threading.Thread):
|
|
225
226
|
while True:
|
226
227
|
time.sleep(30)
|
227
228
|
for thread in threading.enumerate():
|
228
|
-
|
229
|
+
wandb.termwarn(f"DEBUG: {thread.name}")
|
229
230
|
|
230
231
|
|
231
232
|
class SocketServer:
|
@@ -266,7 +267,7 @@ class SocketServer:
|
|
266
267
|
# socket.shutdown() is a more heavy handed approach to interrupting socket.accept()
|
267
268
|
# in the future we might want to consider a more graceful shutdown which would involve setting
|
268
269
|
# a threading Event and then initiating one last connection just to close down the thread
|
269
|
-
# The advantage of the heavy handed approach is that it
|
270
|
+
# The advantage of the heavy handed approach is that it does not depend on the threads functioning
|
270
271
|
# properly, that is, if something has gone wrong, we probably want to use this hammer to shut things down
|
271
272
|
self._sock.shutdown(socket.SHUT_RDWR)
|
272
273
|
except OSError:
|