wandb 0.19.12rc1__py3-none-win32.whl → 0.20.1__py3-none-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +1 -2
- wandb/__init__.pyi +3 -6
- wandb/_iterutils.py +26 -7
- wandb/_pydantic/__init__.py +2 -1
- wandb/_pydantic/utils.py +7 -0
- wandb/agents/pyagent.py +9 -15
- wandb/analytics/sentry.py +1 -2
- wandb/apis/attrs.py +3 -4
- wandb/apis/importers/internals/util.py +1 -1
- wandb/apis/importers/validation.py +2 -2
- wandb/apis/importers/wandb.py +30 -25
- wandb/apis/normalize.py +2 -2
- wandb/apis/public/__init__.py +1 -0
- wandb/apis/public/api.py +37 -33
- wandb/apis/public/artifacts.py +103 -72
- wandb/apis/public/jobs.py +3 -2
- wandb/apis/public/registries/registries_search.py +4 -2
- wandb/apis/public/registries/registry.py +1 -1
- wandb/apis/public/registries/utils.py +9 -9
- wandb/apis/public/runs.py +18 -6
- wandb/automations/_filters/expressions.py +1 -1
- wandb/automations/_filters/operators.py +1 -1
- wandb/automations/_filters/run_metrics.py +1 -1
- wandb/beta/workflows.py +6 -5
- wandb/bin/gpu_stats.exe +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/cli.py +54 -73
- wandb/docker/__init__.py +21 -74
- wandb/docker/names.py +40 -0
- wandb/env.py +0 -1
- wandb/errors/util.py +1 -1
- wandb/filesync/step_checksum.py +1 -1
- wandb/filesync/step_upload.py +1 -1
- wandb/integration/diffusers/resolvers/multimodal.py +1 -2
- wandb/integration/gym/__init__.py +5 -6
- wandb/integration/keras/callbacks/model_checkpoint.py +2 -2
- wandb/integration/keras/keras.py +13 -19
- wandb/integration/kfp/kfp_patch.py +2 -3
- wandb/integration/langchain/wandb_tracer.py +1 -1
- wandb/integration/metaflow/metaflow.py +13 -13
- wandb/integration/openai/fine_tuning.py +3 -2
- wandb/integration/sagemaker/auth.py +2 -1
- wandb/integration/sklearn/utils.py +2 -1
- wandb/integration/tensorboard/__init__.py +1 -1
- wandb/integration/tensorboard/log.py +2 -5
- wandb/integration/tensorflow/__init__.py +2 -2
- wandb/jupyter.py +20 -17
- wandb/plot/confusion_matrix.py +1 -1
- wandb/plot/utils.py +8 -7
- wandb/proto/v3/wandb_internal_pb2.py +355 -335
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +12 -12
- wandb/proto/v4/wandb_internal_pb2.py +339 -335
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +12 -12
- wandb/proto/v5/wandb_internal_pb2.py +339 -335
- wandb/proto/v5/wandb_settings_pb2.py +2 -2
- wandb/proto/v5/wandb_telemetry_pb2.py +12 -12
- wandb/proto/v6/wandb_internal_pb2.py +339 -335
- wandb/proto/v6/wandb_settings_pb2.py +2 -2
- wandb/proto/v6/wandb_telemetry_pb2.py +12 -12
- wandb/proto/wandb_deprecated.py +6 -8
- wandb/sdk/artifacts/_internal_artifact.py +43 -0
- wandb/sdk/artifacts/_validators.py +55 -35
- wandb/sdk/artifacts/artifact.py +117 -115
- wandb/sdk/artifacts/artifact_download_logger.py +2 -0
- wandb/sdk/artifacts/artifact_saver.py +1 -3
- wandb/sdk/artifacts/artifact_state.py +2 -0
- wandb/sdk/artifacts/artifact_ttl.py +2 -0
- wandb/sdk/artifacts/exceptions.py +14 -0
- wandb/sdk/artifacts/staging.py +2 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +2 -6
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -1
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +2 -6
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -5
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -1
- wandb/sdk/artifacts/storage_layout.py +2 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -3
- wandb/sdk/backend/backend.py +11 -182
- wandb/sdk/data_types/_dtypes.py +2 -6
- wandb/sdk/data_types/audio.py +20 -3
- wandb/sdk/data_types/base_types/media.py +12 -7
- wandb/sdk/data_types/base_types/wb_value.py +8 -18
- wandb/sdk/data_types/bokeh.py +19 -2
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +17 -1
- wandb/sdk/data_types/helper_types/image_mask.py +7 -1
- wandb/sdk/data_types/html.py +4 -4
- wandb/sdk/data_types/image.py +178 -103
- wandb/sdk/data_types/molecule.py +6 -6
- wandb/sdk/data_types/object_3d.py +10 -5
- wandb/sdk/data_types/saved_model.py +11 -6
- wandb/sdk/data_types/table.py +313 -83
- wandb/sdk/data_types/table_decorators.py +108 -0
- wandb/sdk/data_types/utils.py +43 -7
- wandb/sdk/data_types/video.py +21 -3
- wandb/sdk/interface/interface.py +10 -0
- wandb/sdk/internal/datastore.py +2 -6
- wandb/sdk/internal/file_pusher.py +1 -5
- wandb/sdk/internal/file_stream.py +8 -17
- wandb/sdk/internal/handler.py +2 -2
- wandb/sdk/internal/incremental_table_util.py +53 -0
- wandb/sdk/internal/internal.py +3 -5
- wandb/sdk/internal/internal_api.py +66 -89
- wandb/sdk/internal/job_builder.py +2 -7
- wandb/sdk/internal/profiler.py +2 -2
- wandb/sdk/internal/progress.py +1 -3
- wandb/sdk/internal/run.py +1 -6
- wandb/sdk/internal/sender.py +24 -36
- wandb/sdk/internal/system/assets/aggregators.py +1 -7
- wandb/sdk/internal/system/assets/disk.py +3 -3
- wandb/sdk/internal/system/assets/gpu.py +4 -4
- wandb/sdk/internal/system/assets/gpu_amd.py +4 -4
- wandb/sdk/internal/system/assets/interfaces.py +6 -6
- wandb/sdk/internal/system/assets/tpu.py +1 -1
- wandb/sdk/internal/system/assets/trainium.py +6 -6
- wandb/sdk/internal/system/system_info.py +5 -7
- wandb/sdk/internal/system/system_monitor.py +4 -4
- wandb/sdk/internal/tb_watcher.py +5 -7
- wandb/sdk/launch/_launch.py +1 -1
- wandb/sdk/launch/_project_spec.py +19 -20
- wandb/sdk/launch/agent/agent.py +3 -3
- wandb/sdk/launch/agent/config.py +1 -1
- wandb/sdk/launch/agent/job_status_tracker.py +2 -2
- wandb/sdk/launch/builder/build.py +2 -3
- wandb/sdk/launch/builder/kaniko_builder.py +5 -4
- wandb/sdk/launch/environment/gcp_environment.py +1 -2
- wandb/sdk/launch/registry/azure_container_registry.py +2 -2
- wandb/sdk/launch/registry/elastic_container_registry.py +2 -2
- wandb/sdk/launch/registry/google_artifact_registry.py +3 -3
- wandb/sdk/launch/runner/abstract.py +5 -5
- wandb/sdk/launch/runner/kubernetes_monitor.py +2 -2
- wandb/sdk/launch/runner/kubernetes_runner.py +1 -1
- wandb/sdk/launch/runner/sagemaker_runner.py +2 -4
- wandb/sdk/launch/runner/vertex_runner.py +2 -7
- wandb/sdk/launch/sweeps/__init__.py +1 -1
- wandb/sdk/launch/sweeps/scheduler.py +2 -2
- wandb/sdk/launch/sweeps/utils.py +3 -3
- wandb/sdk/launch/utils.py +3 -4
- wandb/sdk/lib/apikey.py +5 -8
- wandb/sdk/lib/config_util.py +3 -3
- wandb/sdk/lib/fsm.py +3 -18
- wandb/sdk/lib/gitlib.py +6 -5
- wandb/sdk/lib/ipython.py +2 -2
- wandb/sdk/lib/json_util.py +9 -14
- wandb/sdk/lib/printer.py +3 -8
- wandb/sdk/lib/redirect.py +1 -1
- wandb/sdk/lib/retry.py +3 -7
- wandb/sdk/lib/run_moment.py +2 -2
- wandb/sdk/lib/service_connection.py +3 -1
- wandb/sdk/lib/service_token.py +1 -2
- wandb/sdk/mailbox/mailbox_handle.py +3 -7
- wandb/sdk/mailbox/response_handle.py +2 -6
- wandb/sdk/service/streams.py +3 -7
- wandb/sdk/verify/verify.py +5 -6
- wandb/sdk/wandb_config.py +1 -1
- wandb/sdk/wandb_init.py +38 -106
- wandb/sdk/wandb_login.py +7 -6
- wandb/sdk/wandb_run.py +52 -240
- wandb/sdk/wandb_settings.py +71 -60
- wandb/sdk/wandb_setup.py +40 -14
- wandb/sdk/wandb_watch.py +5 -7
- wandb/sync/__init__.py +1 -1
- wandb/sync/sync.py +13 -13
- wandb/util.py +17 -35
- wandb/wandb_agent.py +8 -11
- {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/METADATA +5 -5
- {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/RECORD +170 -168
- wandb/docker/auth.py +0 -435
- wandb/docker/www_authenticate.py +0 -94
- {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/WHEEL +0 -0
- {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/data_types/table.py
CHANGED
@@ -2,8 +2,10 @@ import base64
|
|
2
2
|
import binascii
|
3
3
|
import codecs
|
4
4
|
import datetime
|
5
|
+
import json
|
5
6
|
import logging
|
6
7
|
import os
|
8
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Set, Tuple
|
7
9
|
|
8
10
|
import wandb
|
9
11
|
from wandb import util
|
@@ -13,8 +15,18 @@ from . import _dtypes
|
|
13
15
|
from ._private import MEDIA_TMP
|
14
16
|
from .base_types.media import Media, _numpy_arrays_to_lists
|
15
17
|
from .base_types.wb_value import WBValue
|
18
|
+
from .table_decorators import (
|
19
|
+
allow_incremental_logging_after_append,
|
20
|
+
allow_relogging_after_mutation,
|
21
|
+
ensure_not_incremental,
|
22
|
+
)
|
16
23
|
from .utils import _json_helper
|
17
24
|
|
25
|
+
if TYPE_CHECKING:
|
26
|
+
from wandb.sdk.artifacts import artifact
|
27
|
+
|
28
|
+
from ...wandb_run import Run as LocalRun
|
29
|
+
|
18
30
|
|
19
31
|
class _TableLinkMixin:
|
20
32
|
def set_table(self, table):
|
@@ -180,6 +192,9 @@ class _ForeignIndexType(_dtypes.Type):
|
|
180
192
|
return cls(table)
|
181
193
|
|
182
194
|
|
195
|
+
_SUPPORTED_LOGGING_MODES = ["IMMUTABLE", "MUTABLE", "INCREMENTAL"]
|
196
|
+
|
197
|
+
|
183
198
|
class Table(Media):
|
184
199
|
"""The Table class used to display and analyze tabular data.
|
185
200
|
|
@@ -190,21 +205,6 @@ class Table(Media):
|
|
190
205
|
|
191
206
|
This class is the primary class used to generate the Table Visualizer
|
192
207
|
in the UI: https://docs.wandb.ai/guides/data-vis/tables.
|
193
|
-
|
194
|
-
Args:
|
195
|
-
columns: (List[str]) Names of the columns in the table.
|
196
|
-
Defaults to ["Input", "Output", "Expected"].
|
197
|
-
data: (List[List[any]]) 2D row-oriented array of values.
|
198
|
-
dataframe: (pandas.DataFrame) DataFrame object used to create the table.
|
199
|
-
When set, `data` and `columns` arguments are ignored.
|
200
|
-
optional: (Union[bool,List[bool]]) Determines if `None` values are allowed. Default to True
|
201
|
-
- If a singular bool value, then the optionality is enforced for all
|
202
|
-
columns specified at construction time
|
203
|
-
- If a list of bool values, then the optionality is applied to each
|
204
|
-
column - should be the same length as `columns`
|
205
|
-
applies to all columns. A list of bool values applies to each respective column.
|
206
|
-
allow_mixed_types: (bool) Determines if columns are allowed to have mixed types
|
207
|
-
(disables type validation). Defaults to False
|
208
208
|
"""
|
209
209
|
|
210
210
|
MAX_ROWS = 10000
|
@@ -221,15 +221,48 @@ class Table(Media):
|
|
221
221
|
dtype=None,
|
222
222
|
optional=True,
|
223
223
|
allow_mixed_types=False,
|
224
|
+
log_mode: Optional[
|
225
|
+
Literal["IMMUTABLE", "MUTABLE", "INCREMENTAL"]
|
226
|
+
] = "IMMUTABLE",
|
224
227
|
):
|
225
228
|
"""Initializes a Table object.
|
226
229
|
|
227
230
|
The rows is available for legacy reasons and should not be used.
|
228
231
|
The Table class uses data to mimic the Pandas API.
|
232
|
+
|
233
|
+
Args:
|
234
|
+
columns: (List[str]) Names of the columns in the table.
|
235
|
+
Defaults to ["Input", "Output", "Expected"].
|
236
|
+
data: (List[List[any]]) 2D row-oriented array of values.
|
237
|
+
dataframe: (pandas.DataFrame) DataFrame object used to create the table.
|
238
|
+
When set, `data` and `columns` arguments are ignored.
|
239
|
+
optional: (Union[bool,List[bool]]) Determines if `None` values are allowed. Default to True
|
240
|
+
- If a singular bool value, then the optionality is enforced for all
|
241
|
+
columns specified at construction time
|
242
|
+
- If a list of bool values, then the optionality is applied to each
|
243
|
+
column - should be the same length as `columns`
|
244
|
+
applies to all columns. A list of bool values applies to each respective column.
|
245
|
+
allow_mixed_types: (bool) Determines if columns are allowed to have mixed types
|
246
|
+
(disables type validation). Defaults to False
|
247
|
+
log_mode: Optional[str] Controls how the Table is logged when mutations occur.
|
248
|
+
Options:
|
249
|
+
- "IMMUTABLE" (default): Table can only be logged once; subsequent
|
250
|
+
logging attempts after the table has been mutated will be no-ops.
|
251
|
+
- "MUTABLE": Table can be re-logged after mutations, creating
|
252
|
+
a new artifact version each time it's logged.
|
253
|
+
- "INCREMENTAL": Table data is logged incrementally, with each log creating
|
254
|
+
a new artifact entry containing the new data since the last log.
|
229
255
|
"""
|
230
256
|
super().__init__()
|
257
|
+
self._validate_log_mode(log_mode)
|
258
|
+
self.log_mode = log_mode
|
259
|
+
if self.log_mode == "INCREMENTAL":
|
260
|
+
self._increment_num: int | None = None
|
261
|
+
self._last_logged_idx: int | None = None
|
262
|
+
self._previous_increments_paths: list[str] | None = None
|
263
|
+
self._run_target_for_increments: LocalRun | None = None
|
231
264
|
self._pk_col = None
|
232
|
-
self._fk_cols = set()
|
265
|
+
self._fk_cols: set[str] = set()
|
233
266
|
if allow_mixed_types:
|
234
267
|
dtype = _dtypes.AnyType
|
235
268
|
|
@@ -258,6 +291,11 @@ class Table(Media):
|
|
258
291
|
else:
|
259
292
|
self._init_from_list([], columns, optional, dtype)
|
260
293
|
|
294
|
+
def _validate_log_mode(self, log_mode):
|
295
|
+
assert (
|
296
|
+
log_mode in _SUPPORTED_LOGGING_MODES
|
297
|
+
), f"Invalid log_mode: {log_mode}. Must be one of {_SUPPORTED_LOGGING_MODES}"
|
298
|
+
|
261
299
|
@staticmethod
|
262
300
|
def _assert_valid_columns(columns):
|
263
301
|
valid_col_types = [str, int]
|
@@ -312,6 +350,63 @@ class Table(Media):
|
|
312
350
|
for col_name, opt, dt in zip(self.columns, optional, dtype):
|
313
351
|
self.cast(col_name, dt, opt)
|
314
352
|
|
353
|
+
def _load_incremental_table_state_from_resumed_run(self, run: "LocalRun", key: str):
|
354
|
+
"""Handle updating incremental table state for resumed runs.
|
355
|
+
|
356
|
+
This method is called when a run is resumed and there are previous
|
357
|
+
increments of this table that need to be preserved. It updates the
|
358
|
+
table's internal state to track previous increments and the current
|
359
|
+
increment number.
|
360
|
+
"""
|
361
|
+
if (
|
362
|
+
self._previous_increments_paths is not None
|
363
|
+
or self._increment_num is not None
|
364
|
+
):
|
365
|
+
raise AssertionError(
|
366
|
+
"The table has been initialized for a resumed run already"
|
367
|
+
)
|
368
|
+
|
369
|
+
self._set_incremental_table_run_target(run)
|
370
|
+
|
371
|
+
summary_from_key = run.summary.get(key)
|
372
|
+
|
373
|
+
if (
|
374
|
+
summary_from_key is None
|
375
|
+
or not isinstance(summary_from_key, dict)
|
376
|
+
or summary_from_key.get("_type") != "incremental-table-file"
|
377
|
+
):
|
378
|
+
# The key was never logged to the run or its last logged
|
379
|
+
# value was not an incrementally logged table.
|
380
|
+
return
|
381
|
+
|
382
|
+
previous_increments_paths = summary_from_key.get(
|
383
|
+
"previous_increments_paths", []
|
384
|
+
)
|
385
|
+
|
386
|
+
# add the artifact path of the last logged increment
|
387
|
+
last_artifact_path = summary_from_key.get("artifact_path")
|
388
|
+
|
389
|
+
if last_artifact_path:
|
390
|
+
previous_increments_paths.append(last_artifact_path)
|
391
|
+
|
392
|
+
# add 1 because a new increment is being logged
|
393
|
+
last_increment_num = summary_from_key.get("increment_num", 0)
|
394
|
+
|
395
|
+
self._increment_num = last_increment_num + 1
|
396
|
+
self._previous_increments_paths = previous_increments_paths
|
397
|
+
|
398
|
+
def _set_incremental_table_run_target(self, run: "LocalRun") -> None:
|
399
|
+
"""Associate a Run object with this incremental Table.
|
400
|
+
|
401
|
+
A Table object in incremental mode can only be logged to a single Run.
|
402
|
+
Raises an error if the table is already associated to a different run.
|
403
|
+
"""
|
404
|
+
if self._run_target_for_increments is None:
|
405
|
+
self._run_target_for_increments = run
|
406
|
+
elif self._run_target_for_increments is not run:
|
407
|
+
raise AssertionError("An incremental Table can only be logged to one Run.")
|
408
|
+
|
409
|
+
@allow_relogging_after_mutation
|
315
410
|
def cast(self, col_name, dtype, optional=False):
|
316
411
|
"""Casts a column to a specific data type.
|
317
412
|
|
@@ -336,11 +431,7 @@ class Table(Media):
|
|
336
431
|
result_type = wbtype.assign(row[col_ndx])
|
337
432
|
if isinstance(result_type, _dtypes.InvalidType):
|
338
433
|
raise TypeError(
|
339
|
-
"Existing data {}, of type {} cannot be cast to {}"
|
340
|
-
row[col_ndx],
|
341
|
-
_dtypes.TypeRegistry.type_of(row[col_ndx]),
|
342
|
-
wbtype,
|
343
|
-
)
|
434
|
+
f"Existing data {row[col_ndx]}, of type {_dtypes.TypeRegistry.type_of(row[col_ndx])} cannot be cast to {wbtype}"
|
344
435
|
)
|
345
436
|
wbtype = result_type
|
346
437
|
|
@@ -359,9 +450,7 @@ class Table(Media):
|
|
359
450
|
if is_pk:
|
360
451
|
assert (
|
361
452
|
self._pk_col is None
|
362
|
-
), "Cannot have multiple primary keys - {} is already set as the primary key."
|
363
|
-
self._pk_col
|
364
|
-
)
|
453
|
+
), f"Cannot have multiple primary keys - {self._pk_col} is already set as the primary key."
|
365
454
|
|
366
455
|
# Update the column type
|
367
456
|
self._column_types.params["type_map"][col_name] = wbtype
|
@@ -375,23 +464,21 @@ class Table(Media):
|
|
375
464
|
|
376
465
|
def _eq_debug(self, other, should_assert=False):
|
377
466
|
eq = isinstance(other, Table)
|
378
|
-
assert
|
379
|
-
|
380
|
-
)
|
467
|
+
assert (
|
468
|
+
not should_assert or eq
|
469
|
+
), f"Found type {other.__class__}, expected {Table}"
|
381
470
|
eq = eq and len(self.data) == len(other.data)
|
382
|
-
assert
|
383
|
-
|
384
|
-
)
|
471
|
+
assert (
|
472
|
+
not should_assert or eq
|
473
|
+
), f"Found {len(other.data)} rows, expected {len(self.data)}"
|
385
474
|
eq = eq and self.columns == other.columns
|
386
|
-
assert
|
387
|
-
|
388
|
-
)
|
475
|
+
assert (
|
476
|
+
not should_assert or eq
|
477
|
+
), f"Found columns {other.columns}, expected {self.columns}"
|
389
478
|
eq = eq and self._column_types == other._column_types
|
390
479
|
assert (
|
391
480
|
not should_assert or eq
|
392
|
-
), "Found column type {}, expected column type {}"
|
393
|
-
other._column_types, self._column_types
|
394
|
-
)
|
481
|
+
), f"Found column type {other._column_types}, expected column type {self._column_types}"
|
395
482
|
if eq:
|
396
483
|
for row_ndx in range(len(self.data)):
|
397
484
|
for col_ndx in range(len(self.data[row_ndx])):
|
@@ -402,12 +489,7 @@ class Table(Media):
|
|
402
489
|
eq = eq and _eq
|
403
490
|
assert (
|
404
491
|
not should_assert or eq
|
405
|
-
), "Unequal data at row_ndx {} col_ndx {}: found {}, expected {}"
|
406
|
-
row_ndx,
|
407
|
-
col_ndx,
|
408
|
-
other.data[row_ndx][col_ndx],
|
409
|
-
self.data[row_ndx][col_ndx],
|
410
|
-
)
|
492
|
+
), f"Unequal data at row_ndx {row_ndx} col_ndx {col_ndx}: found {other.data[row_ndx][col_ndx]}, expected {self.data[row_ndx][col_ndx]}"
|
411
493
|
if not eq:
|
412
494
|
return eq
|
413
495
|
return eq
|
@@ -415,11 +497,14 @@ class Table(Media):
|
|
415
497
|
def __eq__(self, other):
|
416
498
|
return self._eq_debug(other)
|
417
499
|
|
500
|
+
@allow_relogging_after_mutation
|
418
501
|
def add_row(self, *row):
|
419
502
|
"""Deprecated; use add_data instead."""
|
420
503
|
logging.warning("add_row is deprecated, use add_data")
|
421
504
|
self.add_data(*row)
|
422
505
|
|
506
|
+
@allow_relogging_after_mutation
|
507
|
+
@allow_incremental_logging_after_append
|
423
508
|
def add_data(self, *data):
|
424
509
|
"""Adds a new row of data to the table. The maximum amount of rows in a table is determined by `wandb.Table.MAX_ARTIFACT_ROWS`.
|
425
510
|
|
@@ -427,9 +512,7 @@ class Table(Media):
|
|
427
512
|
"""
|
428
513
|
if len(data) != len(self.columns):
|
429
514
|
raise ValueError(
|
430
|
-
"This table expects {} columns: {}, found {}"
|
431
|
-
len(self.columns), self.columns, len(data)
|
432
|
-
)
|
515
|
+
f"This table expects {len(self.columns)} columns: {self.columns}, found {len(data)}"
|
433
516
|
)
|
434
517
|
|
435
518
|
# Special case to pre-emptively cast a column as a key.
|
@@ -468,9 +551,7 @@ class Table(Media):
|
|
468
551
|
result_type = current_type.assign(incoming_row_dict)
|
469
552
|
if isinstance(result_type, _dtypes.InvalidType):
|
470
553
|
raise TypeError(
|
471
|
-
"Data row contained incompatible types:\n{}"
|
472
|
-
current_type.explain(incoming_row_dict)
|
473
|
-
)
|
554
|
+
f"Data row contained incompatible types:\n{current_type.explain(incoming_row_dict)}"
|
474
555
|
)
|
475
556
|
return result_type
|
476
557
|
|
@@ -496,7 +577,16 @@ class Table(Media):
|
|
496
577
|
f"this may cause slower queries in the W&B UI."
|
497
578
|
)
|
498
579
|
logging.warning(f"Truncating wandb.Table object to {max_rows} rows.")
|
499
|
-
|
580
|
+
|
581
|
+
if self.log_mode == "INCREMENTAL" and self._last_logged_idx is not None:
|
582
|
+
return {
|
583
|
+
"columns": self.columns,
|
584
|
+
"data": self.data[
|
585
|
+
self._last_logged_idx + 1 : self._last_logged_idx + 1 + max_rows
|
586
|
+
],
|
587
|
+
}
|
588
|
+
else:
|
589
|
+
return {"columns": self.columns, "data": self.data[:max_rows]}
|
500
590
|
|
501
591
|
def bind_to_run(self, *args, **kwargs):
|
502
592
|
# We set `warn=False` since Tables will now always be logged to both
|
@@ -516,11 +606,12 @@ class Table(Media):
|
|
516
606
|
return os.path.join("media", "table")
|
517
607
|
|
518
608
|
@classmethod
|
519
|
-
def from_json(cls, json_obj, source_artifact):
|
609
|
+
def from_json(cls, json_obj, source_artifact: "artifact.Artifact"):
|
520
610
|
data = []
|
521
611
|
column_types = None
|
522
612
|
np_deserialized_columns = {}
|
523
613
|
timestamp_column_indices = set()
|
614
|
+
log_mode = json_obj.get("log_mode", "IMMUTABLE")
|
524
615
|
if json_obj.get("column_types") is not None:
|
525
616
|
column_types = _dtypes.TypeRegistry.type_from_dict(
|
526
617
|
json_obj["column_types"], source_artifact
|
@@ -547,6 +638,10 @@ class Table(Media):
|
|
547
638
|
and ndarray_type._get_serialization_path() is not None
|
548
639
|
):
|
549
640
|
serialization_path = ndarray_type._get_serialization_path()
|
641
|
+
|
642
|
+
if serialization_path is None:
|
643
|
+
continue
|
644
|
+
|
550
645
|
np = util.get_module(
|
551
646
|
"numpy",
|
552
647
|
required="Deserializing NumPy columns requires NumPy to be installed.",
|
@@ -559,22 +654,23 @@ class Table(Media):
|
|
559
654
|
)
|
560
655
|
ndarray_type._clear_serialization_path()
|
561
656
|
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
657
|
+
if log_mode == "INCREMENTAL":
|
658
|
+
unprocessed_table_data = _get_data_from_increments(
|
659
|
+
json_obj, source_artifact
|
660
|
+
)
|
661
|
+
else:
|
662
|
+
unprocessed_table_data = json_obj["data"]
|
663
|
+
|
664
|
+
for r_ndx, row in enumerate(unprocessed_table_data):
|
665
|
+
data.append(
|
666
|
+
_process_table_row(
|
667
|
+
row,
|
668
|
+
timestamp_column_indices,
|
669
|
+
np_deserialized_columns,
|
670
|
+
source_artifact,
|
671
|
+
r_ndx,
|
672
|
+
)
|
673
|
+
)
|
578
674
|
|
579
675
|
# construct Table with dtypes for each column if type information exists
|
580
676
|
dtypes = None
|
@@ -583,7 +679,9 @@ class Table(Media):
|
|
583
679
|
column_types.params["type_map"][str(col)] for col in json_obj["columns"]
|
584
680
|
]
|
585
681
|
|
586
|
-
new_obj = cls(
|
682
|
+
new_obj = cls(
|
683
|
+
columns=json_obj["columns"], data=data, dtype=dtypes, log_mode=log_mode
|
684
|
+
)
|
587
685
|
|
588
686
|
if column_types is not None:
|
589
687
|
new_obj._column_types = column_types
|
@@ -594,12 +692,31 @@ class Table(Media):
|
|
594
692
|
def to_json(self, run_or_artifact):
|
595
693
|
json_dict = super().to_json(run_or_artifact)
|
596
694
|
|
695
|
+
if self.log_mode == "INCREMENTAL":
|
696
|
+
if self._previous_increments_paths is None:
|
697
|
+
self._previous_increments_paths = []
|
698
|
+
if self._increment_num is None:
|
699
|
+
self._increment_num = 0
|
700
|
+
|
701
|
+
json_dict.update(
|
702
|
+
{
|
703
|
+
"increment_num": self._increment_num,
|
704
|
+
"previous_increments_paths": self._previous_increments_paths,
|
705
|
+
}
|
706
|
+
)
|
707
|
+
|
597
708
|
if isinstance(run_or_artifact, wandb.wandb_sdk.wandb_run.Run):
|
709
|
+
if self.log_mode == "INCREMENTAL":
|
710
|
+
wbvalue_type = "incremental-table-file"
|
711
|
+
else:
|
712
|
+
wbvalue_type = "table-file"
|
713
|
+
|
598
714
|
json_dict.update(
|
599
715
|
{
|
600
|
-
"_type":
|
716
|
+
"_type": wbvalue_type,
|
601
717
|
"ncols": len(self.columns),
|
602
718
|
"nrows": len(self.data),
|
719
|
+
"log_mode": self.log_mode,
|
603
720
|
}
|
604
721
|
)
|
605
722
|
|
@@ -669,10 +786,11 @@ class Table(Media):
|
|
669
786
|
"ncols": len(self.columns),
|
670
787
|
"nrows": len(mapped_data),
|
671
788
|
"column_types": self._column_types.to_json(artifact),
|
789
|
+
"log_mode": self.log_mode,
|
672
790
|
}
|
673
791
|
)
|
674
792
|
else:
|
675
|
-
raise
|
793
|
+
raise TypeError("to_json accepts wandb_run.Run or wandb_artifact.Artifact")
|
676
794
|
|
677
795
|
return json_dict
|
678
796
|
|
@@ -692,11 +810,13 @@ class Table(Media):
|
|
692
810
|
index.set_table(self)
|
693
811
|
yield index, self.data[ndx]
|
694
812
|
|
813
|
+
@allow_relogging_after_mutation
|
695
814
|
def set_pk(self, col_name):
|
696
815
|
# TODO: Docs
|
697
816
|
assert col_name in self.columns
|
698
817
|
self.cast(col_name, _PrimaryKeyType())
|
699
818
|
|
819
|
+
@allow_relogging_after_mutation
|
700
820
|
def set_fk(self, col_name, table, table_col):
|
701
821
|
# TODO: Docs
|
702
822
|
assert col_name in self.columns
|
@@ -737,9 +857,7 @@ class Table(Media):
|
|
737
857
|
# If there is a removed FK
|
738
858
|
if len(self._fk_cols - _fk_cols) > 0:
|
739
859
|
raise AssertionError(
|
740
|
-
"Cannot unset foreign key. Attempted to unset ({})"
|
741
|
-
self._fk_cols - _fk_cols
|
742
|
-
)
|
860
|
+
f"Cannot unset foreign key. Attempted to unset ({self._fk_cols - _fk_cols})"
|
743
861
|
)
|
744
862
|
|
745
863
|
self._pk_col = _pk_col
|
@@ -799,6 +917,8 @@ class Table(Media):
|
|
799
917
|
for row_ndx in range(len(self.data)):
|
800
918
|
update_row(row_ndx)
|
801
919
|
|
920
|
+
@ensure_not_incremental
|
921
|
+
@allow_relogging_after_mutation
|
802
922
|
def add_column(self, name, data, optional=False):
|
803
923
|
"""Adds a column of data to the table.
|
804
924
|
|
@@ -829,7 +949,7 @@ class Table(Media):
|
|
829
949
|
|
830
950
|
try:
|
831
951
|
self.cast(name, _dtypes.UnknownType(), optional=optional)
|
832
|
-
except TypeError
|
952
|
+
except TypeError:
|
833
953
|
# Undo the changes
|
834
954
|
if is_first_col:
|
835
955
|
self.data = []
|
@@ -838,7 +958,7 @@ class Table(Media):
|
|
838
958
|
for ndx in range(len(self.data)):
|
839
959
|
self.data[ndx] = self.data[ndx][:-1]
|
840
960
|
self.columns = self.columns[:-1]
|
841
|
-
raise
|
961
|
+
raise
|
842
962
|
|
843
963
|
def get_column(self, name, convert_to=None):
|
844
964
|
"""Retrieves a column from the table and optionally converts it to a NumPy object.
|
@@ -889,6 +1009,8 @@ class Table(Media):
|
|
889
1009
|
_index.set_table(self)
|
890
1010
|
return _index
|
891
1011
|
|
1012
|
+
@ensure_not_incremental
|
1013
|
+
@allow_relogging_after_mutation
|
892
1014
|
def add_computed_columns(self, fn):
|
893
1015
|
"""Adds one or more computed columns based on existing data.
|
894
1016
|
|
@@ -992,9 +1114,7 @@ class PartitionedTable(Media):
|
|
992
1114
|
columns = part.columns
|
993
1115
|
elif columns != part.columns:
|
994
1116
|
raise ValueError(
|
995
|
-
"Table parts have non-matching columns. {} != {}"
|
996
|
-
columns, part.columns
|
997
|
-
)
|
1117
|
+
f"Table parts have non-matching columns. {columns} != {part.columns}"
|
998
1118
|
)
|
999
1119
|
for _, row in part.iterrows():
|
1000
1120
|
yield ndx, row
|
@@ -1137,13 +1257,13 @@ class JoinedTable(Media):
|
|
1137
1257
|
|
1138
1258
|
def _eq_debug(self, other, should_assert=False):
|
1139
1259
|
eq = isinstance(other, JoinedTable)
|
1140
|
-
assert
|
1141
|
-
|
1142
|
-
)
|
1260
|
+
assert (
|
1261
|
+
not should_assert or eq
|
1262
|
+
), f"Found type {other.__class__}, expected {JoinedTable}"
|
1143
1263
|
eq = eq and self._join_key == other._join_key
|
1144
|
-
assert
|
1145
|
-
|
1146
|
-
)
|
1264
|
+
assert (
|
1265
|
+
not should_assert or eq
|
1266
|
+
), f"Found {other._join_key} join key, expected {self._join_key}"
|
1147
1267
|
eq = eq and self._table1._eq_debug(other._table1, should_assert)
|
1148
1268
|
eq = eq and self._table2._eq_debug(other._table2, should_assert)
|
1149
1269
|
return eq
|
@@ -1207,3 +1327,113 @@ _dtypes.TypeRegistry.add(_PartitionedTableType)
|
|
1207
1327
|
_dtypes.TypeRegistry.add(_ForeignKeyType)
|
1208
1328
|
_dtypes.TypeRegistry.add(_PrimaryKeyType)
|
1209
1329
|
_dtypes.TypeRegistry.add(_ForeignIndexType)
|
1330
|
+
|
1331
|
+
|
1332
|
+
def _get_data_from_increments(
|
1333
|
+
json_obj: Dict[str, Any], source_artifact: "artifact.Artifact"
|
1334
|
+
) -> List[Any]:
|
1335
|
+
"""Get data from incremental table artifacts.
|
1336
|
+
|
1337
|
+
Args:
|
1338
|
+
json_obj: The JSON object containing table metadata.
|
1339
|
+
source_artifact: The source artifact containing the table data.
|
1340
|
+
|
1341
|
+
Returns:
|
1342
|
+
List of table rows from all increments.
|
1343
|
+
"""
|
1344
|
+
if "latest" not in source_artifact.aliases:
|
1345
|
+
wandb.termwarn(
|
1346
|
+
(
|
1347
|
+
"It is recommended to use the latest version of the "
|
1348
|
+
"incremental table artifact for ordering guarantees."
|
1349
|
+
),
|
1350
|
+
repeat=False,
|
1351
|
+
)
|
1352
|
+
data: List[Any] = []
|
1353
|
+
increment_num = json_obj.get("increment_num", None)
|
1354
|
+
if increment_num is None:
|
1355
|
+
return data
|
1356
|
+
|
1357
|
+
# Sort by increment number first, then by timestamp if present
|
1358
|
+
# Format of name is: "{incr_num}-{timestamp_ms}.{key}.table.json"
|
1359
|
+
def get_sort_key(key: str) -> Tuple[int, int]:
|
1360
|
+
try:
|
1361
|
+
parts = key.split(".")
|
1362
|
+
increment_parts = parts[0].split("-")
|
1363
|
+
increment_num = int(increment_parts[0])
|
1364
|
+
# If there's a timestamp part, use it for secondary sorting
|
1365
|
+
timestamp = int(increment_parts[1]) if len(increment_parts) > 1 else 0
|
1366
|
+
except (ValueError, IndexError):
|
1367
|
+
wandb.termwarn(
|
1368
|
+
(
|
1369
|
+
f"Could not parse artifact entry for increment {key}."
|
1370
|
+
" The entry name does not follow the naming convention"
|
1371
|
+
" <increment_number>-<timestamp>.<key>.table.json"
|
1372
|
+
" The data in the table will be out of order."
|
1373
|
+
),
|
1374
|
+
repeat=False,
|
1375
|
+
)
|
1376
|
+
return (0, 0)
|
1377
|
+
|
1378
|
+
return (increment_num, timestamp)
|
1379
|
+
|
1380
|
+
sorted_increment_keys = []
|
1381
|
+
for entry_key in source_artifact.manifest.entries:
|
1382
|
+
if entry_key.endswith(".table.json"):
|
1383
|
+
sorted_increment_keys.append(entry_key)
|
1384
|
+
|
1385
|
+
sorted_increment_keys.sort(key=get_sort_key)
|
1386
|
+
|
1387
|
+
for entry_key in sorted_increment_keys:
|
1388
|
+
try:
|
1389
|
+
with open(source_artifact.manifest.entries[entry_key].download()) as f:
|
1390
|
+
table_data = json.load(f)
|
1391
|
+
data.extend(table_data["data"])
|
1392
|
+
except (json.JSONDecodeError, KeyError) as e:
|
1393
|
+
raise wandb.Error(f"Invalid table file {entry_key}") from e
|
1394
|
+
return data
|
1395
|
+
|
1396
|
+
|
1397
|
+
def _process_table_row(
|
1398
|
+
row: List[Any],
|
1399
|
+
timestamp_column_indices: Set[_dtypes.TimestampType],
|
1400
|
+
np_deserialized_columns: Dict[int, Any],
|
1401
|
+
source_artifact: "artifact.Artifact",
|
1402
|
+
row_idx: int,
|
1403
|
+
) -> List[Any]:
|
1404
|
+
"""Convert special columns in a table row to Python types.
|
1405
|
+
|
1406
|
+
Processes a single row of table data by converting timestamp values to
|
1407
|
+
datetime objects, replacing np typed cells with numpy array data,
|
1408
|
+
and initializing media objects from their json value.
|
1409
|
+
|
1410
|
+
|
1411
|
+
Args:
|
1412
|
+
row: The row data to process.
|
1413
|
+
timestamp_column_indices: Set of column indices containing timestamps.
|
1414
|
+
np_deserialized_columns: Dictionary mapping column indices to numpy arrays.
|
1415
|
+
source_artifact: The source artifact containing the table data.
|
1416
|
+
row_idx: The index of the current row.
|
1417
|
+
|
1418
|
+
Returns:
|
1419
|
+
Processed row data.
|
1420
|
+
"""
|
1421
|
+
row_data = []
|
1422
|
+
for c_ndx, item in enumerate(row):
|
1423
|
+
cell: Any
|
1424
|
+
if c_ndx in timestamp_column_indices and isinstance(item, (int, float)):
|
1425
|
+
cell = datetime.datetime.fromtimestamp(
|
1426
|
+
item / 1000, tz=datetime.timezone.utc
|
1427
|
+
)
|
1428
|
+
elif c_ndx in np_deserialized_columns:
|
1429
|
+
cell = np_deserialized_columns[c_ndx][row_idx]
|
1430
|
+
elif (
|
1431
|
+
isinstance(item, dict)
|
1432
|
+
and "_type" in item
|
1433
|
+
and (obj := WBValue.init_from_json(item, source_artifact))
|
1434
|
+
):
|
1435
|
+
cell = obj
|
1436
|
+
else:
|
1437
|
+
cell = item
|
1438
|
+
row_data.append(cell)
|
1439
|
+
return row_data
|