wandb 0.19.12rc1__py3-none-win32.whl → 0.20.1__py3-none-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. wandb/__init__.py +1 -2
  2. wandb/__init__.pyi +3 -6
  3. wandb/_iterutils.py +26 -7
  4. wandb/_pydantic/__init__.py +2 -1
  5. wandb/_pydantic/utils.py +7 -0
  6. wandb/agents/pyagent.py +9 -15
  7. wandb/analytics/sentry.py +1 -2
  8. wandb/apis/attrs.py +3 -4
  9. wandb/apis/importers/internals/util.py +1 -1
  10. wandb/apis/importers/validation.py +2 -2
  11. wandb/apis/importers/wandb.py +30 -25
  12. wandb/apis/normalize.py +2 -2
  13. wandb/apis/public/__init__.py +1 -0
  14. wandb/apis/public/api.py +37 -33
  15. wandb/apis/public/artifacts.py +103 -72
  16. wandb/apis/public/jobs.py +3 -2
  17. wandb/apis/public/registries/registries_search.py +4 -2
  18. wandb/apis/public/registries/registry.py +1 -1
  19. wandb/apis/public/registries/utils.py +9 -9
  20. wandb/apis/public/runs.py +18 -6
  21. wandb/automations/_filters/expressions.py +1 -1
  22. wandb/automations/_filters/operators.py +1 -1
  23. wandb/automations/_filters/run_metrics.py +1 -1
  24. wandb/beta/workflows.py +6 -5
  25. wandb/bin/gpu_stats.exe +0 -0
  26. wandb/bin/wandb-core +0 -0
  27. wandb/cli/cli.py +54 -73
  28. wandb/docker/__init__.py +21 -74
  29. wandb/docker/names.py +40 -0
  30. wandb/env.py +0 -1
  31. wandb/errors/util.py +1 -1
  32. wandb/filesync/step_checksum.py +1 -1
  33. wandb/filesync/step_upload.py +1 -1
  34. wandb/integration/diffusers/resolvers/multimodal.py +1 -2
  35. wandb/integration/gym/__init__.py +5 -6
  36. wandb/integration/keras/callbacks/model_checkpoint.py +2 -2
  37. wandb/integration/keras/keras.py +13 -19
  38. wandb/integration/kfp/kfp_patch.py +2 -3
  39. wandb/integration/langchain/wandb_tracer.py +1 -1
  40. wandb/integration/metaflow/metaflow.py +13 -13
  41. wandb/integration/openai/fine_tuning.py +3 -2
  42. wandb/integration/sagemaker/auth.py +2 -1
  43. wandb/integration/sklearn/utils.py +2 -1
  44. wandb/integration/tensorboard/__init__.py +1 -1
  45. wandb/integration/tensorboard/log.py +2 -5
  46. wandb/integration/tensorflow/__init__.py +2 -2
  47. wandb/jupyter.py +20 -17
  48. wandb/plot/confusion_matrix.py +1 -1
  49. wandb/plot/utils.py +8 -7
  50. wandb/proto/v3/wandb_internal_pb2.py +355 -335
  51. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  52. wandb/proto/v3/wandb_telemetry_pb2.py +12 -12
  53. wandb/proto/v4/wandb_internal_pb2.py +339 -335
  54. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  55. wandb/proto/v4/wandb_telemetry_pb2.py +12 -12
  56. wandb/proto/v5/wandb_internal_pb2.py +339 -335
  57. wandb/proto/v5/wandb_settings_pb2.py +2 -2
  58. wandb/proto/v5/wandb_telemetry_pb2.py +12 -12
  59. wandb/proto/v6/wandb_internal_pb2.py +339 -335
  60. wandb/proto/v6/wandb_settings_pb2.py +2 -2
  61. wandb/proto/v6/wandb_telemetry_pb2.py +12 -12
  62. wandb/proto/wandb_deprecated.py +6 -8
  63. wandb/sdk/artifacts/_internal_artifact.py +43 -0
  64. wandb/sdk/artifacts/_validators.py +55 -35
  65. wandb/sdk/artifacts/artifact.py +117 -115
  66. wandb/sdk/artifacts/artifact_download_logger.py +2 -0
  67. wandb/sdk/artifacts/artifact_saver.py +1 -3
  68. wandb/sdk/artifacts/artifact_state.py +2 -0
  69. wandb/sdk/artifacts/artifact_ttl.py +2 -0
  70. wandb/sdk/artifacts/exceptions.py +14 -0
  71. wandb/sdk/artifacts/staging.py +2 -0
  72. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +2 -6
  73. wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -1
  74. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +2 -6
  75. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -5
  76. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -1
  77. wandb/sdk/artifacts/storage_layout.py +2 -0
  78. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -3
  79. wandb/sdk/backend/backend.py +11 -182
  80. wandb/sdk/data_types/_dtypes.py +2 -6
  81. wandb/sdk/data_types/audio.py +20 -3
  82. wandb/sdk/data_types/base_types/media.py +12 -7
  83. wandb/sdk/data_types/base_types/wb_value.py +8 -18
  84. wandb/sdk/data_types/bokeh.py +19 -2
  85. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +17 -1
  86. wandb/sdk/data_types/helper_types/image_mask.py +7 -1
  87. wandb/sdk/data_types/html.py +4 -4
  88. wandb/sdk/data_types/image.py +178 -103
  89. wandb/sdk/data_types/molecule.py +6 -6
  90. wandb/sdk/data_types/object_3d.py +10 -5
  91. wandb/sdk/data_types/saved_model.py +11 -6
  92. wandb/sdk/data_types/table.py +313 -83
  93. wandb/sdk/data_types/table_decorators.py +108 -0
  94. wandb/sdk/data_types/utils.py +43 -7
  95. wandb/sdk/data_types/video.py +21 -3
  96. wandb/sdk/interface/interface.py +10 -0
  97. wandb/sdk/internal/datastore.py +2 -6
  98. wandb/sdk/internal/file_pusher.py +1 -5
  99. wandb/sdk/internal/file_stream.py +8 -17
  100. wandb/sdk/internal/handler.py +2 -2
  101. wandb/sdk/internal/incremental_table_util.py +53 -0
  102. wandb/sdk/internal/internal.py +3 -5
  103. wandb/sdk/internal/internal_api.py +66 -89
  104. wandb/sdk/internal/job_builder.py +2 -7
  105. wandb/sdk/internal/profiler.py +2 -2
  106. wandb/sdk/internal/progress.py +1 -3
  107. wandb/sdk/internal/run.py +1 -6
  108. wandb/sdk/internal/sender.py +24 -36
  109. wandb/sdk/internal/system/assets/aggregators.py +1 -7
  110. wandb/sdk/internal/system/assets/disk.py +3 -3
  111. wandb/sdk/internal/system/assets/gpu.py +4 -4
  112. wandb/sdk/internal/system/assets/gpu_amd.py +4 -4
  113. wandb/sdk/internal/system/assets/interfaces.py +6 -6
  114. wandb/sdk/internal/system/assets/tpu.py +1 -1
  115. wandb/sdk/internal/system/assets/trainium.py +6 -6
  116. wandb/sdk/internal/system/system_info.py +5 -7
  117. wandb/sdk/internal/system/system_monitor.py +4 -4
  118. wandb/sdk/internal/tb_watcher.py +5 -7
  119. wandb/sdk/launch/_launch.py +1 -1
  120. wandb/sdk/launch/_project_spec.py +19 -20
  121. wandb/sdk/launch/agent/agent.py +3 -3
  122. wandb/sdk/launch/agent/config.py +1 -1
  123. wandb/sdk/launch/agent/job_status_tracker.py +2 -2
  124. wandb/sdk/launch/builder/build.py +2 -3
  125. wandb/sdk/launch/builder/kaniko_builder.py +5 -4
  126. wandb/sdk/launch/environment/gcp_environment.py +1 -2
  127. wandb/sdk/launch/registry/azure_container_registry.py +2 -2
  128. wandb/sdk/launch/registry/elastic_container_registry.py +2 -2
  129. wandb/sdk/launch/registry/google_artifact_registry.py +3 -3
  130. wandb/sdk/launch/runner/abstract.py +5 -5
  131. wandb/sdk/launch/runner/kubernetes_monitor.py +2 -2
  132. wandb/sdk/launch/runner/kubernetes_runner.py +1 -1
  133. wandb/sdk/launch/runner/sagemaker_runner.py +2 -4
  134. wandb/sdk/launch/runner/vertex_runner.py +2 -7
  135. wandb/sdk/launch/sweeps/__init__.py +1 -1
  136. wandb/sdk/launch/sweeps/scheduler.py +2 -2
  137. wandb/sdk/launch/sweeps/utils.py +3 -3
  138. wandb/sdk/launch/utils.py +3 -4
  139. wandb/sdk/lib/apikey.py +5 -8
  140. wandb/sdk/lib/config_util.py +3 -3
  141. wandb/sdk/lib/fsm.py +3 -18
  142. wandb/sdk/lib/gitlib.py +6 -5
  143. wandb/sdk/lib/ipython.py +2 -2
  144. wandb/sdk/lib/json_util.py +9 -14
  145. wandb/sdk/lib/printer.py +3 -8
  146. wandb/sdk/lib/redirect.py +1 -1
  147. wandb/sdk/lib/retry.py +3 -7
  148. wandb/sdk/lib/run_moment.py +2 -2
  149. wandb/sdk/lib/service_connection.py +3 -1
  150. wandb/sdk/lib/service_token.py +1 -2
  151. wandb/sdk/mailbox/mailbox_handle.py +3 -7
  152. wandb/sdk/mailbox/response_handle.py +2 -6
  153. wandb/sdk/service/streams.py +3 -7
  154. wandb/sdk/verify/verify.py +5 -6
  155. wandb/sdk/wandb_config.py +1 -1
  156. wandb/sdk/wandb_init.py +38 -106
  157. wandb/sdk/wandb_login.py +7 -6
  158. wandb/sdk/wandb_run.py +52 -240
  159. wandb/sdk/wandb_settings.py +71 -60
  160. wandb/sdk/wandb_setup.py +40 -14
  161. wandb/sdk/wandb_watch.py +5 -7
  162. wandb/sync/__init__.py +1 -1
  163. wandb/sync/sync.py +13 -13
  164. wandb/util.py +17 -35
  165. wandb/wandb_agent.py +8 -11
  166. {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/METADATA +5 -5
  167. {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/RECORD +170 -168
  168. wandb/docker/auth.py +0 -435
  169. wandb/docker/www_authenticate.py +0 -94
  170. {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/WHEEL +0 -0
  171. {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/entry_points.txt +0 -0
  172. {wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/licenses/LICENSE +0 -0
@@ -2,8 +2,10 @@ import base64
2
2
  import binascii
3
3
  import codecs
4
4
  import datetime
5
+ import json
5
6
  import logging
6
7
  import os
8
+ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Set, Tuple
7
9
 
8
10
  import wandb
9
11
  from wandb import util
@@ -13,8 +15,18 @@ from . import _dtypes
13
15
  from ._private import MEDIA_TMP
14
16
  from .base_types.media import Media, _numpy_arrays_to_lists
15
17
  from .base_types.wb_value import WBValue
18
+ from .table_decorators import (
19
+ allow_incremental_logging_after_append,
20
+ allow_relogging_after_mutation,
21
+ ensure_not_incremental,
22
+ )
16
23
  from .utils import _json_helper
17
24
 
25
+ if TYPE_CHECKING:
26
+ from wandb.sdk.artifacts import artifact
27
+
28
+ from ...wandb_run import Run as LocalRun
29
+
18
30
 
19
31
  class _TableLinkMixin:
20
32
  def set_table(self, table):
@@ -180,6 +192,9 @@ class _ForeignIndexType(_dtypes.Type):
180
192
  return cls(table)
181
193
 
182
194
 
195
+ _SUPPORTED_LOGGING_MODES = ["IMMUTABLE", "MUTABLE", "INCREMENTAL"]
196
+
197
+
183
198
  class Table(Media):
184
199
  """The Table class used to display and analyze tabular data.
185
200
 
@@ -190,21 +205,6 @@ class Table(Media):
190
205
 
191
206
  This class is the primary class used to generate the Table Visualizer
192
207
  in the UI: https://docs.wandb.ai/guides/data-vis/tables.
193
-
194
- Args:
195
- columns: (List[str]) Names of the columns in the table.
196
- Defaults to ["Input", "Output", "Expected"].
197
- data: (List[List[any]]) 2D row-oriented array of values.
198
- dataframe: (pandas.DataFrame) DataFrame object used to create the table.
199
- When set, `data` and `columns` arguments are ignored.
200
- optional: (Union[bool,List[bool]]) Determines if `None` values are allowed. Default to True
201
- - If a singular bool value, then the optionality is enforced for all
202
- columns specified at construction time
203
- - If a list of bool values, then the optionality is applied to each
204
- column - should be the same length as `columns`
205
- applies to all columns. A list of bool values applies to each respective column.
206
- allow_mixed_types: (bool) Determines if columns are allowed to have mixed types
207
- (disables type validation). Defaults to False
208
208
  """
209
209
 
210
210
  MAX_ROWS = 10000
@@ -221,15 +221,48 @@ class Table(Media):
221
221
  dtype=None,
222
222
  optional=True,
223
223
  allow_mixed_types=False,
224
+ log_mode: Optional[
225
+ Literal["IMMUTABLE", "MUTABLE", "INCREMENTAL"]
226
+ ] = "IMMUTABLE",
224
227
  ):
225
228
  """Initializes a Table object.
226
229
 
227
230
  The rows is available for legacy reasons and should not be used.
228
231
  The Table class uses data to mimic the Pandas API.
232
+
233
+ Args:
234
+ columns: (List[str]) Names of the columns in the table.
235
+ Defaults to ["Input", "Output", "Expected"].
236
+ data: (List[List[any]]) 2D row-oriented array of values.
237
+ dataframe: (pandas.DataFrame) DataFrame object used to create the table.
238
+ When set, `data` and `columns` arguments are ignored.
239
+ optional: (Union[bool,List[bool]]) Determines if `None` values are allowed. Default to True
240
+ - If a singular bool value, then the optionality is enforced for all
241
+ columns specified at construction time
242
+ - If a list of bool values, then the optionality is applied to each
243
+ column - should be the same length as `columns`
244
+ applies to all columns. A list of bool values applies to each respective column.
245
+ allow_mixed_types: (bool) Determines if columns are allowed to have mixed types
246
+ (disables type validation). Defaults to False
247
+ log_mode: Optional[str] Controls how the Table is logged when mutations occur.
248
+ Options:
249
+ - "IMMUTABLE" (default): Table can only be logged once; subsequent
250
+ logging attempts after the table has been mutated will be no-ops.
251
+ - "MUTABLE": Table can be re-logged after mutations, creating
252
+ a new artifact version each time it's logged.
253
+ - "INCREMENTAL": Table data is logged incrementally, with each log creating
254
+ a new artifact entry containing the new data since the last log.
229
255
  """
230
256
  super().__init__()
257
+ self._validate_log_mode(log_mode)
258
+ self.log_mode = log_mode
259
+ if self.log_mode == "INCREMENTAL":
260
+ self._increment_num: int | None = None
261
+ self._last_logged_idx: int | None = None
262
+ self._previous_increments_paths: list[str] | None = None
263
+ self._run_target_for_increments: LocalRun | None = None
231
264
  self._pk_col = None
232
- self._fk_cols = set()
265
+ self._fk_cols: set[str] = set()
233
266
  if allow_mixed_types:
234
267
  dtype = _dtypes.AnyType
235
268
 
@@ -258,6 +291,11 @@ class Table(Media):
258
291
  else:
259
292
  self._init_from_list([], columns, optional, dtype)
260
293
 
294
+ def _validate_log_mode(self, log_mode):
295
+ assert (
296
+ log_mode in _SUPPORTED_LOGGING_MODES
297
+ ), f"Invalid log_mode: {log_mode}. Must be one of {_SUPPORTED_LOGGING_MODES}"
298
+
261
299
  @staticmethod
262
300
  def _assert_valid_columns(columns):
263
301
  valid_col_types = [str, int]
@@ -312,6 +350,63 @@ class Table(Media):
312
350
  for col_name, opt, dt in zip(self.columns, optional, dtype):
313
351
  self.cast(col_name, dt, opt)
314
352
 
353
+ def _load_incremental_table_state_from_resumed_run(self, run: "LocalRun", key: str):
354
+ """Handle updating incremental table state for resumed runs.
355
+
356
+ This method is called when a run is resumed and there are previous
357
+ increments of this table that need to be preserved. It updates the
358
+ table's internal state to track previous increments and the current
359
+ increment number.
360
+ """
361
+ if (
362
+ self._previous_increments_paths is not None
363
+ or self._increment_num is not None
364
+ ):
365
+ raise AssertionError(
366
+ "The table has been initialized for a resumed run already"
367
+ )
368
+
369
+ self._set_incremental_table_run_target(run)
370
+
371
+ summary_from_key = run.summary.get(key)
372
+
373
+ if (
374
+ summary_from_key is None
375
+ or not isinstance(summary_from_key, dict)
376
+ or summary_from_key.get("_type") != "incremental-table-file"
377
+ ):
378
+ # The key was never logged to the run or its last logged
379
+ # value was not an incrementally logged table.
380
+ return
381
+
382
+ previous_increments_paths = summary_from_key.get(
383
+ "previous_increments_paths", []
384
+ )
385
+
386
+ # add the artifact path of the last logged increment
387
+ last_artifact_path = summary_from_key.get("artifact_path")
388
+
389
+ if last_artifact_path:
390
+ previous_increments_paths.append(last_artifact_path)
391
+
392
+ # add 1 because a new increment is being logged
393
+ last_increment_num = summary_from_key.get("increment_num", 0)
394
+
395
+ self._increment_num = last_increment_num + 1
396
+ self._previous_increments_paths = previous_increments_paths
397
+
398
+ def _set_incremental_table_run_target(self, run: "LocalRun") -> None:
399
+ """Associate a Run object with this incremental Table.
400
+
401
+ A Table object in incremental mode can only be logged to a single Run.
402
+ Raises an error if the table is already associated to a different run.
403
+ """
404
+ if self._run_target_for_increments is None:
405
+ self._run_target_for_increments = run
406
+ elif self._run_target_for_increments is not run:
407
+ raise AssertionError("An incremental Table can only be logged to one Run.")
408
+
409
+ @allow_relogging_after_mutation
315
410
  def cast(self, col_name, dtype, optional=False):
316
411
  """Casts a column to a specific data type.
317
412
 
@@ -336,11 +431,7 @@ class Table(Media):
336
431
  result_type = wbtype.assign(row[col_ndx])
337
432
  if isinstance(result_type, _dtypes.InvalidType):
338
433
  raise TypeError(
339
- "Existing data {}, of type {} cannot be cast to {}".format(
340
- row[col_ndx],
341
- _dtypes.TypeRegistry.type_of(row[col_ndx]),
342
- wbtype,
343
- )
434
+ f"Existing data {row[col_ndx]}, of type {_dtypes.TypeRegistry.type_of(row[col_ndx])} cannot be cast to {wbtype}"
344
435
  )
345
436
  wbtype = result_type
346
437
 
@@ -359,9 +450,7 @@ class Table(Media):
359
450
  if is_pk:
360
451
  assert (
361
452
  self._pk_col is None
362
- ), "Cannot have multiple primary keys - {} is already set as the primary key.".format(
363
- self._pk_col
364
- )
453
+ ), f"Cannot have multiple primary keys - {self._pk_col} is already set as the primary key."
365
454
 
366
455
  # Update the column type
367
456
  self._column_types.params["type_map"][col_name] = wbtype
@@ -375,23 +464,21 @@ class Table(Media):
375
464
 
376
465
  def _eq_debug(self, other, should_assert=False):
377
466
  eq = isinstance(other, Table)
378
- assert not should_assert or eq, "Found type {}, expected {}".format(
379
- other.__class__, Table
380
- )
467
+ assert (
468
+ not should_assert or eq
469
+ ), f"Found type {other.__class__}, expected {Table}"
381
470
  eq = eq and len(self.data) == len(other.data)
382
- assert not should_assert or eq, "Found {} rows, expected {}".format(
383
- len(other.data), len(self.data)
384
- )
471
+ assert (
472
+ not should_assert or eq
473
+ ), f"Found {len(other.data)} rows, expected {len(self.data)}"
385
474
  eq = eq and self.columns == other.columns
386
- assert not should_assert or eq, "Found columns {}, expected {}".format(
387
- other.columns, self.columns
388
- )
475
+ assert (
476
+ not should_assert or eq
477
+ ), f"Found columns {other.columns}, expected {self.columns}"
389
478
  eq = eq and self._column_types == other._column_types
390
479
  assert (
391
480
  not should_assert or eq
392
- ), "Found column type {}, expected column type {}".format(
393
- other._column_types, self._column_types
394
- )
481
+ ), f"Found column type {other._column_types}, expected column type {self._column_types}"
395
482
  if eq:
396
483
  for row_ndx in range(len(self.data)):
397
484
  for col_ndx in range(len(self.data[row_ndx])):
@@ -402,12 +489,7 @@ class Table(Media):
402
489
  eq = eq and _eq
403
490
  assert (
404
491
  not should_assert or eq
405
- ), "Unequal data at row_ndx {} col_ndx {}: found {}, expected {}".format(
406
- row_ndx,
407
- col_ndx,
408
- other.data[row_ndx][col_ndx],
409
- self.data[row_ndx][col_ndx],
410
- )
492
+ ), f"Unequal data at row_ndx {row_ndx} col_ndx {col_ndx}: found {other.data[row_ndx][col_ndx]}, expected {self.data[row_ndx][col_ndx]}"
411
493
  if not eq:
412
494
  return eq
413
495
  return eq
@@ -415,11 +497,14 @@ class Table(Media):
415
497
  def __eq__(self, other):
416
498
  return self._eq_debug(other)
417
499
 
500
+ @allow_relogging_after_mutation
418
501
  def add_row(self, *row):
419
502
  """Deprecated; use add_data instead."""
420
503
  logging.warning("add_row is deprecated, use add_data")
421
504
  self.add_data(*row)
422
505
 
506
+ @allow_relogging_after_mutation
507
+ @allow_incremental_logging_after_append
423
508
  def add_data(self, *data):
424
509
  """Adds a new row of data to the table. The maximum amount of rows in a table is determined by `wandb.Table.MAX_ARTIFACT_ROWS`.
425
510
 
@@ -427,9 +512,7 @@ class Table(Media):
427
512
  """
428
513
  if len(data) != len(self.columns):
429
514
  raise ValueError(
430
- "This table expects {} columns: {}, found {}".format(
431
- len(self.columns), self.columns, len(data)
432
- )
515
+ f"This table expects {len(self.columns)} columns: {self.columns}, found {len(data)}"
433
516
  )
434
517
 
435
518
  # Special case to pre-emptively cast a column as a key.
@@ -468,9 +551,7 @@ class Table(Media):
468
551
  result_type = current_type.assign(incoming_row_dict)
469
552
  if isinstance(result_type, _dtypes.InvalidType):
470
553
  raise TypeError(
471
- "Data row contained incompatible types:\n{}".format(
472
- current_type.explain(incoming_row_dict)
473
- )
554
+ f"Data row contained incompatible types:\n{current_type.explain(incoming_row_dict)}"
474
555
  )
475
556
  return result_type
476
557
 
@@ -496,7 +577,16 @@ class Table(Media):
496
577
  f"this may cause slower queries in the W&B UI."
497
578
  )
498
579
  logging.warning(f"Truncating wandb.Table object to {max_rows} rows.")
499
- return {"columns": self.columns, "data": self.data[:max_rows]}
580
+
581
+ if self.log_mode == "INCREMENTAL" and self._last_logged_idx is not None:
582
+ return {
583
+ "columns": self.columns,
584
+ "data": self.data[
585
+ self._last_logged_idx + 1 : self._last_logged_idx + 1 + max_rows
586
+ ],
587
+ }
588
+ else:
589
+ return {"columns": self.columns, "data": self.data[:max_rows]}
500
590
 
501
591
  def bind_to_run(self, *args, **kwargs):
502
592
  # We set `warn=False` since Tables will now always be logged to both
@@ -516,11 +606,12 @@ class Table(Media):
516
606
  return os.path.join("media", "table")
517
607
 
518
608
  @classmethod
519
- def from_json(cls, json_obj, source_artifact):
609
+ def from_json(cls, json_obj, source_artifact: "artifact.Artifact"):
520
610
  data = []
521
611
  column_types = None
522
612
  np_deserialized_columns = {}
523
613
  timestamp_column_indices = set()
614
+ log_mode = json_obj.get("log_mode", "IMMUTABLE")
524
615
  if json_obj.get("column_types") is not None:
525
616
  column_types = _dtypes.TypeRegistry.type_from_dict(
526
617
  json_obj["column_types"], source_artifact
@@ -547,6 +638,10 @@ class Table(Media):
547
638
  and ndarray_type._get_serialization_path() is not None
548
639
  ):
549
640
  serialization_path = ndarray_type._get_serialization_path()
641
+
642
+ if serialization_path is None:
643
+ continue
644
+
550
645
  np = util.get_module(
551
646
  "numpy",
552
647
  required="Deserializing NumPy columns requires NumPy to be installed.",
@@ -559,22 +654,23 @@ class Table(Media):
559
654
  )
560
655
  ndarray_type._clear_serialization_path()
561
656
 
562
- for r_ndx, row in enumerate(json_obj["data"]):
563
- row_data = []
564
- for c_ndx, item in enumerate(row):
565
- cell = item
566
- if c_ndx in timestamp_column_indices and isinstance(item, (int, float)):
567
- cell = datetime.datetime.fromtimestamp(
568
- item / 1000, tz=datetime.timezone.utc
569
- )
570
- elif c_ndx in np_deserialized_columns:
571
- cell = np_deserialized_columns[c_ndx][r_ndx]
572
- elif isinstance(item, dict) and "_type" in item:
573
- obj = WBValue.init_from_json(item, source_artifact)
574
- if obj is not None:
575
- cell = obj
576
- row_data.append(cell)
577
- data.append(row_data)
657
+ if log_mode == "INCREMENTAL":
658
+ unprocessed_table_data = _get_data_from_increments(
659
+ json_obj, source_artifact
660
+ )
661
+ else:
662
+ unprocessed_table_data = json_obj["data"]
663
+
664
+ for r_ndx, row in enumerate(unprocessed_table_data):
665
+ data.append(
666
+ _process_table_row(
667
+ row,
668
+ timestamp_column_indices,
669
+ np_deserialized_columns,
670
+ source_artifact,
671
+ r_ndx,
672
+ )
673
+ )
578
674
 
579
675
  # construct Table with dtypes for each column if type information exists
580
676
  dtypes = None
@@ -583,7 +679,9 @@ class Table(Media):
583
679
  column_types.params["type_map"][str(col)] for col in json_obj["columns"]
584
680
  ]
585
681
 
586
- new_obj = cls(columns=json_obj["columns"], data=data, dtype=dtypes)
682
+ new_obj = cls(
683
+ columns=json_obj["columns"], data=data, dtype=dtypes, log_mode=log_mode
684
+ )
587
685
 
588
686
  if column_types is not None:
589
687
  new_obj._column_types = column_types
@@ -594,12 +692,31 @@ class Table(Media):
594
692
  def to_json(self, run_or_artifact):
595
693
  json_dict = super().to_json(run_or_artifact)
596
694
 
695
+ if self.log_mode == "INCREMENTAL":
696
+ if self._previous_increments_paths is None:
697
+ self._previous_increments_paths = []
698
+ if self._increment_num is None:
699
+ self._increment_num = 0
700
+
701
+ json_dict.update(
702
+ {
703
+ "increment_num": self._increment_num,
704
+ "previous_increments_paths": self._previous_increments_paths,
705
+ }
706
+ )
707
+
597
708
  if isinstance(run_or_artifact, wandb.wandb_sdk.wandb_run.Run):
709
+ if self.log_mode == "INCREMENTAL":
710
+ wbvalue_type = "incremental-table-file"
711
+ else:
712
+ wbvalue_type = "table-file"
713
+
598
714
  json_dict.update(
599
715
  {
600
- "_type": "table-file",
716
+ "_type": wbvalue_type,
601
717
  "ncols": len(self.columns),
602
718
  "nrows": len(self.data),
719
+ "log_mode": self.log_mode,
603
720
  }
604
721
  )
605
722
 
@@ -669,10 +786,11 @@ class Table(Media):
669
786
  "ncols": len(self.columns),
670
787
  "nrows": len(mapped_data),
671
788
  "column_types": self._column_types.to_json(artifact),
789
+ "log_mode": self.log_mode,
672
790
  }
673
791
  )
674
792
  else:
675
- raise ValueError("to_json accepts wandb_run.Run or wandb_artifact.Artifact")
793
+ raise TypeError("to_json accepts wandb_run.Run or wandb_artifact.Artifact")
676
794
 
677
795
  return json_dict
678
796
 
@@ -692,11 +810,13 @@ class Table(Media):
692
810
  index.set_table(self)
693
811
  yield index, self.data[ndx]
694
812
 
813
+ @allow_relogging_after_mutation
695
814
  def set_pk(self, col_name):
696
815
  # TODO: Docs
697
816
  assert col_name in self.columns
698
817
  self.cast(col_name, _PrimaryKeyType())
699
818
 
819
+ @allow_relogging_after_mutation
700
820
  def set_fk(self, col_name, table, table_col):
701
821
  # TODO: Docs
702
822
  assert col_name in self.columns
@@ -737,9 +857,7 @@ class Table(Media):
737
857
  # If there is a removed FK
738
858
  if len(self._fk_cols - _fk_cols) > 0:
739
859
  raise AssertionError(
740
- "Cannot unset foreign key. Attempted to unset ({})".format(
741
- self._fk_cols - _fk_cols
742
- )
860
+ f"Cannot unset foreign key. Attempted to unset ({self._fk_cols - _fk_cols})"
743
861
  )
744
862
 
745
863
  self._pk_col = _pk_col
@@ -799,6 +917,8 @@ class Table(Media):
799
917
  for row_ndx in range(len(self.data)):
800
918
  update_row(row_ndx)
801
919
 
920
+ @ensure_not_incremental
921
+ @allow_relogging_after_mutation
802
922
  def add_column(self, name, data, optional=False):
803
923
  """Adds a column of data to the table.
804
924
 
@@ -829,7 +949,7 @@ class Table(Media):
829
949
 
830
950
  try:
831
951
  self.cast(name, _dtypes.UnknownType(), optional=optional)
832
- except TypeError as err:
952
+ except TypeError:
833
953
  # Undo the changes
834
954
  if is_first_col:
835
955
  self.data = []
@@ -838,7 +958,7 @@ class Table(Media):
838
958
  for ndx in range(len(self.data)):
839
959
  self.data[ndx] = self.data[ndx][:-1]
840
960
  self.columns = self.columns[:-1]
841
- raise err
961
+ raise
842
962
 
843
963
  def get_column(self, name, convert_to=None):
844
964
  """Retrieves a column from the table and optionally converts it to a NumPy object.
@@ -889,6 +1009,8 @@ class Table(Media):
889
1009
  _index.set_table(self)
890
1010
  return _index
891
1011
 
1012
+ @ensure_not_incremental
1013
+ @allow_relogging_after_mutation
892
1014
  def add_computed_columns(self, fn):
893
1015
  """Adds one or more computed columns based on existing data.
894
1016
 
@@ -992,9 +1114,7 @@ class PartitionedTable(Media):
992
1114
  columns = part.columns
993
1115
  elif columns != part.columns:
994
1116
  raise ValueError(
995
- "Table parts have non-matching columns. {} != {}".format(
996
- columns, part.columns
997
- )
1117
+ f"Table parts have non-matching columns. {columns} != {part.columns}"
998
1118
  )
999
1119
  for _, row in part.iterrows():
1000
1120
  yield ndx, row
@@ -1137,13 +1257,13 @@ class JoinedTable(Media):
1137
1257
 
1138
1258
  def _eq_debug(self, other, should_assert=False):
1139
1259
  eq = isinstance(other, JoinedTable)
1140
- assert not should_assert or eq, "Found type {}, expected {}".format(
1141
- other.__class__, JoinedTable
1142
- )
1260
+ assert (
1261
+ not should_assert or eq
1262
+ ), f"Found type {other.__class__}, expected {JoinedTable}"
1143
1263
  eq = eq and self._join_key == other._join_key
1144
- assert not should_assert or eq, "Found {} join key, expected {}".format(
1145
- other._join_key, self._join_key
1146
- )
1264
+ assert (
1265
+ not should_assert or eq
1266
+ ), f"Found {other._join_key} join key, expected {self._join_key}"
1147
1267
  eq = eq and self._table1._eq_debug(other._table1, should_assert)
1148
1268
  eq = eq and self._table2._eq_debug(other._table2, should_assert)
1149
1269
  return eq
@@ -1207,3 +1327,113 @@ _dtypes.TypeRegistry.add(_PartitionedTableType)
1207
1327
  _dtypes.TypeRegistry.add(_ForeignKeyType)
1208
1328
  _dtypes.TypeRegistry.add(_PrimaryKeyType)
1209
1329
  _dtypes.TypeRegistry.add(_ForeignIndexType)
1330
+
1331
+
1332
+ def _get_data_from_increments(
1333
+ json_obj: Dict[str, Any], source_artifact: "artifact.Artifact"
1334
+ ) -> List[Any]:
1335
+ """Get data from incremental table artifacts.
1336
+
1337
+ Args:
1338
+ json_obj: The JSON object containing table metadata.
1339
+ source_artifact: The source artifact containing the table data.
1340
+
1341
+ Returns:
1342
+ List of table rows from all increments.
1343
+ """
1344
+ if "latest" not in source_artifact.aliases:
1345
+ wandb.termwarn(
1346
+ (
1347
+ "It is recommended to use the latest version of the "
1348
+ "incremental table artifact for ordering guarantees."
1349
+ ),
1350
+ repeat=False,
1351
+ )
1352
+ data: List[Any] = []
1353
+ increment_num = json_obj.get("increment_num", None)
1354
+ if increment_num is None:
1355
+ return data
1356
+
1357
+ # Sort by increment number first, then by timestamp if present
1358
+ # Format of name is: "{incr_num}-{timestamp_ms}.{key}.table.json"
1359
+ def get_sort_key(key: str) -> Tuple[int, int]:
1360
+ try:
1361
+ parts = key.split(".")
1362
+ increment_parts = parts[0].split("-")
1363
+ increment_num = int(increment_parts[0])
1364
+ # If there's a timestamp part, use it for secondary sorting
1365
+ timestamp = int(increment_parts[1]) if len(increment_parts) > 1 else 0
1366
+ except (ValueError, IndexError):
1367
+ wandb.termwarn(
1368
+ (
1369
+ f"Could not parse artifact entry for increment {key}."
1370
+ " The entry name does not follow the naming convention"
1371
+ " <increment_number>-<timestamp>.<key>.table.json"
1372
+ " The data in the table will be out of order."
1373
+ ),
1374
+ repeat=False,
1375
+ )
1376
+ return (0, 0)
1377
+
1378
+ return (increment_num, timestamp)
1379
+
1380
+ sorted_increment_keys = []
1381
+ for entry_key in source_artifact.manifest.entries:
1382
+ if entry_key.endswith(".table.json"):
1383
+ sorted_increment_keys.append(entry_key)
1384
+
1385
+ sorted_increment_keys.sort(key=get_sort_key)
1386
+
1387
+ for entry_key in sorted_increment_keys:
1388
+ try:
1389
+ with open(source_artifact.manifest.entries[entry_key].download()) as f:
1390
+ table_data = json.load(f)
1391
+ data.extend(table_data["data"])
1392
+ except (json.JSONDecodeError, KeyError) as e:
1393
+ raise wandb.Error(f"Invalid table file {entry_key}") from e
1394
+ return data
1395
+
1396
+
1397
+ def _process_table_row(
1398
+ row: List[Any],
1399
+ timestamp_column_indices: Set[_dtypes.TimestampType],
1400
+ np_deserialized_columns: Dict[int, Any],
1401
+ source_artifact: "artifact.Artifact",
1402
+ row_idx: int,
1403
+ ) -> List[Any]:
1404
+ """Convert special columns in a table row to Python types.
1405
+
1406
+ Processes a single row of table data by converting timestamp values to
1407
+ datetime objects, replacing np typed cells with numpy array data,
1408
+ and initializing media objects from their json value.
1409
+
1410
+
1411
+ Args:
1412
+ row: The row data to process.
1413
+ timestamp_column_indices: Set of column indices containing timestamps.
1414
+ np_deserialized_columns: Dictionary mapping column indices to numpy arrays.
1415
+ source_artifact: The source artifact containing the table data.
1416
+ row_idx: The index of the current row.
1417
+
1418
+ Returns:
1419
+ Processed row data.
1420
+ """
1421
+ row_data = []
1422
+ for c_ndx, item in enumerate(row):
1423
+ cell: Any
1424
+ if c_ndx in timestamp_column_indices and isinstance(item, (int, float)):
1425
+ cell = datetime.datetime.fromtimestamp(
1426
+ item / 1000, tz=datetime.timezone.utc
1427
+ )
1428
+ elif c_ndx in np_deserialized_columns:
1429
+ cell = np_deserialized_columns[c_ndx][row_idx]
1430
+ elif (
1431
+ isinstance(item, dict)
1432
+ and "_type" in item
1433
+ and (obj := WBValue.init_from_json(item, source_artifact))
1434
+ ):
1435
+ cell = obj
1436
+ else:
1437
+ cell = item
1438
+ row_data.append(cell)
1439
+ return row_data