PyPI - wandb - Versions diffs - 0.19.12rc1__py3-none-win32.whl → 0.20.1__py3-none-win32.whl - Mend

wandb 0.19.12rc1py3-none-win32.whl → 0.20.1py3-none-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

wandb/__init__.py +1 -2
wandb/__init__.pyi +3 -6
wandb/_iterutils.py +26 -7
wandb/_pydantic/__init__.py +2 -1
wandb/_pydantic/utils.py +7 -0
wandb/agents/pyagent.py +9 -15
wandb/analytics/sentry.py +1 -2
wandb/apis/attrs.py +3 -4
wandb/apis/importers/internals/util.py +1 -1
wandb/apis/importers/validation.py +2 -2
wandb/apis/importers/wandb.py +30 -25
wandb/apis/normalize.py +2 -2
wandb/apis/public/__init__.py +1 -0
wandb/apis/public/api.py +37 -33
wandb/apis/public/artifacts.py +103 -72
wandb/apis/public/jobs.py +3 -2
wandb/apis/public/registries/registries_search.py +4 -2
wandb/apis/public/registries/registry.py +1 -1
wandb/apis/public/registries/utils.py +9 -9
wandb/apis/public/runs.py +18 -6
wandb/automations/_filters/expressions.py +1 -1
wandb/automations/_filters/operators.py +1 -1
wandb/automations/_filters/run_metrics.py +1 -1
wandb/beta/workflows.py +6 -5
wandb/bin/gpu_stats.exe +0 -0
wandb/bin/wandb-core +0 -0
wandb/cli/cli.py +54 -73
wandb/docker/__init__.py +21 -74
wandb/docker/names.py +40 -0
wandb/env.py +0 -1
wandb/errors/util.py +1 -1
wandb/filesync/step_checksum.py +1 -1
wandb/filesync/step_upload.py +1 -1
wandb/integration/diffusers/resolvers/multimodal.py +1 -2
wandb/integration/gym/__init__.py +5 -6
wandb/integration/keras/callbacks/model_checkpoint.py +2 -2
wandb/integration/keras/keras.py +13 -19
wandb/integration/kfp/kfp_patch.py +2 -3
wandb/integration/langchain/wandb_tracer.py +1 -1
wandb/integration/metaflow/metaflow.py +13 -13
wandb/integration/openai/fine_tuning.py +3 -2
wandb/integration/sagemaker/auth.py +2 -1
wandb/integration/sklearn/utils.py +2 -1
wandb/integration/tensorboard/__init__.py +1 -1
wandb/integration/tensorboard/log.py +2 -5
wandb/integration/tensorflow/__init__.py +2 -2
wandb/jupyter.py +20 -17
wandb/plot/confusion_matrix.py +1 -1
wandb/plot/utils.py +8 -7
wandb/proto/v3/wandb_internal_pb2.py +355 -335
wandb/proto/v3/wandb_settings_pb2.py +2 -2
wandb/proto/v3/wandb_telemetry_pb2.py +12 -12
wandb/proto/v4/wandb_internal_pb2.py +339 -335
wandb/proto/v4/wandb_settings_pb2.py +2 -2
wandb/proto/v4/wandb_telemetry_pb2.py +12 -12
wandb/proto/v5/wandb_internal_pb2.py +339 -335
wandb/proto/v5/wandb_settings_pb2.py +2 -2
wandb/proto/v5/wandb_telemetry_pb2.py +12 -12
wandb/proto/v6/wandb_internal_pb2.py +339 -335
wandb/proto/v6/wandb_settings_pb2.py +2 -2
wandb/proto/v6/wandb_telemetry_pb2.py +12 -12
wandb/proto/wandb_deprecated.py +6 -8
wandb/sdk/artifacts/_internal_artifact.py +43 -0
wandb/sdk/artifacts/_validators.py +55 -35
wandb/sdk/artifacts/artifact.py +117 -115
wandb/sdk/artifacts/artifact_download_logger.py +2 -0
wandb/sdk/artifacts/artifact_saver.py +1 -3
wandb/sdk/artifacts/artifact_state.py +2 -0
wandb/sdk/artifacts/artifact_ttl.py +2 -0
wandb/sdk/artifacts/exceptions.py +14 -0
wandb/sdk/artifacts/staging.py +2 -0
wandb/sdk/artifacts/storage_handlers/local_file_handler.py +2 -6
wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -1
wandb/sdk/artifacts/storage_handlers/tracking_handler.py +2 -6
wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -5
wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -1
wandb/sdk/artifacts/storage_layout.py +2 -0
wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -3
wandb/sdk/backend/backend.py +11 -182
wandb/sdk/data_types/_dtypes.py +2 -6
wandb/sdk/data_types/audio.py +20 -3
wandb/sdk/data_types/base_types/media.py +12 -7
wandb/sdk/data_types/base_types/wb_value.py +8 -18
wandb/sdk/data_types/bokeh.py +19 -2
wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +17 -1
wandb/sdk/data_types/helper_types/image_mask.py +7 -1
wandb/sdk/data_types/html.py +4 -4
wandb/sdk/data_types/image.py +178 -103
wandb/sdk/data_types/molecule.py +6 -6
wandb/sdk/data_types/object_3d.py +10 -5
wandb/sdk/data_types/saved_model.py +11 -6
wandb/sdk/data_types/table.py +313 -83
wandb/sdk/data_types/table_decorators.py +108 -0
wandb/sdk/data_types/utils.py +43 -7
wandb/sdk/data_types/video.py +21 -3
wandb/sdk/interface/interface.py +10 -0
wandb/sdk/internal/datastore.py +2 -6
wandb/sdk/internal/file_pusher.py +1 -5
wandb/sdk/internal/file_stream.py +8 -17
wandb/sdk/internal/handler.py +2 -2
wandb/sdk/internal/incremental_table_util.py +53 -0
wandb/sdk/internal/internal.py +3 -5
wandb/sdk/internal/internal_api.py +66 -89
wandb/sdk/internal/job_builder.py +2 -7
wandb/sdk/internal/profiler.py +2 -2
wandb/sdk/internal/progress.py +1 -3
wandb/sdk/internal/run.py +1 -6
wandb/sdk/internal/sender.py +24 -36
wandb/sdk/internal/system/assets/aggregators.py +1 -7
wandb/sdk/internal/system/assets/disk.py +3 -3
wandb/sdk/internal/system/assets/gpu.py +4 -4
wandb/sdk/internal/system/assets/gpu_amd.py +4 -4
wandb/sdk/internal/system/assets/interfaces.py +6 -6
wandb/sdk/internal/system/assets/tpu.py +1 -1
wandb/sdk/internal/system/assets/trainium.py +6 -6
wandb/sdk/internal/system/system_info.py +5 -7
wandb/sdk/internal/system/system_monitor.py +4 -4
wandb/sdk/internal/tb_watcher.py +5 -7
wandb/sdk/launch/_launch.py +1 -1
wandb/sdk/launch/_project_spec.py +19 -20
wandb/sdk/launch/agent/agent.py +3 -3
wandb/sdk/launch/agent/config.py +1 -1
wandb/sdk/launch/agent/job_status_tracker.py +2 -2
wandb/sdk/launch/builder/build.py +2 -3
wandb/sdk/launch/builder/kaniko_builder.py +5 -4
wandb/sdk/launch/environment/gcp_environment.py +1 -2
wandb/sdk/launch/registry/azure_container_registry.py +2 -2
wandb/sdk/launch/registry/elastic_container_registry.py +2 -2
wandb/sdk/launch/registry/google_artifact_registry.py +3 -3
wandb/sdk/launch/runner/abstract.py +5 -5
wandb/sdk/launch/runner/kubernetes_monitor.py +2 -2
wandb/sdk/launch/runner/kubernetes_runner.py +1 -1
wandb/sdk/launch/runner/sagemaker_runner.py +2 -4
wandb/sdk/launch/runner/vertex_runner.py +2 -7
wandb/sdk/launch/sweeps/__init__.py +1 -1
wandb/sdk/launch/sweeps/scheduler.py +2 -2
wandb/sdk/launch/sweeps/utils.py +3 -3
wandb/sdk/launch/utils.py +3 -4
wandb/sdk/lib/apikey.py +5 -8
wandb/sdk/lib/config_util.py +3 -3
wandb/sdk/lib/fsm.py +3 -18
wandb/sdk/lib/gitlib.py +6 -5
wandb/sdk/lib/ipython.py +2 -2
wandb/sdk/lib/json_util.py +9 -14
wandb/sdk/lib/printer.py +3 -8
wandb/sdk/lib/redirect.py +1 -1
wandb/sdk/lib/retry.py +3 -7
wandb/sdk/lib/run_moment.py +2 -2
wandb/sdk/lib/service_connection.py +3 -1
wandb/sdk/lib/service_token.py +1 -2
wandb/sdk/mailbox/mailbox_handle.py +3 -7
wandb/sdk/mailbox/response_handle.py +2 -6
wandb/sdk/service/streams.py +3 -7
wandb/sdk/verify/verify.py +5 -6
wandb/sdk/wandb_config.py +1 -1
wandb/sdk/wandb_init.py +38 -106
wandb/sdk/wandb_login.py +7 -6
wandb/sdk/wandb_run.py +52 -240
wandb/sdk/wandb_settings.py +71 -60
wandb/sdk/wandb_setup.py +40 -14
wandb/sdk/wandb_watch.py +5 -7
wandb/sync/__init__.py +1 -1
wandb/sync/sync.py +13 -13
wandb/util.py +17 -35
wandb/wandb_agent.py +8 -11
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/METADATA +5 -5
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/RECORD +170 -168
wandb/docker/auth.py +0 -435
wandb/docker/www_authenticate.py +0 -94
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/WHEEL +0 -0
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/entry_points.txt +0 -0
{wandb-0.19.12rc1.dist-info → wandb-0.20.1.dist-info}/licenses/LICENSE +0 -0

wandb/sdk/data_types/table.py CHANGED Viewed

@@ -2,8 +2,10 @@ import base64
 import binascii
 import codecs
 import datetime
+import json
 import logging
 import os
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Set, Tuple
 import wandb
 from wandb import util
@@ -13,8 +15,18 @@ from . import _dtypes
 from ._private import MEDIA_TMP
 from .base_types.media import Media, _numpy_arrays_to_lists
 from .base_types.wb_value import WBValue
+from .table_decorators import (
+    allow_incremental_logging_after_append,
+    allow_relogging_after_mutation,
+    ensure_not_incremental,
+)
 from .utils import _json_helper
+if TYPE_CHECKING:
+    from wandb.sdk.artifacts import artifact
+    from ...wandb_run import Run as LocalRun
 class _TableLinkMixin:
     def set_table(self, table):
@@ -180,6 +192,9 @@ class _ForeignIndexType(_dtypes.Type):
         return cls(table)
+_SUPPORTED_LOGGING_MODES = ["IMMUTABLE", "MUTABLE", "INCREMENTAL"]
 class Table(Media):
     """The Table class used to display and analyze tabular data.
@@ -190,21 +205,6 @@ class Table(Media):
     This class is the primary class used to generate the Table Visualizer
     in the UI: https://docs.wandb.ai/guides/data-vis/tables.
-    Args:
-        columns: (List[str]) Names of the columns in the table.
-            Defaults to ["Input", "Output", "Expected"].
-        data: (List[List[any]]) 2D row-oriented array of values.
-        dataframe: (pandas.DataFrame) DataFrame object used to create the table.
-            When set, `data` and `columns` arguments are ignored.
-        optional: (Union[bool,List[bool]]) Determines if `None` values are allowed. Default to True
-            - If a singular bool value, then the optionality is enforced for all
-            columns specified at construction time
-            - If a list of bool values, then the optionality is applied to each
-            column - should be the same length as `columns`
-            applies to all columns. A list of bool values applies to each respective column.
-        allow_mixed_types: (bool) Determines if columns are allowed to have mixed types
-            (disables type validation). Defaults to False
     """
     MAX_ROWS = 10000
@@ -221,15 +221,48 @@ class Table(Media):
         dtype=None,
         optional=True,
         allow_mixed_types=False,
+        log_mode: Optional[
+            Literal["IMMUTABLE", "MUTABLE", "INCREMENTAL"]
+        ] = "IMMUTABLE",
     ):
         """Initializes a Table object.
         The rows is available for legacy reasons and should not be used.
         The Table class uses data to mimic the Pandas API.
+        Args:
+            columns: (List[str]) Names of the columns in the table.
+                Defaults to ["Input", "Output", "Expected"].
+            data: (List[List[any]]) 2D row-oriented array of values.
+            dataframe: (pandas.DataFrame) DataFrame object used to create the table.
+                When set, `data` and `columns` arguments are ignored.
+            optional: (Union[bool,List[bool]]) Determines if `None` values are allowed. Default to True
+                - If a singular bool value, then the optionality is enforced for all
+                columns specified at construction time
+                - If a list of bool values, then the optionality is applied to each
+                column - should be the same length as `columns`
+                applies to all columns. A list of bool values applies to each respective column.
+            allow_mixed_types: (bool) Determines if columns are allowed to have mixed types
+                (disables type validation). Defaults to False
+            log_mode: Optional[str] Controls how the Table is logged when mutations occur.
+                Options:
+                - "IMMUTABLE" (default): Table can only be logged once; subsequent
+                logging attempts after the table has been mutated will be no-ops.
+                - "MUTABLE": Table can be re-logged after mutations, creating
+                a new artifact version each time it's logged.
+                - "INCREMENTAL": Table data is logged incrementally, with each log creating
+                a new artifact entry containing the new data since the last log.
         """
         super().__init__()
+        self._validate_log_mode(log_mode)
+        self.log_mode = log_mode
+        if self.log_mode == "INCREMENTAL":
+            self._increment_num: int | None = None
+            self._last_logged_idx: int | None = None
+            self._previous_increments_paths: list[str] | None = None
+            self._run_target_for_increments: LocalRun | None = None
         self._pk_col = None
-        self._fk_cols = set()
+        self._fk_cols: set[str] = set()
         if allow_mixed_types:
             dtype = _dtypes.AnyType
@@ -258,6 +291,11 @@ class Table(Media):
             else:
                 self._init_from_list([], columns, optional, dtype)
+    def _validate_log_mode(self, log_mode):
+        assert (
+            log_mode in _SUPPORTED_LOGGING_MODES
+        ), f"Invalid log_mode: {log_mode}. Must be one of {_SUPPORTED_LOGGING_MODES}"
     @staticmethod
     def _assert_valid_columns(columns):
         valid_col_types = [str, int]
@@ -312,6 +350,63 @@ class Table(Media):
         for col_name, opt, dt in zip(self.columns, optional, dtype):
             self.cast(col_name, dt, opt)
+    def _load_incremental_table_state_from_resumed_run(self, run: "LocalRun", key: str):
+        """Handle updating incremental table state for resumed runs.
+        This method is called when a run is resumed and there are previous
+        increments of this table that need to be preserved. It updates the
+        table's internal state to track previous increments and the current
+        increment number.
+        """
+        if (
+            self._previous_increments_paths is not None
+            or self._increment_num is not None
+        ):
+            raise AssertionError(
+                "The table has been initialized for a resumed run already"
+            )
+        self._set_incremental_table_run_target(run)
+        summary_from_key = run.summary.get(key)
+        if (
+            summary_from_key is None
+            or not isinstance(summary_from_key, dict)
+            or summary_from_key.get("_type") != "incremental-table-file"
+        ):
+            # The key was never logged to the run or its last logged
+            # value was not an incrementally logged table.
+            return
+        previous_increments_paths = summary_from_key.get(
+            "previous_increments_paths", []
+        )
+        # add the artifact path of the last logged increment
+        last_artifact_path = summary_from_key.get("artifact_path")
+        if last_artifact_path:
+            previous_increments_paths.append(last_artifact_path)
+        # add 1 because a new increment is being logged
+        last_increment_num = summary_from_key.get("increment_num", 0)
+        self._increment_num = last_increment_num + 1
+        self._previous_increments_paths = previous_increments_paths
+    def _set_incremental_table_run_target(self, run: "LocalRun") -> None:
+        """Associate a Run object with this incremental Table.
+        A Table object in incremental mode can only be logged to a single Run.
+        Raises an error if the table is already associated to a different run.
+        """
+        if self._run_target_for_increments is None:
+            self._run_target_for_increments = run
+        elif self._run_target_for_increments is not run:
+            raise AssertionError("An incremental Table can only be logged to one Run.")
+    @allow_relogging_after_mutation
     def cast(self, col_name, dtype, optional=False):
         """Casts a column to a specific data type.
@@ -336,11 +431,7 @@ class Table(Media):
             result_type = wbtype.assign(row[col_ndx])
             if isinstance(result_type, _dtypes.InvalidType):
                 raise TypeError(
-                    "Existing data {}, of type {} cannot be cast to {}".format(
-                        row[col_ndx],
-                        _dtypes.TypeRegistry.type_of(row[col_ndx]),
-                        wbtype,
-                    )
+                    f"Existing data {row[col_ndx]}, of type {_dtypes.TypeRegistry.type_of(row[col_ndx])} cannot be cast to {wbtype}"
                 )
             wbtype = result_type
@@ -359,9 +450,7 @@ class Table(Media):
         if is_pk:
             assert (
                 self._pk_col is None
-            ), "Cannot have multiple primary keys - {} is already set as the primary key.".format(
-                self._pk_col
-            )
+            ), f"Cannot have multiple primary keys - {self._pk_col} is already set as the primary key."
         # Update the column type
         self._column_types.params["type_map"][col_name] = wbtype
@@ -375,23 +464,21 @@ class Table(Media):
     def _eq_debug(self, other, should_assert=False):
         eq = isinstance(other, Table)
-        assert not should_assert or eq, "Found type {}, expected {}".format(
-            other.__class__, Table
-        )
+        assert (
+            not should_assert or eq
+        ), f"Found type {other.__class__}, expected {Table}"
         eq = eq and len(self.data) == len(other.data)
-        assert not should_assert or eq, "Found {} rows, expected {}".format(
-            len(other.data), len(self.data)
-        )
+        assert (
+            not should_assert or eq
+        ), f"Found {len(other.data)} rows, expected {len(self.data)}"
         eq = eq and self.columns == other.columns
-        assert not should_assert or eq, "Found columns {}, expected {}".format(
-            other.columns, self.columns
-        )
+        assert (
+            not should_assert or eq
+        ), f"Found columns {other.columns}, expected {self.columns}"
         eq = eq and self._column_types == other._column_types
         assert (
             not should_assert or eq
-        ), "Found column type {}, expected column type {}".format(
-            other._column_types, self._column_types
-        )
+        ), f"Found column type {other._column_types}, expected column type {self._column_types}"
         if eq:
             for row_ndx in range(len(self.data)):
                 for col_ndx in range(len(self.data[row_ndx])):
@@ -402,12 +489,7 @@ class Table(Media):
                     eq = eq and _eq
                     assert (
                         not should_assert or eq
-                    ), "Unequal data at row_ndx {} col_ndx {}: found {}, expected {}".format(
-                        row_ndx,
-                        col_ndx,
-                        other.data[row_ndx][col_ndx],
-                        self.data[row_ndx][col_ndx],
-                    )
+                    ), f"Unequal data at row_ndx {row_ndx} col_ndx {col_ndx}: found {other.data[row_ndx][col_ndx]}, expected {self.data[row_ndx][col_ndx]}"
                     if not eq:
                         return eq
         return eq
@@ -415,11 +497,14 @@ class Table(Media):
     def __eq__(self, other):
         return self._eq_debug(other)
+    @allow_relogging_after_mutation
     def add_row(self, *row):
         """Deprecated; use add_data instead."""
         logging.warning("add_row is deprecated, use add_data")
         self.add_data(*row)
+    @allow_relogging_after_mutation
+    @allow_incremental_logging_after_append
     def add_data(self, *data):
         """Adds a new row of data to the table. The maximum amount of rows in a table is determined by `wandb.Table.MAX_ARTIFACT_ROWS`.
@@ -427,9 +512,7 @@ class Table(Media):
         """
         if len(data) != len(self.columns):
             raise ValueError(
-                "This table expects {} columns: {}, found {}".format(
-                    len(self.columns), self.columns, len(data)
-                )
+                f"This table expects {len(self.columns)} columns: {self.columns}, found {len(data)}"
             )
         # Special case to pre-emptively cast a column as a key.
@@ -468,9 +551,7 @@ class Table(Media):
         result_type = current_type.assign(incoming_row_dict)
         if isinstance(result_type, _dtypes.InvalidType):
             raise TypeError(
-                "Data row contained incompatible types:\n{}".format(
-                    current_type.explain(incoming_row_dict)
-                )
+                f"Data row contained incompatible types:\n{current_type.explain(incoming_row_dict)}"
             )
         return result_type
@@ -496,7 +577,16 @@ class Table(Media):
                     f"this may cause slower queries in the W&B UI."
                 )
             logging.warning(f"Truncating wandb.Table object to {max_rows} rows.")
-        return {"columns": self.columns, "data": self.data[:max_rows]}
+        if self.log_mode == "INCREMENTAL" and self._last_logged_idx is not None:
+            return {
+                "columns": self.columns,
+                "data": self.data[
+                    self._last_logged_idx + 1 : self._last_logged_idx + 1 + max_rows
+                ],
+            }
+        else:
+            return {"columns": self.columns, "data": self.data[:max_rows]}
     def bind_to_run(self, *args, **kwargs):
         # We set `warn=False` since Tables will now always be logged to both
@@ -516,11 +606,12 @@ class Table(Media):
         return os.path.join("media", "table")
     @classmethod
-    def from_json(cls, json_obj, source_artifact):
+    def from_json(cls, json_obj, source_artifact: "artifact.Artifact"):
         data = []
         column_types = None
         np_deserialized_columns = {}
         timestamp_column_indices = set()
+        log_mode = json_obj.get("log_mode", "IMMUTABLE")
         if json_obj.get("column_types") is not None:
             column_types = _dtypes.TypeRegistry.type_from_dict(
                 json_obj["column_types"], source_artifact
@@ -547,6 +638,10 @@ class Table(Media):
                     and ndarray_type._get_serialization_path() is not None
                 ):
                     serialization_path = ndarray_type._get_serialization_path()
+                    if serialization_path is None:
+                        continue
                     np = util.get_module(
                         "numpy",
                         required="Deserializing NumPy columns requires NumPy to be installed.",
@@ -559,22 +654,23 @@ class Table(Media):
                     )
                     ndarray_type._clear_serialization_path()
-        for r_ndx, row in enumerate(json_obj["data"]):
-            row_data = []
-            for c_ndx, item in enumerate(row):
-                cell = item
-                if c_ndx in timestamp_column_indices and isinstance(item, (int, float)):
-                    cell = datetime.datetime.fromtimestamp(
-                        item / 1000, tz=datetime.timezone.utc
-                    )
-                elif c_ndx in np_deserialized_columns:
-                    cell = np_deserialized_columns[c_ndx][r_ndx]
-                elif isinstance(item, dict) and "_type" in item:
-                    obj = WBValue.init_from_json(item, source_artifact)
-                    if obj is not None:
-                        cell = obj
-                row_data.append(cell)
-            data.append(row_data)
+        if log_mode == "INCREMENTAL":
+            unprocessed_table_data = _get_data_from_increments(
+                json_obj, source_artifact
+            )
+        else:
+            unprocessed_table_data = json_obj["data"]
+        for r_ndx, row in enumerate(unprocessed_table_data):
+            data.append(
+                _process_table_row(
+                    row,
+                    timestamp_column_indices,
+                    np_deserialized_columns,
+                    source_artifact,
+                    r_ndx,
+                )
+            )
         # construct Table with dtypes for each column if type information exists
         dtypes = None
@@ -583,7 +679,9 @@ class Table(Media):
                 column_types.params["type_map"][str(col)] for col in json_obj["columns"]
             ]
-        new_obj = cls(columns=json_obj["columns"], data=data, dtype=dtypes)
+        new_obj = cls(
+            columns=json_obj["columns"], data=data, dtype=dtypes, log_mode=log_mode
+        )
         if column_types is not None:
             new_obj._column_types = column_types
@@ -594,12 +692,31 @@ class Table(Media):
     def to_json(self, run_or_artifact):
         json_dict = super().to_json(run_or_artifact)
+        if self.log_mode == "INCREMENTAL":
+            if self._previous_increments_paths is None:
+                self._previous_increments_paths = []
+            if self._increment_num is None:
+                self._increment_num = 0
+            json_dict.update(
+                {
+                    "increment_num": self._increment_num,
+                    "previous_increments_paths": self._previous_increments_paths,
+                }
+            )
         if isinstance(run_or_artifact, wandb.wandb_sdk.wandb_run.Run):
+            if self.log_mode == "INCREMENTAL":
+                wbvalue_type = "incremental-table-file"
+            else:
+                wbvalue_type = "table-file"
             json_dict.update(
                 {
-                    "_type": "table-file",
+                    "_type": wbvalue_type,
                     "ncols": len(self.columns),
                     "nrows": len(self.data),
+                    "log_mode": self.log_mode,
                 }
             )
@@ -669,10 +786,11 @@ class Table(Media):
                     "ncols": len(self.columns),
                     "nrows": len(mapped_data),
                     "column_types": self._column_types.to_json(artifact),
+                    "log_mode": self.log_mode,
                 }
             )
         else:
-            raise ValueError("to_json accepts wandb_run.Run or wandb_artifact.Artifact")
+            raise TypeError("to_json accepts wandb_run.Run or wandb_artifact.Artifact")
         return json_dict
@@ -692,11 +810,13 @@ class Table(Media):
             index.set_table(self)
             yield index, self.data[ndx]
+    @allow_relogging_after_mutation
     def set_pk(self, col_name):
         # TODO: Docs
         assert col_name in self.columns
         self.cast(col_name, _PrimaryKeyType())
+    @allow_relogging_after_mutation
     def set_fk(self, col_name, table, table_col):
         # TODO: Docs
         assert col_name in self.columns
@@ -737,9 +857,7 @@ class Table(Media):
             # If there is a removed FK
             if len(self._fk_cols - _fk_cols) > 0:
                 raise AssertionError(
-                    "Cannot unset foreign key. Attempted to unset ({})".format(
-                        self._fk_cols - _fk_cols
-                    )
+                    f"Cannot unset foreign key. Attempted to unset ({self._fk_cols - _fk_cols})"
                 )
             self._pk_col = _pk_col
@@ -799,6 +917,8 @@ class Table(Media):
             for row_ndx in range(len(self.data)):
                 update_row(row_ndx)
+    @ensure_not_incremental
+    @allow_relogging_after_mutation
     def add_column(self, name, data, optional=False):
         """Adds a column of data to the table.
@@ -829,7 +949,7 @@ class Table(Media):
         try:
             self.cast(name, _dtypes.UnknownType(), optional=optional)
-        except TypeError as err:
+        except TypeError:
             # Undo the changes
             if is_first_col:
                 self.data = []
@@ -838,7 +958,7 @@ class Table(Media):
                 for ndx in range(len(self.data)):
                     self.data[ndx] = self.data[ndx][:-1]
                 self.columns = self.columns[:-1]
-            raise err
+            raise
     def get_column(self, name, convert_to=None):
         """Retrieves a column from the table and optionally converts it to a NumPy object.
@@ -889,6 +1009,8 @@ class Table(Media):
         _index.set_table(self)
         return _index
+    @ensure_not_incremental
+    @allow_relogging_after_mutation
     def add_computed_columns(self, fn):
         """Adds one or more computed columns based on existing data.
@@ -992,9 +1114,7 @@ class PartitionedTable(Media):
                 columns = part.columns
             elif columns != part.columns:
                 raise ValueError(
-                    "Table parts have non-matching columns. {} != {}".format(
-                        columns, part.columns
-                    )
+                    f"Table parts have non-matching columns. {columns} != {part.columns}"
                 )
             for _, row in part.iterrows():
                 yield ndx, row
@@ -1137,13 +1257,13 @@ class JoinedTable(Media):
     def _eq_debug(self, other, should_assert=False):
         eq = isinstance(other, JoinedTable)
-        assert not should_assert or eq, "Found type {}, expected {}".format(
-            other.__class__, JoinedTable
-        )
+        assert (
+            not should_assert or eq
+        ), f"Found type {other.__class__}, expected {JoinedTable}"
         eq = eq and self._join_key == other._join_key
-        assert not should_assert or eq, "Found {} join key, expected {}".format(
-            other._join_key, self._join_key
-        )
+        assert (
+            not should_assert or eq
+        ), f"Found {other._join_key} join key, expected {self._join_key}"
         eq = eq and self._table1._eq_debug(other._table1, should_assert)
         eq = eq and self._table2._eq_debug(other._table2, should_assert)
         return eq
@@ -1207,3 +1327,113 @@ _dtypes.TypeRegistry.add(_PartitionedTableType)
 _dtypes.TypeRegistry.add(_ForeignKeyType)
 _dtypes.TypeRegistry.add(_PrimaryKeyType)
 _dtypes.TypeRegistry.add(_ForeignIndexType)
+def _get_data_from_increments(
+    json_obj: Dict[str, Any], source_artifact: "artifact.Artifact"
+) -> List[Any]:
+    """Get data from incremental table artifacts.
+    Args:
+        json_obj: The JSON object containing table metadata.
+        source_artifact: The source artifact containing the table data.
+    Returns:
+        List of table rows from all increments.
+    """
+    if "latest" not in source_artifact.aliases:
+        wandb.termwarn(
+            (
+                "It is recommended to use the latest version of the "
+                "incremental table artifact for ordering guarantees."
+            ),
+            repeat=False,
+        )
+    data: List[Any] = []
+    increment_num = json_obj.get("increment_num", None)
+    if increment_num is None:
+        return data
+    # Sort by increment number first, then by timestamp if present
+    # Format of name is: "{incr_num}-{timestamp_ms}.{key}.table.json"
+    def get_sort_key(key: str) -> Tuple[int, int]:
+        try:
+            parts = key.split(".")
+            increment_parts = parts[0].split("-")
+            increment_num = int(increment_parts[0])
+            # If there's a timestamp part, use it for secondary sorting
+            timestamp = int(increment_parts[1]) if len(increment_parts) > 1 else 0
+        except (ValueError, IndexError):
+            wandb.termwarn(
+                (
+                    f"Could not parse artifact entry for increment {key}."
+                    " The entry name does not follow the naming convention"
+                    " <increment_number>-<timestamp>.<key>.table.json"
+                    " The data in the table will be out of order."
+                ),
+                repeat=False,
+            )
+            return (0, 0)
+        return (increment_num, timestamp)
+    sorted_increment_keys = []
+    for entry_key in source_artifact.manifest.entries:
+        if entry_key.endswith(".table.json"):
+            sorted_increment_keys.append(entry_key)
+    sorted_increment_keys.sort(key=get_sort_key)
+    for entry_key in sorted_increment_keys:
+        try:
+            with open(source_artifact.manifest.entries[entry_key].download()) as f:
+                table_data = json.load(f)
+            data.extend(table_data["data"])
+        except (json.JSONDecodeError, KeyError) as e:
+            raise wandb.Error(f"Invalid table file {entry_key}") from e
+    return data
+def _process_table_row(
+    row: List[Any],
+    timestamp_column_indices: Set[_dtypes.TimestampType],
+    np_deserialized_columns: Dict[int, Any],
+    source_artifact: "artifact.Artifact",
+    row_idx: int,
+) -> List[Any]:
+    """Convert special columns in a table row to Python types.
+    Processes a single row of table data by converting timestamp values to
+    datetime objects, replacing np typed cells with numpy array data,
+    and initializing media objects from their json value.
+    Args:
+        row: The row data to process.
+        timestamp_column_indices: Set of column indices containing timestamps.
+        np_deserialized_columns: Dictionary mapping column indices to numpy arrays.
+        source_artifact: The source artifact containing the table data.
+        row_idx: The index of the current row.
+    Returns:
+        Processed row data.
+    """
+    row_data = []
+    for c_ndx, item in enumerate(row):
+        cell: Any
+        if c_ndx in timestamp_column_indices and isinstance(item, (int, float)):
+            cell = datetime.datetime.fromtimestamp(
+                item / 1000, tz=datetime.timezone.utc
+            )
+        elif c_ndx in np_deserialized_columns:
+            cell = np_deserialized_columns[c_ndx][row_idx]
+        elif (
+            isinstance(item, dict)
+            and "_type" in item
+            and (obj := WBValue.init_from_json(item, source_artifact))
+        ):
+            cell = obj
+        else:
+            cell = item
+        row_data.append(cell)
+    return row_data

wandb 0.19.12rc1__py3-none-win32.whl → 0.20.1__py3-none-win32.whl

wandb 0.19.12rc1py3-none-win32.whl → 0.20.1py3-none-win32.whl