PyPI - vastdb - Versions diffs - 1.3.11__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

vastdb 1.3.11py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

vastdb/_ibis_support.py +28 -0
vastdb/_internal.py +167 -180
vastdb/_table_interface.py +136 -0
vastdb/bench/perf_bench/orchestrate/results_helpers.py +1 -1
vastdb/bucket.py +1 -1
vastdb/conftest.py +42 -19
vastdb/schema.py +15 -3
vastdb/session.py +3 -1
vastdb/table.py +599 -339
vastdb/table_metadata.py +221 -0
vastdb/tests/test_duckdb.py +30 -30
vastdb/tests/test_fixed_list.py +56 -6
vastdb/tests/test_imports.py +2 -1
vastdb/tests/test_nested.py +0 -5
vastdb/tests/test_table_in_tx.py +249 -0
vastdb/tests/test_tables.py +63 -16
vastdb/tests/util.py +109 -2
vastdb/transaction.py +27 -0
{vastdb-1.3.11.dist-info → vastdb-2.0.0.dist-info}/METADATA +21 -6
{vastdb-1.3.11.dist-info → vastdb-2.0.0.dist-info}/RECORD +23 -19
{vastdb-1.3.11.dist-info → vastdb-2.0.0.dist-info}/WHEEL +1 -1
{vastdb-1.3.11.dist-info → vastdb-2.0.0.dist-info/licenses}/LICENSE +0 -0
{vastdb-1.3.11.dist-info → vastdb-2.0.0.dist-info}/top_level.txt +0 -0

vastdb/table_metadata.py ADDED Viewed

@@ -0,0 +1,221 @@
+"""VAST Database table metadata."""
+import logging
+from copy import deepcopy
+from dataclasses import dataclass
+from enum import Enum
+from typing import TYPE_CHECKING, Optional, Tuple
+import ibis
+import pyarrow as pa
+from vastdb import errors
+from vastdb._ibis_support import validate_ibis_support_schema
+if TYPE_CHECKING:
+    from .transaction import Transaction
+log = logging.getLogger(__name__)
+class TableType(Enum):
+    """Table Type."""
+    Regular = 1
+    Elysium = 2
+    TableImports = 3
+@dataclass
+class TableRef:
+    """Represents a table ref (table's full path)."""
+    bucket: str
+    schema: str
+    table: str
+    @property
+    def full_path(self) -> str:
+        """Table full path."""
+        return f"{self.bucket}/{self.schema}/{self.table}"
+    def __str__(self) -> str:
+        """Table full path."""
+        return self.full_path
+@dataclass
+class TableStats:
+    """Table-related information."""
+    num_rows: int
+    size_in_bytes: int
+    sorting_score: int
+    write_amplification: int
+    acummulative_row_inserition_count: int
+    is_external_rowid_alloc: bool = False
+    sorting_key_enabled: bool = False
+    sorting_done: bool = False
+    endpoints: Tuple[str, ...] = ()
+class TableMetadata:
+    """Table Metadata."""
+    _ref: TableRef
+    _arrow_schema: Optional[pa.Schema]
+    _sorted_columns: Optional[list[str]]
+    _ibis_table: ibis.Table
+    _stats: Optional[TableStats]
+    def __init__(self,
+                 ref: TableRef,
+                 arrow_schema: Optional[pa.Schema] = None,
+                 table_type: Optional[TableType] = None):
+        """Table Metadata."""
+        self._ref = deepcopy(ref)
+        self._table_type = table_type
+        self.arrow_schema = deepcopy(arrow_schema)
+        self._sorted_columns = None
+        self._stats = None
+    def __eq__(self, other: object) -> bool:
+        """TableMetadata Equal."""
+        if not isinstance(other, TableMetadata):
+            return False
+        return (self._ref == other._ref and
+                self._table_type == other._table_type)
+    def rename_table(self, name: str) -> None:
+        """Rename table metadata's table name."""
+        self._ref.table = name
+    def load(self, tx: "Transaction") -> None:
+        """Load/Reload table metadata."""
+        self.load_stats(tx)
+        self.load_schema(tx)
+        if self._table_type is TableType.Elysium:
+            self.load_sorted_columns(tx)
+    def load_schema(self, tx: "Transaction") -> None:
+        """Load/Reload table schema."""
+        fields = []
+        next_key = 0
+        while True:
+            cur_columns, next_key, is_truncated, _count = tx._rpc.api.list_columns(
+                bucket=self.ref.bucket,
+                schema=self.ref.schema,
+                table=self.ref.table,
+                next_key=next_key,
+                txid=tx.active_txid,
+                list_imports_table=self.is_imports_table)
+            fields.extend(cur_columns)
+            if not is_truncated:
+                break
+        self.arrow_schema = pa.schema(fields)
+    def load_sorted_columns(self, tx: "Transaction") -> None:
+        """Return sorted columns' metadata."""
+        fields = []
+        try:
+            next_key = 0
+            while True:
+                cur_columns, next_key, is_truncated, _count = tx._rpc.api.list_sorted_columns(
+                    bucket=self.ref.bucket, schema=self.ref.schema, table=self.ref.table,
+                    next_key=next_key, txid=tx.active_txid, list_imports_table=self.is_imports_table)
+                fields.extend(cur_columns)
+                if not is_truncated:
+                    break
+        except errors.BadRequest:
+            raise
+        except errors.InternalServerError as ise:
+            log.warning(
+                "Failed to get the sorted columns Elysium might not be supported: %s", ise)
+            raise
+        except errors.NotSupportedVersion:
+            log.warning("Failed to get the sorted columns, Elysium not supported")
+            raise
+        finally:
+            self._sorted_columns = fields
+    def load_stats(self, tx: "Transaction") -> None:
+        """Load/Reload table stats."""
+        stats_tuple = tx._rpc.api.get_table_stats(
+            bucket=self.ref.bucket, schema=self.ref.schema, name=self.ref.table, txid=tx.active_txid,
+            imports_table_stats=self.is_imports_table)
+        self._stats = TableStats(**stats_tuple._asdict())
+        is_elysium_table = self._stats.sorting_key_enabled
+        if self._table_type is None:
+            if is_elysium_table:
+                self._set_sorted_table(tx)
+            else:
+                self._set_regular_table()
+        else:
+            if is_elysium_table and self.table_type is not TableType.Elysium:
+                raise ValueError(
+                    "Actual table is sorted (TableType.Elysium), was not inited as TableType.Elysium"
+                )
+    def _set_sorted_table(self, tx: "Transaction"):
+        self._table_type = TableType.Elysium
+        tx._rpc.features.check_elysium()
+    def _set_regular_table(self):
+        self._table_type = TableType.Regular
+    @property
+    def stats(self) -> Optional[TableStats]:
+        """Get table's stats."""
+        return self._stats
+    @property
+    def arrow_schema(self) -> pa.Schema:
+        """Table's arrow schema."""
+        return self._arrow_schema
+    @arrow_schema.setter
+    def arrow_schema(self, arrow_schema: Optional[pa.Schema]):
+        """Set arrow schema."""
+        if arrow_schema:
+            validate_ibis_support_schema(arrow_schema)
+            self._arrow_schema = arrow_schema
+            self._ibis_table = ibis.table(ibis.Schema.from_pyarrow(arrow_schema), self._ref.full_path)
+        else:
+            self._arrow_schema = None
+            self._ibis_table = None
+    @property
+    def sorted_columns(self) -> list:
+        """Sorted columns."""
+        if self._sorted_columns is None:
+            raise ValueError("sorted columns not loaded")
+        return self._sorted_columns
+    @property
+    def ibis_table(self) -> ibis.Table:
+        """Ibis table."""
+        return self._ibis_table
+    @property
+    def ref(self) -> TableRef:
+        """Table's reference."""
+        return self._ref
+    @property
+    def table_type(self) -> TableType:
+        """Table's type."""
+        if self._table_type is None:
+            raise ValueError(
+                "TableType was not loaded. load using TableMetadata.load_stats")
+        return self._table_type
+    @property
+    def is_imports_table(self) -> bool:
+        """Is table an imports table."""
+        return self._table_type is TableType.TableImports

vastdb/tests/test_duckdb.py CHANGED Viewed

@@ -3,9 +3,7 @@ import logging
 import duckdb
 import pyarrow as pa
 import pyarrow.compute as pc
-import pytest
-from ..table import QueryConfig
 from .util import prepare_data
 log = logging.getLogger(__name__)
@@ -31,31 +29,33 @@ def test_duckdb(session, clean_bucket_name):
         assert actual == expected
-def test_closed_tx(session, clean_bucket_name):
-    columns = pa.schema([
-        ('a', pa.int64()),
-    ])
-    data = pa.table(schema=columns, data=[
-        list(range(10000)),
-    ])
-    with session.transaction() as tx:
-        t = tx.bucket(clean_bucket_name).create_schema("s1").create_table("t1", columns)
-        t.insert(data)
-        config = QueryConfig(
-            num_sub_splits=1,
-            num_splits=1,
-            num_row_groups_per_sub_split=1,
-            limit_rows_per_sub_split=100)
-        batches = t.select(config=config)  # noqa: F841
-        first = next(batches)  # make sure that HTTP response processing has started
-        assert first['a'].to_pylist() == list(range(100))
-        conn = duckdb.connect()
-        res = conn.execute('SELECT a FROM batches')
-        log.debug("closing tx=%s after first batch=%s", t.tx, first)
-    # transaction is closed, collecting the result should fail internally in DuckDB
-    with pytest.raises(duckdb.InvalidInputException):
-        res.arrow()
+# def test_closed_tx(session, clean_bucket_name):
+#     assert duckdb.__version__ == "1.0.0", "doesn't reproduce with newer duckdb versions, when updating duckdb in tests/when relevant need to update this test accordingly."
+#     columns = pa.schema([
+#         ('a', pa.int64()),
+#     ])
+#     data = pa.table(schema=columns, data=[
+#         list(range(10000)),
+#     ])
+#     with session.transaction() as tx:
+#         t = tx.bucket(clean_bucket_name).create_schema("s1").create_table("t1", columns)
+#         t.insert(data)
+#         config = QueryConfig(
+#             num_sub_splits=1,
+#             num_splits=1,
+#             num_row_groups_per_sub_split=1,
+#             limit_rows_per_sub_split=100)
+#         batches = t.select(config=config)  # noqa: F841
+#         first = next(batches)  # make sure that HTTP response processing has started
+#         assert first['a'].to_pylist() == list(range(100))
+#         conn = duckdb.connect()
+#         res = conn.execute('SELECT a FROM batches')
+#         log.debug("closing tx=%s after first batch=%s", t.tx, first)
+#     # transaction is closed, collecting the result should fail internally in DuckDB
+#     with pytest.raises(duckdb.InvalidInputException):
+#         res.arrow()

vastdb/tests/test_fixed_list.py CHANGED Viewed

@@ -11,7 +11,11 @@ import pytest
 import vastdb.errors
-from .util import prepare_data
+from .util import (
+    assert_pandas_df_equal,
+    convert_pandas_df_to_hashable_values,
+    prepare_data,
+)
 supported_fixed_list_element_types = [
     pa.uint8(),
@@ -85,7 +89,7 @@ def test_vectors(session, clean_bucket_name):
     columns = pa.schema(
         [("id", pa.int64()), ("vec", pa.list_(pa.field(name="item", type=element_type, nullable=False), dimension),)]
     )
-    ids = range(num_rows)
+    ids = list(range(num_rows))
     expected = pa.table(
         schema=columns,
         data=[
@@ -102,7 +106,7 @@ def test_vectors(session, clean_bucket_name):
         assert actual == expected
         # Select by id.
-        select_id = random.randint(0, num_rows)
+        select_id = random.choice(ids)
         actual = t.select(predicate=(t["id"] == select_id)).read_all()
         assert actual.to_pydict()["vec"] == [[select_id] * dimension]
         assert actual == expected.filter(pc.field("id") == select_id)
@@ -221,24 +225,70 @@ def generate_random_pyarrow_value(
 @pytest.mark.parametrize("element_field", supported_fixed_list_element_fields)
 def test_fixed_list_type_values(session, clean_bucket_name, element_field):
-    list_size = random.randint(1, 1000)
-    num_rows = random.randint(1, 100)
+    list_size = 250
+    num_rows = 100
     vec_type = pa.list_(element_field, list_size)
     schema = pa.schema(
         {"id": pa.int64(), "vec": vec_type, "random_int": pa.int64()})
+    ids = list(range(num_rows))
     expected = pa.table(
         schema=schema,
-        data=[list(range(num_rows))] + [[generate_random_pyarrow_value(schema.field(col_name)) for _ in range(num_rows)]
+        data=[ids] + [[generate_random_pyarrow_value(schema.field(col_name)) for _ in range(num_rows)]
                                         for col_name in
                                         schema.names[1:]],
     )
+    # Convert the list to tuple in order to support comparison as a whole.
+    pd_expected = convert_pandas_df_to_hashable_values(expected.to_pandas())
     with prepare_data(session, clean_bucket_name, "s", "t", expected) as table:
         assert table.arrow_schema == schema
         actual = table.select().read_all()
         assert actual == expected
+        # Select by id.
+        id_to_select = random.choice(ids)
+        select_by_id = table.select(predicate=(table["id"] == id_to_select)).read_all()
+        assert len(select_by_id) == 1  # ID is unique.
+        assert select_by_id == expected.filter(pc.field("id") == id_to_select)
+        # Choose a random vector which is not null. Nulls should not be selected using == , != operators, but by isnull.
+        # In addition, nulls are discarded unless isnull is used (meaning != 1 will return both not nulls and not 1).
+        vector_to_select = random.choice(expected.filter(~pc.field('vec').is_null())['vec'].to_numpy())
+        # TODO VSDK-36: Remove this workaround when the issue with negative decimals is predicate is fixed.
+        if pa.types.is_decimal(element_field.type):
+            vector_to_select = abs(vector_to_select)
+        # Dtype is not asserted since pandas convert the dtype of integer to float when there are (or could be)
+        # NaN/None values.
+        # Select by vector value.
+        select_by_vector = table.select(predicate=(table["vec"] == vector_to_select)).read_all()
+        assert_pandas_df_equal(select_by_vector.to_pandas(),
+                               pd_expected.loc[pd_expected['vec'] == tuple(vector_to_select)], check_dtype=False)
+        # Not equal to vector value.
+        select_by_vector = table.select(predicate=(table["vec"] != vector_to_select)).read_all()
+        assert_pandas_df_equal(select_by_vector.to_pandas(),
+                               pd_expected.loc[(pd_expected['vec'] != tuple(vector_to_select)) &
+                                               pd_expected['vec'].notnull()], check_dtype=False)
+        # Not equal to vector value or null.
+        select_by_vector = table.select(
+            predicate=((table["vec"] != vector_to_select) | (table['vec'].isnull()))).read_all()
+        assert_pandas_df_equal(select_by_vector.to_pandas(),
+                               pd_expected.loc[pd_expected['vec'] != tuple(vector_to_select)], check_dtype=False)
+        # Lexicographically greater than vector.
+        select_by_vector = table.select(predicate=(table["vec"] > vector_to_select)).read_all()
+        assert_pandas_df_equal(select_by_vector.to_pandas(), pd_expected.loc[
+            pd_expected['vec'].notnull() & (pd_expected['vec'] > tuple(vector_to_select))], check_dtype=False)
+        # Lexicographically less than vector.
+        select_by_vector = table.select(predicate=(table["vec"] < vector_to_select)).read_all()
+        assert_pandas_df_equal(select_by_vector.to_pandas(), pd_expected.loc[
+            pd_expected['vec'].notnull() & (pd_expected['vec'] < tuple(vector_to_select))], check_dtype=False)
 @pytest.mark.parametrize("list_type", unsupported_fixed_list_types)
 def test_unsupported_fixed_list_types(session, clean_bucket_name, list_type):

vastdb/tests/test_imports.py CHANGED Viewed

@@ -14,6 +14,7 @@ from vastdb.errors import (
     InvalidArgument,
     NotSupportedVersion,
 )
+from vastdb.session import Session
 log = logging.getLogger(__name__)
@@ -28,7 +29,7 @@ def zip_import_session(session):
             pytest.skip("Skipped because this test requires version 5.3.1")
-def test_parallel_imports(session, clean_bucket_name, s3):
+def test_parallel_imports(session: Session, clean_bucket_name: str, s3):
     num_rows = 1000
     num_files = 53
     ds = {'num': [i for i in range(num_rows)]}

vastdb/tests/test_nested.py CHANGED Viewed

@@ -79,13 +79,11 @@ def test_nested_filter(session, clean_bucket_name):
 def test_nested_unsupported_filter(session, clean_bucket_name):
     columns = pa.schema([
         ('l', pa.list_(pa.int8())),
-        ('fl', pa.list_(pa.field(name='item', type=pa.int64(), nullable=False), 2)),
         ('m', pa.map_(pa.utf8(), pa.float64())),
         ('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
     ])
     expected = pa.table(schema=columns, data=[
         [[1], [], [2, 3], None],
-        [[1, 2], None, [3, 4], None],
         [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
         [{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
     ])
@@ -95,9 +93,6 @@ def test_nested_unsupported_filter(session, clean_bucket_name):
         with pytest.raises(NotImplementedError):
             list(t.select(predicate=(t['l'].isnull())))
-        with pytest.raises(NotImplementedError):
-            list(t.select(predicate=(t['fl'].isnull())))
         with pytest.raises(NotImplementedError):
             list(t.select(predicate=(t['m'].isnull())))

vastdb/tests/test_table_in_tx.py ADDED Viewed

@@ -0,0 +1,249 @@
+from dataclasses import dataclass
+from typing import Generator, Optional
+import ibis
+import pyarrow as pa
+import pytest
+from vastdb.session import Session
+from vastdb.table import INTERNAL_ROW_ID, ITable
+from vastdb.table_metadata import TableMetadata, TableRef, TableType
+from vastdb.transaction import Transaction
+from .util import compare_pyarrow_tables, prepare_data_get_tx
+def test_sanity(session: Session, clean_bucket_name):
+    columns = pa.schema([
+        ('a', pa.int64()),
+        ('b', pa.float32()),
+        ('s', pa.utf8()),
+    ])
+    expected = pa.table(schema=columns, data=[
+        [111, 222, 333],
+        [0.5, 1.5, 2.5],
+        ['a', 'bb', 'ccc'],
+    ])
+    with prepare_data_get_tx(session, clean_bucket_name, 's', 't', expected) as tx:
+        ref = TableRef(clean_bucket_name, 's', 't')
+        table_md = TableMetadata(ref, columns, TableType.Regular)
+        table_md.load_stats(tx)
+        t = tx.table_from_metadata(table_md)
+        actual = t.select(columns=['a', 'b', 's']).read_all()
+        assert actual == expected
+@dataclass
+class SimpleDbSetup:
+    tx: Transaction
+    ref: TableRef
+    table_type: TableType
+    arrow_schema: Optional[pa.Schema] = None
+@pytest.fixture(scope="function")
+def simple_db_setup(session: Session, clean_bucket_name: str) -> Generator[SimpleDbSetup, None, None]:
+    arrow_schema = pa.schema([
+        ('a', pa.int64()),
+        ('b', pa.float32()),
+        ('s', pa.utf8()),
+    ])
+    expected = pa.table(schema=arrow_schema, data=[
+        [111, 222, 333],
+        [0.5, 1.5, 2.5],
+        ['a', 'bb', 'ccc'],
+    ])
+    with prepare_data_get_tx(session, clean_bucket_name, 's', 't', expected) as tx:
+        yield SimpleDbSetup(tx=tx,
+                         arrow_schema=arrow_schema,
+                         ref=TableRef(clean_bucket_name, 's', 't'),
+                         table_type=TableType.Regular)
+def test_schema_load_through_metadata(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(simple_db_setup.ref,
+                             table_type=simple_db_setup.table_type)
+    table = simple_db_setup.tx.table_from_metadata(table_md)
+    assert table.arrow_schema is None
+    table.reload_schema()
+    assert table.arrow_schema is not None
+def test_metadata_init_with_schema(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(ref=simple_db_setup.ref,
+                             arrow_schema=simple_db_setup.arrow_schema,
+                             table_type=simple_db_setup.table_type)
+    table = simple_db_setup.tx.table_from_metadata(table_md)
+    assert table.arrow_schema is not None
+def test_path(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(ref=simple_db_setup.ref,
+                             arrow_schema=simple_db_setup.arrow_schema,
+                             table_type=simple_db_setup.table_type)
+    table = simple_db_setup.tx.table_from_metadata(table_md)
+    assert table.path == simple_db_setup.ref.full_path
+def test_name(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(ref=simple_db_setup.ref,
+                             arrow_schema=simple_db_setup.arrow_schema,
+                             table_type=simple_db_setup.table_type)
+    table = simple_db_setup.tx.table_from_metadata(table_md)
+    assert table.name == simple_db_setup.ref.table
+def test_arrow_schema(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(ref=simple_db_setup.ref,
+                             arrow_schema=simple_db_setup.arrow_schema,
+                             table_type=simple_db_setup.table_type)
+    table = simple_db_setup.tx.table_from_metadata(table_md)
+    assert table.arrow_schema == simple_db_setup.arrow_schema
+def test_eq(simple_db_setup: SimpleDbSetup):
+    table_md1 = TableMetadata(ref=simple_db_setup.ref, table_type=simple_db_setup.table_type)
+    table1 = simple_db_setup.tx.table_from_metadata(table_md1)
+    table_md2 = TableMetadata(ref=simple_db_setup.ref, table_type=simple_db_setup.table_type)
+    table2 = simple_db_setup.tx.table_from_metadata(table_md2)
+    assert table1 == table2
+    other_ref = TableRef(simple_db_setup.ref.bucket, simple_db_setup.ref.schema, "other_table")
+    table_md3 = TableMetadata(ref=other_ref, table_type=simple_db_setup.table_type)
+    table3 = simple_db_setup.tx.table_from_metadata(table_md3)
+    assert table1 != table3
+def test_insert_and_select(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(ref=simple_db_setup.ref,
+                             arrow_schema=simple_db_setup.arrow_schema,
+                             table_type=simple_db_setup.table_type)
+    table_md.load_stats(simple_db_setup.tx)  # the next select requires stats loaded
+    table = simple_db_setup.tx.table_from_metadata(table_md)
+    initial_data = table.select().read_all()
+    assert initial_data.num_rows == 3
+    new_rows = pa.table(schema=simple_db_setup.arrow_schema, data=[[444], [4.5], ["dddd"]])
+    table.insert(new_rows)
+    all_data = table.select().read_all()
+    assert all_data.num_rows == 4
+    t = ibis.table(table.arrow_schema, name=table.name)
+    reader = table.select(predicate=t.a > 300)
+    filtered_data = reader.read_all()
+    assert filtered_data.num_rows == 2
+def test_sorting_status(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(ref=simple_db_setup.ref, table_type=simple_db_setup.table_type)
+    table = simple_db_setup.tx.table_from_metadata(table_md)
+    is_done = table.sorting_done()
+    assert isinstance(is_done, bool)
+    score = table.sorting_score()
+    assert isinstance(score, int)
+def test_projections(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(ref=simple_db_setup.ref, table_type=simple_db_setup.table_type)
+    table: ITable = simple_db_setup.tx.table_from_metadata(table_md)
+    ref = simple_db_setup.ref
+    legacy_table = simple_db_setup.tx.bucket(ref.bucket).schema(ref.schema).table(ref.table)
+    initial_projections = list(table.projections())
+    proj_name = "my_proj"
+    proj = legacy_table.create_projection(
+        projection_name=proj_name, sorted_columns=["a"], unsorted_columns=["s"]
+    )
+    assert proj.name == proj_name
+    retrieved_proj = table.projection(proj_name)
+    assert retrieved_proj == proj
+    all_projections = list(table.projections())
+    assert len(all_projections) == len(initial_projections) + 1
+def test_update(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(ref=simple_db_setup.ref,
+                             arrow_schema=simple_db_setup.arrow_schema,
+                             table_type=simple_db_setup.table_type)
+    table_md.load_stats(simple_db_setup.tx)
+    table = simple_db_setup.tx.table_from_metadata(table_md)
+    # 1. Select a row to update
+    row_to_update = table.select(predicate=table['a'] == 222, internal_row_id=True).read_all()
+    assert row_to_update.num_rows == 1
+    # 2. Create a modified version in a new RecordBatch
+    update_data = pa.table({
+        INTERNAL_ROW_ID: row_to_update[INTERNAL_ROW_ID],
+        's': ['updated_bb']
+    })
+    # 3. Call table.update()
+    table.update(update_data)
+    # 4. Select the row again and verify changes
+    updated_row = table.select(predicate=table['a'] == 222).read_all()
+    assert updated_row.to_pydict()['s'] == ['updated_bb']
+    remaining_rows = table.select(predicate=table['a'] != 222).read_all()
+    expected_remaining = pa.table({
+        'a': pa.array([111, 333], type=pa.int64()),
+        'b': pa.array([0.5, 2.5], type=pa.float32()),
+        's': pa.array(['a', 'ccc'], type=pa.utf8()),
+    })
+    assert compare_pyarrow_tables(remaining_rows, expected_remaining)
+def test_delete(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(ref=simple_db_setup.ref,
+                             arrow_schema=simple_db_setup.arrow_schema,
+                             table_type=simple_db_setup.table_type)
+    table_md.load_stats(simple_db_setup.tx)
+    table = simple_db_setup.tx.table_from_metadata(table_md)
+    # 1. Identify a row to delete
+    row_to_delete = table.select(predicate=table['a'] == 333, internal_row_id=True).read_all()
+    assert row_to_delete.num_rows == 1
+    # 2. Create a RecordBatch with the key of the row
+    delete_data = pa.table({
+        INTERNAL_ROW_ID: row_to_delete[INTERNAL_ROW_ID]
+    })
+    # 3. Call table.delete()
+    table.delete(delete_data)
+    # 4. Select to verify the row is gone
+    all_data = table.select().read_all()
+    assert all_data.num_rows == 2
+    assert 333 not in all_data.to_pydict()['a']
+    expected_remaining = pa.table({
+        'a': pa.array([111, 222], type=pa.int64()),
+        'b': pa.array([0.5, 1.5], type=pa.float32()),
+        's': pa.array(['a', 'bb'], type=pa.utf8()),
+    })
+    assert compare_pyarrow_tables(all_data, expected_remaining)
+def test_sanity_load(simple_db_setup: SimpleDbSetup):
+    table_md = TableMetadata(TableRef(simple_db_setup.ref.bucket,
+                                      simple_db_setup.ref.schema,
+                                      simple_db_setup.ref.table))
+    table_md.load(simple_db_setup.tx)

vastdb 1.3.11__py3-none-any.whl → 2.0.0__py3-none-any.whl

vastdb 1.3.11py3-none-any.whl → 2.0.0py3-none-any.whl