PyPI - vastdb - Versions diffs - 2.0.2__py3-none-any.whl → 2.0.5__py3-none-any.whl - Mend

vastdb 2.0.2py3-none-any.whl → 2.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

vastdb/_adbc.py +205 -0
vastdb/_internal.py +106 -17
vastdb/_table_interface.py +20 -3
vastdb/conftest.py +23 -1
vastdb/errors.py +5 -0
vastdb/schema.py +19 -2
vastdb/session.py +14 -5
vastdb/table.py +57 -22
vastdb/table_metadata.py +58 -34
vastdb/tests/test_adbc_integration.py +129 -0
vastdb/tests/test_tables.py +35 -1
vastdb/tests/test_vector_index.py +162 -0
vastdb/tests/test_vector_search.py +211 -0
vastdb/tests/util.py +3 -2
vastdb/transaction.py +32 -0
vastdb/vast_flatbuf/tabular/GetTableStatsResponse.py +51 -59
vastdb/vast_flatbuf/tabular/ObjectDetails.py +36 -59
vastdb/vast_flatbuf/tabular/VectorIndexMetadata.py +67 -0
vastdb/vast_flatbuf/tabular/VipRange.py +19 -12
{vastdb-2.0.2.dist-info → vastdb-2.0.5.dist-info}/METADATA +2 -1
{vastdb-2.0.2.dist-info → vastdb-2.0.5.dist-info}/RECORD +24 -19
{vastdb-2.0.2.dist-info → vastdb-2.0.5.dist-info}/WHEEL +0 -0
{vastdb-2.0.2.dist-info → vastdb-2.0.5.dist-info}/licenses/LICENSE +0 -0
{vastdb-2.0.2.dist-info → vastdb-2.0.5.dist-info}/top_level.txt +0 -0

vastdb/table.py CHANGED Viewed

@@ -22,11 +22,12 @@ import ibis
 import pyarrow as pa
 import urllib3
-from vastdb._table_interface import ITable
+from vastdb._table_interface import IbisPredicate, ITable
 from vastdb.table_metadata import TableMetadata, TableRef, TableStats, TableType
 from . import _internal, errors, util
 from ._ibis_support import validate_ibis_support_schema
+from ._internal import VectorIndex
 from .config import ImportConfig, QueryConfig
 if TYPE_CHECKING:
@@ -213,6 +214,11 @@ class TableInTransaction(ITable):
         """Reload Sorted Columns."""
         self._metadata.load_sorted_columns(self._tx)
+    @property
+    def vector_index(self) -> Optional[VectorIndex]:
+        """Table's Vector Index if exists."""
+        return self._metadata._vector_index
     @property
     def path(self) -> str:
         """Return table's path."""
@@ -222,7 +228,7 @@ class TableInTransaction(ITable):
     def _internal_rowid_field(self) -> pa.Field:
         return INTERNAL_ROW_ID_SORTED_FIELD if self._is_sorted_table else INTERNAL_ROW_ID_FIELD
-    def sorted_columns(self) -> list[str]:
+    def sorted_columns(self) -> list[pa.Field]:
         """Return sorted columns' metadata."""
         return self._metadata.sorted_columns
@@ -620,46 +626,51 @@ class TableInTransaction(ITable):
                             log.debug(
                                 "one worker thread finished, remaining: %d", tasks_running)
-                    # all host threads ended - wait for all futures to complete
-                    propagate_first_exception(futures, block=True)
                 finally:
                     stop_event.set()
                     while tasks_running > 0:
                         if record_batches_queue.get() is None:
                             tasks_running -= 1
+                    propagate_first_exception(futures, block=True)
         return pa.RecordBatchReader.from_batches(query_data_request.response_schema, batches_iterator())
     def insert_in_column_batches(self, rows: pa.RecordBatch) -> pa.ChunkedArray:
-        """Split the RecordBatch into max_columns that can be inserted in single RPC.
+        """Split the RecordBatch into an insert + updates.
+        This is both to support rows that won't fit into an RPC and for performance for wide rows.
         Insert first MAX_COLUMN_IN_BATCH columns and get the row_ids. Then loop on the rest of the columns and
         update in groups of MAX_COLUMN_IN_BATCH.
         """
-        column_record_batch = pa.RecordBatch.from_arrays([_combine_chunks(rows.column(i)) for i in range(0, MAX_COLUMN_IN_BATCH)],
-                                                         schema=pa.schema([rows.schema.field(i) for i in range(0, MAX_COLUMN_IN_BATCH)]))
-        row_ids = self.insert(rows=column_record_batch)  # type: ignore
         columns_names = [field.name for field in rows.schema]
-        columns = list(rows.schema)
-        arrays = [_combine_chunks(rows.column(i))
-                  for i in range(len(rows.schema))]
-        for start in range(MAX_COLUMN_IN_BATCH, len(rows.schema), MAX_COLUMN_IN_BATCH):
+        # Sorted columns must be in the first insert as those can't be updated later.
+        if self._is_sorted_table:
+            sorted_columns_names = [field.name for field in self.sorted_columns()]
+            columns_names = sorted_columns_names + [column_name for column_name in columns_names if column_name not in sorted_columns_names]
+        columns = [rows.schema.field(column_name) for column_name in columns_names]
+        arrays = [_combine_chunks(rows.column(column_name)) for column_name in columns_names]
+        for start in range(0, len(rows.schema), MAX_COLUMN_IN_BATCH):
             end = start + MAX_COLUMN_IN_BATCH if start + \
                 MAX_COLUMN_IN_BATCH < len(rows.schema) else len(rows.schema)
             columns_name_chunk = columns_names[start:end]
             columns_chunks = columns[start:end]
             arrays_chunks = arrays[start:end]
-            columns_chunks.append(self._internal_rowid_field)
-            arrays_chunks.append(row_ids.to_pylist())
-            column_record_batch = pa.RecordBatch.from_arrays(
-                arrays_chunks, schema=pa.schema(columns_chunks))
-            self.update(rows=column_record_batch, columns=columns_name_chunk)
+            if start == 0:
+                column_record_batch = pa.RecordBatch.from_arrays(
+                    arrays_chunks, schema=pa.schema(columns_chunks))
+                row_ids = self.insert(rows=column_record_batch, by_columns=False)  # type: ignore
+            else:
+                columns_chunks.append(self._internal_rowid_field)
+                arrays_chunks.append(row_ids.to_pylist())
+                column_record_batch = pa.RecordBatch.from_arrays(
+                    arrays_chunks, schema=pa.schema(columns_chunks))
+                self.update(rows=column_record_batch, columns=columns_name_chunk)
         return row_ids
     def insert(self,
                rows: Union[pa.RecordBatch, pa.Table],
-               by_columns: bool = False) -> pa.ChunkedArray:
+               by_columns: bool = True) -> pa.ChunkedArray:
         """Insert a RecordBatch into this table."""
         self._assert_not_imports_table()
@@ -667,9 +678,14 @@ class TableInTransaction(ITable):
             log.debug("Ignoring empty insert into %s", self.ref)
             return pa.chunked_array([], type=self._internal_rowid_field.type)
-        if by_columns:
-            self._tx._rpc.features.check_return_row_ids()
-            return self.insert_in_column_batches(rows)
+        # inserting by columns is faster, so default to doing that
+        # if the cluster supports it and there are actually columns in the rows
+        if by_columns and len(rows.schema):
+            try:
+                self._tx._rpc.features.check_return_row_ids()
+                return self.insert_in_column_batches(rows)
+            except errors.NotSupportedVersion:
+                pass
         try:
             row_ids = []
@@ -802,6 +818,25 @@ class TableInTransaction(ITable):
     def _is_sorted_table(self) -> bool:
         return self._metadata.table_type is TableType.Elysium
+    def vector_search(
+        self,
+        vec: list[float],
+        columns: list[str],
+        limit: int,
+        predicate: Optional[IbisPredicate] = None,
+    ) -> pa.RecordBatchReader:
+        """Vector Search over vector indexed columns."""
+        assert self.vector_index is not None, "Table is either not vector indexed. (maybe try reloading the TableMetadata)"
+        return self._tx.adbc_conn.vector_search(
+            vec,
+            self.vector_index,
+            self.ref,
+            columns,
+            limit,
+            predicate=predicate,
+        )
 class Table(TableInTransaction):
     """Vast Interactive Table."""

vastdb/table_metadata.py CHANGED Viewed

@@ -4,17 +4,19 @@ import logging
 from copy import deepcopy
 from dataclasses import dataclass
 from enum import Enum
-from typing import TYPE_CHECKING, Optional, Tuple
+from typing import TYPE_CHECKING, Optional
 import ibis
 import pyarrow as pa
 from vastdb import errors
 from vastdb._ibis_support import validate_ibis_support_schema
+from vastdb._internal import TableStats, VectorIndex
 if TYPE_CHECKING:
     from .transaction import Transaction
 log = logging.getLogger(__name__)
@@ -39,26 +41,16 @@ class TableRef:
         """Table full path."""
         return f"{self.bucket}/{self.schema}/{self.table}"
+    @property
+    def query_engine_full_path(self) -> str:
+        """Table full path for VastDB Query Engine."""
+        return f'"{self.bucket}/{self.schema}".{self.table}'
     def __str__(self) -> str:
         """Table full path."""
         return self.full_path
-@dataclass
-class TableStats:
-    """Table-related information."""
-    num_rows: int
-    size_in_bytes: int
-    sorting_score: int
-    write_amplification: int
-    acummulative_row_inserition_count: int
-    is_external_rowid_alloc: bool = False
-    sorting_key_enabled: bool = False
-    sorting_done: bool = False
-    endpoints: Tuple[str, ...] = ()
 class TableMetadata:
     """Table Metadata."""
@@ -67,25 +59,29 @@ class TableMetadata:
     _sorted_columns: Optional[list[str]]
     _ibis_table: ibis.Table
     _stats: Optional[TableStats]
-    def __init__(self,
-                 ref: TableRef,
-                 arrow_schema: Optional[pa.Schema] = None,
-                 table_type: Optional[TableType] = None):
+    _vector_index: Optional[VectorIndex]
+    def __init__(
+        self,
+        ref: TableRef,
+        arrow_schema: Optional[pa.Schema] = None,
+        table_type: Optional[TableType] = None,
+        vector_index: Optional[VectorIndex] = None,
+    ):
         """Table Metadata."""
         self._ref = deepcopy(ref)
         self._table_type = table_type
         self.arrow_schema = deepcopy(arrow_schema)
         self._sorted_columns = None
         self._stats = None
+        self._vector_index = vector_index
     def __eq__(self, other: object) -> bool:
         """TableMetadata Equal."""
         if not isinstance(other, TableMetadata):
             return False
-        return (self._ref == other._ref and
-                self._table_type == other._table_type)
+        return self._ref == other._ref and self._table_type == other._table_type
     def rename_table(self, name: str) -> None:
         """Rename table metadata's table name."""
@@ -110,7 +106,8 @@ class TableMetadata:
                 table=self.ref.table,
                 next_key=next_key,
                 txid=tx.active_txid,
-                list_imports_table=self.is_imports_table)
+                list_imports_table=self.is_imports_table,
+            )
             fields.extend(cur_columns)
             if not is_truncated:
                 break
@@ -123,9 +120,16 @@ class TableMetadata:
         try:
             next_key = 0
             while True:
-                cur_columns, next_key, is_truncated, _count = tx._rpc.api.list_sorted_columns(
-                    bucket=self.ref.bucket, schema=self.ref.schema, table=self.ref.table,
-                    next_key=next_key, txid=tx.active_txid, list_imports_table=self.is_imports_table)
+                cur_columns, next_key, is_truncated, _count = (
+                    tx._rpc.api.list_sorted_columns(
+                        bucket=self.ref.bucket,
+                        schema=self.ref.schema,
+                        table=self.ref.table,
+                        next_key=next_key,
+                        txid=tx.active_txid,
+                        list_imports_table=self.is_imports_table,
+                    )
+                )
                 fields.extend(cur_columns)
                 if not is_truncated:
                     break
@@ -133,7 +137,9 @@ class TableMetadata:
             raise
         except errors.InternalServerError as ise:
             log.warning(
-                "Failed to get the sorted columns Elysium might not be supported: %s", ise)
+                "Failed to get the sorted columns Elysium might not be supported: %s",
+                ise,
+            )
             raise
         except errors.NotSupportedVersion:
             log.warning("Failed to get the sorted columns, Elysium not supported")
@@ -143,10 +149,13 @@ class TableMetadata:
     def load_stats(self, tx: "Transaction") -> None:
         """Load/Reload table stats."""
-        stats_tuple = tx._rpc.api.get_table_stats(
-            bucket=self.ref.bucket, schema=self.ref.schema, name=self.ref.table, txid=tx.active_txid,
-            imports_table_stats=self.is_imports_table)
-        self._stats = TableStats(**stats_tuple._asdict())
+        self._stats = tx._rpc.api.get_table_stats(
+            bucket=self.ref.bucket,
+            schema=self.ref.schema,
+            name=self.ref.table,
+            txid=tx.active_txid,
+            imports_table_stats=self.is_imports_table,
+        )
         is_elysium_table = self._stats.sorting_key_enabled
@@ -161,6 +170,18 @@ class TableMetadata:
                     "Actual table is sorted (TableType.Elysium), was not inited as TableType.Elysium"
                 )
+        self._parse_stats_vector_index()
+    def _parse_stats_vector_index(self):
+        vector_index_is_set = self._vector_index is not None
+        if vector_index_is_set and self._stats.vector_index != self._vector_index:
+            raise ValueError(
+                f"Table has index {self._stats.vector_index}, but was initialized as {self._vector_index}"
+                )
+        else:
+            self._vector_index = self._stats.vector_index
     def _set_sorted_table(self, tx: "Transaction"):
         self._table_type = TableType.Elysium
         tx._rpc.features.check_elysium()
@@ -184,7 +205,9 @@ class TableMetadata:
         if arrow_schema:
             validate_ibis_support_schema(arrow_schema)
             self._arrow_schema = arrow_schema
-            self._ibis_table = ibis.table(ibis.Schema.from_pyarrow(arrow_schema), self._ref.full_path)
+            self._ibis_table = ibis.table(
+                ibis.Schema.from_pyarrow(arrow_schema), self._ref.full_path
+            )
         else:
             self._arrow_schema = None
             self._ibis_table = None
@@ -211,7 +234,8 @@ class TableMetadata:
         """Table's type."""
         if self._table_type is None:
             raise ValueError(
-                "TableType was not loaded. load using TableMetadata.load_stats")
+                "TableType was not loaded. load using TableMetadata.load_stats"
+            )
         return self._table_type

vastdb/tests/test_adbc_integration.py ADDED Viewed

@@ -0,0 +1,129 @@
+from typing import Optional
+from unittest.mock import MagicMock, patch
+import pyarrow as pa
+import pytest
+from vastdb._adbc import END_USER_PROPERTY, AdbcDriver
+from vastdb.session import Session
+from vastdb.table_metadata import TableRef
+from vastdb.transaction import NoAdbcConnectionError
+def test_sanity(session_factory, clean_bucket_name: str):
+    session = session_factory(with_adbc=True)
+    arrow_schema = pa.schema([("n", pa.int32())])
+    ref = TableRef(clean_bucket_name, "s", "t")
+    data_table = pa.table(schema=arrow_schema, data=[[1, 2, 3, 4, 5]])
+    with session.transaction() as tx:
+        table = (
+            tx.bucket(clean_bucket_name)
+            .create_schema("s")
+            .create_table("t", arrow_schema)
+        )
+        table.insert(data_table)
+    with session.transaction() as tx:
+        tx.adbc_conn.cursor.execute(f"SELECT * FROM {ref.query_engine_full_path}")
+        res = tx.adbc_conn.cursor.fetchall()
+        assert res == [(1,), (2,), (3,), (4,), (5,)]
+def test_adbc_shares_tx(session_factory, clean_bucket_name: str):
+    session = session_factory(with_adbc=True)
+    arrow_schema = pa.schema([("n", pa.int32())])
+    data_table = pa.table(schema=arrow_schema, data=[[1, 2, 3, 4, 5]])
+    with session.transaction() as tx:
+        table = (
+            tx.bucket(clean_bucket_name)
+            .create_schema("s")
+            .create_table("t", arrow_schema)
+        )
+        table.insert(data_table)
+        # expecting adbc execute to "see" table if it shares the transaction with the pysdk
+        tx.adbc_conn.cursor.execute(f"SELECT * FROM {table.ref.query_engine_full_path}")
+        assert tx.adbc_conn.cursor.fetchall() == [(1,), (2,), (3,), (4,), (5,)]
+def test_adbc_conn_unreachable_tx_close(session_factory):
+    session = session_factory(with_adbc=True)
+    with session.transaction() as tx:
+        assert tx.adbc_conn is not None
+    # adbc conn should not be reachable after tx close
+    with pytest.raises(NoAdbcConnectionError):
+        tx.adbc_conn
+def test_two_simulatnious_txs_with_adbc(session_factory, clean_bucket_name: str):
+    session = session_factory(with_adbc=True)
+    arrow_schema = pa.schema([("n", pa.int32())])
+    data_table = pa.table(schema=arrow_schema, data=[[1, 2, 3, 4, 5]])
+    with session.transaction() as tx:
+        table = (
+            tx.bucket(clean_bucket_name)
+            .create_schema("s")
+            .create_table("t1", arrow_schema)
+        )
+        table.insert(data_table)
+        # expecting adbc execute to "see" table if it shares the transaction with the pysdk
+        tx.adbc_conn.cursor.execute(f"SELECT * FROM {table.ref.query_engine_full_path}")
+        assert tx.adbc_conn.cursor.fetchall() == [(1,), (2,), (3,), (4,), (5,)]
+    with session.transaction() as tx:
+        table = (
+            tx.bucket(clean_bucket_name).schema("s").create_table("t2", arrow_schema)
+        )
+        table.insert(data_table)
+        # expecting adbc execute to "see" table if it shares the transaction with the pysdk
+        tx.adbc_conn.cursor.execute(f"SELECT * FROM {table.ref.query_engine_full_path}")
+        assert tx.adbc_conn.cursor.fetchall() == [(1,), (2,), (3,), (4,), (5,)]
+@pytest.mark.parametrize("end_user", [("mock-end-user",), (None,)])
+def test_end_user_passed_to_adbc_connect(end_user: Optional[str]):
+    mock_driver = MagicMock(spec=AdbcDriver)
+    mock_driver.local_path = "/mock/driver/path"
+    with (
+        patch("vastdb._adbc.connect") as mock_connect,
+        patch("vastdb._internal.VastdbApi") as MockVastdbApi,
+    ):
+        mock_api_instance = MockVastdbApi.return_value
+        mock_api_instance.begin_transaction.return_value.headers = {
+            "tabular-txid": "12345"
+        }
+        # A version that supports everything needed.
+        mock_api_instance.vast_version = (5, 4, 0, 0)
+        session = Session(
+            access="test_access",
+            secret="test_secret",
+            endpoint="http://localhost:9090",
+            adbc_driver=mock_driver,
+            end_user=end_user,
+        )
+        with session.transaction():
+            # The ADBC connection is established when the transaction starts
+            pass
+        mock_connect.assert_called_once()
+        call_kwargs = mock_connect.call_args.kwargs
+        conn_kwargs = call_kwargs.get("conn_kwargs", {})
+        assert (end_user is None) ^ (END_USER_PROPERTY in conn_kwargs)
+        assert (end_user is None) ^ (conn_kwargs.get(END_USER_PROPERTY) == end_user)

vastdb/tests/test_tables.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import datetime as dt
 import decimal
+import itertools
 import logging
 import random
 import threading
@@ -17,7 +18,7 @@ from requests.exceptions import HTTPError
 from vastdb import errors
 from vastdb.session import Session
-from vastdb.table import INTERNAL_ROW_ID, QueryConfig
+from vastdb.table import INTERNAL_ROW_ID, MAX_COLUMN_IN_BATCH, QueryConfig
 from .util import assert_row_ids_ascending_on_first_insertion_to_table, prepare_data
@@ -95,6 +96,39 @@ def test_insert_wide_row(session, clean_bucket_name):
         assert actual == expected
+@pytest.mark.parametrize("num_columns,insert_by_columns", itertools.product([
+        MAX_COLUMN_IN_BATCH // 2,
+        MAX_COLUMN_IN_BATCH - 1,
+        MAX_COLUMN_IN_BATCH,
+        MAX_COLUMN_IN_BATCH + 1,
+        MAX_COLUMN_IN_BATCH * 2,
+        MAX_COLUMN_IN_BATCH * 10,
+    ],
+    [False, True]
+    )
+)
+def test_insert_by_columns_variations(session, clean_bucket_name, num_columns, insert_by_columns):
+    columns = pa.schema([pa.field(f'i{i}', pa.int64()) for i in range(num_columns)])
+    data = [[i] for i in range(num_columns)]
+    expected = pa.table(schema=columns, data=data)
+    with prepare_data(session, clean_bucket_name, 's', 't', expected, insert_by_columns=insert_by_columns) as t:
+        actual = t.select().read_all()
+        assert actual == expected
+@pytest.mark.parametrize("sorting_key", [0, 40, 80, 120])
+def test_insert_by_columns_sorted(session, clean_bucket_name, sorting_key):
+    num_columns = 160
+    columns = pa.schema([pa.field(f'i{i}', pa.int64()) for i in range(num_columns)])
+    data = [[i] for i in range(num_columns)]
+    expected = pa.table(schema=columns, data=data)
+    with prepare_data(session, clean_bucket_name, 's', 't', expected, sorting_key=[sorting_key], insert_by_columns=True) as t:
+        actual = t.select().read_all()
+        assert actual == expected
 def test_multi_batch_table(session, clean_bucket_name):
     columns = pa.schema([pa.field('s', pa.utf8())])
     expected = pa.Table.from_batches([

vastdb/tests/test_vector_index.py ADDED Viewed

@@ -0,0 +1,162 @@
+"""Tests for vector index functionality."""
+import logging
+from typing import Optional
+import pyarrow as pa
+import pytest
+from vastdb import errors
+from vastdb._internal import VectorIndexSpec
+from vastdb.session import Session
+log = logging.getLogger(__name__)
+@pytest.mark.parametrize("table_name,vector_index", [
+    # Test 1: Table without vector index
+    ("table_without_index", None),
+    # Test 2: Table with L2 vector index
+    ("table_with_l2_index", VectorIndexSpec("embedding", "l2sq")),
+    # Test 3: Table with inner product vector index
+    ("table_with_ip_index", VectorIndexSpec("embedding", "ip")),
+])
+def test_create_table_with_vector_index_metadata(session: Session,
+                                                 clean_bucket_name: str,
+                                                 table_name: str,
+                                                 vector_index: Optional[VectorIndexSpec]):
+    """Test that table creation and stats retrieval work correctly with vector index metadata."""
+    schema_name = "schema1"
+    with session.transaction() as tx:
+        log.info(f"Testing table '{table_name}' with {vector_index}")
+        # Create schema
+        bucket = tx.bucket(clean_bucket_name)
+        schema = bucket.create_schema(schema_name)
+        # Create the appropriate schema based on whether vector index is needed
+        if vector_index is None:
+            # Simple table without vector index
+            arrow_schema = pa.schema([
+                ('id', pa.int64()),
+                ('data', pa.string())
+            ])
+        else:
+            # Table with vector column
+            vector_dimension = 128  # Fixed-size vector dimension
+            vec_type = pa.list_(pa.field('', pa.float32(), False), vector_dimension)
+            arrow_schema = pa.schema([
+                ('id', pa.int64()),
+                ('embedding', vec_type)  # Fixed-size vector column
+            ])
+        # Create table with or without vector index
+        log.info(f"Creating table: {table_name}")
+        table = schema.create_table(
+            table_name=table_name,
+            columns=arrow_schema,
+            vector_index=vector_index
+        )
+        # Reload stats to ensure we get the vector index metadata
+        table.reload_stats()
+        # Get vector index metadata
+        result_vector_index = table._metadata._vector_index
+        log.info(f"Vector index metadata: {result_vector_index}")
+        # Assert expected values (should match input parameters)
+        result_vector_index_spec = (
+            None
+            if result_vector_index is None
+            else result_vector_index.to_vector_index_spec()
+        )
+        assert result_vector_index_spec == vector_index
+        log.info(f"✓ Test passed for table '{table_name}'")
+@pytest.mark.parametrize("table_name,vector_index,expected_error", [
+    # Test 1: Invalid column name (column doesn't exist in schema)
+    ("table_invalid_column", VectorIndexSpec("nonexistent_column", "l2sq"), "invalid vector indexed column name nonexistent_column"),
+    # Test 2: Invalid distance metric
+    ("table_invalid_metric", VectorIndexSpec("embedding", "invalid_metric"), "invalid vector index distance metric invalid_metric, supported metrics: 'l2sq', 'ip'"),
+])
+def test_create_table_with_invalid_vector_index(session: Session,
+                                                clean_bucket_name: str,
+                                                table_name: str,
+                                                vector_index: VectorIndexSpec,
+                                                expected_error: str):
+    """Test that table creation fails with appropriate error messages for invalid vector index parameters."""
+    schema_name = "schema1"
+    with session.transaction() as tx:
+        log.info(f"Testing invalid table '{table_name}' with vector_index={vector_index}, expected_error={expected_error}")
+        # Create schema
+        bucket = tx.bucket(clean_bucket_name)
+        schema = bucket.create_schema(schema_name)
+        # Table with vector column
+        vector_dimension = 128  # Fixed-size vector dimension
+        vec_type = pa.list_(pa.field('', pa.float32(), False), vector_dimension)
+        arrow_schema = pa.schema([
+            ('id', pa.int64()),
+            ('embedding', vec_type)  # Fixed-size vector column
+        ])
+        # Attempt to create table with invalid parameters - should raise an error
+        log.info(f"Attempting to create invalid table: {table_name}")
+        with pytest.raises((errors.BadRequest)) as exc_info:
+            schema.create_table(
+                table_name=table_name,
+                columns=arrow_schema,
+                vector_index=vector_index
+            )
+        # Verify the error message contains the expected error text
+        assert expected_error in str(exc_info.value), \
+            f"Expected error message to contain '{expected_error}', got '{str(exc_info.value)}'"
+        log.info(f"✓ Test passed for invalid table '{table_name}'")
+def test_vector_index_metadata_from_stats(session: Session, clean_bucket_name: str):
+    """Test that vector index metadata is correctly retrieved from table stats."""
+    schema_name = "schema1"
+    table_name = "vector_table"
+    with session.transaction() as tx:
+        # Create schema
+        bucket = tx.bucket(clean_bucket_name)
+        schema = bucket.create_schema(schema_name)
+        # Create table with vector index
+        vector_dimension = 128
+        vec_type = pa.list_(pa.field('', pa.float32(), False), vector_dimension)
+        arrow_schema = pa.schema([
+            ('id', pa.int64()),
+            ('embedding', vec_type)
+        ])
+        table = schema.create_table(
+            table_name=table_name,
+            columns=arrow_schema,
+            vector_index=VectorIndexSpec("embedding", "l2sq")
+        )
+        # Check stats object directly
+        stats = table.stats
+        assert stats is not None
+        assert stats.vector_index is not None
+        assert stats.vector_index.column == "embedding"
+        assert stats.vector_index.distance_metric == "l2sq"
+        # Check via the table methods
+        assert table._metadata._vector_index is not None
+        assert table._metadata._vector_index.column == "embedding"
+        assert table._metadata._vector_index.distance_metric == "l2sq"
+        log.info("✓ Vector index metadata correctly retrieved from stats")

vastdb 2.0.2__py3-none-any.whl → 2.0.5__py3-none-any.whl

vastdb 2.0.2py3-none-any.whl → 2.0.5py3-none-any.whl