vastdb 2.0.5__py3-none-any.whl → 2.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vastdb/_internal.py CHANGED
@@ -47,6 +47,7 @@ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.DecimalLiteral as
47
47
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression as fb_expression
48
48
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.FieldIndex as fb_field_index
49
49
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.FieldRef as fb_field_ref
50
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Float16Literal as fb_float16_lit
50
51
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Float32Literal as fb_float32_lit
51
52
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Float64Literal as fb_float64_lit
52
53
  import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int8Literal as fb_int8_lit
@@ -411,15 +412,19 @@ class Predicate:
411
412
 
412
413
  if pa.types.is_floating(pa_type):
413
414
  impl_type, impl_class = None, None
414
- value = float(value)
415
415
 
416
- if pa.types.is_float32(pa_type):
417
- impl_type, impl_class = LiteralImpl.Float32Literal, fb_float32_lit
418
- elif pa.types.is_float64(pa_type):
419
- impl_type, impl_class = LiteralImpl.Float64Literal, fb_float64_lit
416
+ if pa.types.is_float16(pa_type):
417
+ import numpy as np
418
+ value = np.float16(value).view(np.uint16).item()
419
+ impl_type, impl_class = LiteralImpl.Float16Literal, fb_float16_lit
420
420
  else:
421
- # Float16 is not supported by Vast.
422
- raise ValueError(f'unsupported floating point predicate type: {pa_type}, value={value}')
421
+ value = float(value)
422
+ if pa.types.is_float32(pa_type):
423
+ impl_type, impl_class = LiteralImpl.Float32Literal, fb_float32_lit
424
+ elif pa.types.is_float64(pa_type):
425
+ impl_type, impl_class = LiteralImpl.Float64Literal, fb_float64_lit
426
+ else:
427
+ raise ValueError(f'unsupported floating point predicate type: {pa_type}, value={value}')
423
428
 
424
429
  impl_class.Start(self.builder)
425
430
  impl_class.AddValue(self.builder, value)
@@ -783,7 +788,7 @@ def _decode_table_props(s):
783
788
  return {y: _prop_coding[x][1](z) for x, y, z in triplets if z != ''}
784
789
 
785
790
 
786
- TableInfo = namedtuple('TableInfo', 'name properties handle num_rows size_in_bytes num_partitions sorting_key_enabled sorting_score write_amplification acummulative_row_insertion_count sorting_done')
791
+ TableInfo = namedtuple('TableInfo', 'name properties handle num_rows size_in_bytes num_partitions sorting_key_enabled sorting_score write_amplification acummulative_row_insertion_count sorting_done vector_index_enabled vector_index_column_name vector_index_distance_metric vector_index_sql_function_name')
787
792
 
788
793
 
789
794
  def _parse_table_info(obj, parse_properties):
@@ -801,8 +806,15 @@ def _parse_table_info(obj, parse_properties):
801
806
 
802
807
  sorting_score = sorting_score_raw & ((1 << 63) - 1)
803
808
  sorting_done = bool(sorting_score_raw >> 63)
809
+
810
+ vector_index_enabled = obj.VectorIndexEnabled() if hasattr(obj, 'VectorIndexEnabled') else False
811
+ vector_index_column_name = obj.VectorIndexColumnName().decode() if hasattr(obj, 'VectorIndexColumnName') and obj.VectorIndexColumnName() else ""
812
+ vector_index_distance_metric = obj.VectorIndexDistanceMetric().decode() if hasattr(obj, 'VectorIndexDistanceMetric') and obj.VectorIndexDistanceMetric() else ""
813
+ vector_index_sql_function_name = obj.VectorIndexSqlFunctionName().decode() if hasattr(obj, 'VectorIndexSqlFunctionName') and obj.VectorIndexSqlFunctionName() else ""
814
+
804
815
  return TableInfo(name, properties, handle, num_rows, used_bytes, num_partitions, sorting_key_enabled,
805
- sorting_score, write_amplification, acummulative_row_insertion_count, sorting_done)
816
+ sorting_score, write_amplification, acummulative_row_insertion_count, sorting_done,
817
+ vector_index_enabled, vector_index_column_name, vector_index_distance_metric, vector_index_sql_function_name)
806
818
 
807
819
 
808
820
  @dataclass
@@ -1384,20 +1396,24 @@ class VastdbApi:
1384
1396
  parse_properties=_decode_table_props, max_keys=max_keys,
1385
1397
  next_key=next_key, name_prefix=name_prefix, exact_match=exact_match,
1386
1398
  expected_retvals=expected_retvals,
1387
- include_list_stats=include_list_stats, count_only=count_only)
1399
+ include_list_stats=include_list_stats, count_only=count_only,
1400
+ include_vector_index_metadata=False)
1388
1401
 
1389
1402
  def list_tables(self, bucket, schema, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
1390
- exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
1403
+ exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False,
1404
+ include_vector_index_metadata=False):
1391
1405
  def parse_properties(x):
1392
1406
  return x
1393
1407
  return self._list_tables_internal(bucket=bucket, schema=schema, txid=txid, client_tags=client_tags,
1394
1408
  parse_properties=parse_properties, max_keys=max_keys, next_key=next_key,
1395
1409
  name_prefix=name_prefix, exact_match=exact_match,
1396
1410
  expected_retvals=expected_retvals,
1397
- include_list_stats=include_list_stats, count_only=count_only)
1411
+ include_list_stats=include_list_stats, count_only=count_only,
1412
+ include_vector_index_metadata=include_vector_index_metadata)
1398
1413
 
1399
1414
  def _list_tables_raw(self, bucket, schema, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
1400
- exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
1415
+ exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False,
1416
+ include_vector_index_metadata=False):
1401
1417
  """
1402
1418
  GET /mybucket/schema_path?table HTTP/1.1
1403
1419
  tabular-txid: TransactionId
@@ -1416,6 +1432,7 @@ class VastdbApi:
1416
1432
 
1417
1433
  headers['tabular-list-count-only'] = str(count_only)
1418
1434
  headers['tabular-include-list-stats'] = str(include_list_stats)
1435
+ headers['tabular-include-vector-index-meta-data'] = str(include_vector_index_metadata).lower()
1419
1436
 
1420
1437
  res = self._request(
1421
1438
  method="GET",
@@ -1431,11 +1448,13 @@ class VastdbApi:
1431
1448
  return lists, next_key, is_truncated, count
1432
1449
 
1433
1450
  def _list_tables_internal(self, bucket, schema, parse_properties, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
1434
- exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
1451
+ exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False,
1452
+ include_vector_index_metadata=False):
1435
1453
  tables = []
1436
1454
  lists, next_key, is_truncated, count = self._list_tables_raw(bucket, schema, txid=txid, client_tags=client_tags, max_keys=max_keys,
1437
1455
  next_key=next_key, name_prefix=name_prefix, exact_match=exact_match, expected_retvals=expected_retvals,
1438
- include_list_stats=include_list_stats, count_only=count_only)
1456
+ include_list_stats=include_list_stats, count_only=count_only,
1457
+ include_vector_index_metadata=include_vector_index_metadata)
1439
1458
  bucket_name = lists.BucketName().decode()
1440
1459
  schema_name = lists.SchemaName().decode()
1441
1460
  if not bucket.startswith(bucket_name): # ignore snapshot name
@@ -2283,6 +2302,12 @@ def get_field_type(builder: flatbuffers.Builder, field: pa.Field):
2283
2302
  fb_int.AddIsSigned(builder, False)
2284
2303
  field_type = fb_int.End(builder)
2285
2304
 
2305
+ elif field.type.equals(pa.float16()):
2306
+ field_type_type = Type.FloatingPoint
2307
+ fb_floating_point.Start(builder)
2308
+ fb_floating_point.AddPrecision(builder, 0) # half
2309
+ field_type = fb_floating_point.End(builder)
2310
+
2286
2311
  elif field.type.equals(pa.float32()):
2287
2312
  field_type_type = Type.FloatingPoint
2288
2313
  fb_floating_point.Start(builder)
vastdb/schema.py CHANGED
@@ -14,7 +14,7 @@ from vastdb.table_metadata import TableMetadata, TableRef, TableType
14
14
 
15
15
  from . import bucket, errors, schema, table
16
16
  from ._ibis_support import validate_ibis_support_schema
17
- from ._internal import VectorIndexSpec
17
+ from ._internal import VectorIndex, VectorIndexSpec
18
18
 
19
19
  if TYPE_CHECKING:
20
20
  from .table import Table
@@ -130,7 +130,7 @@ class Schema:
130
130
  log.debug("Found table: %s", t[0])
131
131
  return t[0]
132
132
 
133
- def _iter_tables(self, table_name=None, page_size=1000):
133
+ def _iter_tables(self, table_name=None, page_size=1000, include_vector_index_metadata=False):
134
134
  next_key = 0
135
135
  name_prefix = table_name if table_name else ""
136
136
  exact_match = bool(table_name)
@@ -138,7 +138,8 @@ class Schema:
138
138
  _bucket_name, _schema_name, curr_tables, next_key, is_truncated, _ = \
139
139
  self.tx._rpc.api.list_tables(
140
140
  bucket=self.bucket.name, schema=self.name, next_key=next_key, max_keys=page_size, txid=self.tx.active_txid,
141
- exact_match=exact_match, name_prefix=name_prefix, include_list_stats=exact_match)
141
+ exact_match=exact_match, name_prefix=name_prefix, include_list_stats=exact_match,
142
+ include_vector_index_metadata=include_vector_index_metadata)
142
143
  if not curr_tables:
143
144
  break
144
145
  yield from curr_tables
@@ -177,7 +178,17 @@ def _parse_table_info(table_info, schema: "schema.Schema"):
177
178
  table=table_info.name)
178
179
 
179
180
  table_type = TableType.Elysium if table_info.sorting_key_enabled else TableType.Regular
180
- table_metadata = TableMetadata(ref, table_type=table_type)
181
+
182
+ # populate vector_index from list_tables if vector index is enabled
183
+ vector_index = None
184
+ if table_info.vector_index_enabled:
185
+ vector_index = VectorIndex(
186
+ column=table_info.vector_index_column_name,
187
+ distance_metric=table_info.vector_index_distance_metric,
188
+ sql_distance_function=table_info.vector_index_sql_function_name
189
+ )
190
+
191
+ table_metadata = TableMetadata(ref, table_type=table_type, vector_index=vector_index)
181
192
 
182
193
  return table.Table(handle=int(table_info.handle),
183
194
  metadata=table_metadata,
vastdb/table.py CHANGED
@@ -226,7 +226,7 @@ class TableInTransaction(ITable):
226
226
 
227
227
  @property
228
228
  def _internal_rowid_field(self) -> pa.Field:
229
- return INTERNAL_ROW_ID_SORTED_FIELD if self._is_sorted_table else INTERNAL_ROW_ID_FIELD
229
+ return INTERNAL_ROW_ID_SORTED_FIELD if self._uses_global_row_ids else INTERNAL_ROW_ID_FIELD
230
230
 
231
231
  def sorted_columns(self) -> list[pa.Field]:
232
232
  """Return sorted columns' metadata."""
@@ -818,6 +818,16 @@ class TableInTransaction(ITable):
818
818
  def _is_sorted_table(self) -> bool:
819
819
  return self._metadata.table_type is TableType.Elysium
820
820
 
821
+ @property
822
+ def _uses_global_row_ids(self) -> bool:
823
+ """Check if table uses global row IDs (decimal128: ehandle + row_id).
824
+
825
+ Both Elysium and Vector Index tables use global row IDs.
826
+ """
827
+ # _vector_index is set from list_tables or synced from stats.vector_index by _parse_stats_vector_index()
828
+ has_vector_index = self._metadata._vector_index is not None
829
+ return self._is_sorted_table or has_vector_index
830
+
821
831
  def vector_search(
822
832
  self,
823
833
  vec: list[float],
vastdb/table_metadata.py CHANGED
@@ -173,14 +173,14 @@ class TableMetadata:
173
173
  self._parse_stats_vector_index()
174
174
 
175
175
  def _parse_stats_vector_index(self):
176
- vector_index_is_set = self._vector_index is not None
176
+ if self._vector_index is not None and self._stats.vector_index != self._vector_index:
177
+ is_empty_placeholder = not self._vector_index.column and not self._vector_index.distance_metric
178
+ if not is_empty_placeholder:
179
+ raise ValueError(
180
+ f"Table has index {self._stats.vector_index}, but was initialized as {self._vector_index}"
181
+ )
177
182
 
178
- if vector_index_is_set and self._stats.vector_index != self._vector_index:
179
- raise ValueError(
180
- f"Table has index {self._stats.vector_index}, but was initialized as {self._vector_index}"
181
- )
182
- else:
183
- self._vector_index = self._stats.vector_index
183
+ self._vector_index = self._stats.vector_index
184
184
 
185
185
  def _set_sorted_table(self, tx: "Transaction"):
186
186
  self._table_type = TableType.Elysium
@@ -9,6 +9,7 @@ from contextlib import closing
9
9
  from tempfile import NamedTemporaryFile
10
10
 
11
11
  import ibis
12
+ import numpy as np
12
13
  import pandas as pd
13
14
  import pyarrow as pa
14
15
  import pyarrow.compute as pc
@@ -390,6 +391,7 @@ def test_types(session, clean_bucket_name):
390
391
  ('a2', pa.int16()),
391
392
  ('a4', pa.int64()),
392
393
  ('b', pa.float32()),
394
+ ('f16', pa.float16()),
393
395
  ('s', pa.string()),
394
396
  ('d', pa.decimal128(7, 3)),
395
397
  ('bin', pa.binary()),
@@ -410,6 +412,7 @@ def test_types(session, clean_bucket_name):
410
412
  [1999, 2000, 2001],
411
413
  [11122221, 222111122, 333333],
412
414
  [0.5, 1.5, 2.5],
415
+ [np.float16(0.5), np.float16(1.5), np.float16(2.5)],
413
416
  ["a", "v", "s"],
414
417
  [decimal.Decimal('110.52'), decimal.Decimal('231.15'), decimal.Decimal('3332.44')],
415
418
  [b"\x01\x02", b"\x01\x05", b"\x01\x07"],
@@ -436,6 +439,13 @@ def test_types(session, clean_bucket_name):
436
439
  assert select(t['a2'] == 2000) == expected.filter(pc.field('a2') == 2000)
437
440
  assert select(t['a4'] == 222111122) == expected.filter(pc.field('a4') == 222111122)
438
441
  assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
442
+
443
+ # Test float16 predicate (PyArrow compute doesn't support float16, so validate manually)
444
+ f16_literal = np.float16(1.5)
445
+ result = select(t['f16'] == f16_literal)
446
+ assert len(result) == 1, f"Expected 1 row for f16==1.5, got {len(result)}"
447
+ assert np.float16(result.column('f16')[0].as_py()) == f16_literal
448
+
439
449
  assert select(t['s'] == "v") == expected.filter(pc.field('s') == "v")
440
450
  assert select(t['d'] == 231.15) == expected.filter(pc.field('d') == 231.15)
441
451
  assert select(t['bin'] == b"\x01\x02") == expected.filter(pc.field('bin') == b"\x01\x02")
@@ -468,6 +478,59 @@ def test_types(session, clean_bucket_name):
468
478
  assert select(t['ts9'] == ts_literal) == expected.filter(pc.field('ts9') == ts_literal)
469
479
 
470
480
 
481
+ @pytest.mark.parametrize("element_type,test_name", [
482
+ (pa.float32(), "float32"),
483
+ (pa.float16(), "float16"),
484
+ ])
485
+ def test_vector_types(session, clean_bucket_name, element_type, test_name):
486
+ """Test vector (fixed-size list) columns with different element types."""
487
+ vector_dim = 3
488
+ vec_type = pa.list_(pa.field('', element_type, False), vector_dim)
489
+
490
+ columns = pa.schema([
491
+ ('id', pa.int32()),
492
+ ('vector', vec_type),
493
+ ])
494
+
495
+ # Create test data based on element type
496
+ if element_type == pa.float16():
497
+ test_vectors = [
498
+ [np.float16(1.0), np.float16(2.0), np.float16(3.0)],
499
+ [np.float16(4.5), np.float16(5.5), np.float16(6.5)],
500
+ [np.float16(-1.0), np.float16(0.0), np.float16(1.0)],
501
+ ]
502
+ else: # float32
503
+ test_vectors = [
504
+ [1.0, 2.0, 3.0],
505
+ [4.5, 5.5, 6.5],
506
+ [-1.0, 0.0, 1.0],
507
+ ]
508
+
509
+ expected = pa.table(schema=columns, data=[
510
+ [0, 1, 2],
511
+ test_vectors,
512
+ ])
513
+
514
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
515
+ # Read back and verify
516
+ actual = table.select().read_all()
517
+ assert actual.schema == columns
518
+ assert len(actual) == 3
519
+
520
+ # Verify vector data
521
+ actual_vectors = actual.column('vector').to_pylist()
522
+ for i, (actual_vec, expected_vec) in enumerate(zip(actual_vectors, test_vectors)):
523
+ assert len(actual_vec) == vector_dim, f"Wrong vector dimension at row {i}"
524
+ for j, (act, exp) in enumerate(zip(actual_vec, expected_vec)):
525
+ if element_type == pa.float16():
526
+ assert np.float16(act) == np.float16(exp), \
527
+ f"Mismatch at row {i}, element {j}: {act} != {exp}"
528
+ else:
529
+ assert act == exp, f"Mismatch at row {i}, element {j}: {act} != {exp}"
530
+
531
+ log.info(f"Vector type test ({test_name}) passed successfully")
532
+
533
+
471
534
  @pytest.mark.parametrize("arrow_type,internal_support", [
472
535
  # Types not supported by Vast.
473
536
  (pa.null(), False),
@@ -160,3 +160,165 @@ def test_vector_index_metadata_from_stats(session: Session, clean_bucket_name: s
160
160
  assert table._metadata._vector_index.distance_metric == "l2sq"
161
161
 
162
162
  log.info("✓ Vector index metadata correctly retrieved from stats")
163
+
164
+
165
+ @pytest.fixture
166
+ def vector_index_test_tables(session: Session, clean_bucket_name: str):
167
+ """
168
+ Fixture that creates a schema with 4 test tables:
169
+ - regular_table: no vector index
170
+ - vector_table_l2: l2sq distance metric
171
+ - vector_table_ip: ip distance metric
172
+ - vector_table_cosine: cosine distance metric
173
+
174
+ Returns a tuple of (schema, table_names_dict, expected_metrics) where:
175
+ - table_names_dict contains the names of all created tables
176
+ - expected_metrics is a list of (metric_key, column, distance_metric, sql_function) tuples
177
+ """
178
+ schema_name = "vector_list_schema"
179
+ vector_dimension = 5
180
+
181
+ with session.transaction() as tx:
182
+ bucket = tx.bucket(clean_bucket_name)
183
+ schema = bucket.create_schema(schema_name)
184
+
185
+ # Create test tables
186
+ vec_type = pa.list_(pa.field('', pa.float32(), False), vector_dimension)
187
+ arrow_schema = pa.schema([
188
+ ('id', pa.int64()),
189
+ ('embedding', vec_type)
190
+ ])
191
+
192
+ table_names = {}
193
+ expected_metrics = []
194
+
195
+ # Regular table without vector index
196
+ table_name = "regular_table"
197
+ schema.create_table(table_name, arrow_schema)
198
+ table_names["regular"] = table_name
199
+ log.info(f"Created regular table: {table_name}")
200
+
201
+ # Vector index table with l2sq metric
202
+ table_name = "vector_table_l2"
203
+ schema.create_table(table_name, arrow_schema, vector_index=VectorIndexSpec("embedding", "l2sq"))
204
+ table_names["l2"] = table_name
205
+ expected_metrics.append(("l2", "embedding", "l2sq", "array_distance"))
206
+ log.info(f"Created vector index table (l2sq): {table_name}")
207
+
208
+ # Vector index table with ip metric
209
+ table_name = "vector_table_ip"
210
+ schema.create_table(table_name, arrow_schema, vector_index=VectorIndexSpec("embedding", "ip"))
211
+ table_names["ip"] = table_name
212
+ expected_metrics.append(("ip", "embedding", "ip", "array_inner_product"))
213
+ log.info(f"Created vector index table (ip): {table_name}")
214
+
215
+ # Vector index table with cosine metric
216
+ table_name = "vector_table_cosine"
217
+ schema.create_table(table_name, arrow_schema, vector_index=VectorIndexSpec("embedding", "cosine"))
218
+ table_names["cosine"] = table_name
219
+ expected_metrics.append(("cosine", "embedding", "cosine", "array_cosine_distance"))
220
+ log.info(f"Created vector index table (cosine): {table_name}")
221
+
222
+ yield schema, table_names, expected_metrics
223
+
224
+
225
+ def _check_vector_index_metadata(actual_column, actual_metric, actual_sql_func,
226
+ expected_column, expected_metric, expected_sql_func,
227
+ expect_full_metadata: bool,
228
+ table_name: str = "table"):
229
+ """Pure checker helper to validate vector index metadata values.
230
+
231
+ Args:
232
+ actual_column: Actual column name from result
233
+ actual_metric: Actual distance metric from result
234
+ actual_sql_func: Actual SQL function name from result
235
+ expected_column: Expected column name (when full metadata is present)
236
+ expected_metric: Expected distance metric (when full metadata is present)
237
+ expected_sql_func: Expected SQL function name (when full metadata is present)
238
+ expect_full_metadata: Whether full metadata should be present
239
+ table_name: Name of table for error messages
240
+ """
241
+ # Determine actual expected values based on metadata presence
242
+ if expect_full_metadata:
243
+ expected_col = expected_column
244
+ expected_met = expected_metric
245
+ expected_sql = expected_sql_func
246
+ else:
247
+ # When metadata is not loaded, both Table objects and TableInfo use empty strings
248
+ expected_col = ""
249
+ expected_met = ""
250
+ expected_sql = ""
251
+
252
+ assert actual_column == expected_col, \
253
+ f"Expected column='{expected_col}', got '{actual_column}' for {table_name}"
254
+ assert actual_metric == expected_met, \
255
+ f"Expected metric='{expected_met}', got '{actual_metric}' for {table_name}"
256
+ assert actual_sql_func == expected_sql, \
257
+ f"Expected sql_func='{expected_sql}', got '{actual_sql_func}' for {table_name}"
258
+
259
+
260
+ @pytest.mark.parametrize("test_case,use_public_api,include_metadata,expect_full_metadata", [
261
+ # Test 1: Public API schema.tables() - returns Table objects with empty metadata
262
+ ("schema.tables() public API", True, None, False),
263
+ # Test 2: Internal API with full metadata enabled
264
+ ("_iter_tables(include_vector_index_metadata=True)", False, True, True),
265
+ # Test 3: Internal API with metadata disabled - returns flag only
266
+ ("_iter_tables(include_vector_index_metadata=False)", False, False, False),
267
+ ])
268
+ def test_list_tables_vector_index_metadata(session: Session, vector_index_test_tables,
269
+ test_case: str, use_public_api: bool,
270
+ include_metadata: Optional[bool], expect_full_metadata: bool):
271
+ """
272
+ Test that list_tables APIs return correct vector index metadata based on parameters.
273
+
274
+ Tests three scenarios:
275
+ 1. schema.tables() - public API returns Table objects with vector_index placeholder (empty strings)
276
+ 2. _iter_tables(include_vector_index_metadata=True) - returns full metadata
277
+ 3. _iter_tables(include_vector_index_metadata=False) - returns flag only, no expensive metadata
278
+ """
279
+ schema, table_names, expected_metrics = vector_index_test_tables
280
+
281
+ log.info(f"Testing: {test_case}")
282
+
283
+ # Call the appropriate API and extract vector index info into uniform structure
284
+ if use_public_api:
285
+ # Public API returns Table objects with _vector_index
286
+ results = schema.tables()
287
+ results_by_name = {t.name: (t, t._metadata._vector_index) for t in results}
288
+ else:
289
+ # Internal API returns table info objects with individual vector_index fields
290
+ # Create a simple object to match _vector_index interface
291
+ from types import SimpleNamespace
292
+ results = list(schema._iter_tables(include_vector_index_metadata=include_metadata))
293
+ results_by_name = {}
294
+ for t in results:
295
+ # Create object with same attributes as VectorIndex for uniform access
296
+ vi = SimpleNamespace(
297
+ column=t.vector_index_column_name,
298
+ distance_metric=t.vector_index_distance_metric,
299
+ sql_distance_function=t.vector_index_sql_function_name,
300
+ ) if t.vector_index_enabled else None
301
+ results_by_name[t.name] = (t, vi)
302
+
303
+ assert len(results) == 4, f"Expected 4 tables, got {len(results)}"
304
+
305
+ # Validate regular table (no vector index)
306
+ _, regular_vi = results_by_name[table_names["regular"]]
307
+ assert regular_vi is None, \
308
+ "Expected vector_index=None for regular table"
309
+
310
+ # Validate vector index tables
311
+ for metric_key, expected_column, expected_metric, expected_sql_func in expected_metrics:
312
+ _, vi = results_by_name[table_names[metric_key]]
313
+
314
+ assert vi is not None, \
315
+ f"Expected vector_index present for {metric_key}"
316
+
317
+ _check_vector_index_metadata(
318
+ vi.column, vi.distance_metric, vi.sql_distance_function,
319
+ expected_column, expected_metric, expected_sql_func,
320
+ expect_full_metadata,
321
+ table_name=metric_key
322
+ )
323
+
324
+ log.info(f"{test_case} validated successfully")
@@ -95,7 +95,42 @@ class ObjectDetails(object):
95
95
  return self._tab.Get(flatbuffers.number_types.Int64Flags, o + self._tab.Pos)
96
96
  return 0
97
97
 
98
- def Start(builder): builder.StartObject(10)
98
+ # ObjectDetails
99
+ def PartitioningKeyEnabled(self):
100
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(24))
101
+ if o != 0:
102
+ return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos))
103
+ return False
104
+
105
+ # ObjectDetails
106
+ def VectorIndexEnabled(self):
107
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(26))
108
+ if o != 0:
109
+ return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos))
110
+ return False
111
+
112
+ # ObjectDetails
113
+ def VectorIndexColumnName(self):
114
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(28))
115
+ if o != 0:
116
+ return self._tab.String(o + self._tab.Pos)
117
+ return None
118
+
119
+ # ObjectDetails
120
+ def VectorIndexDistanceMetric(self):
121
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(30))
122
+ if o != 0:
123
+ return self._tab.String(o + self._tab.Pos)
124
+ return None
125
+
126
+ # ObjectDetails
127
+ def VectorIndexSqlFunctionName(self):
128
+ o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(32))
129
+ if o != 0:
130
+ return self._tab.String(o + self._tab.Pos)
131
+ return None
132
+
133
+ def Start(builder): builder.StartObject(15)
99
134
  def ObjectDetailsStart(builder):
100
135
  """This method is deprecated. Please switch to Start."""
101
136
  return Start(builder)
@@ -139,6 +174,26 @@ def AddAcummulativeRowInseritionCount(builder, acummulativeRowInseritionCount):
139
174
  def ObjectDetailsAddAcummulativeRowInseritionCount(builder, acummulativeRowInseritionCount):
140
175
  """This method is deprecated. Please switch to AddAcummulativeRowInseritionCount."""
141
176
  return AddAcummulativeRowInseritionCount(builder, acummulativeRowInseritionCount)
177
+ def AddPartitioningKeyEnabled(builder, partitioningKeyEnabled): builder.PrependBoolSlot(10, partitioningKeyEnabled, 0)
178
+ def ObjectDetailsAddPartitioningKeyEnabled(builder, partitioningKeyEnabled):
179
+ """This method is deprecated. Please switch to AddPartitioningKeyEnabled."""
180
+ return AddPartitioningKeyEnabled(builder, partitioningKeyEnabled)
181
+ def AddVectorIndexEnabled(builder, vectorIndexEnabled): builder.PrependBoolSlot(11, vectorIndexEnabled, 0)
182
+ def ObjectDetailsAddVectorIndexEnabled(builder, vectorIndexEnabled):
183
+ """This method is deprecated. Please switch to AddVectorIndexEnabled."""
184
+ return AddVectorIndexEnabled(builder, vectorIndexEnabled)
185
+ def AddVectorIndexColumnName(builder, vectorIndexColumnName): builder.PrependUOffsetTRelativeSlot(12, flatbuffers.number_types.UOffsetTFlags.py_type(vectorIndexColumnName), 0)
186
+ def ObjectDetailsAddVectorIndexColumnName(builder, vectorIndexColumnName):
187
+ """This method is deprecated. Please switch to AddVectorIndexColumnName."""
188
+ return AddVectorIndexColumnName(builder, vectorIndexColumnName)
189
+ def AddVectorIndexDistanceMetric(builder, vectorIndexDistanceMetric): builder.PrependUOffsetTRelativeSlot(13, flatbuffers.number_types.UOffsetTFlags.py_type(vectorIndexDistanceMetric), 0)
190
+ def ObjectDetailsAddVectorIndexDistanceMetric(builder, vectorIndexDistanceMetric):
191
+ """This method is deprecated. Please switch to AddVectorIndexDistanceMetric."""
192
+ return AddVectorIndexDistanceMetric(builder, vectorIndexDistanceMetric)
193
+ def AddVectorIndexSqlFunctionName(builder, vectorIndexSqlFunctionName): builder.PrependUOffsetTRelativeSlot(14, flatbuffers.number_types.UOffsetTFlags.py_type(vectorIndexSqlFunctionName), 0)
194
+ def ObjectDetailsAddVectorIndexSqlFunctionName(builder, vectorIndexSqlFunctionName):
195
+ """This method is deprecated. Please switch to AddVectorIndexSqlFunctionName."""
196
+ return AddVectorIndexSqlFunctionName(builder, vectorIndexSqlFunctionName)
142
197
  def End(builder): return builder.EndObject()
143
198
  def ObjectDetailsEnd(builder):
144
199
  """This method is deprecated. Please switch to End."""
@@ -4,6 +4,7 @@ import time
4
4
  from concurrent.futures import ThreadPoolExecutor
5
5
 
6
6
  import pyarrow as pa
7
+ import pytest
7
8
 
8
9
  from vastdb.table import QueryConfig
9
10
 
@@ -34,6 +35,7 @@ def test_concurrent_query(session, test_bucket_name, schema_name, table_name):
34
35
  logger.info(f"finished running {amount_of_queries_in_parallel} queries")
35
36
 
36
37
 
38
+ @pytest.mark.skip(reason="see https://vastdata.atlassian.net/browse/ORION-319356")
37
39
  def test_table_stats(session, test_bucket_name, schema_name, table_name):
38
40
  """
39
41
  Testing stats integrity while altering table
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vastdb
3
- Version: 2.0.5
3
+ Version: 2.0.8
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
@@ -20,6 +20,7 @@ Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: aws-requests-auth
22
22
  Requires-Dist: ibis-framework~=10.1
23
+ Requires-Dist: sqlglot<28.7,>=23.4
23
24
  Requires-Dist: pyarrow~=18.0
24
25
  Requires-Dist: pyarrow-hotfix==0.7
25
26
  Requires-Dist: flatbuffers
@@ -1,17 +1,17 @@
1
1
  vastdb/__init__.py,sha256=uf-AXdzsD4nPxFP7WxkcAXGG0whv8BHLrrXCJtsPGaQ,436
2
2
  vastdb/_adbc.py,sha256=aUjn0hC92U5Jh2jQYeMFzTHYnVP086dVHlMoIoxhvh0,5985
3
3
  vastdb/_ibis_support.py,sha256=sJieOMvDWpsciPKh1mJzS56jxLtCRVlvK41hW84vexM,866
4
- vastdb/_internal.py,sha256=7c6tySuEacwZ5jqdW-HSAtbIIk8ep-_jA4uGrEU9gwc,112767
4
+ vastdb/_internal.py,sha256=-xN2v4nVn8CERbKT2Ub_xDqqy337YjK_d2Dllb8yhEY,114713
5
5
  vastdb/_table_interface.py,sha256=dRcXzC7j1SSpBQ4jCOPF_Zgg66LSt0tBeILM8W_A71c,4130
6
6
  vastdb/bucket.py,sha256=ulkTI_7xw5FYVzcrTFC7G2ijmTTVSmvJZUdgzycGHR0,2588
7
7
  vastdb/config.py,sha256=OehnsWrjzv0-SUouEXmkrKBugiWyhXOn4XiSLV3s9yk,2342
8
8
  vastdb/conftest.py,sha256=vDNqhYMsF630_ueXofnT2zhuSEpjcc4hxcXAjPqlxDI,5525
9
9
  vastdb/errors.py,sha256=GVmiwyrfHrag8GL55ivcF5Kwc1Ro4wvvaJAcuIWvmO0,5764
10
10
  vastdb/features.py,sha256=ivYbvhiGA858B00vhs_CNzlVV9QDUe53yW6V3J5EoxM,1874
11
- vastdb/schema.py,sha256=7k6Su8xjXzeLZeqNzsXIk06q9sIWMG9PBBlwy1aEAG0,7782
11
+ vastdb/schema.py,sha256=jYEjddYnWxp0-3UsN3B7Kqthagrfy4NtULRwreaMM2c,8325
12
12
  vastdb/session.py,sha256=oUiTx8x_5XOs6EnNvFjon9dSVTk89XhOZ-SfLR8sITQ,2529
13
- vastdb/table.py,sha256=X0uOMlxA9R2ER1-uEyB1lBv_nn-c_kM_jjaM2ai2IrQ,46241
14
- vastdb/table_metadata.py,sha256=PSwx1A25AmcCs4ig-Sx_xyXr7xqyGaNIk2LMnIvrwmM,7358
13
+ vastdb/table.py,sha256=1fdzD2eNYBTh9PjGrGamHZ9pjtF5aZULzxhyXSD4z04,46698
14
+ vastdb/table_metadata.py,sha256=HWs3s0LP2gx9EemMb5N7OYnd3zdAletX_DubhvtIOEs,7451
15
15
  vastdb/transaction.py,sha256=JaTvUw86HZ-qFiqjR1fmHndF5bSgMtVWj549MtGsZ5Q,4734
16
16
  vastdb/util.py,sha256=8CUnVRsJukC3uNHNoB5D0qPf0FxS8OSdVB84nNoLJKc,6290
17
17
  vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -53,9 +53,9 @@ vastdb/tests/test_projections.py,sha256=v43WrIdz_Ru4j3VeZ7medRPafzuw1eKMZfbAE56x
53
53
  vastdb/tests/test_sanity.py,sha256=bv1ypGDzvOgmMvGbucDYiLQu8krQLlE6NB3M__q87x8,3303
54
54
  vastdb/tests/test_schemas.py,sha256=l70YQMlx2UL1KRQhApriiG2ZM7GJF-IzWU31H3Yqn1U,3312
55
55
  vastdb/tests/test_table_in_tx.py,sha256=-OFGaZMZQc78HOmR23xHIsH7d0VBneFG3tGdDlogceM,9105
56
- vastdb/tests/test_tables.py,sha256=yU5fBeHentNe22Rc1e73mj33M0nl63Y_acOy6VTzF-4,54496
56
+ vastdb/tests/test_tables.py,sha256=HZBulgWhLh74MSSkOzOnWYiG4cQOi8T7-N8E3IMnIPo,56927
57
57
  vastdb/tests/test_util.py,sha256=n7gvT5Wg6b6bxgqkFXkYqvFd_W1GlUdVfmPv66XYXyA,1956
58
- vastdb/tests/test_vector_index.py,sha256=qgppSQG77PUgmdXzfprX1mxUkkqq7tbxCA3MRaYFFgQ,6460
58
+ vastdb/tests/test_vector_index.py,sha256=3JEyL2JqTWOACxJ0nihmOfLxX-ICVisCZMK1EfOOu-w,13871
59
59
  vastdb/tests/test_vector_search.py,sha256=50mnBFuw0PAApMLkYwDUMV5VxQorswzD_f_XdpaEXTo,7103
60
60
  vastdb/tests/util.py,sha256=cv8Qcvy92vpBbiiuqXr7pAfbh2K8ziBVZ_j1EQ4vT6o,5994
61
61
  vastdb/vast_flatbuf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -211,16 +211,16 @@ vastdb/vast_flatbuf/tabular/ListSchemasResponse.py,sha256=WyofRuUc66g3sPCKM53BDp
211
211
  vastdb/vast_flatbuf/tabular/ListTablesResponse.py,sha256=ysNiDeEKy4ayYrw8DCJxrt-Xf6OIauUBL1Dgj9WnSMI,3557
212
212
  vastdb/vast_flatbuf/tabular/ListViewsResponse.py,sha256=pDpWMbrxhFZ8Rs1keRkXN4bsI9PNDOeV3RqdyE2dcQ0,3528
213
213
  vastdb/vast_flatbuf/tabular/NameString.py,sha256=TkCp4wt61gzqoyaue9wf91SPEQ2W1FqqNbRBSMzK5y0,1501
214
- vastdb/vast_flatbuf/tabular/ObjectDetails.py,sha256=inPIj4uSoMBcxeceK5cxWciXiClfKbT4ToPo5CYzoHU,6481
214
+ vastdb/vast_flatbuf/tabular/ObjectDetails.py,sha256=xHZKBrQBXi4hQPQMbTJ2BmeVwvJfKuPEk2v4uW5Dxqw,9664
215
215
  vastdb/vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI0,4450
216
216
  vastdb/vast_flatbuf/tabular/VectorIndexMetadata.py,sha256=EX7wQnfHZH4GArvGS2jUqcPDS6gvJfELRtne4rWwsb4,2794
217
217
  vastdb/vast_flatbuf/tabular/VipRange.py,sha256=8zb-fzDM61YY5tdhpK3n-ySdTu_69suXMZj68YMpJH8,1842
218
218
  vastdb/vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
219
219
  vastdb/vast_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
220
220
  vastdb/vast_tests/test_ha.py,sha256=744P4G6VJ09RIkHhMQL4wlipCBJWQVMhyvUrSc4k1HQ,975
221
- vastdb/vast_tests/test_scale.py,sha256=5jGwOdZH6Tv5tPdZYPWoqcxOceI2jA5i2D1zNKZHER4,3958
222
- vastdb-2.0.5.dist-info/licenses/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
223
- vastdb-2.0.5.dist-info/METADATA,sha256=4wi1lvs2ENDLmNaGUK4cFq_AS_IpWtYBIQJdNvrV7Nc,1721
224
- vastdb-2.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
225
- vastdb-2.0.5.dist-info/top_level.txt,sha256=nnKAaZaQa8GFbYpWAexr_B9HrhonZbUlX6hL6AC--yA,7
226
- vastdb-2.0.5.dist-info/RECORD,,
221
+ vastdb/vast_tests/test_scale.py,sha256=RZ4K4DQB1N4XOBIfNqp3-aIDeXHIqXN115gQpjSyIXA,4055
222
+ vastdb-2.0.8.dist-info/licenses/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
223
+ vastdb-2.0.8.dist-info/METADATA,sha256=YCzUbEjArA0qr6cY1xAuZ-I6BXcShcrrqY6VzMscqIs,1756
224
+ vastdb-2.0.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
225
+ vastdb-2.0.8.dist-info/top_level.txt,sha256=nnKAaZaQa8GFbYpWAexr_B9HrhonZbUlX6hL6AC--yA,7
226
+ vastdb-2.0.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5