vastdb 1.3.10__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vastdb/_internal.py CHANGED
@@ -69,6 +69,7 @@ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Date as fb_date
69
69
  import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Decimal as fb_decimal
70
70
  import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Field as fb_field
71
71
  import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.FixedSizeBinary as fb_fixed_size_binary
72
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.FixedSizeList as fb_fixed_size_list
72
73
  import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.FloatingPoint as fb_floating_point
73
74
  import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Int as fb_int
74
75
  import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.List as fb_list
@@ -497,7 +498,13 @@ class Predicate:
497
498
  fb_bool.Start(self.builder)
498
499
  field_type = fb_bool.End(self.builder)
499
500
 
500
- value = True if value == 'true' else False # not cover all cases
501
+ # Handle both boolean values and string representations
502
+ if isinstance(value, bool):
503
+ value = value
504
+ elif isinstance(value, str):
505
+ value = value.lower() == 'true'
506
+ else:
507
+ value = bool(value)
501
508
  elif isinstance(field.type, pa.Decimal128Type):
502
509
  literal_type = fb_decimal_lit
503
510
  literal_impl = LiteralImpl.DecimalLiteral
@@ -608,7 +615,7 @@ class FieldNode:
608
615
  self.debug = debug
609
616
  if isinstance(self.type, pa.StructType):
610
617
  self.children = [FieldNode(field, index_iter, parent=self) for field in self.type]
611
- elif isinstance(self.type, pa.ListType):
618
+ elif pa.types.is_list(self.type) or pa.types.is_fixed_size_list(self.type):
612
619
  self.children = [FieldNode(self.type.value_field, index_iter, parent=self)]
613
620
  elif isinstance(self.type, pa.MapType):
614
621
  # Map is represented as List<Struct<K, V>> in Arrow
@@ -752,7 +759,7 @@ def _iter_nested_arrays(column: pa.Array) -> Iterator[pa.Array]:
752
759
  if not column.type.num_fields == 1: # Note: VAST serializes only a single struct field at a time
753
760
  raise ValueError(f'column.type.num_fields: {column.type.num_fields} not eq to 1')
754
761
  yield from _iter_nested_arrays(column.field(0))
755
- elif isinstance(column.type, pa.ListType):
762
+ elif pa.types.is_list(column.type) or pa.types.is_fixed_size_list(column.type):
756
763
  yield from _iter_nested_arrays(column.values) # Note: Map is serialized in VAST as a List<Struct<K, V>>
757
764
 
758
765
 
@@ -853,10 +860,11 @@ class VastdbApi:
853
860
  VAST_VERSION_REGEX = re.compile(r'^vast (\d+\.\d+\.\d+\.\d+)$')
854
861
 
855
862
  def __init__(self, endpoint, access_key, secret_key,
856
- *,
857
- ssl_verify=True,
858
- timeout=None,
859
- backoff_config: Optional[BackoffConfig] = None):
863
+ *,
864
+ ssl_verify=True,
865
+ timeout=None,
866
+ backoff_config: Optional[BackoffConfig] = None,
867
+ version_check=True):
860
868
 
861
869
  from . import version # import lazily here (to avoid circular dependencies)
862
870
  self.client_sdk_version = f"VAST Database Python SDK {version()} - 2024 (c)"
@@ -896,29 +904,30 @@ class VastdbApi:
896
904
  aws_region='',
897
905
  aws_service='s3')
898
906
 
899
- # probe the cluster for its version
900
- res = self._request(method="GET", url=self._url(command="transaction"), skip_status_check=True) # used only for the response headers
901
- _logger.debug("headers=%s code=%s content=%s", res.headers, res.status_code, res.content)
902
- server_header = res.headers.get("Server")
903
- if server_header is None:
904
- _logger.error("Response doesn't contain 'Server' header")
905
- else:
906
- if not server_header.startswith(self.VAST_SERVER_PREFIX):
907
- raise UnsupportedServer(f'{self.url} is not a VAST DB server endpoint ("{server_header}")')
908
-
909
- if m := self.VAST_VERSION_REGEX.match(server_header):
910
- self.vast_version: Tuple[int, ...] = tuple(int(v) for v in m.group(1).split("."))
911
- return
907
+ if version_check:
908
+ # probe the cluster for its version
909
+ res = self._request(method="GET", url=self._url(command="transaction"), skip_status_check=True) # used only for the response headers
910
+ _logger.debug("headers=%s code=%s content=%s", res.headers, res.status_code, res.content)
911
+ server_header = res.headers.get("Server")
912
+ if server_header is None:
913
+ _logger.error("Response doesn't contain 'Server' header")
912
914
  else:
913
- _logger.error("'Server' header '%s' doesn't match the expected pattern", server_header)
915
+ if not server_header.startswith(self.VAST_SERVER_PREFIX):
916
+ raise UnsupportedServer(f'{self.url} is not a VAST DB server endpoint ("{server_header}")')
914
917
 
915
- msg = (
916
- f'Please use `vastdb` <= 0.0.5.x with current VAST cluster version ("{server_header or "N/A"}"). '
917
- 'To use the latest SDK, please upgrade your cluster to the latest service pack. '
918
- 'Please contact customer.support@vastdata.com for more details.'
919
- )
920
- _logger.critical(msg)
921
- raise NotImplementedError(msg)
918
+ if m := self.VAST_VERSION_REGEX.match(server_header):
919
+ self.vast_version: Tuple[int, ...] = tuple(int(v) for v in m.group(1).split("."))
920
+ return
921
+ else:
922
+ _logger.error("'Server' header '%s' doesn't match the expected pattern", server_header)
923
+
924
+ msg = (
925
+ f'Please use `vastdb` <= 0.0.5.x with current VAST cluster version ("{server_header or "N/A"}"). '
926
+ 'To use the latest SDK, please upgrade your cluster to the latest service pack. '
927
+ 'Please contact customer.support@vastdata.com for more details.'
928
+ )
929
+ _logger.critical(msg)
930
+ raise NotImplementedError(msg)
922
931
 
923
932
  def __enter__(self):
924
933
  """Allow using this session as a context manager."""
@@ -935,7 +944,8 @@ class VastdbApi:
935
944
  secret_key=self.secret_key,
936
945
  ssl_verify=self._session.verify,
937
946
  timeout=self.timeout,
938
- backoff_config=self.backoff_config)
947
+ backoff_config=self.backoff_config,
948
+ version_check=False)
939
949
 
940
950
  def _single_request(self, *, method, url, skip_status_check=False, **kwargs):
941
951
  _logger.debug("Sending request: %s %s %s timeout=%s", method, url, kwargs, self.timeout)
@@ -1349,12 +1359,12 @@ class VastdbApi:
1349
1359
  lists = list_tables.GetRootAs(res.content)
1350
1360
  tables_length = lists.TablesLength()
1351
1361
  count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
1352
- return lists, is_truncated, count
1362
+ return lists, next_key, is_truncated, count
1353
1363
 
1354
1364
  def _list_tables_internal(self, bucket, schema, parse_properties, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
1355
1365
  exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
1356
1366
  tables = []
1357
- lists, is_truncated, count = self._list_tables_raw(bucket, schema, txid=txid, client_tags=client_tags, max_keys=max_keys,
1367
+ lists, next_key, is_truncated, count = self._list_tables_raw(bucket, schema, txid=txid, client_tags=client_tags, max_keys=max_keys,
1358
1368
  next_key=next_key, name_prefix=name_prefix, exact_match=exact_match, expected_retvals=expected_retvals,
1359
1369
  include_list_stats=include_list_stats, count_only=count_only)
1360
1370
  bucket_name = lists.BucketName().decode()
@@ -1368,7 +1378,7 @@ class VastdbApi:
1368
1378
  return bucket_name, schema_name, tables, next_key, is_truncated, count
1369
1379
 
1370
1380
  def raw_sorting_score(self, bucket, schema, txid, name):
1371
- lists, _, _ = self._list_tables_raw(bucket, schema, txid=txid, exact_match=True, name_prefix=name, include_list_stats=True)
1381
+ lists, _, _, _ = self._list_tables_raw(bucket, schema, txid=txid, exact_match=True, name_prefix=name, include_list_stats=True)
1372
1382
  bucket_name = lists.BucketName().decode()
1373
1383
  if not bucket.startswith(bucket_name): # ignore snapshot name
1374
1384
  raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
@@ -2267,11 +2277,17 @@ def get_field_type(builder: flatbuffers.Builder, field: pa.Field):
2267
2277
  fb_struct.Start(builder)
2268
2278
  field_type = fb_struct.End(builder)
2269
2279
 
2270
- elif isinstance(field.type, pa.ListType):
2280
+ elif pa.types.is_list(field.type):
2271
2281
  field_type_type = Type.List
2272
2282
  fb_list.Start(builder)
2273
2283
  field_type = fb_list.End(builder)
2274
2284
 
2285
+ elif pa.types.is_fixed_size_list(field.type):
2286
+ field_type_type = Type.FixedSizeList
2287
+ fb_fixed_size_list.Start(builder)
2288
+ fb_fixed_size_list.AddListSize(builder, field.type.list_size)
2289
+ field_type = fb_fixed_size_list.End(builder)
2290
+
2275
2291
  elif isinstance(field.type, pa.MapType):
2276
2292
  field_type_type = Type.Map
2277
2293
  fb_map.Start(builder)
@@ -2293,7 +2309,7 @@ def build_field(builder: flatbuffers.Builder, f: pa.Field, name: str):
2293
2309
  children = None
2294
2310
  if isinstance(f.type, pa.StructType):
2295
2311
  children = [build_field(builder, child, child.name) for child in list(f.type)]
2296
- if isinstance(f.type, pa.ListType):
2312
+ if pa.types.is_list(f.type) or pa.types.is_fixed_size_list(f.type):
2297
2313
  children = [build_field(builder, f.type.value_field, "item")]
2298
2314
  if isinstance(f.type, pa.MapType):
2299
2315
  children = [
vastdb/conftest.py CHANGED
@@ -6,6 +6,7 @@ import boto3
6
6
  import pytest
7
7
 
8
8
  import vastdb
9
+ import vastdb.errors
9
10
 
10
11
 
11
12
  def pytest_addoption(parser):
@@ -65,8 +66,14 @@ def clean_bucket_name(request, test_bucket_name, session):
65
66
  b = tx.bucket(test_bucket_name)
66
67
  for top_schema in b.schemas():
67
68
  for s in iter_schemas(top_schema):
68
- for t in s.tables():
69
- t.drop()
69
+ for t_name in s.tablenames():
70
+ try:
71
+ t = s.table(t_name)
72
+ t.drop()
73
+ except vastdb.errors.NotSupportedSchema:
74
+ # Use internal API to drop the table in case unsupported schema prevents creating a table
75
+ # object.
76
+ tx._rpc.api.drop_table(b.name, s.name, t_name, txid=tx.txid)
70
77
  s.drop()
71
78
  return test_bucket_name
72
79
 
vastdb/errors.py CHANGED
@@ -2,7 +2,9 @@ import logging
2
2
  import xml.etree.ElementTree
3
3
  from dataclasses import dataclass
4
4
  from enum import Enum
5
+ from typing import Optional
5
6
 
7
+ import pyarrow as pa
6
8
  import requests
7
9
 
8
10
 
@@ -89,6 +91,9 @@ class ImportFilesError(Exception):
89
91
  message: str
90
92
  error_dict: dict
91
93
 
94
+ def __post_init__(self):
95
+ self.args = [vars(self)]
96
+
92
97
 
93
98
  class InvalidArgument(Exception):
94
99
  pass
@@ -122,18 +127,27 @@ class NotSupported(Exception):
122
127
  class MissingBucket(Missing):
123
128
  bucket: str
124
129
 
130
+ def __post_init__(self):
131
+ self.args = [vars(self)]
132
+
125
133
 
126
134
  @dataclass
127
135
  class MissingSnapshot(Missing):
128
136
  bucket: str
129
137
  snapshot: str
130
138
 
139
+ def __post_init__(self):
140
+ self.args = [vars(self)]
141
+
131
142
 
132
143
  @dataclass
133
144
  class MissingSchema(Missing):
134
145
  bucket: str
135
146
  schema: str
136
147
 
148
+ def __post_init__(self):
149
+ self.args = [vars(self)]
150
+
137
151
 
138
152
  @dataclass
139
153
  class MissingTable(Missing):
@@ -141,6 +155,9 @@ class MissingTable(Missing):
141
155
  schema: str
142
156
  table: str
143
157
 
158
+ def __post_init__(self):
159
+ self.args = [vars(self)]
160
+
144
161
 
145
162
  @dataclass
146
163
  class MissingProjection(Missing):
@@ -149,6 +166,9 @@ class MissingProjection(Missing):
149
166
  table: str
150
167
  projection: str
151
168
 
169
+ def __post_init__(self):
170
+ self.args = [vars(self)]
171
+
152
172
 
153
173
  class Exists(Exception):
154
174
  pass
@@ -159,6 +179,9 @@ class SchemaExists(Exists):
159
179
  bucket: str
160
180
  schema: str
161
181
 
182
+ def __post_init__(self):
183
+ self.args = [vars(self)]
184
+
162
185
 
163
186
  @dataclass
164
187
  class TableExists(Exists):
@@ -166,6 +189,9 @@ class TableExists(Exists):
166
189
  schema: str
167
190
  table: str
168
191
 
192
+ def __post_init__(self):
193
+ self.args = [vars(self)]
194
+
169
195
 
170
196
  @dataclass
171
197
  class NotSupportedCommand(NotSupported):
@@ -173,18 +199,37 @@ class NotSupportedCommand(NotSupported):
173
199
  schema: str
174
200
  table: str
175
201
 
202
+ def __post_init__(self):
203
+ self.args = [vars(self)]
204
+
176
205
 
177
206
  @dataclass
178
207
  class NotSupportedVersion(NotSupported):
179
208
  err_msg: str
180
209
  version: str
181
210
 
211
+ def __post_init__(self):
212
+ self.args = [vars(self)]
213
+
214
+
215
+ @dataclass
216
+ class NotSupportedSchema(NotSupported):
217
+ message: Optional[str] = None
218
+ schema: Optional[pa.Schema] = None
219
+ cause: Optional[Exception] = None
220
+
221
+ def __post_init__(self):
222
+ self.args = [vars(self)]
223
+
182
224
 
183
225
  @dataclass
184
226
  class ConnectionError(Exception):
185
227
  cause: Exception
186
228
  may_retry: bool
187
229
 
230
+ def __post_init__(self):
231
+ self.args = [vars(self)]
232
+
188
233
 
189
234
  def handle_unavailable(**kwargs):
190
235
  if kwargs['code'] == 'SlowDown':
@@ -192,7 +237,7 @@ def handle_unavailable(**kwargs):
192
237
  raise ServiceUnavailable(**kwargs)
193
238
 
194
239
 
195
- ERROR_TYPES_MAP = {
240
+ HTTP_ERROR_TYPES_MAP = {
196
241
  HttpStatus.BAD_REQUEST: BadRequest,
197
242
  HttpStatus.FOBIDDEN: Forbidden,
198
243
  HttpStatus.NOT_FOUND: NotFound,
@@ -205,6 +250,10 @@ ERROR_TYPES_MAP = {
205
250
  HttpStatus.INSUFFICIENT_CAPACITY: InsufficientCapacity,
206
251
  }
207
252
 
253
+ SPECIFIC_ERROR_TYPES_MAP = {
254
+ 'TabularUnsupportedColumnType': NotSupportedSchema,
255
+ }
256
+
208
257
 
209
258
  def from_response(res: requests.Response):
210
259
  if res.status_code == HttpStatus.SUCCESS.value:
@@ -234,5 +283,10 @@ def from_response(res: requests.Response):
234
283
  )
235
284
  log.warning("RPC failed: %s", kwargs)
236
285
  status = HttpStatus(res.status_code)
237
- error_type = ERROR_TYPES_MAP.get(status, UnexpectedError)
238
- return error_type(**kwargs) # type: ignore
286
+ http_error_type = HTTP_ERROR_TYPES_MAP.get(status, UnexpectedError)
287
+ http_error = http_error_type(**kwargs) # type: ignore
288
+ # Wrap specific error types if applicable
289
+ if code_str in SPECIFIC_ERROR_TYPES_MAP:
290
+ error_type = SPECIFIC_ERROR_TYPES_MAP[code_str]
291
+ return error_type(message=message_str, cause=http_error)
292
+ return http_error
vastdb/schema.py CHANGED
@@ -91,6 +91,7 @@ class Schema:
91
91
  if use_external_row_ids_allocation:
92
92
  self.tx._rpc.features.check_external_row_ids_allocation()
93
93
 
94
+ table.Table.validate_ibis_support_schema(columns)
94
95
  self.tx._rpc.api.create_table(self.bucket.name, self.name, table_name, columns, txid=self.tx.txid,
95
96
  use_external_row_ids_allocation=use_external_row_ids_allocation,
96
97
  sorting_key=sorting_key)
@@ -109,14 +110,14 @@ class Schema:
109
110
  log.debug("Found table: %s", t[0])
110
111
  return t[0]
111
112
 
112
- def _iter_tables(self, table_name=None):
113
+ def _iter_tables(self, table_name=None, page_size=1000):
113
114
  next_key = 0
114
115
  name_prefix = table_name if table_name else ""
115
116
  exact_match = bool(table_name)
116
117
  while True:
117
118
  _bucket_name, _schema_name, curr_tables, next_key, is_truncated, _ = \
118
119
  self.tx._rpc.api.list_tables(
119
- bucket=self.bucket.name, schema=self.name, next_key=next_key, txid=self.tx.txid,
120
+ bucket=self.bucket.name, schema=self.name, next_key=next_key, max_keys=page_size, txid=self.tx.txid,
120
121
  exact_match=exact_match, name_prefix=name_prefix, include_list_stats=exact_match)
121
122
  if not curr_tables:
122
123
  break
@@ -124,19 +125,19 @@ class Schema:
124
125
  if not is_truncated:
125
126
  break
126
127
 
127
- def tables(self, table_name: str = "") -> List["Table"]:
128
+ def tables(self, table_name: str = "", page_size=1000) -> List["Table"]:
128
129
  """List all tables under this schema if `table_name` is empty.
129
130
 
130
131
  Otherwise, list only the specific table (if exists).
131
132
  """
132
133
  return [
133
134
  _parse_table_info(table_info, self)
134
- for table_info in self._iter_tables(table_name=table_name)
135
+ for table_info in self._iter_tables(table_name=table_name, page_size=page_size)
135
136
  ]
136
137
 
137
- def tablenames(self) -> List[str]:
138
+ def tablenames(self, page_size=1000) -> List[str]:
138
139
  """List all table names under this schema."""
139
- return [table_info.name for table_info in self._iter_tables()]
140
+ return [table_info.name for table_info in self._iter_tables(page_size=page_size)]
140
141
 
141
142
  def drop(self) -> None:
142
143
  """Delete this schema."""
vastdb/table.py CHANGED
@@ -126,11 +126,35 @@ class Table:
126
126
  _imports_table: bool
127
127
  sorted_table: bool
128
128
 
129
+ @staticmethod
130
+ def validate_ibis_support_schema(arrow_schema: pa.Schema):
131
+ """Validate that the provided Arrow schema is compatible with Ibis.
132
+
133
+ Raises NotSupportedSchema if the schema contains unsupported fields.
134
+ """
135
+ unsupported_fields = []
136
+ first_exception = None
137
+ for f in arrow_schema:
138
+ try:
139
+ ibis.Schema.from_pyarrow(pa.schema([f]))
140
+ except Exception as e:
141
+ if first_exception is None:
142
+ first_exception = e
143
+ unsupported_fields.append(f)
144
+
145
+ if unsupported_fields:
146
+ raise errors.NotSupportedSchema(
147
+ message=f"Ibis does not support the schema {unsupported_fields=}",
148
+ schema=arrow_schema,
149
+ cause=first_exception
150
+ )
151
+
129
152
  def __post_init__(self):
130
153
  """Also, load columns' metadata."""
131
154
  self.arrow_schema = self.columns()
132
155
 
133
156
  self._table_path = f'{self.schema.bucket.name}/{self.schema.name}/{self.name}'
157
+ self.validate_ibis_support_schema(self.arrow_schema)
134
158
  self._ibis_table = ibis.table(ibis.Schema.from_pyarrow(self.arrow_schema), self._table_path)
135
159
 
136
160
  @property
@@ -350,12 +374,8 @@ class Table:
350
374
  if limit_rows:
351
375
  config.limit_rows_per_sub_split = limit_rows
352
376
 
353
- stats = None
354
- # Retrieve snapshots only if needed
355
377
  if config.data_endpoints is None:
356
- stats = self.get_stats()
357
- log.debug("stats: %s", stats)
358
- endpoints = stats.endpoints
378
+ endpoints = tuple([self.tx._rpc.api.url])
359
379
  else:
360
380
  endpoints = tuple(config.data_endpoints)
361
381
  log.debug("endpoints: %s", endpoints)
@@ -385,8 +405,7 @@ class Table:
385
405
  num_rows = self._get_row_estimate(columns, predicate, query_schema)
386
406
  log.debug(f'sorted estimate: {num_rows}')
387
407
  if num_rows == 0:
388
- if stats is None:
389
- stats = self.get_stats()
408
+ stats = self.get_stats()
390
409
  num_rows = stats.num_rows
391
410
 
392
411
  config.num_splits = max(1, num_rows // config.rows_per_split)
@@ -465,7 +484,7 @@ class Table:
465
484
 
466
485
  total_num_rows = limit_rows if limit_rows else sys.maxsize
467
486
  with concurrent.futures.ThreadPoolExecutor(max_workers=len(endpoints), thread_name_prefix=threads_prefix) as tp: # TODO: concurrency == enpoints is just a heuristic
468
- futures = [tp.submit(single_endpoint_worker, endpoint) for endpoint in endpoints]
487
+ futures = [tp.submit(single_endpoint_worker, endpoint) for endpoint in endpoints[:config.num_splits]]
469
488
  tasks_running = len(futures)
470
489
  try:
471
490
  while tasks_running > 0:
@@ -523,6 +542,9 @@ class Table:
523
542
  """Insert a RecordBatch into this table."""
524
543
  if self._imports_table:
525
544
  raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
545
+ if 0 == rows.num_rows:
546
+ log.debug("Ignoring empty insert into %s", self.name)
547
+ return pa.chunked_array([], type=INTERNAL_ROW_ID_FIELD.type)
526
548
  try:
527
549
  row_ids = []
528
550
  serialized_slices = util.iter_serialized_slices(rows, MAX_INSERT_ROWS_PER_PATCH)
@@ -535,7 +557,7 @@ class Table:
535
557
  self.tx._rpc.features.check_return_row_ids()
536
558
  except errors.NotSupportedVersion:
537
559
  return # type: ignore
538
- return pa.chunked_array(row_ids)
560
+ return pa.chunked_array(row_ids, type=INTERNAL_ROW_ID_FIELD.type)
539
561
  except errors.TooWideRow:
540
562
  self.tx._rpc.features.check_return_row_ids()
541
563
  return self.insert_in_column_batches(rows)
@@ -619,6 +641,7 @@ class Table:
619
641
  """Add a new column."""
620
642
  if self._imports_table:
621
643
  raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
644
+ self.validate_ibis_support_schema(new_column)
622
645
  self.tx._rpc.api.add_columns(self.bucket.name, self.schema.name, self.name, new_column, txid=self.tx.txid)
623
646
  log.info("Added column(s): %s", new_column)
624
647
  self.arrow_schema = self.columns()
@@ -0,0 +1,294 @@
1
+ import datetime
2
+ import decimal
3
+ import itertools
4
+ import random
5
+ from typing import Any, Union, cast
6
+
7
+ import numpy as np
8
+ import pyarrow as pa
9
+ import pyarrow.compute as pc
10
+ import pytest
11
+
12
+ import vastdb.errors
13
+
14
+ from .util import prepare_data
15
+
16
+ supported_fixed_list_element_types = [
17
+ pa.uint8(),
18
+ pa.uint16(),
19
+ pa.uint32(),
20
+ pa.uint64(),
21
+ pa.int8(),
22
+ pa.int16(),
23
+ pa.int32(),
24
+ pa.int64(),
25
+ pa.float32(),
26
+ pa.float64(),
27
+ pa.decimal128(10),
28
+ pa.date32(),
29
+ pa.timestamp("s"),
30
+ pa.time32("ms"),
31
+ pa.time64("us"),
32
+ ]
33
+
34
+ # All the supported element types are supported as non-nullable.
35
+ supported_fixed_list_element_fields = [
36
+ pa.field(name="item", type=element_type, nullable=False)
37
+ for element_type in supported_fixed_list_element_types
38
+ ]
39
+
40
+ unsupported_fixed_list_element_types = [
41
+ pa.string(),
42
+ pa.list_(pa.int64()),
43
+ pa.list_(pa.int64(), 1),
44
+ pa.map_(pa.utf8(), pa.float64()),
45
+ pa.struct([("x", pa.int16())]),
46
+ pa.bool_(),
47
+ pa.binary(),
48
+ ]
49
+
50
+ unsupported_fixed_list_element_fields = [ # Nullable types are not supported.
51
+ pa.field(name="item", type=element_type, nullable=True)
52
+ for element_type in itertools.chain(
53
+ supported_fixed_list_element_types, unsupported_fixed_list_element_types
54
+ )
55
+ ] + [ # Not nullable unsupported type are unsupported.
56
+ pa.field(name="item", type=element_type, nullable=False)
57
+ for element_type in unsupported_fixed_list_element_types
58
+ ]
59
+
60
+ unsupported_fixed_list_types = (
61
+ [
62
+ pa.list_(element_field, 1)
63
+ for element_field in unsupported_fixed_list_element_fields
64
+ ] +
65
+ # Fixed list with amount of elements exceeding the supported limit.
66
+ [pa.list_(
67
+ pa.field("item", pa.int64(), nullable=False), np.iinfo(np.int32).max
68
+ )]
69
+ )
70
+
71
+ invalid_fixed_list_types = [
72
+ # Fixed list 0 elements.
73
+ pa.list_(pa.field("item", pa.int64(), nullable=False), 0),
74
+ ]
75
+
76
+
77
+ def test_vectors(session, clean_bucket_name):
78
+ """
79
+ Test table with efficient vector type - pa.FixedSizeListArray[not nullable numeric].
80
+ """
81
+ dimension = 100
82
+ element_type = pa.float32()
83
+ num_rows = 50
84
+
85
+ columns = pa.schema(
86
+ [("id", pa.int64()), ("vec", pa.list_(pa.field(name="item", type=element_type, nullable=False), dimension),)]
87
+ )
88
+ ids = range(num_rows)
89
+ expected = pa.table(
90
+ schema=columns,
91
+ data=[
92
+ ids,
93
+ [[i] * dimension for i in ids],
94
+ ],
95
+ )
96
+
97
+ with prepare_data(session, clean_bucket_name, "s", "t", expected) as t:
98
+ assert t.arrow_schema == columns
99
+
100
+ # Full scan.
101
+ actual = t.select().read_all()
102
+ assert actual == expected
103
+
104
+ # Select by id.
105
+ select_id = random.randint(0, num_rows)
106
+ actual = t.select(predicate=(t["id"] == select_id)).read_all()
107
+ assert actual.to_pydict()["vec"] == [[select_id] * dimension]
108
+ assert actual == expected.filter(pc.field("id") == select_id)
109
+
110
+
111
+ def convert_scalar_type_pyarrow_to_numpy(arrow_type: pa.DataType):
112
+ return pa.array([], type=arrow_type).to_numpy().dtype.type
113
+
114
+
115
+ def generate_random_pyarrow_value(
116
+ element: Union[pa.DataType, pa.Field], nulls_prob: float = 0.2
117
+ ) -> Any:
118
+ """
119
+ Generates a random value compatible with the provided PyArrow type.
120
+
121
+ Args:
122
+ element: The pyarrow field/type to generate values for.
123
+ nulls_prob: Probability of creating nulls.
124
+ """
125
+ assert 0 <= nulls_prob <= 1
126
+
127
+ nullable = True
128
+
129
+ # Convert Field to DataType.
130
+ if isinstance(element, pa.DataType):
131
+ pa_type = element
132
+ elif isinstance(element, pa.Field):
133
+ pa_type = element.type
134
+ nullable = element.nullable
135
+ else:
136
+ raise TypeError(
137
+ f"Expected pyarrow.DataType or pyarrow.Field, got {type(element)}"
138
+ )
139
+
140
+ if nullable and random.random() < nulls_prob:
141
+ return None
142
+
143
+ if pa.types.is_boolean(pa_type):
144
+ return random.choice([True, False])
145
+ if pa.types.is_integer(pa_type):
146
+ np_type = convert_scalar_type_pyarrow_to_numpy(pa_type)
147
+ iinfo = np.iinfo(np_type)
148
+ return np.random.randint(iinfo.min, iinfo.max, dtype=np_type)
149
+ if pa.types.is_floating(pa_type):
150
+ np_type = convert_scalar_type_pyarrow_to_numpy(pa_type)
151
+ finfo = np.finfo(np_type)
152
+ return np_type(random.uniform(float(finfo.min), float(finfo.max)))
153
+ if pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type):
154
+ return "".join(
155
+ random.choices("abcdefghijklmnopqrstuvwxyz ", k=random.randint(5, 20))
156
+ )
157
+ if pa.types.is_binary(pa_type) or pa.types.is_large_binary(pa_type):
158
+ return random.randbytes(random.randint(5, 20))
159
+ if pa.types.is_timestamp(pa_type):
160
+ # Generate a random timestamp within a range (e.g., last 10 years)
161
+ start_datetime = datetime.datetime(2015, 1, 1, tzinfo=datetime.timezone.utc)
162
+ end_datetime = datetime.datetime(2025, 1, 1, tzinfo=datetime.timezone.utc)
163
+ random_seconds = random.uniform(
164
+ 0, (end_datetime - start_datetime).total_seconds()
165
+ )
166
+ return start_datetime + datetime.timedelta(seconds=random_seconds)
167
+ if pa.types.is_date(pa_type):
168
+ start_date = datetime.date(2000, 1, 1)
169
+ end_date = datetime.date(2025, 1, 1)
170
+ random_days = random.randint(0, (end_date - start_date).days)
171
+ return start_date + datetime.timedelta(days=random_days)
172
+ if pa.types.is_time(pa_type):
173
+ return datetime.time(
174
+ random.randint(0, 23), random.randint(0, 59), random.randint(0, 59)
175
+ )
176
+ if pa.types.is_decimal(pa_type):
177
+ pa_type = cast(pa.Decimal128Type, pa_type)
178
+ decimal_value = decimal.Decimal(
179
+ round(random.uniform(-1000.0, 1000.0), pa_type.precision)
180
+ )
181
+ quantize_template = decimal.Decimal("1e-%d" % pa_type.scale)
182
+ return decimal_value.quantize(quantize_template)
183
+ if pa.types.is_null(pa_type): # Explicit NullType
184
+ return None
185
+ if pa.types.is_list(pa_type) or pa.types.is_fixed_size_list(pa_type):
186
+ # For ListType, recursively generate elements for the value_type
187
+ pa_type = (
188
+ cast(pa.FixedSizeListType, pa_type)
189
+ if pa.types.is_fixed_size_list(pa_type)
190
+ else cast(pa.ListType, pa_type)
191
+ )
192
+ list_size = (
193
+ pa_type.list_size
194
+ if pa.types.is_fixed_size_list(pa_type)
195
+ else random.randint(0, 5)
196
+ )
197
+ list_elements = [
198
+ generate_random_pyarrow_value(pa_type.value_field, nulls_prob)
199
+ for _ in range(list_size)
200
+ ]
201
+ return list_elements
202
+ if pa.types.is_struct(pa_type):
203
+ struct_dict = {}
204
+ for field in cast(pa.StructType, pa_type):
205
+ # Recursively generate value for each field in the struct
206
+ struct_dict[field.name] = generate_random_pyarrow_value(field, nulls_prob)
207
+ return struct_dict
208
+ if pa.types.is_map(pa_type):
209
+ num_entries = random.randint(0, 3) # Random number of map entries
210
+ pa_type = cast(pa.MapType, pa_type)
211
+ return {
212
+ generate_random_pyarrow_value(pa_type.key_field, nulls_prob): generate_random_pyarrow_value(
213
+ pa_type.item_field, nulls_prob)
214
+ for _ in range(num_entries)
215
+ }
216
+
217
+ raise NotImplementedError(
218
+ f"Generation for PyArrow type {pa_type} not implemented yet."
219
+ )
220
+
221
+
222
+ @pytest.mark.parametrize("element_field", supported_fixed_list_element_fields)
223
+ def test_fixed_list_type_values(session, clean_bucket_name, element_field):
224
+ list_size = random.randint(1, 1000)
225
+ num_rows = random.randint(1, 100)
226
+
227
+ vec_type = pa.list_(element_field, list_size)
228
+ schema = pa.schema(
229
+ {"id": pa.int64(), "vec": vec_type, "random_int": pa.int64()})
230
+ expected = pa.table(
231
+ schema=schema,
232
+ data=[list(range(num_rows))] + [[generate_random_pyarrow_value(schema.field(col_name)) for _ in range(num_rows)]
233
+ for col_name in
234
+ schema.names[1:]],
235
+ )
236
+
237
+ with prepare_data(session, clean_bucket_name, "s", "t", expected) as table:
238
+ assert table.arrow_schema == schema
239
+ actual = table.select().read_all()
240
+ assert actual == expected
241
+
242
+
243
+ @pytest.mark.parametrize("list_type", unsupported_fixed_list_types)
244
+ def test_unsupported_fixed_list_types(session, clean_bucket_name, list_type):
245
+ schema = pa.schema({"fixed_list": list_type})
246
+ empty_table = pa.table(schema=schema, data=[[]])
247
+
248
+ with pytest.raises((vastdb.errors.BadRequest, vastdb.errors.NotSupported), match=r'TabularUnsupportedColumnType'):
249
+ with prepare_data(session, clean_bucket_name, "s", "t", empty_table):
250
+ pass
251
+
252
+
253
+ @pytest.mark.parametrize("list_type", invalid_fixed_list_types)
254
+ def test_invalid_fixed_list_types(session, clean_bucket_name, list_type):
255
+ schema = pa.schema({"fixed_list": list_type})
256
+ empty_table = pa.table(schema=schema, data=[[]])
257
+
258
+ with pytest.raises(vastdb.errors.BadRequest, match=r'TabularInvalidColumnTypeParam'):
259
+ with prepare_data(session, clean_bucket_name, "s", "t", empty_table):
260
+ pass
261
+
262
+
263
+ def test_invalid_values_fixed_list(session, clean_bucket_name):
264
+ dimension = 10
265
+ element_type = pa.float32()
266
+
267
+ col_name = "vec"
268
+ schema = pa.schema([(col_name, pa.list_(pa.field(name="item", type=element_type, nullable=False), dimension))])
269
+ empty_table = pa.table(schema=schema, data=[[]])
270
+
271
+ with prepare_data(session, clean_bucket_name, "s", "t", empty_table) as table:
272
+ invalid_fields = [
273
+ pa.field(col_name, pa.list_(pa.field(name="item", type=element_type, nullable=False), dimension - 1)),
274
+ pa.field(col_name, pa.list_(pa.field(name="item", type=element_type, nullable=False), dimension + 1)),
275
+ pa.field(col_name, pa.list_(pa.field(name="item", type=element_type, nullable=True), dimension)),
276
+ schema.field(0).with_nullable(False),
277
+ ]
278
+ for field in invalid_fields:
279
+ # Everything that could be null should be in order to be invalid regarding the values and not just the type.
280
+ rb = pa.record_batch(
281
+ schema=pa.schema([field]),
282
+ data=[[[1] * field.type.list_size]]
283
+ )
284
+ with pytest.raises((vastdb.errors.BadRequest, vastdb.errors.NotFound, vastdb.errors.NotSupported),
285
+ match=r'(TabularInvalidColumnTypeParam)|(TabularUnsupportedColumnType)|(TabularMismatchColumnType)'):
286
+ table.insert(rb)
287
+
288
+ # Amount of elements in fixed list is not equal to the list size is enforced by Arrow.
289
+ with pytest.raises(pa.ArrowInvalid):
290
+ # Insert with empty list.
291
+ pa.record_batch(
292
+ schema=schema,
293
+ data=[[[generate_random_pyarrow_value(element_type, 0) for _ in range(dimension + 1)]]],
294
+ )
@@ -222,6 +222,45 @@ def test_zip_imports(zip_import_session, clean_bucket_name, s3):
222
222
  # Step 3: Import files into the table
223
223
  attempt_import(zip_import_session, clean_bucket_name, 's1', 't1', files, key_names=['id', 'symbol'])
224
224
 
225
+ # Step 4: Construct expected rows
226
+ expected_rows = []
227
+ for i in range(num_rows):
228
+ row = {
229
+ 'vastdb_rowid': 10 + i, # Initial vastdb_rowid values (10-19)
230
+ 'id': i, # ID values (0-9)
231
+ 'symbol': chr(ord('a') + i), # Symbol values ('a' to 'j')
232
+ 'feature0': 0 * 10 + i, # Values from file 1 (0-9)
233
+ 'feature1': 1 * 10 + i, # Values from file 2 (10-19)
234
+ 'feature2': 2 * 10 + i, # Values from file 3 (20-29)
235
+ 'feature3': 3 * 10 + i, # Values from file 4 (30-39)
236
+ 'feature4': 4 * 10 + i, # Values from file 5 (40-49)
237
+ }
238
+ expected_rows.append(row)
239
+
240
+ # Step 5: Query the actual data from the table
241
+ with zip_import_session.transaction() as tx:
242
+ t = tx.bucket(clean_bucket_name).schema('s1').table('t1')
243
+ arrow_table = t.select().read_all()
244
+ actual_data = arrow_table.to_pydict()
245
+
246
+ # Step 6: Compare expected and actual data
247
+ num_actual_rows = len(next(iter(actual_data.values()), []))
248
+ assert num_actual_rows == len(expected_rows), f"Expected {len(expected_rows)} rows but got {num_actual_rows}"
249
+
250
+ # Convert expected_rows to a comparable format (pydict format)
251
+ expected_data = {k: [] for k in expected_rows[0].keys()}
252
+ for row in expected_rows:
253
+ for k, v in row.items():
254
+ expected_data[k].append(v)
255
+
256
+ # Check that all expected columns exist in actual data
257
+ for col in expected_data:
258
+ assert col in actual_data, f"Expected column {col} not found in actual data"
259
+
260
+ # Compare column values
261
+ for col in expected_data:
262
+ assert actual_data[col] == expected_data[col], f"Values in column {col} don't match expected values"
263
+
225
264
 
226
265
  def test_zip_imports_scale(zip_import_session, clean_bucket_name, s3):
227
266
  """Verify that many key names, and large amounts of data of different kind work as expected."""
@@ -12,11 +12,15 @@ from .util import prepare_data
12
12
  def test_nested_select(session, clean_bucket_name):
13
13
  columns = pa.schema([
14
14
  ('l', pa.list_(pa.int8())),
15
+ ('fl', pa.list_(pa.field(name='item', type=pa.int64(), nullable=False), 2)),
16
+ ('lfl', pa.list_(pa.list_(pa.field(name='item', type=pa.int64(), nullable=False), 2))),
15
17
  ('m', pa.map_(pa.utf8(), pa.float64())),
16
18
  ('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
17
19
  ])
18
20
  expected = pa.table(schema=columns, data=[
19
21
  [[1], [], [2, 3], None],
22
+ [[1, 2], None, [3, 4], None],
23
+ [[[1, 2], [3, 4], [4, 5]], None, [[5, 6], [7, 8]], [None, None]],
20
24
  [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
21
25
  [{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
22
26
  ])
@@ -36,6 +40,7 @@ def test_nested_filter(session, clean_bucket_name):
36
40
  columns = pa.schema([
37
41
  ('x', pa.int64()),
38
42
  ('l', pa.list_(pa.int8())),
43
+ ('fl', pa.list_(pa.field(name='item', type=pa.int64(), nullable=False), 2)),
39
44
  ('y', pa.int64()),
40
45
  ('m', pa.map_(pa.utf8(), pa.float64())),
41
46
  ('z', pa.int64()),
@@ -45,6 +50,7 @@ def test_nested_filter(session, clean_bucket_name):
45
50
  expected = pa.table(schema=columns, data=[
46
51
  [1, 2, 3, None],
47
52
  [[1], [], [2, 3], None],
53
+ [[1, 2], None, [3, 4], None],
48
54
  [1, 2, None, 3],
49
55
  [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
50
56
  [1, None, 2, 3],
@@ -72,22 +78,16 @@ def test_nested_filter(session, clean_bucket_name):
72
78
 
73
79
  def test_nested_unsupported_filter(session, clean_bucket_name):
74
80
  columns = pa.schema([
75
- ('x', pa.int64()),
76
81
  ('l', pa.list_(pa.int8())),
77
- ('y', pa.int64()),
82
+ ('fl', pa.list_(pa.field(name='item', type=pa.int64(), nullable=False), 2)),
78
83
  ('m', pa.map_(pa.utf8(), pa.float64())),
79
- ('z', pa.int64()),
80
84
  ('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
81
- ('w', pa.int64()),
82
85
  ])
83
86
  expected = pa.table(schema=columns, data=[
84
- [1, 2, 3, None],
85
87
  [[1], [], [2, 3], None],
86
- [1, 2, None, 3],
88
+ [[1, 2], None, [3, 4], None],
87
89
  [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
88
- [1, None, 2, 3],
89
90
  [{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
90
- [None, 1, 2, 3],
91
91
  ])
92
92
 
93
93
  with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
@@ -95,6 +95,9 @@ def test_nested_unsupported_filter(session, clean_bucket_name):
95
95
  with pytest.raises(NotImplementedError):
96
96
  list(t.select(predicate=(t['l'].isnull())))
97
97
 
98
+ with pytest.raises(NotImplementedError):
99
+ list(t.select(predicate=(t['fl'].isnull())))
100
+
98
101
  with pytest.raises(NotImplementedError):
99
102
  list(t.select(predicate=(t['m'].isnull())))
100
103
 
@@ -106,6 +109,7 @@ def test_nested_subfields_predicate_pushdown(session, clean_bucket_name):
106
109
  columns = pa.schema([
107
110
  ('x', pa.int64()),
108
111
  ('l', pa.list_(pa.int8())),
112
+ ('fl', pa.list_(pa.field(name='item', type=pa.int64(), nullable=False), 2)),
109
113
  ('y', pa.int64()),
110
114
  ('m', pa.map_(pa.utf8(), pa.float64())),
111
115
  ('z', pa.int64()),
@@ -122,6 +126,7 @@ def test_nested_subfields_predicate_pushdown(session, clean_bucket_name):
122
126
  expected = pa.table(schema=columns, data=[
123
127
  [1, 2, 3, None],
124
128
  [[1], [], [2, 3], None],
129
+ [[1, 2], None, [3, 4], None],
125
130
  [1, 2, None, 3],
126
131
  [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
127
132
  [1, None, 2, 3],
@@ -112,13 +112,18 @@ def test_insert_empty(session, clean_bucket_name):
112
112
  data = [[None] * 5, [None] * 5]
113
113
  all_nulls = pa.table(schema=columns, data=data)
114
114
  no_columns = all_nulls.select([])
115
+ no_rows = pa.table(schema=columns, data=[[] for _ in columns])
115
116
 
116
117
  with session.transaction() as tx:
117
118
  t = tx.bucket(clean_bucket_name).create_schema('s').create_table('t', columns)
118
119
  t.insert(all_nulls)
120
+
119
121
  with pytest.raises(errors.NotImplemented):
120
122
  t.insert(no_columns)
121
123
 
124
+ row_ids = t.insert(no_rows).to_pylist()
125
+ assert row_ids == []
126
+
122
127
 
123
128
  def test_exists(session, clean_bucket_name):
124
129
  with session.transaction() as tx:
@@ -140,7 +145,8 @@ def test_exists(session, clean_bucket_name):
140
145
  assert s.tables() == [t]
141
146
 
142
147
 
143
- def test_list_tables(session, clean_bucket_name):
148
+ @pytest.mark.parametrize("num_tables,page_size", [(10, 3)])
149
+ def test_list_tables(session, clean_bucket_name, num_tables, page_size):
144
150
  with session.transaction() as tx:
145
151
  s = tx.bucket(clean_bucket_name).create_schema('s1')
146
152
  assert s.tables() == []
@@ -148,12 +154,14 @@ def test_list_tables(session, clean_bucket_name):
148
154
 
149
155
  tables = [
150
156
  s.create_table(f't{i}', pa.schema([(f'x{i}', pa.int64())]))
151
- for i in range(10)
157
+ for i in range(num_tables)
152
158
  ]
153
159
  assert tables == s.tables()
154
160
  tablenames = [t.name for t in tables]
155
161
  assert s.tablenames() == tablenames
156
162
 
163
+ assert s.tablenames(page_size=page_size) == tablenames
164
+
157
165
 
158
166
  def test_update_table(session, clean_bucket_name):
159
167
  columns = pa.schema([
@@ -388,6 +396,7 @@ def test_types(session, clean_bucket_name):
388
396
  assert select(None) == expected
389
397
  for t in [table, ibis._]:
390
398
  assert select(t['tb'] == False) == expected.filter(pc.field('tb') == False) # noqa: E712
399
+ assert select(t['tb'] == True) == expected.filter(pc.field('tb') == True) # noqa: E712
391
400
  assert select(t['a1'] == 2) == expected.filter(pc.field('a1') == 2)
392
401
  assert select(t['a2'] == 2000) == expected.filter(pc.field('a2') == 2000)
393
402
  assert select(t['a4'] == 222111122) == expected.filter(pc.field('a4') == 222111122)
@@ -424,6 +433,58 @@ def test_types(session, clean_bucket_name):
424
433
  assert select(t['ts9'] == ts_literal) == expected.filter(pc.field('ts9') == ts_literal)
425
434
 
426
435
 
436
+ @pytest.mark.parametrize("arrow_type,internal_support", [
437
+ # Types not supported by Vast.
438
+ (pa.null(), False),
439
+ (pa.dictionary(pa.int64(), pa.int64()), False),
440
+ (pa.dense_union([pa.field('1', pa.int32()), pa.field('2', pa.int64())]), False),
441
+ # Arrow.FixedSizeBinaryType is not supported by Ibis, but Vast supports it internally.
442
+ (pa.binary(1), True)
443
+ ])
444
+ def test_unsupported_types(session, clean_bucket_name, arrow_type, internal_support):
445
+ """ Test that unsupported types cannot be used in table creation or modification."""
446
+ unsupported_field = pa.field('u', arrow_type)
447
+ schema_name = 's'
448
+ table_name = 't'
449
+
450
+ # Create the schema
451
+ with session.transaction() as tx:
452
+ tx.bucket(clean_bucket_name).create_schema(schema_name)
453
+
454
+ # Creation of a table with unsupported types should fail
455
+ with session.transaction() as tx:
456
+ s = tx.bucket(clean_bucket_name).schema(schema_name)
457
+ with pytest.raises(errors.NotSupportedSchema):
458
+ s.create_table(table_name, pa.schema([unsupported_field]))
459
+
460
+ with session.transaction() as tx:
461
+ tx.bucket(clean_bucket_name).schema(schema_name).create_table(table_name,
462
+ pa.schema([pa.field('a', pa.int32())]))
463
+
464
+ # Adding unsupported types to an existing table should fail
465
+ with session.transaction() as tx:
466
+ t = tx.bucket(clean_bucket_name).schema(schema_name).table(table_name)
467
+ with pytest.raises(errors.NotSupportedSchema):
468
+ t.add_column(pa.schema([unsupported_field]))
469
+
470
+ if internal_support:
471
+ # Using internal API to add unsupported types
472
+ with session.transaction() as tx:
473
+ tx._rpc.api.add_columns(clean_bucket_name, schema_name, table_name, pa.schema([unsupported_field]),
474
+ txid=tx.txid)
475
+
476
+ # Attempt to open a table with unsupported types should fail
477
+ with session.transaction() as tx:
478
+ s = tx.bucket(clean_bucket_name).schema(schema_name)
479
+ with pytest.raises(errors.NotSupportedSchema):
480
+ s.table(table_name)
481
+
482
+ # Even though the table is with unsupported types, it should still be listed
483
+ with session.transaction() as tx:
484
+ s = tx.bucket(clean_bucket_name).schema(schema_name)
485
+ assert [table_name] == s.tablenames()
486
+
487
+
427
488
  def test_unsigned_filters(session, clean_bucket_name):
428
489
  columns = pa.schema([
429
490
  ('a', pa.uint8()),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vastdb
3
- Version: 1.3.10
3
+ Version: 1.3.11
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
@@ -1,13 +1,13 @@
1
1
  vastdb/__init__.py,sha256=uf-AXdzsD4nPxFP7WxkcAXGG0whv8BHLrrXCJtsPGaQ,436
2
- vastdb/_internal.py,sha256=tGNU-9wOtRoK7OXFmX1-uEgQRjpKQXPA0H4rZy86-JM,107257
2
+ vastdb/_internal.py,sha256=YoZGgliQfNYNKuKG8M9ziZpu4R1pmp3PLdLcyOgo9Hc,108129
3
3
  vastdb/bucket.py,sha256=aomUbrfK5Oa6FdGPVsoBXgRW39IzYnmsorF8642r990,2549
4
4
  vastdb/config.py,sha256=OehnsWrjzv0-SUouEXmkrKBugiWyhXOn4XiSLV3s9yk,2342
5
- vastdb/conftest.py,sha256=X2kVveySPQYZlVBXUMoo7Oea5IsvmJzjdqq3fpH2kVw,3469
6
- vastdb/errors.py,sha256=B_FNFONDE8apoTRL8wkMNjUJWAjXu36mO0HI4cGSBgY,4328
5
+ vastdb/conftest.py,sha256=Cl98Hg4kkLmx83F5dFMbVb-sTnn0zHxruE5B1hYXbMk,3866
6
+ vastdb/errors.py,sha256=NiKdwbfVsWJIixP2Tf3JgiBoEt8rRaZ0VeCyD9mXnoM,5645
7
7
  vastdb/features.py,sha256=ivYbvhiGA858B00vhs_CNzlVV9QDUe53yW6V3J5EoxM,1874
8
- vastdb/schema.py,sha256=UR1WzQvfAdnpDaNsEaGZLYGC65Blri5MYOWinCcl8Hc,6552
8
+ vastdb/schema.py,sha256=5BZ0f3b_c-fGRKAaBBL6B3avHel5EDwwxte7t17WeTw,6718
9
9
  vastdb/session.py,sha256=toMR0BXwTaECdWDKnIZky1F3MA1SmelRBiqCrqQ3GCM,2067
10
- vastdb/table.py,sha256=NGImmz_KltU80B0u-CYDgEdGOMHSppf7mmVs72WD8wM,35937
10
+ vastdb/table.py,sha256=fUAmOaVdMSzas1XHldRgT0UbHEG-hYJjuWkDIz3hchs,36936
11
11
  vastdb/transaction.py,sha256=NlVkEowJ_pmtffjWBBDaKExYDKPekjSZyj_fK_bZPJE,3026
12
12
  vastdb/util.py,sha256=8CUnVRsJukC3uNHNoB5D0qPf0FxS8OSdVB84nNoLJKc,6290
13
13
  vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -41,12 +41,13 @@ vastdb/bench/perf_bench/query/query_vastdb.py,sha256=SZYem_EmsaynEftAa_VFobjSJZD
41
41
  vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  vastdb/tests/metrics.py,sha256=ZCSeBYFSPMG3yI0JrAHs2CrY6wFjx_5GwRTYHVAwLKA,1026
43
43
  vastdb/tests/test_duckdb.py,sha256=STw_1PwTQR8Naz6s0p6lQTV1ZTKKhe3LPBUbhqzTCu0,1880
44
- vastdb/tests/test_imports.py,sha256=R-ExC6IYaf4REGQw0v7iVAz7TPY9vd8S3S892vy86R0,20011
45
- vastdb/tests/test_nested.py,sha256=LPU6uV3Ri23dBzAEMFQqRPbqapV5LfmiHSHkhILPIY0,6332
44
+ vastdb/tests/test_fixed_list.py,sha256=qwtFNvw5fdMkOsYAcFfqPN3JOnJn31XGYtFWVe0vuOQ,11187
45
+ vastdb/tests/test_imports.py,sha256=1Xi5s0qWxuoVunW5iMQGzofTNOXxXP8eOARs9HWOiGE,21734
46
+ vastdb/tests/test_nested.py,sha256=c7q9a3MsyDymqAtShPC4cMHlzjCr18kbu_Db3u_c4IQ,6893
46
47
  vastdb/tests/test_projections.py,sha256=3y1kubwVrzO-xoR0hyps7zrjOJI8niCYspaFTN16Q9w,4540
47
48
  vastdb/tests/test_sanity.py,sha256=bv1ypGDzvOgmMvGbucDYiLQu8krQLlE6NB3M__q87x8,3303
48
49
  vastdb/tests/test_schemas.py,sha256=l70YQMlx2UL1KRQhApriiG2ZM7GJF-IzWU31H3Yqn1U,3312
49
- vastdb/tests/test_tables.py,sha256=wBPUewfJVEJNyDHwO49qld3lMVjVjUiAzP7ngX07fFA,48478
50
+ vastdb/tests/test_tables.py,sha256=Bl0ldJApPCFEL94jRNycsCi8M_2jTHWDXiA-7JHVlzk,51260
50
51
  vastdb/tests/test_util.py,sha256=n7gvT5Wg6b6bxgqkFXkYqvFd_W1GlUdVfmPv66XYXyA,1956
51
52
  vastdb/tests/util.py,sha256=YsCBCcx7n1QOH-IPDpCsl6KEaUQQJRZwGPeayijHNb4,1307
52
53
  vastdb/vast_flatbuf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -209,8 +210,8 @@ vastdb/vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
209
210
  vastdb/vast_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
210
211
  vastdb/vast_tests/test_ha.py,sha256=744P4G6VJ09RIkHhMQL4wlipCBJWQVMhyvUrSc4k1HQ,975
211
212
  vastdb/vast_tests/test_scale.py,sha256=5jGwOdZH6Tv5tPdZYPWoqcxOceI2jA5i2D1zNKZHER4,3958
212
- vastdb-1.3.10.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
213
- vastdb-1.3.10.dist-info/METADATA,sha256=BFeEhZ0mgwoCyAKM_EkijrPcI5RWTME4tDtdq-fcWwc,1341
214
- vastdb-1.3.10.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
215
- vastdb-1.3.10.dist-info/top_level.txt,sha256=nnKAaZaQa8GFbYpWAexr_B9HrhonZbUlX6hL6AC--yA,7
216
- vastdb-1.3.10.dist-info/RECORD,,
213
+ vastdb-1.3.11.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
214
+ vastdb-1.3.11.dist-info/METADATA,sha256=KwME3q5diBEN8GTagURPqGDDBv41TS7xFN348qr26lw,1341
215
+ vastdb-1.3.11.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
216
+ vastdb-1.3.11.dist-info/top_level.txt,sha256=nnKAaZaQa8GFbYpWAexr_B9HrhonZbUlX6hL6AC--yA,7
217
+ vastdb-1.3.11.dist-info/RECORD,,