vastdb 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -130,46 +130,13 @@ def get_unit_to_flatbuff_time_unit(type):
130
130
  class Predicate:
131
131
  def __init__(self, schema: 'pa.Schema', expr: ibis.expr.types.BooleanColumn):
132
132
  self.schema = schema
133
+ index = itertools.count() # used to generate leaf column positions for VAST QueryData RPC
134
+ # Arrow schema contains the top-level columns, where each column may include multiple subfields
135
+ # We use DFS is used to enumerate all the sub-columns, using `index` as an ID allocator
136
+ nodes = [FieldNode(field, index) for field in schema]
137
+ self.nodes_map = {node.field.name: node for node in nodes}
133
138
  self.expr = expr
134
139
 
135
- def get_field_indexes(self, field: 'pa.Field', field_name_per_index: list) -> None:
136
- field_name_per_index.append(field.name)
137
-
138
- if isinstance(field.type, pa.StructType):
139
- flat_fields = field.flatten()
140
- elif isinstance(field.type, pa.MapType):
141
- flat_fields = [pa.field(f'{field.name}.entries', pa.struct([field.type.key_field, field.type.item_field]))]
142
- elif isinstance(field.type, pa.ListType):
143
- flat_fields = [pa.field(f'{field.name}.{field.type.value_field.name}', field.type.value_field.type)]
144
- else:
145
- return
146
-
147
- for flat_field in flat_fields:
148
- self.get_field_indexes(flat_field, field_name_per_index)
149
-
150
- @property
151
- def field_name_per_index(self):
152
- if self._field_name_per_index is None:
153
- _field_name_per_index = []
154
- for field in self.schema:
155
- self.get_field_indexes(field, _field_name_per_index)
156
- self._field_name_per_index = {field: index for index, field in enumerate(_field_name_per_index)}
157
- return self._field_name_per_index
158
-
159
- def get_projections(self, builder: 'flatbuffers.builder.Builder', field_names: Optional[List[str]] = None):
160
- if field_names is None:
161
- field_names = self.field_name_per_index.keys()
162
- projection_fields = []
163
- for field_name in field_names:
164
- fb_field_index.Start(builder)
165
- fb_field_index.AddPosition(builder, self.field_name_per_index[field_name])
166
- offset = fb_field_index.End(builder)
167
- projection_fields.append(offset)
168
- fb_source.StartProjectionVector(builder, len(projection_fields))
169
- for offset in reversed(projection_fields):
170
- builder.PrependUOffsetTRelative(offset)
171
- return builder.EndVector()
172
-
173
140
  def serialize(self, builder: 'flatbuffers.builder.Builder'):
174
141
  from ibis.expr.operations.generic import (
175
142
  IsNull,
@@ -204,8 +171,6 @@ class Predicate:
204
171
  Between: self.build_between,
205
172
  }
206
173
 
207
- positions_map = dict((f.name, index) for index, f in enumerate(self.schema)) # TODO: BFS
208
-
209
174
  self.builder = builder
210
175
 
211
176
  offsets = []
@@ -261,7 +226,11 @@ class Predicate:
261
226
  elif prev_field_name != field_name:
262
227
  raise NotImplementedError(self.expr)
263
228
 
264
- column_offset = self.build_column(position=positions_map[field_name])
229
+ node = self.nodes_map[field_name]
230
+ # TODO: support predicate pushdown for leaf nodes (ORION-160338)
231
+ if node.children:
232
+ raise NotImplementedError(node.field) # no predicate pushdown for nested columns
233
+ column_offset = self.build_column(position=node.index)
265
234
  field = self.schema.field(field_name)
266
235
  for literal in literals:
267
236
  args_offsets = [column_offset]
@@ -839,12 +808,13 @@ class VastdbApi:
839
808
  return prefix
840
809
 
841
810
  def _fill_common_headers(self, txid=0, client_tags=[], version_id=1):
842
- common_headers = {'tabular-txid': str(txid), 'tabular-api-version-id': str(version_id),
843
- 'tabular-client-name': 'tabular-api'}
844
- for tag in client_tags:
845
- common_headers['tabular-client-tags-%d' % client_tags.index(tag)] = tag
811
+ common_headers = {
812
+ 'tabular-txid': str(txid),
813
+ 'tabular-api-version-id': str(version_id),
814
+ 'tabular-client-name': 'tabular-api'
815
+ }
846
816
 
847
- return common_headers
817
+ return common_headers | {f'tabular-client-tags-{index}': tag for index, tag in enumerate(client_tags)}
848
818
 
849
819
  def _check_res(self, res, cmd="", expected_retvals=[]):
850
820
  if exc := errors.from_response(res):
@@ -952,8 +922,7 @@ class VastdbApi:
952
922
  res_headers = res.headers
953
923
  next_key = int(res_headers['tabular-next-key'])
954
924
  is_truncated = res_headers['tabular-is-truncated'] == 'true'
955
- flatbuf = b''.join(res.iter_content(chunk_size=128))
956
- lists = list_schemas.GetRootAs(flatbuf)
925
+ lists = list_schemas.GetRootAs(res.content)
957
926
  bucket_name = lists.BucketName().decode()
958
927
  if not bucket.startswith(bucket_name):
959
928
  raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
@@ -976,8 +945,7 @@ class VastdbApi:
976
945
  res = self.session.get(self._api_prefix(bucket=bucket, command="list", url_params=url_params), headers={}, stream=True)
977
946
  self._check_res(res, "list_snapshots")
978
947
 
979
- out = b''.join(res.iter_content(chunk_size=128))
980
- xml_str = out.decode()
948
+ xml_str = res.content.decode()
981
949
  xml_dict = xmltodict.parse(xml_str)
982
950
  list_res = xml_dict['ListBucketResult']
983
951
  is_truncated = list_res['IsTruncated'] == 'true'
@@ -1059,8 +1027,7 @@ class VastdbApi:
1059
1027
  res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=name, command="stats", url_params=url_params), headers=headers)
1060
1028
  self._check_res(res, "get_table_stats", expected_retvals)
1061
1029
 
1062
- flatbuf = b''.join(res.iter_content(chunk_size=128))
1063
- stats = get_table_stats.GetRootAs(flatbuf)
1030
+ stats = get_table_stats.GetRootAs(res.content)
1064
1031
  num_rows = stats.NumRows()
1065
1032
  size_in_bytes = stats.SizeInBytes()
1066
1033
  is_external_rowid_alloc = stats.IsExternalRowidAlloc()
@@ -1159,8 +1126,7 @@ class VastdbApi:
1159
1126
  res_headers = res.headers
1160
1127
  next_key = int(res_headers['tabular-next-key'])
1161
1128
  is_truncated = res_headers['tabular-is-truncated'] == 'true'
1162
- flatbuf = b''.join(res.iter_content(chunk_size=128))
1163
- lists = list_tables.GetRootAs(flatbuf)
1129
+ lists = list_tables.GetRootAs(res.content)
1164
1130
  bucket_name = lists.BucketName().decode()
1165
1131
  schema_name = lists.SchemaName().decode()
1166
1132
  if not bucket.startswith(bucket_name): # ignore snapshot name
@@ -1288,11 +1254,7 @@ class VastdbApi:
1288
1254
  next_key = int(res_headers['tabular-next-key'])
1289
1255
  is_truncated = res_headers['tabular-is-truncated'] == 'true'
1290
1256
  count = int(res_headers['tabular-list-count'])
1291
- columns = []
1292
- if not count_only:
1293
- schema_buf = b''.join(res.iter_content(chunk_size=128))
1294
- schema_out = pa.ipc.open_stream(schema_buf).schema
1295
- columns = schema_out
1257
+ columns = [] if count_only else pa.ipc.open_stream(res.content).schema
1296
1258
 
1297
1259
  return columns, next_key, is_truncated, count
1298
1260
 
@@ -1692,8 +1654,7 @@ class VastdbApi:
1692
1654
  res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection-stats", url_params=url_params),
1693
1655
  headers=headers)
1694
1656
  if res.status_code == 200:
1695
- flatbuf = b''.join(res.iter_content(chunk_size=128))
1696
- stats = get_projection_table_stats.GetRootAs(flatbuf)
1657
+ stats = get_projection_table_stats.GetRootAs(res.content)
1697
1658
  num_rows = stats.NumRows()
1698
1659
  size_in_bytes = stats.SizeInBytes()
1699
1660
  dirty_blocks_percentage = stats.DirtyBlocksPercentage()
@@ -1779,8 +1740,7 @@ class VastdbApi:
1779
1740
  next_key = int(res_headers['tabular-next-key'])
1780
1741
  is_truncated = res_headers['tabular-is-truncated'] == 'true'
1781
1742
  count = int(res_headers['tabular-list-count'])
1782
- flatbuf = b''.join(res.iter_content(chunk_size=128))
1783
- lists = list_projections.GetRootAs(flatbuf)
1743
+ lists = list_projections.GetRootAs(res.content)
1784
1744
  bucket_name = lists.BucketName().decode()
1785
1745
  schema_name = lists.SchemaName().decode()
1786
1746
  table_name = lists.TableName().decode()
@@ -1827,13 +1787,8 @@ class VastdbApi:
1827
1787
  next_key = int(res_headers['tabular-next-key'])
1828
1788
  is_truncated = res_headers['tabular-is-truncated'] == 'true'
1829
1789
  count = int(res_headers['tabular-list-count'])
1830
- columns = []
1831
- if not count_only:
1832
- schema_buf = b''.join(res.iter_content(chunk_size=128))
1833
- schema_out = pa.ipc.open_stream(schema_buf).schema
1834
- for f in schema_out:
1835
- columns.append([f.name, f.type, f.metadata])
1836
- # sort_type = f.metadata[b'VAST:sort_type'].decode()
1790
+ columns = [] if count_only else [[f.name, f.type, f.metadata] for f in
1791
+ pa.ipc.open_stream(res.content).schema]
1837
1792
 
1838
1793
  return columns, next_key, is_truncated, count
1839
1794
 
vastdb/session.py CHANGED
@@ -35,7 +35,7 @@ class Features:
35
35
  class Session:
36
36
  """VAST database session."""
37
37
 
38
- def __init__(self, access=None, secret=None, endpoint=None):
38
+ def __init__(self, access=None, secret=None, endpoint=None, ssl_verify=True):
39
39
  """Connect to a VAST Database endpoint, using specified credentials."""
40
40
  if access is None:
41
41
  access = os.environ['AWS_ACCESS_KEY_ID']
@@ -44,7 +44,7 @@ class Session:
44
44
  if endpoint is None:
45
45
  endpoint = os.environ['AWS_S3_ENDPOINT_URL']
46
46
 
47
- self.api = internal_commands.VastdbApi(endpoint, access, secret)
47
+ self.api = internal_commands.VastdbApi(endpoint, access, secret, ssl_verify=ssl_verify)
48
48
  version_tuple = tuple(int(part) for part in self.api.vast_version.split('.'))
49
49
  self.features = Features(version_tuple)
50
50
  self.s3 = boto3.client('s3',
vastdb/table.py CHANGED
@@ -12,6 +12,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
12
12
  import backoff
13
13
  import ibis
14
14
  import pyarrow as pa
15
+ import requests
15
16
 
16
17
  from . import errors, internal_commands, schema, util
17
18
 
@@ -39,20 +40,44 @@ class TableStats:
39
40
  endpoints: Tuple[str, ...] = ()
40
41
 
41
42
 
43
+ RETRIABLE_ERRORS = (
44
+ errors.Slowdown,
45
+ requests.exceptions.ConnectionError,
46
+ )
47
+
48
+
42
49
  @dataclass
43
50
  class QueryConfig:
44
51
  """Query execution configiration."""
45
52
 
53
+ # allows server-side parallel processing by issuing multiple reads concurrently for a single RPC
46
54
  num_sub_splits: int = 4
55
+
56
+ # used to split the table into disjoint subsets of rows, to be processed concurrently using multiple RPCs
47
57
  num_splits: int = 1
58
+
59
+ # each endpoint will be handled by a separate worker thread
60
+ # a single endpoint can be specified more than once to benefit from multithreaded execution
48
61
  data_endpoints: Optional[List[str]] = None
62
+
63
+ # a subsplit fiber will finish after sending this number of rows back to the client
49
64
  limit_rows_per_sub_split: int = 128 * 1024
65
+
66
+ # each fiber will read the following number of rowgroups coninuously before skipping
67
+ # in order to use semi-sorted projections this value must be 8
50
68
  num_row_groups_per_sub_split: int = 8
69
+
70
+ # can be disabled for benchmarking purposes
51
71
  use_semi_sorted_projections: bool = True
72
+
73
+ # used to estimate the number of splits, given the table rows' count
52
74
  rows_per_split: int = 4000000
75
+
76
+ # used for worker threads' naming
53
77
  query_id: str = ""
54
- max_slowdown_retry: int = 10
55
- backoff_func: Any = field(default=backoff.on_exception(backoff.expo, errors.Slowdown, max_tries=max_slowdown_retry))
78
+
79
+ # allows retrying QueryData when the server is overloaded
80
+ backoff_func: Any = field(default=backoff.on_exception(backoff.expo, RETRIABLE_ERRORS, max_tries=10))
56
81
 
57
82
 
58
83
  @dataclass
@@ -271,7 +296,7 @@ class Table:
271
296
  return TableStats(**stats_tuple._asdict())
272
297
 
273
298
  def select(self, columns: Optional[List[str]] = None,
274
- predicate: ibis.expr.types.BooleanColumn = None,
299
+ predicate: Union[ibis.expr.types.BooleanColumn, ibis.common.deferred.Deferred] = None,
275
300
  config: Optional[QueryConfig] = None,
276
301
  *,
277
302
  internal_row_id: bool = False) -> pa.RecordBatchReader:
@@ -310,6 +335,9 @@ class Table:
310
335
  response_schema = internal_commands.get_response_schema(schema=query_schema, field_names=columns)
311
336
  return pa.RecordBatchReader.from_batches(response_schema, [])
312
337
 
338
+ if isinstance(predicate, ibis.common.deferred.Deferred):
339
+ predicate = predicate.resolve(self._ibis_table) # may raise if the predicate is invalid (e.g. wrong types / missing column)
340
+
313
341
  query_data_request = internal_commands.build_query_data_request(
314
342
  schema=query_schema,
315
343
  predicate=predicate,
@@ -1,11 +1,15 @@
1
+ import functools
1
2
  import itertools
3
+ import operator
2
4
 
3
5
  import pyarrow as pa
6
+ import pyarrow.compute as pc
7
+ import pytest
4
8
 
5
9
  from .util import prepare_data
6
10
 
7
11
 
8
- def test_nested(session, clean_bucket_name):
12
+ def test_nested_select(session, clean_bucket_name):
9
13
  columns = pa.schema([
10
14
  ('l', pa.list_(pa.int8())),
11
15
  ('m', pa.map_(pa.utf8(), pa.float64())),
@@ -26,3 +30,73 @@ def test_nested(session, clean_bucket_name):
26
30
  for cols in itertools.permutations(names, n):
27
31
  actual = pa.Table.from_batches(t.select(columns=cols))
28
32
  assert actual == expected.select(cols)
33
+
34
+
35
+ def test_nested_filter(session, clean_bucket_name):
36
+ columns = pa.schema([
37
+ ('x', pa.int64()),
38
+ ('l', pa.list_(pa.int8())),
39
+ ('y', pa.int64()),
40
+ ('m', pa.map_(pa.utf8(), pa.float64())),
41
+ ('z', pa.int64()),
42
+ ('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
43
+ ('w', pa.int64()),
44
+ ])
45
+ expected = pa.table(schema=columns, data=[
46
+ [1, 2, 3, None],
47
+ [[1], [], [2, 3], None],
48
+ [1, 2, None, 3],
49
+ [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
50
+ [1, None, 2, 3],
51
+ [{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
52
+ [None, 1, 2, 3],
53
+ ])
54
+
55
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
56
+ actual = pa.Table.from_batches(t.select())
57
+ assert actual == expected
58
+
59
+ names = list('xyzw')
60
+ for n in range(1, len(names) + 1):
61
+ for cols in itertools.permutations(names, n):
62
+ ibis_predicate = functools.reduce(
63
+ operator.and_,
64
+ (t[col] > 2 for col in cols))
65
+ actual = pa.Table.from_batches(t.select(predicate=ibis_predicate), t.arrow_schema)
66
+
67
+ arrow_predicate = functools.reduce(
68
+ operator.and_,
69
+ (pc.field(col) > 2 for col in cols))
70
+ assert actual == expected.filter(arrow_predicate)
71
+
72
+
73
+ def test_nested_unsupported_filter(session, clean_bucket_name):
74
+ columns = pa.schema([
75
+ ('x', pa.int64()),
76
+ ('l', pa.list_(pa.int8())),
77
+ ('y', pa.int64()),
78
+ ('m', pa.map_(pa.utf8(), pa.float64())),
79
+ ('z', pa.int64()),
80
+ ('s', pa.struct([('x', pa.int16()), ('y', pa.int32())])),
81
+ ('w', pa.int64()),
82
+ ])
83
+ expected = pa.table(schema=columns, data=[
84
+ [1, 2, 3, None],
85
+ [[1], [], [2, 3], None],
86
+ [1, 2, None, 3],
87
+ [None, {'a': 2.5}, {'b': 0.25, 'c': 0.025}, {}],
88
+ [1, None, 2, 3],
89
+ [{'x': 1, 'y': None}, None, {'x': 2, 'y': 3}, {'x': None, 'y': 4}],
90
+ [None, 1, 2, 3],
91
+ ])
92
+
93
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
94
+
95
+ with pytest.raises(NotImplementedError):
96
+ list(t.select(predicate=(t['l'].isnull())))
97
+
98
+ with pytest.raises(NotImplementedError):
99
+ list(t.select(predicate=(t['m'].isnull())))
100
+
101
+ with pytest.raises(NotImplementedError):
102
+ list(t.select(predicate=(t['s'].isnull())))
@@ -7,6 +7,7 @@ import time
7
7
  from contextlib import closing
8
8
  from tempfile import NamedTemporaryFile
9
9
 
10
+ import ibis
10
11
  import pyarrow as pa
11
12
  import pyarrow.compute as pc
12
13
  import pyarrow.parquet as pq
@@ -215,46 +216,47 @@ def test_types(session, clean_bucket_name):
215
216
  [dt.datetime(2024, 4, 10, 12, 34, 56, 789789), dt.datetime(2025, 4, 10, 12, 34, 56, 789789), dt.datetime(2026, 4, 10, 12, 34, 56, 789789)],
216
217
  ])
217
218
 
218
- with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
219
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
219
220
  def select(predicate):
220
- return pa.Table.from_batches(t.select(predicate=predicate))
221
+ return pa.Table.from_batches(table.select(predicate=predicate))
221
222
 
222
223
  assert select(None) == expected
223
- assert select(t['tb'] == False) == expected.filter(pc.field('tb') == False) # noqa: E712
224
- assert select(t['a1'] == 2) == expected.filter(pc.field('a1') == 2)
225
- assert select(t['a2'] == 2000) == expected.filter(pc.field('a2') == 2000)
226
- assert select(t['a4'] == 222111122) == expected.filter(pc.field('a4') == 222111122)
227
- assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
228
- assert select(t['s'] == "v") == expected.filter(pc.field('s') == "v")
229
- assert select(t['d'] == 231.15) == expected.filter(pc.field('d') == 231.15)
230
- assert select(t['bin'] == b"\x01\x02") == expected.filter(pc.field('bin') == b"\x01\x02")
224
+ for t in [table, ibis._]:
225
+ assert select(t['tb'] == False) == expected.filter(pc.field('tb') == False) # noqa: E712
226
+ assert select(t['a1'] == 2) == expected.filter(pc.field('a1') == 2)
227
+ assert select(t['a2'] == 2000) == expected.filter(pc.field('a2') == 2000)
228
+ assert select(t['a4'] == 222111122) == expected.filter(pc.field('a4') == 222111122)
229
+ assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
230
+ assert select(t['s'] == "v") == expected.filter(pc.field('s') == "v")
231
+ assert select(t['d'] == 231.15) == expected.filter(pc.field('d') == 231.15)
232
+ assert select(t['bin'] == b"\x01\x02") == expected.filter(pc.field('bin') == b"\x01\x02")
231
233
 
232
- date_literal = dt.date(2024, 4, 10)
233
- assert select(t['date'] == date_literal) == expected.filter(pc.field('date') == date_literal)
234
+ date_literal = dt.date(2024, 4, 10)
235
+ assert select(t['date'] == date_literal) == expected.filter(pc.field('date') == date_literal)
234
236
 
235
- time_literal = dt.time(12, 34, 56)
236
- assert select(t['t0'] == time_literal) == expected.filter(pc.field('t0') == time_literal)
237
+ time_literal = dt.time(12, 34, 56)
238
+ assert select(t['t0'] == time_literal) == expected.filter(pc.field('t0') == time_literal)
237
239
 
238
- time_literal = dt.time(12, 34, 56, 789000)
239
- assert select(t['t3'] == time_literal) == expected.filter(pc.field('t3') == time_literal)
240
+ time_literal = dt.time(12, 34, 56, 789000)
241
+ assert select(t['t3'] == time_literal) == expected.filter(pc.field('t3') == time_literal)
240
242
 
241
- time_literal = dt.time(12, 34, 56, 789789)
242
- assert select(t['t6'] == time_literal) == expected.filter(pc.field('t6') == time_literal)
243
+ time_literal = dt.time(12, 34, 56, 789789)
244
+ assert select(t['t6'] == time_literal) == expected.filter(pc.field('t6') == time_literal)
243
245
 
244
- time_literal = dt.time(12, 34, 56, 789789)
245
- assert select(t['t9'] == time_literal) == expected.filter(pc.field('t9') == time_literal)
246
+ time_literal = dt.time(12, 34, 56, 789789)
247
+ assert select(t['t9'] == time_literal) == expected.filter(pc.field('t9') == time_literal)
246
248
 
247
- ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56)
248
- assert select(t['ts0'] == ts_literal) == expected.filter(pc.field('ts0') == ts_literal)
249
+ ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56)
250
+ assert select(t['ts0'] == ts_literal) == expected.filter(pc.field('ts0') == ts_literal)
249
251
 
250
- ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789000)
251
- assert select(t['ts3'] == ts_literal) == expected.filter(pc.field('ts3') == ts_literal)
252
+ ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789000)
253
+ assert select(t['ts3'] == ts_literal) == expected.filter(pc.field('ts3') == ts_literal)
252
254
 
253
- ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
254
- assert select(t['ts6'] == ts_literal) == expected.filter(pc.field('ts6') == ts_literal)
255
+ ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
256
+ assert select(t['ts6'] == ts_literal) == expected.filter(pc.field('ts6') == ts_literal)
255
257
 
256
- ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
257
- assert select(t['ts9'] == ts_literal) == expected.filter(pc.field('ts9') == ts_literal)
258
+ ts_literal = dt.datetime(2024, 4, 10, 12, 34, 56, 789789)
259
+ assert select(t['ts9'] == ts_literal) == expected.filter(pc.field('ts9') == ts_literal)
258
260
 
259
261
 
260
262
  def test_filters(session, clean_bucket_name):
@@ -270,62 +272,63 @@ def test_filters(session, clean_bucket_name):
270
272
  ['a', 'bb', 'ccc', None, 'xyz'],
271
273
  ])
272
274
 
273
- with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
275
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
274
276
  def select(predicate):
275
- return pa.Table.from_batches(t.select(predicate=predicate), t.arrow_schema)
277
+ return pa.Table.from_batches(table.select(predicate=predicate), table.arrow_schema)
276
278
 
277
279
  assert select(None) == expected
278
280
  assert select(True) == expected
279
281
  assert select(False) == pa.Table.from_batches([], schema=columns)
280
282
 
281
- assert select(t['a'].between(222, 444)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444))
282
- assert select((t['a'].between(222, 444)) & (t['b'] > 2.5)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444) & (pc.field('b') > 2.5))
283
-
284
- assert select(t['a'] > 222) == expected.filter(pc.field('a') > 222)
285
- assert select(t['a'] < 222) == expected.filter(pc.field('a') < 222)
286
- assert select(t['a'] == 222) == expected.filter(pc.field('a') == 222)
287
- assert select(t['a'] != 222) == expected.filter(pc.field('a') != 222)
288
- assert select(t['a'] <= 222) == expected.filter(pc.field('a') <= 222)
289
- assert select(t['a'] >= 222) == expected.filter(pc.field('a') >= 222)
290
-
291
- assert select(t['b'] > 1.5) == expected.filter(pc.field('b') > 1.5)
292
- assert select(t['b'] < 1.5) == expected.filter(pc.field('b') < 1.5)
293
- assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
294
- assert select(t['b'] != 1.5) == expected.filter(pc.field('b') != 1.5)
295
- assert select(t['b'] <= 1.5) == expected.filter(pc.field('b') <= 1.5)
296
- assert select(t['b'] >= 1.5) == expected.filter(pc.field('b') >= 1.5)
297
-
298
- assert select(t['s'] > 'bb') == expected.filter(pc.field('s') > 'bb')
299
- assert select(t['s'] < 'bb') == expected.filter(pc.field('s') < 'bb')
300
- assert select(t['s'] == 'bb') == expected.filter(pc.field('s') == 'bb')
301
- assert select(t['s'] != 'bb') == expected.filter(pc.field('s') != 'bb')
302
- assert select(t['s'] <= 'bb') == expected.filter(pc.field('s') <= 'bb')
303
- assert select(t['s'] >= 'bb') == expected.filter(pc.field('s') >= 'bb')
304
-
305
- assert select((t['a'] > 111) & (t['b'] > 0) & (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) & (pc.field('b') > 0) & (pc.field('s') < 'ccc'))
306
- assert select((t['a'] > 111) & (t['b'] < 2.5)) == expected.filter((pc.field('a') > 111) & (pc.field('b') < 2.5))
307
- assert select((t['a'] > 111) & (t['a'] < 333)) == expected.filter((pc.field('a') > 111) & (pc.field('a') < 333))
308
-
309
- assert select((t['a'] > 111) | (t['a'] < 333)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333))
310
- assert select(((t['a'] > 111) | (t['a'] < 333)) & (t['b'] < 2.5)) == expected.filter(((pc.field('a') > 111) | (pc.field('a') < 333)) & (pc.field('b') < 2.5))
311
- with pytest.raises(NotImplementedError):
312
- assert select((t['a'] > 111) | (t['b'] > 0) | (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) | (pc.field('b') > 0) | (pc.field('s') < 'ccc'))
313
- assert select((t['a'] > 111) | (t['a'] < 333) | (t['a'] == 777)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333) | (pc.field('a') == 777))
314
-
315
- assert select(t['s'].isnull()) == expected.filter(pc.field('s').is_null())
316
- assert select((t['s'].isnull()) | (t['s'] == 'bb')) == expected.filter((pc.field('s').is_null()) | (pc.field('s') == 'bb'))
317
- assert select((t['s'].isnull()) & (t['b'] == 3.5)) == expected.filter((pc.field('s').is_null()) & (pc.field('b') == 3.5))
318
-
319
- assert select(~t['s'].isnull()) == expected.filter(~pc.field('s').is_null())
320
- assert select(t['s'].contains('b')) == expected.filter(pc.field('s') == 'bb')
321
- assert select(t['s'].contains('y')) == expected.filter(pc.field('s') == 'xyz')
322
-
323
- assert select(t['a'].isin([555])) == expected.filter(pc.field('a').isin([555]))
324
- assert select(t['a'].isin([111, 222, 999])) == expected.filter(pc.field('a').isin([111, 222, 999]))
325
- assert select((t['a'] == 111) | t['a'].isin([333, 444]) | (t['a'] > 600)) == expected.filter((pc.field('a') == 111) | pc.field('a').isin([333, 444]) | (pc.field('a') > 600))
326
-
327
- with pytest.raises(NotImplementedError):
328
- select(t['a'].isin([]))
283
+ for t in [table, ibis._]:
284
+ assert select(t['a'].between(222, 444)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444))
285
+ assert select((t['a'].between(222, 444)) & (t['b'] > 2.5)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444) & (pc.field('b') > 2.5))
286
+
287
+ assert select(t['a'] > 222) == expected.filter(pc.field('a') > 222)
288
+ assert select(t['a'] < 222) == expected.filter(pc.field('a') < 222)
289
+ assert select(t['a'] == 222) == expected.filter(pc.field('a') == 222)
290
+ assert select(t['a'] != 222) == expected.filter(pc.field('a') != 222)
291
+ assert select(t['a'] <= 222) == expected.filter(pc.field('a') <= 222)
292
+ assert select(t['a'] >= 222) == expected.filter(pc.field('a') >= 222)
293
+
294
+ assert select(t['b'] > 1.5) == expected.filter(pc.field('b') > 1.5)
295
+ assert select(t['b'] < 1.5) == expected.filter(pc.field('b') < 1.5)
296
+ assert select(t['b'] == 1.5) == expected.filter(pc.field('b') == 1.5)
297
+ assert select(t['b'] != 1.5) == expected.filter(pc.field('b') != 1.5)
298
+ assert select(t['b'] <= 1.5) == expected.filter(pc.field('b') <= 1.5)
299
+ assert select(t['b'] >= 1.5) == expected.filter(pc.field('b') >= 1.5)
300
+
301
+ assert select(t['s'] > 'bb') == expected.filter(pc.field('s') > 'bb')
302
+ assert select(t['s'] < 'bb') == expected.filter(pc.field('s') < 'bb')
303
+ assert select(t['s'] == 'bb') == expected.filter(pc.field('s') == 'bb')
304
+ assert select(t['s'] != 'bb') == expected.filter(pc.field('s') != 'bb')
305
+ assert select(t['s'] <= 'bb') == expected.filter(pc.field('s') <= 'bb')
306
+ assert select(t['s'] >= 'bb') == expected.filter(pc.field('s') >= 'bb')
307
+
308
+ assert select((t['a'] > 111) & (t['b'] > 0) & (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) & (pc.field('b') > 0) & (pc.field('s') < 'ccc'))
309
+ assert select((t['a'] > 111) & (t['b'] < 2.5)) == expected.filter((pc.field('a') > 111) & (pc.field('b') < 2.5))
310
+ assert select((t['a'] > 111) & (t['a'] < 333)) == expected.filter((pc.field('a') > 111) & (pc.field('a') < 333))
311
+
312
+ assert select((t['a'] > 111) | (t['a'] < 333)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333))
313
+ assert select(((t['a'] > 111) | (t['a'] < 333)) & (t['b'] < 2.5)) == expected.filter(((pc.field('a') > 111) | (pc.field('a') < 333)) & (pc.field('b') < 2.5))
314
+ with pytest.raises(NotImplementedError):
315
+ assert select((t['a'] > 111) | (t['b'] > 0) | (t['s'] < 'ccc')) == expected.filter((pc.field('a') > 111) | (pc.field('b') > 0) | (pc.field('s') < 'ccc'))
316
+ assert select((t['a'] > 111) | (t['a'] < 333) | (t['a'] == 777)) == expected.filter((pc.field('a') > 111) | (pc.field('a') < 333) | (pc.field('a') == 777))
317
+
318
+ assert select(t['s'].isnull()) == expected.filter(pc.field('s').is_null())
319
+ assert select((t['s'].isnull()) | (t['s'] == 'bb')) == expected.filter((pc.field('s').is_null()) | (pc.field('s') == 'bb'))
320
+ assert select((t['s'].isnull()) & (t['b'] == 3.5)) == expected.filter((pc.field('s').is_null()) & (pc.field('b') == 3.5))
321
+
322
+ assert select(~t['s'].isnull()) == expected.filter(~pc.field('s').is_null())
323
+ assert select(t['s'].contains('b')) == expected.filter(pc.field('s') == 'bb')
324
+ assert select(t['s'].contains('y')) == expected.filter(pc.field('s') == 'xyz')
325
+
326
+ assert select(t['a'].isin([555])) == expected.filter(pc.field('a').isin([555]))
327
+ assert select(t['a'].isin([111, 222, 999])) == expected.filter(pc.field('a').isin([111, 222, 999]))
328
+ assert select((t['a'] == 111) | t['a'].isin([333, 444]) | (t['a'] > 600)) == expected.filter((pc.field('a') == 111) | pc.field('a').isin([333, 444]) | (pc.field('a') > 600))
329
+
330
+ with pytest.raises(NotImplementedError):
331
+ select(t['a'].isin([]))
329
332
 
330
333
 
331
334
  def test_parquet_export(session, clean_bucket_name):
vastdb/transaction.py CHANGED
@@ -63,7 +63,7 @@ class Transaction:
63
63
  except botocore.exceptions.ClientError as e:
64
64
  log.warning("res: %s", e.response)
65
65
  if e.response['Error']['Code'] == '404':
66
- raise errors.MissingBucket(name)
66
+ raise errors.MissingBucket(name) from e
67
67
  raise
68
68
  return bucket.Bucket(name, self)
69
69
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vastdb
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
@@ -152,26 +152,26 @@ vastdb/__init__.py,sha256=cMJtZuJ0IL9aKyM3DUWqTCzuP1H1MXXVivKKE1-q0DY,292
152
152
  vastdb/bucket.py,sha256=4rPEm9qlPTg7ccWO6VGmd4LKb8w-BDhJYwzXGjn03sc,3566
153
153
  vastdb/conftest.py,sha256=pKpo_46Vq4QHzTDQAFxasrVhnZ2V2L-y6IMLxojxaFM,2132
154
154
  vastdb/errors.py,sha256=fj8IlPnGi1lbJWIl1-8MSjLavL9bYQ-YUoboWbXCo54,4047
155
- vastdb/internal_commands.py,sha256=yS6ylyuJjaAwAm4OqVGX4tq-Un5cvM-LXp7F4eYOUDw,100414
155
+ vastdb/internal_commands.py,sha256=kIdkLHabW8r4-GSygGl1Gdrr4puxD79WPO8Jkx8aszg,98490
156
156
  vastdb/schema.py,sha256=ql4TPB1W_FQ_BHov3CKHI8JX3krXMlcKWz7dTrjpQ1w,3346
157
- vastdb/session.py,sha256=2tu5cp7xG28ynyQfEl9_HM2dtNcLM2AoJmm3bfNLC0o,2563
158
- vastdb/table.py,sha256=apRXCrglg6_glozJXu8D7q6du5seP7NMi42PNjyGcTM,28891
159
- vastdb/transaction.py,sha256=g8YTcYnsNPIhB2udbHyT5RIFB5kHnBLJcvV2CWRICwI,2845
157
+ vastdb/session.py,sha256=UTaz1Fh3u71Bnay2r6IyCHNMDrAszbzjnwylPURzhsk,2603
158
+ vastdb/table.py,sha256=1ikj6toITImFowI2WHiimmqSiObmTfAohCdWC89q71Y,30031
159
+ vastdb/transaction.py,sha256=u4pJBLooZQ_YGjsRgEWVL6RPAlt3lgm5oOpPHzPcayM,2852
160
160
  vastdb/util.py,sha256=rs7nLL2Qz-OVEZDSVIqAvS-uETMq-zxQs5jBksB5-JA,4276
161
161
  vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
162
  vastdb/bench/test_perf.py,sha256=iHE3E60fvyU5SBDHPi4h03Dj6QcY6VI9l9mMhgNMtPc,1117
163
163
  vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
164
  vastdb/tests/test_duckdb.py,sha256=KDuv4PrjGEwChCGHG36xNT2JiFlBOt6K3DQ3L06Kq-A,1913
165
165
  vastdb/tests/test_imports.py,sha256=48kbJKsa_MrEXcBYQUbUDr1e9wzjG4FHQ7C3wUEQfXA,5705
166
- vastdb/tests/test_nested.py,sha256=3kejEvtSqV0LrUgb1QglRjrlxnKI4_AXTFw2nE7Q520,951
166
+ vastdb/tests/test_nested.py,sha256=FHYMmaKYvqVh0NvsocUFLr2LDVlSfXZYgqUSopWOSM0,3512
167
167
  vastdb/tests/test_projections.py,sha256=_cDNfD5zTwbCXLk6uGpPUWGN0P-4HElu5OjubWu-Jg0,1255
168
168
  vastdb/tests/test_sanity.py,sha256=ixx0QPo73hLHjAa7bByFXjS1XST0WvmSwLEpgnHh_JY,2960
169
169
  vastdb/tests/test_schemas.py,sha256=qoHTLX51D-0S4bMxdCpRh9gaYQd-BkZdT_agGOwFwTM,1739
170
- vastdb/tests/test_tables.py,sha256=pfQx0OZm6oVJj1-CziPWUoEn3l2-OET4Bpl8M9Z4mws,27499
170
+ vastdb/tests/test_tables.py,sha256=Q3N5P-7mOPVcfAFEfpAzomqkyCJ5gKZmfE4SUW5jehk,27859
171
171
  vastdb/tests/test_util.py,sha256=owRAU3TCKMq-kz54NRdA5wX2O_bZIHqG5ucUR77jm5k,1046
172
172
  vastdb/tests/util.py,sha256=dpRJYbboDnlqL4qIdvScpp8--5fxRUBIcIYitrfcj9o,555
173
- vastdb-0.1.4.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
174
- vastdb-0.1.4.dist-info/METADATA,sha256=SyZkyjQSwklzsq3oub8m8w9lY-HuI4XOG72y8trKvf4,1350
175
- vastdb-0.1.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
176
- vastdb-0.1.4.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
177
- vastdb-0.1.4.dist-info/RECORD,,
173
+ vastdb-0.1.5.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
174
+ vastdb-0.1.5.dist-info/METADATA,sha256=NJzrnkyfPs4lliFamaEdJy2elLYLzYJtlCxEMRSiLtg,1350
175
+ vastdb-0.1.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
176
+ vastdb-0.1.5.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
177
+ vastdb-0.1.5.dist-info/RECORD,,
File without changes