vastdb 1.2.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vastdb/_internal.py CHANGED
@@ -34,7 +34,7 @@ from ibis.expr.operations.logical import (
34
34
  Or,
35
35
  )
36
36
  from ibis.expr.operations.relations import Field
37
- from ibis.expr.operations.strings import StringContains
37
+ from ibis.expr.operations.strings import StartsWith, StringContains
38
38
  from ibis.expr.operations.structs import StructField
39
39
 
40
40
  import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
@@ -103,7 +103,7 @@ from vast_flatbuf.tabular.ListProjectionsResponse import (
103
103
  from vast_flatbuf.tabular.ListSchemasResponse import ListSchemasResponse as list_schemas
104
104
  from vast_flatbuf.tabular.ListTablesResponse import ListTablesResponse as list_tables
105
105
 
106
- from . import errors
106
+ from . import errors, util
107
107
  from .config import BackoffConfig
108
108
 
109
109
  UINT64_MAX = 18446744073709551615
@@ -168,6 +168,7 @@ class Predicate:
168
168
  IsNull: self.build_is_null,
169
169
  Not: self.build_is_not_null,
170
170
  StringContains: self.build_match_substring,
171
+ StartsWith: self.build_starts_with,
171
172
  Between: self.build_between,
172
173
  }
173
174
 
@@ -207,6 +208,14 @@ class Predicate:
207
208
  elif builder_func == self.build_between:
208
209
  column, lower, upper = inner_op.args
209
210
  literals = (None,)
211
+ elif builder_func == self.build_starts_with:
212
+ column, prefix = inner_op.args
213
+ literals = (None,)
214
+ if prefix.value:
215
+ lower_bytes, upper_bytes = util.prefix_to_range(prefix.value)
216
+ else:
217
+ # `col.starts_with('')` is equivalent to `col IS NOT NULL`
218
+ builder_func = self.build_is_not_null
210
219
  else:
211
220
  column, arg = inner_op.args
212
221
  if isinstance(arg, tuple):
@@ -249,6 +258,9 @@ class Predicate:
249
258
  if builder_func == self.build_between:
250
259
  args_offsets.append(self.build_literal(field=node.field, value=lower.value))
251
260
  args_offsets.append(self.build_literal(field=node.field, value=upper.value))
261
+ if builder_func == self.build_starts_with:
262
+ args_offsets.append(self.build_literal(field=node.field, value=lower_bytes))
263
+ args_offsets.append(self.build_literal(field=node.field, value=upper_bytes))
252
264
 
253
265
  inner_offsets.append(builder_func(*args_offsets))
254
266
 
@@ -550,6 +562,13 @@ class Predicate:
550
562
  ]
551
563
  return self.build_and(offsets)
552
564
 
565
+ def build_starts_with(self, column: int, lower: int, upper: int):
566
+ offsets = [
567
+ self.build_greater_equal(column, lower),
568
+ self.build_less(column, upper),
569
+ ]
570
+ return self.build_and(offsets)
571
+
553
572
 
554
573
  class FieldNodesState:
555
574
  def __init__(self) -> None:
@@ -847,7 +866,7 @@ class VastdbApi:
847
866
  res = self._session.request(method=method, url=url, timeout=self.timeout, **kwargs)
848
867
  except requests.exceptions.ConnectionError as err:
849
868
  # low-level connection issue, it is safe to retry only read-only requests
850
- may_retry = (method == "GET")
869
+ may_retry = (method in {"HEAD", "GET"})
851
870
  raise errors.ConnectionError(cause=err, may_retry=may_retry) from err
852
871
 
853
872
  if not skip_status_check:
@@ -837,3 +837,48 @@ def test_catalog_snapshots_select(session, clean_bucket_name):
837
837
  rows = t.select().read_all()
838
838
  if not rows:
839
839
  raise NotReady
840
+
841
+
842
+ def test_starts_with(session, clean_bucket_name):
843
+ columns = pa.schema([
844
+ ('s', pa.utf8()),
845
+ ('i', pa.int16()),
846
+ ])
847
+
848
+ expected = pa.table(schema=columns, data=[
849
+ ['a', 'ab', 'abc', None, 'abd', 'α', '', 'b'],
850
+ [0, 1, 2, 3, 4, 5, 6, 7],
851
+ ])
852
+
853
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
854
+ def select(prefix):
855
+ res = table.select(predicate=table['s'].startswith(prefix)).read_all()
856
+ return res.to_pydict()
857
+
858
+ assert select('')['s'] == ['a', 'ab', 'abc', 'abd', 'α', '', 'b']
859
+ assert select('a')['s'] == ['a', 'ab', 'abc', 'abd']
860
+ assert select('b')['s'] == ['b']
861
+ assert select('ab')['s'] == ['ab', 'abc', 'abd']
862
+ assert select('abc')['s'] == ['abc']
863
+ assert select('α')['s'] == ['α']
864
+
865
+ res = table.select(predicate=(table['s'].startswith('ab') | (table['s'].isnull()))).read_all()
866
+ assert res.to_pydict()['s'] == ['ab', 'abc', None, 'abd']
867
+
868
+ res = table.select(predicate=(table['s'].startswith('ab') | (table['s'] == 'b'))).read_all()
869
+ assert res.to_pydict()['s'] == ['ab', 'abc', 'abd', 'b']
870
+
871
+ res = table.select(predicate=((table['s'] == 'b') | table['s'].startswith('ab'))).read_all()
872
+ assert res.to_pydict()['s'] == ['ab', 'abc', 'abd', 'b']
873
+
874
+ res = table.select(predicate=(table['s'].startswith('ab') & (table['s'] != 'abc'))).read_all()
875
+ assert res.to_pydict()['s'] == ['ab', 'abd']
876
+
877
+ res = table.select(predicate=((table['s'] != 'abc') & table['s'].startswith('ab'))).read_all()
878
+ assert res.to_pydict()['s'] == ['ab', 'abd']
879
+
880
+ res = table.select(predicate=((table['i'] > 3) & table['s'].startswith('ab'))).read_all()
881
+ assert res.to_pydict() == {'i': [4], 's': ['abd']}
882
+
883
+ res = table.select(predicate=(table['s'].startswith('ab')) & (table['i'] > 3)).read_all()
884
+ assert res.to_pydict() == {'i': [4], 's': ['abd']}
vastdb/tests/test_util.py CHANGED
@@ -43,3 +43,16 @@ def _parse(bufs):
43
43
  for buf in bufs:
44
44
  with pa.ipc.open_stream(buf) as reader:
45
45
  yield from reader
46
+
47
+
48
+ def test_prefix():
49
+ assert util.prefix_to_range('a') == (b'a', b'b')
50
+ assert util.prefix_to_range('abc') == (b'abc', b'abd')
51
+ assert util.prefix_to_range('abc\x00') == (b'abc\x00', b'abc\x01')
52
+ assert util.prefix_to_range('abc\x7f') == (b'abc\x7f', b'abc\x80')
53
+ assert util.prefix_to_range('/a/b/c') == (b'/a/b/c', b'/a/b/d')
54
+ assert util.prefix_to_range('/123α') == (b'/123\xce\xb1', b'/123\xce\xb2')
55
+ assert util.prefix_to_range('/123αA') == (b'/123\xce\xb1A', b'/123\xce\xb1B')
56
+ assert util.prefix_to_range('\U0010ffff') == (b'\xf4\x8f\xbf\xbf', b'\xf4\x8f\xbf\xc0') # max unicode codepoint
57
+ with pytest.raises(AssertionError):
58
+ util.prefix_to_range('')
vastdb/util.py CHANGED
@@ -157,3 +157,13 @@ def sort_record_batch_if_needed(record_batch, sort_column):
157
157
  return record_batch.sort_by(sort_column)
158
158
  else:
159
159
  return record_batch
160
+
161
+
162
+ def prefix_to_range(prefix: str):
163
+ """Compute (L, U) such that `s.starts_with(prefix)` is equivalent to `L <= s.encode() < H`."""
164
+ assert prefix, "Empty prefix is not convertible to range predicate"
165
+ lower = prefix.encode()
166
+ upper = bytearray(lower)
167
+ # https://en.wikipedia.org/wiki/UTF-8#Encoding guarantees that the last byte is not 0xFF
168
+ upper[-1] = upper[-1] + 1
169
+ return (lower, bytes(upper))
@@ -66,3 +66,26 @@ def test_table_stats(session, test_bucket_name, schema_name, table_name):
66
66
  logger.info("inserted to table")
67
67
  assert new_stat.size_in_bytes != initial_stat.size_in_bytes
68
68
  assert new_stat.num_rows - NUM_TIMES_TO_INSERT == initial_stat.num_rows
69
+
70
+
71
+ def test_ndu_while_querying(session, test_bucket_name, schema_name, table_name):
72
+ """
73
+ Executing queries while a NDU takes place.
74
+ """
75
+ # TODO: Before merging run mypy and print query result
76
+
77
+ config = QueryConfig(num_splits=1, num_sub_splits=1)
78
+
79
+ logger.info(f'{test_bucket_name=}, {schema_name=}, {table_name=}')
80
+
81
+ for query in range(300):
82
+ with session.transaction() as tx:
83
+ t = tx.bucket(test_bucket_name).schema(schema_name).table(table_name)
84
+ s = time.time()
85
+ if query == 0:
86
+ res = t.select(config=config).read_all()
87
+ logger.info(f'{res=}')
88
+ else:
89
+ assert res == t.select(config=config).read_all()
90
+ e = time.time()
91
+ logger.info(f'{query=} took {e - s}')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vastdb
3
- Version: 1.2.0
3
+ Version: 1.3.1
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
@@ -149,7 +149,7 @@ vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI
149
149
  vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
150
150
  vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
151
151
  vastdb/__init__.py,sha256=J1JjKiFkKC95BHowfh9kJfQFTjRce-QMsc6zF_FfxC0,432
152
- vastdb/_internal.py,sha256=KRFNlB25Ckj4nowVUneC5tBSYXpnIv-MDdw4Vn6KYnc,91255
152
+ vastdb/_internal.py,sha256=LmiiNmUqJFfNGUoe362sHwU8M3aKbN1VWYIvobrgeK8,92322
153
153
  vastdb/bucket.py,sha256=5KuKhPjZOevznZqWHDVVocejvAy7dcwobPuV6BJCfPc,2544
154
154
  vastdb/config.py,sha256=1tMYtzKXerGcIUjH4tIGEvZNWvO4fviCEdcNCnELJZo,2269
155
155
  vastdb/conftest.py,sha256=X2kVveySPQYZlVBXUMoo7Oea5IsvmJzjdqq3fpH2kVw,3469
@@ -159,7 +159,7 @@ vastdb/schema.py,sha256=IaZDJsx0ms_dJVXeyCcSD8Dt3TNJkqR3739XOnDBM_E,6177
159
159
  vastdb/session.py,sha256=toMR0BXwTaECdWDKnIZky1F3MA1SmelRBiqCrqQ3GCM,2067
160
160
  vastdb/table.py,sha256=C6Zz0zolRRRbf5EQBvDRAofl3kGvfz4PjZwGeQongTI,31106
161
161
  vastdb/transaction.py,sha256=NlVkEowJ_pmtffjWBBDaKExYDKPekjSZyj_fK_bZPJE,3026
162
- vastdb/util.py,sha256=eunfTuqbCrqQEFZEO9T15N-Bu8Fqpw7Zlqp2TAGfYaY,5870
162
+ vastdb/util.py,sha256=8CUnVRsJukC3uNHNoB5D0qPf0FxS8OSdVB84nNoLJKc,6290
163
163
  vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
164
  vastdb/bench/test_perf.py,sha256=gZIqfHva6lNFpD-9bHAe7M8COBjUyrPkHu3E7F8J2L0,1072
165
165
  vastdb/bench/test_sample.py,sha256=LgF4syzij09sH3Noiv1EyCAJ9pvrUE5bxR4RJTVEYag,7881
@@ -196,14 +196,14 @@ vastdb/tests/test_nested.py,sha256=LPU6uV3Ri23dBzAEMFQqRPbqapV5LfmiHSHkhILPIY0,6
196
196
  vastdb/tests/test_projections.py,sha256=3y1kubwVrzO-xoR0hyps7zrjOJI8niCYspaFTN16Q9w,4540
197
197
  vastdb/tests/test_sanity.py,sha256=oiV2gb05aPyG5RMNUQZlyjNlg3T7Fig1_8OJzpAgcsk,3038
198
198
  vastdb/tests/test_schemas.py,sha256=l70YQMlx2UL1KRQhApriiG2ZM7GJF-IzWU31H3Yqn1U,3312
199
- vastdb/tests/test_tables.py,sha256=H5BK3Zm0Ocug8kZ2cJeGTC7o8YgZgBDSqkOgwW01hAo,33056
200
- vastdb/tests/test_util.py,sha256=Ok_sAEBJsRGF5Voa_v5eu3eAd52GWu8jMjjQbadwW-s,1260
199
+ vastdb/tests/test_tables.py,sha256=17-t9VkEJRIW43Yf-lwEI7jHn8teOJvv-eZgANcvTkM,35023
200
+ vastdb/tests/test_util.py,sha256=n7gvT5Wg6b6bxgqkFXkYqvFd_W1GlUdVfmPv66XYXyA,1956
201
201
  vastdb/tests/util.py,sha256=dpRJYbboDnlqL4qIdvScpp8--5fxRUBIcIYitrfcj9o,555
202
202
  vastdb/vast_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
203
203
  vastdb/vast_tests/test_ha.py,sha256=744P4G6VJ09RIkHhMQL4wlipCBJWQVMhyvUrSc4k1HQ,975
204
- vastdb/vast_tests/test_scale.py,sha256=EpjCJmVAQrNBxVnHGJ-KHCoxevhqOcyqYFPMIIY9s60,2714
205
- vastdb-1.2.0.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
206
- vastdb-1.2.0.dist-info/METADATA,sha256=O4U-OWABQccLaEF7kOdwqoLO55YG3FkZ3SG5IQs6PS4,1340
207
- vastdb-1.2.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
208
- vastdb-1.2.0.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
209
- vastdb-1.2.0.dist-info/RECORD,,
204
+ vastdb/vast_tests/test_scale.py,sha256=yPF5sL2X7fiP_QooV3OnZAzsW38ANfkHFXq8B1_Uh-s,3515
205
+ vastdb-1.3.1.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
206
+ vastdb-1.3.1.dist-info/METADATA,sha256=oyVkT5bnbEKiuHPfiYuQKtu0C1N7yfeQTSOnhRWpF8c,1340
207
+ vastdb-1.3.1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
208
+ vastdb-1.3.1.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
209
+ vastdb-1.3.1.dist-info/RECORD,,
File without changes