vastdb 1.1.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vastdb/_internal.py CHANGED
@@ -34,7 +34,7 @@ from ibis.expr.operations.logical import (
34
34
  Or,
35
35
  )
36
36
  from ibis.expr.operations.relations import Field
37
- from ibis.expr.operations.strings import StringContains
37
+ from ibis.expr.operations.strings import StartsWith, StringContains
38
38
  from ibis.expr.operations.structs import StructField
39
39
 
40
40
  import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
@@ -103,7 +103,7 @@ from vast_flatbuf.tabular.ListProjectionsResponse import (
103
103
  from vast_flatbuf.tabular.ListSchemasResponse import ListSchemasResponse as list_schemas
104
104
  from vast_flatbuf.tabular.ListTablesResponse import ListTablesResponse as list_tables
105
105
 
106
- from . import errors
106
+ from . import errors, util
107
107
  from .config import BackoffConfig
108
108
 
109
109
  UINT64_MAX = 18446744073709551615
@@ -168,6 +168,7 @@ class Predicate:
168
168
  IsNull: self.build_is_null,
169
169
  Not: self.build_is_not_null,
170
170
  StringContains: self.build_match_substring,
171
+ StartsWith: self.build_starts_with,
171
172
  Between: self.build_between,
172
173
  }
173
174
 
@@ -207,6 +208,14 @@ class Predicate:
207
208
  elif builder_func == self.build_between:
208
209
  column, lower, upper = inner_op.args
209
210
  literals = (None,)
211
+ elif builder_func == self.build_starts_with:
212
+ column, prefix = inner_op.args
213
+ literals = (None,)
214
+ if prefix.value:
215
+ lower_bytes, upper_bytes = util.prefix_to_range(prefix.value)
216
+ else:
217
+ # `col.starts_with('')` is equivalent to `col IS NOT NULL`
218
+ builder_func = self.build_is_not_null
210
219
  else:
211
220
  column, arg = inner_op.args
212
221
  if isinstance(arg, tuple):
@@ -249,6 +258,9 @@ class Predicate:
249
258
  if builder_func == self.build_between:
250
259
  args_offsets.append(self.build_literal(field=node.field, value=lower.value))
251
260
  args_offsets.append(self.build_literal(field=node.field, value=upper.value))
261
+ if builder_func == self.build_starts_with:
262
+ args_offsets.append(self.build_literal(field=node.field, value=lower_bytes))
263
+ args_offsets.append(self.build_literal(field=node.field, value=upper_bytes))
252
264
 
253
265
  inner_offsets.append(builder_func(*args_offsets))
254
266
 
@@ -550,6 +562,13 @@ class Predicate:
550
562
  ]
551
563
  return self.build_and(offsets)
552
564
 
565
+ def build_starts_with(self, column: int, lower: int, upper: int):
566
+ offsets = [
567
+ self.build_greater_equal(column, lower),
568
+ self.build_less(column, upper),
569
+ ]
570
+ return self.build_and(offsets)
571
+
553
572
 
554
573
  class FieldNodesState:
555
574
  def __init__(self) -> None:
vastdb/schema.py CHANGED
@@ -103,9 +103,7 @@ class Schema:
103
103
  log.debug("Found table: %s", t[0])
104
104
  return t[0]
105
105
 
106
- def tables(self, table_name=None) -> List["Table"]:
107
- """List all tables under this schema."""
108
- tables = []
106
+ def _iter_tables(self, table_name=None):
109
107
  next_key = 0
110
108
  name_prefix = table_name if table_name else ""
111
109
  exact_match = bool(table_name)
@@ -116,11 +114,20 @@ class Schema:
116
114
  exact_match=exact_match, name_prefix=name_prefix, include_list_stats=exact_match)
117
115
  if not curr_tables:
118
116
  break
119
- tables.extend(curr_tables)
117
+ yield from curr_tables
120
118
  if not is_truncated:
121
119
  break
122
120
 
123
- return [_parse_table_info(table, self) for table in tables]
121
+ def tables(self, table_name=None) -> List["Table"]:
122
+ """List all tables under this schema."""
123
+ return [
124
+ _parse_table_info(table_info, self)
125
+ for table_info in self._iter_tables(table_name=table_name)
126
+ ]
127
+
128
+ def tablenames(self) -> List[str]:
129
+ """List all table names under this schema."""
130
+ return [table_info.name for table_info in self._iter_tables()]
124
131
 
125
132
  def drop(self) -> None:
126
133
  """Delete this schema."""
@@ -127,6 +127,21 @@ def test_exists(session, clean_bucket_name):
127
127
  assert s.tables() == [t]
128
128
 
129
129
 
130
+ def test_list_tables(session, clean_bucket_name):
131
+ with session.transaction() as tx:
132
+ s = tx.bucket(clean_bucket_name).create_schema('s1')
133
+ assert s.tables() == []
134
+ assert s.tablenames() == []
135
+
136
+ tables = [
137
+ s.create_table(f't{i}', pa.schema([(f'x{i}', pa.int64())]))
138
+ for i in range(10)
139
+ ]
140
+ assert tables == s.tables()
141
+ tablenames = [t.name for t in tables]
142
+ assert s.tablenames() == tablenames
143
+
144
+
130
145
  def test_update_table(session, clean_bucket_name):
131
146
  columns = pa.schema([
132
147
  ('a', pa.int64()),
@@ -822,3 +837,48 @@ def test_catalog_snapshots_select(session, clean_bucket_name):
822
837
  rows = t.select().read_all()
823
838
  if not rows:
824
839
  raise NotReady
840
+
841
+
842
+ def test_starts_with(session, clean_bucket_name):
843
+ columns = pa.schema([
844
+ ('s', pa.utf8()),
845
+ ('i', pa.int16()),
846
+ ])
847
+
848
+ expected = pa.table(schema=columns, data=[
849
+ ['a', 'ab', 'abc', None, 'abd', 'α', '', 'b'],
850
+ [0, 1, 2, 3, 4, 5, 6, 7],
851
+ ])
852
+
853
+ with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
854
+ def select(prefix):
855
+ res = table.select(predicate=table['s'].startswith(prefix)).read_all()
856
+ return res.to_pydict()
857
+
858
+ assert select('')['s'] == ['a', 'ab', 'abc', 'abd', 'α', '', 'b']
859
+ assert select('a')['s'] == ['a', 'ab', 'abc', 'abd']
860
+ assert select('b')['s'] == ['b']
861
+ assert select('ab')['s'] == ['ab', 'abc', 'abd']
862
+ assert select('abc')['s'] == ['abc']
863
+ assert select('α')['s'] == ['α']
864
+
865
+ res = table.select(predicate=(table['s'].startswith('ab') | (table['s'].isnull()))).read_all()
866
+ assert res.to_pydict()['s'] == ['ab', 'abc', None, 'abd']
867
+
868
+ res = table.select(predicate=(table['s'].startswith('ab') | (table['s'] == 'b'))).read_all()
869
+ assert res.to_pydict()['s'] == ['ab', 'abc', 'abd', 'b']
870
+
871
+ res = table.select(predicate=((table['s'] == 'b') | table['s'].startswith('ab'))).read_all()
872
+ assert res.to_pydict()['s'] == ['ab', 'abc', 'abd', 'b']
873
+
874
+ res = table.select(predicate=(table['s'].startswith('ab') & (table['s'] != 'abc'))).read_all()
875
+ assert res.to_pydict()['s'] == ['ab', 'abd']
876
+
877
+ res = table.select(predicate=((table['s'] != 'abc') & table['s'].startswith('ab'))).read_all()
878
+ assert res.to_pydict()['s'] == ['ab', 'abd']
879
+
880
+ res = table.select(predicate=((table['i'] > 3) & table['s'].startswith('ab'))).read_all()
881
+ assert res.to_pydict() == {'i': [4], 's': ['abd']}
882
+
883
+ res = table.select(predicate=(table['s'].startswith('ab')) & (table['i'] > 3)).read_all()
884
+ assert res.to_pydict() == {'i': [4], 's': ['abd']}
vastdb/tests/test_util.py CHANGED
@@ -43,3 +43,16 @@ def _parse(bufs):
43
43
  for buf in bufs:
44
44
  with pa.ipc.open_stream(buf) as reader:
45
45
  yield from reader
46
+
47
+
48
+ def test_prefix():
49
+ assert util.prefix_to_range('a') == (b'a', b'b')
50
+ assert util.prefix_to_range('abc') == (b'abc', b'abd')
51
+ assert util.prefix_to_range('abc\x00') == (b'abc\x00', b'abc\x01')
52
+ assert util.prefix_to_range('abc\x7f') == (b'abc\x7f', b'abc\x80')
53
+ assert util.prefix_to_range('/a/b/c') == (b'/a/b/c', b'/a/b/d')
54
+ assert util.prefix_to_range('/123α') == (b'/123\xce\xb1', b'/123\xce\xb2')
55
+ assert util.prefix_to_range('/123αA') == (b'/123\xce\xb1A', b'/123\xce\xb1B')
56
+ assert util.prefix_to_range('\U0010ffff') == (b'\xf4\x8f\xbf\xbf', b'\xf4\x8f\xbf\xc0') # max unicode codepoint
57
+ with pytest.raises(AssertionError):
58
+ util.prefix_to_range('')
vastdb/util.py CHANGED
@@ -157,3 +157,13 @@ def sort_record_batch_if_needed(record_batch, sort_column):
157
157
  return record_batch.sort_by(sort_column)
158
158
  else:
159
159
  return record_batch
160
+
161
+
162
+ def prefix_to_range(prefix: str):
163
+ """Compute (L, U) such that `s.starts_with(prefix)` is equivalent to `L <= s.encode() < H`."""
164
+ assert prefix, "Empty prefix is not convertible to range predicate"
165
+ lower = prefix.encode()
166
+ upper = bytearray(lower)
167
+ # https://en.wikipedia.org/wiki/UTF-8#Encoding guarantees that the last byte is not 0xFF
168
+ upper[-1] = upper[-1] + 1
169
+ return (lower, bytes(upper))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vastdb
3
- Version: 1.1.2
3
+ Version: 1.3.0
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
@@ -149,17 +149,17 @@ vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI
149
149
  vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
150
150
  vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
151
151
  vastdb/__init__.py,sha256=J1JjKiFkKC95BHowfh9kJfQFTjRce-QMsc6zF_FfxC0,432
152
- vastdb/_internal.py,sha256=KRFNlB25Ckj4nowVUneC5tBSYXpnIv-MDdw4Vn6KYnc,91255
152
+ vastdb/_internal.py,sha256=N_vCPwyPxyvRtN8Gvhjc1_tNmvUsfn-rdz_27-h-gaY,92312
153
153
  vastdb/bucket.py,sha256=5KuKhPjZOevznZqWHDVVocejvAy7dcwobPuV6BJCfPc,2544
154
154
  vastdb/config.py,sha256=1tMYtzKXerGcIUjH4tIGEvZNWvO4fviCEdcNCnELJZo,2269
155
155
  vastdb/conftest.py,sha256=X2kVveySPQYZlVBXUMoo7Oea5IsvmJzjdqq3fpH2kVw,3469
156
156
  vastdb/errors.py,sha256=2XR1ko7J5nkfiHSAgwuVAADw0SsyqxOwSeFaGgKZEXM,4186
157
157
  vastdb/features.py,sha256=DxV746LSkORwVSD6MP2hdXRfnyoLkJwtOwGmp1dnquo,1322
158
- vastdb/schema.py,sha256=X7IRrogXH7Z0kes-DsDh1bRqIhvjH6owlFigGBXy7XQ,5913
158
+ vastdb/schema.py,sha256=IaZDJsx0ms_dJVXeyCcSD8Dt3TNJkqR3739XOnDBM_E,6177
159
159
  vastdb/session.py,sha256=toMR0BXwTaECdWDKnIZky1F3MA1SmelRBiqCrqQ3GCM,2067
160
160
  vastdb/table.py,sha256=C6Zz0zolRRRbf5EQBvDRAofl3kGvfz4PjZwGeQongTI,31106
161
161
  vastdb/transaction.py,sha256=NlVkEowJ_pmtffjWBBDaKExYDKPekjSZyj_fK_bZPJE,3026
162
- vastdb/util.py,sha256=eunfTuqbCrqQEFZEO9T15N-Bu8Fqpw7Zlqp2TAGfYaY,5870
162
+ vastdb/util.py,sha256=8CUnVRsJukC3uNHNoB5D0qPf0FxS8OSdVB84nNoLJKc,6290
163
163
  vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
164
  vastdb/bench/test_perf.py,sha256=gZIqfHva6lNFpD-9bHAe7M8COBjUyrPkHu3E7F8J2L0,1072
165
165
  vastdb/bench/test_sample.py,sha256=LgF4syzij09sH3Noiv1EyCAJ9pvrUE5bxR4RJTVEYag,7881
@@ -196,14 +196,14 @@ vastdb/tests/test_nested.py,sha256=LPU6uV3Ri23dBzAEMFQqRPbqapV5LfmiHSHkhILPIY0,6
196
196
  vastdb/tests/test_projections.py,sha256=3y1kubwVrzO-xoR0hyps7zrjOJI8niCYspaFTN16Q9w,4540
197
197
  vastdb/tests/test_sanity.py,sha256=oiV2gb05aPyG5RMNUQZlyjNlg3T7Fig1_8OJzpAgcsk,3038
198
198
  vastdb/tests/test_schemas.py,sha256=l70YQMlx2UL1KRQhApriiG2ZM7GJF-IzWU31H3Yqn1U,3312
199
- vastdb/tests/test_tables.py,sha256=URfd0_rh5sleCFGbNsJBdIGdXOvIiiLGyhKTNtswU98,32578
200
- vastdb/tests/test_util.py,sha256=Ok_sAEBJsRGF5Voa_v5eu3eAd52GWu8jMjjQbadwW-s,1260
199
+ vastdb/tests/test_tables.py,sha256=17-t9VkEJRIW43Yf-lwEI7jHn8teOJvv-eZgANcvTkM,35023
200
+ vastdb/tests/test_util.py,sha256=n7gvT5Wg6b6bxgqkFXkYqvFd_W1GlUdVfmPv66XYXyA,1956
201
201
  vastdb/tests/util.py,sha256=dpRJYbboDnlqL4qIdvScpp8--5fxRUBIcIYitrfcj9o,555
202
202
  vastdb/vast_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
203
203
  vastdb/vast_tests/test_ha.py,sha256=744P4G6VJ09RIkHhMQL4wlipCBJWQVMhyvUrSc4k1HQ,975
204
204
  vastdb/vast_tests/test_scale.py,sha256=EpjCJmVAQrNBxVnHGJ-KHCoxevhqOcyqYFPMIIY9s60,2714
205
- vastdb-1.1.2.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
206
- vastdb-1.1.2.dist-info/METADATA,sha256=n2QV0OrTzVR9pQKUmVhsH3c5dIS29Vl5C1h0AyDEJDg,1340
207
- vastdb-1.1.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
208
- vastdb-1.1.2.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
209
- vastdb-1.1.2.dist-info/RECORD,,
205
+ vastdb-1.3.0.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
206
+ vastdb-1.3.0.dist-info/METADATA,sha256=Idrpl3by0yNRJPAYlVgcqZEiLPSsA3vvte2KneNonEA,1340
207
+ vastdb-1.3.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
208
+ vastdb-1.3.0.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
209
+ vastdb-1.3.0.dist-info/RECORD,,
File without changes