vastdb 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/__init__.py +3 -0
- vastdb/{internal_commands.py → _internal.py} +267 -359
- vastdb/bench/test_perf.py +1 -2
- vastdb/bucket.py +14 -39
- vastdb/conftest.py +13 -4
- vastdb/errors.py +5 -1
- vastdb/schema.py +52 -3
- vastdb/session.py +42 -13
- vastdb/table.py +44 -23
- vastdb/tests/test_duckdb.py +2 -2
- vastdb/tests/test_imports.py +3 -3
- vastdb/tests/test_nested.py +4 -4
- vastdb/tests/test_projections.py +78 -0
- vastdb/tests/test_sanity.py +3 -2
- vastdb/tests/test_schemas.py +49 -0
- vastdb/tests/test_tables.py +107 -29
- vastdb/tests/test_util.py +6 -0
- vastdb/transaction.py +23 -14
- vastdb/util.py +41 -1
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/METADATA +2 -2
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/RECORD +24 -24
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/LICENSE +0 -0
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/WHEEL +0 -0
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/top_level.txt +0 -0
vastdb/tests/test_sanity.py
CHANGED
|
@@ -25,8 +25,9 @@ def test_bad_credentials(session):
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def test_bad_endpoint(session):
|
|
28
|
+
backoff_config = vastdb.session.BackoffConfig(max_tries=3)
|
|
28
29
|
with pytest.raises(requests.exceptions.ConnectionError):
|
|
29
|
-
vastdb.connect(access='BAD', secret='BAD', endpoint='http://invalid-host-name-for-tests:12345')
|
|
30
|
+
vastdb.connect(access='BAD', secret='BAD', endpoint='http://invalid-host-name-for-tests:12345', backoff_config=backoff_config)
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
def test_version_extraction():
|
|
@@ -36,7 +37,7 @@ def test_version_extraction():
|
|
|
36
37
|
("5", None), # major
|
|
37
38
|
("5.2", None), # major.minor
|
|
38
39
|
("5.2.0", None), # major.minor.patch
|
|
39
|
-
("5.2.0.10",
|
|
40
|
+
("5.2.0.10", (5, 2, 0, 10)), # major.minor.patch.protocol
|
|
40
41
|
("5.2.0.10 some other things", None), # suffix
|
|
41
42
|
("5.2.0.10.20", None), # extra version
|
|
42
43
|
]
|
vastdb/tests/test_schemas.py
CHANGED
|
@@ -61,3 +61,52 @@ def test_list_snapshots(session, clean_bucket_name):
|
|
|
61
61
|
with session.transaction() as tx:
|
|
62
62
|
b = tx.bucket(clean_bucket_name)
|
|
63
63
|
b.snapshots() # VAST Catalog may create some snapshots
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_nested_schemas(session, clean_bucket_name):
|
|
67
|
+
with session.transaction() as tx:
|
|
68
|
+
b = tx.bucket(clean_bucket_name)
|
|
69
|
+
s1 = b.create_schema('s1')
|
|
70
|
+
s1_s2 = s1.create_schema('s2')
|
|
71
|
+
s1_s3 = s1.create_schema('s3')
|
|
72
|
+
s1_s3_s4 = s1_s3.create_schema('s4')
|
|
73
|
+
s5 = b.create_schema('s5')
|
|
74
|
+
|
|
75
|
+
assert b.schema('s1') == s1
|
|
76
|
+
assert s1.schema('s2') == s1_s2
|
|
77
|
+
assert s1.schema('s3') == s1_s3
|
|
78
|
+
assert s1_s3.schema('s4') == s1_s3_s4
|
|
79
|
+
assert b.schema('s5') == s5
|
|
80
|
+
|
|
81
|
+
assert b.schemas() == [s1, s5]
|
|
82
|
+
assert s1.schemas() == [s1_s2, s1_s3]
|
|
83
|
+
assert s1_s2.schemas() == []
|
|
84
|
+
assert s1_s3.schemas() == [s1_s3_s4]
|
|
85
|
+
assert s1_s3_s4.schemas() == []
|
|
86
|
+
assert s5.schemas() == []
|
|
87
|
+
|
|
88
|
+
s1_s3_s4.drop()
|
|
89
|
+
assert s1_s3.schemas() == []
|
|
90
|
+
s1_s3.drop()
|
|
91
|
+
assert s1.schemas() == [s1_s2]
|
|
92
|
+
s1_s2.drop()
|
|
93
|
+
assert s1.schemas() == []
|
|
94
|
+
|
|
95
|
+
assert b.schemas() == [s1, s5]
|
|
96
|
+
s1.drop()
|
|
97
|
+
assert b.schemas() == [s5]
|
|
98
|
+
s5.drop()
|
|
99
|
+
assert b.schemas() == []
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_schema_pagination(session, clean_bucket_name):
|
|
103
|
+
with session.transaction() as tx:
|
|
104
|
+
b = tx.bucket(clean_bucket_name)
|
|
105
|
+
names = [f's{i}' for i in range(10)]
|
|
106
|
+
schemas = [b.create_schema(name) for name in names]
|
|
107
|
+
assert b.schemas(batch_size=3) == schemas
|
|
108
|
+
|
|
109
|
+
s0 = b.schema('s0')
|
|
110
|
+
names = [f'q{i}' for i in range(10)]
|
|
111
|
+
subschemas = [s0.create_schema(name) for name in names]
|
|
112
|
+
assert s0.schemas(batch_size=3) == subschemas
|
vastdb/tests/test_tables.py
CHANGED
|
@@ -3,7 +3,6 @@ import decimal
|
|
|
3
3
|
import logging
|
|
4
4
|
import random
|
|
5
5
|
import threading
|
|
6
|
-
import time
|
|
7
6
|
from contextlib import closing
|
|
8
7
|
from tempfile import NamedTemporaryFile
|
|
9
8
|
|
|
@@ -33,25 +32,25 @@ def test_tables(session, clean_bucket_name):
|
|
|
33
32
|
['a', 'bb', 'ccc'],
|
|
34
33
|
])
|
|
35
34
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
36
|
-
actual =
|
|
35
|
+
actual = t.select(columns=['a', 'b', 's']).read_all()
|
|
37
36
|
assert actual == expected
|
|
38
37
|
|
|
39
|
-
actual =
|
|
38
|
+
actual = t.select().read_all()
|
|
40
39
|
assert actual == expected
|
|
41
40
|
|
|
42
|
-
actual =
|
|
41
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
43
42
|
assert actual == expected.select(['a', 'b'])
|
|
44
43
|
|
|
45
|
-
actual =
|
|
44
|
+
actual = t.select(columns=['b', 's', 'a']).read_all()
|
|
46
45
|
assert actual == expected.select(['b', 's', 'a'])
|
|
47
46
|
|
|
48
|
-
actual =
|
|
47
|
+
actual = t.select(columns=['s']).read_all()
|
|
49
48
|
assert actual == expected.select(['s'])
|
|
50
49
|
|
|
51
|
-
actual =
|
|
50
|
+
actual = t.select(columns=[]).read_all()
|
|
52
51
|
assert actual == expected.select([])
|
|
53
52
|
|
|
54
|
-
actual =
|
|
53
|
+
actual = t.select(columns=['s'], internal_row_id=True).read_all()
|
|
55
54
|
log.debug("actual=%s", actual)
|
|
56
55
|
assert actual.to_pydict() == {
|
|
57
56
|
's': ['a', 'bb', 'ccc'],
|
|
@@ -59,12 +58,12 @@ def test_tables(session, clean_bucket_name):
|
|
|
59
58
|
}
|
|
60
59
|
|
|
61
60
|
columns_to_delete = pa.schema([(INTERNAL_ROW_ID, pa.uint64())])
|
|
62
|
-
rb = pa.record_batch(schema=columns_to_delete, data=[[0]]) # delete
|
|
61
|
+
rb = pa.record_batch(schema=columns_to_delete, data=[[0]]) # delete row 0
|
|
63
62
|
t.delete(rb)
|
|
64
63
|
|
|
65
|
-
selected_rows =
|
|
64
|
+
selected_rows = t.select(columns=['b'], predicate=(t['a'] == 222), internal_row_id=True).read_all()
|
|
66
65
|
t.delete(selected_rows)
|
|
67
|
-
actual =
|
|
66
|
+
actual = t.select(columns=['a', 'b', 's']).read_all()
|
|
68
67
|
assert actual.to_pydict() == {
|
|
69
68
|
'a': [333],
|
|
70
69
|
'b': [2.5],
|
|
@@ -78,10 +77,23 @@ def test_insert_wide_row(session, clean_bucket_name):
|
|
|
78
77
|
expected = pa.table(schema=columns, data=data)
|
|
79
78
|
|
|
80
79
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
81
|
-
actual =
|
|
80
|
+
actual = t.select().read_all()
|
|
82
81
|
assert actual == expected
|
|
83
82
|
|
|
84
83
|
|
|
84
|
+
def test_insert_empty(session, clean_bucket_name):
|
|
85
|
+
columns = pa.schema([('a', pa.int8()), ('b', pa.float32())])
|
|
86
|
+
data = [[None] * 5, [None] * 5]
|
|
87
|
+
all_nulls = pa.table(schema=columns, data=data)
|
|
88
|
+
no_columns = all_nulls.select([])
|
|
89
|
+
|
|
90
|
+
with session.transaction() as tx:
|
|
91
|
+
t = tx.bucket(clean_bucket_name).create_schema('s').create_table('t', columns)
|
|
92
|
+
t.insert(all_nulls)
|
|
93
|
+
with pytest.raises(errors.NotImplemented):
|
|
94
|
+
t.insert(no_columns)
|
|
95
|
+
|
|
96
|
+
|
|
85
97
|
def test_exists(session, clean_bucket_name):
|
|
86
98
|
with session.transaction() as tx:
|
|
87
99
|
s = tx.bucket(clean_bucket_name).create_schema('s1')
|
|
@@ -125,38 +137,59 @@ def test_update_table(session, clean_bucket_name):
|
|
|
125
137
|
])
|
|
126
138
|
|
|
127
139
|
t.update(rb)
|
|
128
|
-
actual =
|
|
140
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
129
141
|
assert actual.to_pydict() == {
|
|
130
142
|
'a': [1110, 222, 3330],
|
|
131
143
|
'b': [0.5, 1.5, 2.5]
|
|
132
144
|
}
|
|
133
145
|
|
|
134
|
-
actual =
|
|
146
|
+
actual = t.select(columns=['a', 'b'], predicate=(t['a'] < 1000), internal_row_id=True).read_all()
|
|
135
147
|
column_index = actual.column_names.index('a')
|
|
136
148
|
column_field = actual.field(column_index)
|
|
137
149
|
new_data = pc.add(actual.column('a'), 2000)
|
|
138
150
|
update_table = actual.set_column(column_index, column_field, new_data)
|
|
139
151
|
|
|
140
152
|
t.update(update_table, columns=['a'])
|
|
141
|
-
actual =
|
|
153
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
142
154
|
assert actual.to_pydict() == {
|
|
143
155
|
'a': [1110, 2222, 3330],
|
|
144
156
|
'b': [0.5, 1.5, 2.5]
|
|
145
157
|
}
|
|
146
158
|
|
|
147
|
-
actual =
|
|
159
|
+
actual = t.select(columns=['a', 'b'], predicate=(t['a'] != 2222), internal_row_id=True).read_all()
|
|
148
160
|
column_index = actual.column_names.index('a')
|
|
149
161
|
column_field = actual.field(column_index)
|
|
150
162
|
new_data = pc.divide(actual.column('a'), 10)
|
|
151
163
|
update_table = actual.set_column(column_index, column_field, new_data)
|
|
152
164
|
|
|
153
165
|
t.update(update_table.to_batches()[0], columns=['a'])
|
|
154
|
-
actual =
|
|
166
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
155
167
|
assert actual.to_pydict() == {
|
|
156
168
|
'a': [111, 2222, 333],
|
|
157
169
|
'b': [0.5, 1.5, 2.5]
|
|
158
170
|
}
|
|
159
171
|
|
|
172
|
+
# test update for not sorted rows:
|
|
173
|
+
rb = pa.record_batch(schema=columns_to_update, data=[
|
|
174
|
+
[2, 0], # update rows 0,2
|
|
175
|
+
[231, 235]
|
|
176
|
+
])
|
|
177
|
+
t.update(rb)
|
|
178
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
179
|
+
assert actual.to_pydict() == {
|
|
180
|
+
'a': [235, 2222, 231],
|
|
181
|
+
'b': [0.5, 1.5, 2.5]
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# test delete for not sorted rows:
|
|
185
|
+
rb = pa.record_batch(schema=pa.schema([(INTERNAL_ROW_ID, pa.uint64())]), data=[[2, 0]])
|
|
186
|
+
t.delete(rb)
|
|
187
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
188
|
+
assert actual.to_pydict() == {
|
|
189
|
+
'a': [2222],
|
|
190
|
+
'b': [1.5]
|
|
191
|
+
}
|
|
192
|
+
|
|
160
193
|
|
|
161
194
|
def test_select_with_multisplits(session, clean_bucket_name):
|
|
162
195
|
columns = pa.schema([
|
|
@@ -171,10 +204,29 @@ def test_select_with_multisplits(session, clean_bucket_name):
|
|
|
171
204
|
config.rows_per_split = 1000
|
|
172
205
|
|
|
173
206
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
174
|
-
actual =
|
|
207
|
+
actual = t.select(columns=['a'], config=config).read_all()
|
|
175
208
|
assert actual == expected
|
|
176
209
|
|
|
177
210
|
|
|
211
|
+
def test_select_with_priority(session, clean_bucket_name):
|
|
212
|
+
columns = pa.schema([
|
|
213
|
+
('a', pa.int32())
|
|
214
|
+
])
|
|
215
|
+
expected = pa.table(schema=columns, data=[range(100)])
|
|
216
|
+
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
217
|
+
config = QueryConfig()
|
|
218
|
+
|
|
219
|
+
config.queue_priority = 0
|
|
220
|
+
assert t.select(config=config).read_all() == expected
|
|
221
|
+
|
|
222
|
+
config.queue_priority = 12345
|
|
223
|
+
assert t.select(config=config).read_all() == expected
|
|
224
|
+
|
|
225
|
+
config.queue_priority = -1
|
|
226
|
+
with pytest.raises(errors.BadRequest):
|
|
227
|
+
t.select(config=config).read_all()
|
|
228
|
+
|
|
229
|
+
|
|
178
230
|
def test_types(session, clean_bucket_name):
|
|
179
231
|
columns = pa.schema([
|
|
180
232
|
('tb', pa.bool_()),
|
|
@@ -218,7 +270,7 @@ def test_types(session, clean_bucket_name):
|
|
|
218
270
|
|
|
219
271
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
|
|
220
272
|
def select(predicate):
|
|
221
|
-
return
|
|
273
|
+
return table.select(predicate=predicate).read_all()
|
|
222
274
|
|
|
223
275
|
assert select(None) == expected
|
|
224
276
|
for t in [table, ibis._]:
|
|
@@ -274,13 +326,20 @@ def test_filters(session, clean_bucket_name):
|
|
|
274
326
|
|
|
275
327
|
with prepare_data(session, clean_bucket_name, 's', 't', expected) as table:
|
|
276
328
|
def select(predicate):
|
|
277
|
-
return
|
|
329
|
+
return table.select(predicate=predicate).read_all()
|
|
278
330
|
|
|
279
331
|
assert select(None) == expected
|
|
280
332
|
assert select(True) == expected
|
|
281
333
|
assert select(False) == pa.Table.from_batches([], schema=columns)
|
|
282
334
|
|
|
283
335
|
for t in [table, ibis._]:
|
|
336
|
+
|
|
337
|
+
select(t['a'].isin(list(range(100))))
|
|
338
|
+
select(t['a'].isin(list(range(1000))))
|
|
339
|
+
select(t['a'].isin(list(range(10000))))
|
|
340
|
+
with pytest.raises(errors.TooLargeRequest):
|
|
341
|
+
select(t['a'].isin(list(range(100000))))
|
|
342
|
+
|
|
284
343
|
assert select(t['a'].between(222, 444)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444))
|
|
285
344
|
assert select((t['a'].between(222, 444)) & (t['b'] > 2.5)) == expected.filter((pc.field('a') >= 222) & (pc.field('a') <= 444) & (pc.field('b') > 2.5))
|
|
286
345
|
|
|
@@ -351,7 +410,7 @@ def test_parquet_export(session, clean_bucket_name):
|
|
|
351
410
|
expected = pa.Table.from_batches([rb])
|
|
352
411
|
rb = t.insert(rb)
|
|
353
412
|
assert rb.to_pylist() == [0, 1]
|
|
354
|
-
actual =
|
|
413
|
+
actual = t.select().read_all()
|
|
355
414
|
assert actual == expected
|
|
356
415
|
|
|
357
416
|
table_batches = t.select()
|
|
@@ -667,18 +726,37 @@ def test_select_stop(session, clean_bucket_name):
|
|
|
667
726
|
assert active_threads() == 0
|
|
668
727
|
|
|
669
728
|
|
|
670
|
-
def
|
|
729
|
+
def test_catalog_select(session, clean_bucket_name):
|
|
671
730
|
with session.transaction() as tx:
|
|
672
731
|
bc = tx.catalog()
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
732
|
+
assert bc.columns()
|
|
733
|
+
rows = bc.select(['name']).read_all()
|
|
734
|
+
assert len(rows) > 0, rows
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
class NotReady(Exception):
|
|
738
|
+
pass
|
|
676
739
|
|
|
677
740
|
|
|
741
|
+
@pytest.mark.flaky(retries=30, delay=1, only_on=[NotReady])
|
|
678
742
|
def test_audit_log_select(session, clean_bucket_name):
|
|
679
743
|
with session.transaction() as tx:
|
|
680
744
|
a = tx.audit_log()
|
|
681
|
-
a.columns()
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
745
|
+
assert a.columns()
|
|
746
|
+
rows = a.select().read_all()
|
|
747
|
+
if len(rows) == 0:
|
|
748
|
+
raise NotReady
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
@pytest.mark.flaky(retries=30, delay=1, only_on=[NotReady])
|
|
752
|
+
def test_catalog_snapshots_select(session, clean_bucket_name):
|
|
753
|
+
with session.transaction() as tx:
|
|
754
|
+
snaps = tx.catalog_snapshots()
|
|
755
|
+
if not snaps:
|
|
756
|
+
raise NotReady
|
|
757
|
+
latest = snaps[-1]
|
|
758
|
+
t = tx.catalog(latest)
|
|
759
|
+
assert t.columns()
|
|
760
|
+
rows = t.select().read_all()
|
|
761
|
+
if not rows:
|
|
762
|
+
raise NotReady
|
vastdb/tests/test_util.py
CHANGED
|
@@ -33,6 +33,12 @@ def test_wide_row():
|
|
|
33
33
|
list(util.iter_serialized_slices(t))
|
|
34
34
|
|
|
35
35
|
|
|
36
|
+
def test_expand_ip_ranges():
|
|
37
|
+
endpoints = ["http://172.19.101.1-3"]
|
|
38
|
+
expected = ["http://172.19.101.1", "http://172.19.101.2", "http://172.19.101.3"]
|
|
39
|
+
assert util.expand_ip_ranges(endpoints) == expected
|
|
40
|
+
|
|
41
|
+
|
|
36
42
|
def _parse(bufs):
|
|
37
43
|
for buf in bufs:
|
|
38
44
|
with pa.ipc.open_stream(buf) as reader:
|
vastdb/transaction.py
CHANGED
|
@@ -8,21 +8,26 @@ A transcation is used as a context manager, since every Database-related operati
|
|
|
8
8
|
|
|
9
9
|
import logging
|
|
10
10
|
from dataclasses import dataclass
|
|
11
|
-
from typing import Optional
|
|
11
|
+
from typing import TYPE_CHECKING, Iterable, Optional
|
|
12
12
|
|
|
13
13
|
import botocore
|
|
14
14
|
|
|
15
|
-
from . import bucket, errors, schema, session
|
|
15
|
+
from . import bucket, errors, schema, session
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from bucket import Bucket
|
|
19
|
+
from table import Table
|
|
20
|
+
|
|
16
21
|
|
|
17
22
|
log = logging.getLogger(__name__)
|
|
18
23
|
|
|
19
|
-
|
|
24
|
+
VAST_CATALOG_BUCKET_NAME = "vast-big-catalog-bucket"
|
|
20
25
|
VAST_CATALOG_SCHEMA_NAME = 'vast_big_catalog_schema'
|
|
21
26
|
VAST_CATALOG_TABLE_NAME = 'vast_big_catalog_table'
|
|
22
27
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
28
|
+
AUDIT_LOG_BUCKET_NAME = "vast-audit-log-bucket"
|
|
29
|
+
AUDIT_LOG_SCHEMA_NAME = 'vast_audit_log_schema'
|
|
30
|
+
AUDIT_LOG_TABLE_NAME = 'vast_audit_log_table'
|
|
26
31
|
|
|
27
32
|
|
|
28
33
|
@dataclass
|
|
@@ -56,7 +61,7 @@ class Transaction:
|
|
|
56
61
|
return 'InvalidTransaction'
|
|
57
62
|
return f'Transaction(id=0x{self.txid:016x})'
|
|
58
63
|
|
|
59
|
-
def bucket(self, name: str) -> "
|
|
64
|
+
def bucket(self, name: str) -> "Bucket":
|
|
60
65
|
"""Return a VAST Bucket, if exists."""
|
|
61
66
|
try:
|
|
62
67
|
self._rpc.s3.head_bucket(Bucket=name)
|
|
@@ -67,14 +72,18 @@ class Transaction:
|
|
|
67
72
|
raise
|
|
68
73
|
return bucket.Bucket(name, self)
|
|
69
74
|
|
|
70
|
-
def
|
|
75
|
+
def catalog_snapshots(self) -> Iterable["Bucket"]:
|
|
76
|
+
"""Return VAST Catalog bucket snapshots."""
|
|
77
|
+
return bucket.Bucket(VAST_CATALOG_BUCKET_NAME, self).snapshots()
|
|
78
|
+
|
|
79
|
+
def catalog(self, snapshot: Optional["Bucket"] = None, fail_if_missing=True) -> Optional["Table"]:
|
|
71
80
|
"""Return VAST Catalog table."""
|
|
72
|
-
b = bucket.Bucket(
|
|
81
|
+
b = snapshot or bucket.Bucket(VAST_CATALOG_BUCKET_NAME, self)
|
|
73
82
|
s = schema.Schema(VAST_CATALOG_SCHEMA_NAME, b)
|
|
74
83
|
return s.table(name=VAST_CATALOG_TABLE_NAME, fail_if_missing=fail_if_missing)
|
|
75
84
|
|
|
76
|
-
def audit_log(self, fail_if_missing=True) -> Optional["
|
|
77
|
-
"""Return VAST
|
|
78
|
-
b = bucket.Bucket(
|
|
79
|
-
s = schema.Schema(
|
|
80
|
-
return s.table(name=
|
|
85
|
+
def audit_log(self, fail_if_missing=True) -> Optional["Table"]:
|
|
86
|
+
"""Return VAST Audit Log table."""
|
|
87
|
+
b = bucket.Bucket(AUDIT_LOG_BUCKET_NAME, self)
|
|
88
|
+
s = schema.Schema(AUDIT_LOG_SCHEMA_NAME, b)
|
|
89
|
+
return s.table(name=AUDIT_LOG_TABLE_NAME, fail_if_missing=fail_if_missing)
|
vastdb/util.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import re
|
|
2
3
|
from typing import TYPE_CHECKING, Callable, List, Optional, Union
|
|
3
4
|
|
|
4
5
|
import pyarrow as pa
|
|
6
|
+
import pyarrow.compute as pc
|
|
5
7
|
import pyarrow.parquet as pq
|
|
6
8
|
|
|
7
9
|
from .errors import InvalidArgument, TooWideRow
|
|
@@ -83,12 +85,16 @@ def union_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.S
|
|
|
83
85
|
|
|
84
86
|
MAX_TABULAR_REQUEST_SIZE = 5 << 20 # in bytes
|
|
85
87
|
MAX_RECORD_BATCH_SLICE_SIZE = int(0.9 * MAX_TABULAR_REQUEST_SIZE)
|
|
88
|
+
MAX_QUERY_DATA_REQUEST_SIZE = int(0.9 * MAX_TABULAR_REQUEST_SIZE)
|
|
86
89
|
|
|
87
90
|
|
|
88
91
|
def iter_serialized_slices(batch: Union[pa.RecordBatch, pa.Table], max_rows_per_slice=None):
|
|
89
92
|
"""Iterate over a list of record batch slices."""
|
|
93
|
+
if batch.nbytes:
|
|
94
|
+
rows_per_slice = int(0.9 * len(batch) * MAX_RECORD_BATCH_SLICE_SIZE / batch.nbytes)
|
|
95
|
+
else:
|
|
96
|
+
rows_per_slice = len(batch) # if the batch has no buffers (no rows/columns)
|
|
90
97
|
|
|
91
|
-
rows_per_slice = int(0.9 * len(batch) * MAX_RECORD_BATCH_SLICE_SIZE / batch.nbytes)
|
|
92
98
|
if max_rows_per_slice is not None:
|
|
93
99
|
rows_per_slice = min(rows_per_slice, max_rows_per_slice)
|
|
94
100
|
|
|
@@ -112,3 +118,37 @@ def serialize_record_batch(batch: Union[pa.RecordBatch, pa.Table]):
|
|
|
112
118
|
with pa.ipc.new_stream(sink, batch.schema) as writer:
|
|
113
119
|
writer.write(batch)
|
|
114
120
|
return sink.getvalue()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def expand_ip_ranges(endpoints):
|
|
124
|
+
"""Expands endpoint strings that include an IP range in the format 'http://172.19.101.1-16'."""
|
|
125
|
+
expanded_endpoints = []
|
|
126
|
+
pattern = re.compile(r"(http://\d+\.\d+\.\d+)\.(\d+)-(\d+)")
|
|
127
|
+
|
|
128
|
+
for endpoint in endpoints:
|
|
129
|
+
match = pattern.match(endpoint)
|
|
130
|
+
if match:
|
|
131
|
+
base_url = match.group(1)
|
|
132
|
+
start_ip = int(match.group(2))
|
|
133
|
+
end_ip = int(match.group(3))
|
|
134
|
+
if start_ip > end_ip:
|
|
135
|
+
raise ValueError("Start IP cannot be greater than end IP in the range.")
|
|
136
|
+
expanded_endpoints.extend(f"{base_url}.{ip}" for ip in range(start_ip, end_ip + 1))
|
|
137
|
+
else:
|
|
138
|
+
expanded_endpoints.append(endpoint)
|
|
139
|
+
return expanded_endpoints
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def is_sorted(arr):
|
|
143
|
+
"""Check if the array is sorted."""
|
|
144
|
+
return pc.all(pc.greater(arr[1:], arr[:-1])).as_py()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def sort_record_batch_if_needed(record_batch, sort_column):
|
|
148
|
+
"""Sort the RecordBatch by the specified column if it is not already sorted."""
|
|
149
|
+
column_data = record_batch[sort_column]
|
|
150
|
+
|
|
151
|
+
if not is_sorted(column_data):
|
|
152
|
+
return record_batch.sort_by(sort_column)
|
|
153
|
+
else:
|
|
154
|
+
return record_batch
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vastdb
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: VAST Data SDK
|
|
5
5
|
Home-page: https://github.com/vast-data/vastdb_sdk
|
|
6
6
|
Author: VAST DATA
|
|
@@ -21,7 +21,7 @@ License-File: LICENSE
|
|
|
21
21
|
Requires-Dist: aws-requests-auth
|
|
22
22
|
Requires-Dist: boto3
|
|
23
23
|
Requires-Dist: flatbuffers
|
|
24
|
-
Requires-Dist: ibis-framework ==
|
|
24
|
+
Requires-Dist: ibis-framework ==9.0.0
|
|
25
25
|
Requires-Dist: pyarrow
|
|
26
26
|
Requires-Dist: requests
|
|
27
27
|
Requires-Dist: xmltodict
|
|
@@ -148,30 +148,30 @@ vast_flatbuf/tabular/ObjectDetails.py,sha256=qW0WtbkCYYE_L-Kw6VNRDCLYaRm5lKvTbLN
|
|
|
148
148
|
vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI0,4450
|
|
149
149
|
vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
|
|
150
150
|
vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
151
|
-
vastdb/__init__.py,sha256=
|
|
152
|
-
vastdb/
|
|
153
|
-
vastdb/
|
|
154
|
-
vastdb/
|
|
155
|
-
vastdb/
|
|
156
|
-
vastdb/schema.py,sha256=
|
|
157
|
-
vastdb/session.py,sha256=
|
|
158
|
-
vastdb/table.py,sha256=
|
|
159
|
-
vastdb/transaction.py,sha256=
|
|
160
|
-
vastdb/util.py,sha256=
|
|
151
|
+
vastdb/__init__.py,sha256=8PLcZowy_vM0zuiYSQPXuxIEMcwHD7IRFpgcPK-03bk,386
|
|
152
|
+
vastdb/_internal.py,sha256=FB0pHOc79tAgMLzZcLSeeHIzZogt81S8FHhzdZiKEuI,89095
|
|
153
|
+
vastdb/bucket.py,sha256=5KuKhPjZOevznZqWHDVVocejvAy7dcwobPuV6BJCfPc,2544
|
|
154
|
+
vastdb/conftest.py,sha256=D4RvOhGvMQy-JliKY-uyzcB-_mFBwI6aMF__xwHiwOM,2359
|
|
155
|
+
vastdb/errors.py,sha256=nC7d05xwe0WxMFyM3cEEqIvA09OXNqcxiUGsKov822I,4098
|
|
156
|
+
vastdb/schema.py,sha256=X7IRrogXH7Z0kes-DsDh1bRqIhvjH6owlFigGBXy7XQ,5913
|
|
157
|
+
vastdb/session.py,sha256=3YHhG7IamFOKuy-Fkq_IVtPNriSfI6IN_4z4arBFbDU,3349
|
|
158
|
+
vastdb/table.py,sha256=a0ZARfBdptxlm-zwiqdZ1ALud54-IkfX_ZI_ZD5hcsw,31047
|
|
159
|
+
vastdb/transaction.py,sha256=qu2rOlR7AS1ojMOzgWapQMpcorrutelZZLH1mLmTHxk,3186
|
|
160
|
+
vastdb/util.py,sha256=4LTYBBR13na376AmDm5lQILJzLcfelIKdkNPy0IqI0o,5684
|
|
161
161
|
vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
|
-
vastdb/bench/test_perf.py,sha256=
|
|
162
|
+
vastdb/bench/test_perf.py,sha256=yn5gE7t_nzmJHBl9bCs1hxQOgzhvFphuYElsWGko8ts,1084
|
|
163
163
|
vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
|
-
vastdb/tests/test_duckdb.py,sha256=
|
|
165
|
-
vastdb/tests/test_imports.py,sha256=
|
|
166
|
-
vastdb/tests/test_nested.py,sha256=
|
|
167
|
-
vastdb/tests/test_projections.py,sha256=
|
|
168
|
-
vastdb/tests/test_sanity.py,sha256=
|
|
169
|
-
vastdb/tests/test_schemas.py,sha256=
|
|
170
|
-
vastdb/tests/test_tables.py,sha256=
|
|
171
|
-
vastdb/tests/test_util.py,sha256=
|
|
164
|
+
vastdb/tests/test_duckdb.py,sha256=STw_1PwTQR8Naz6s0p6lQTV1ZTKKhe3LPBUbhqzTCu0,1880
|
|
165
|
+
vastdb/tests/test_imports.py,sha256=xKub3-bisFjH0BsZM8COfiUWuMrtoOoQKprF6VQT9RI,5669
|
|
166
|
+
vastdb/tests/test_nested.py,sha256=22NAxBTm7Aq-Vn6AIYbi5Cb1ET8W0XeLK3pp4D8BYWI,3448
|
|
167
|
+
vastdb/tests/test_projections.py,sha256=11a-55VbJcqaFPkOKaKDEdM5nkeI0xtUhh6cQc1upSA,4223
|
|
168
|
+
vastdb/tests/test_sanity.py,sha256=xD-XBmmuFxALj5r8eirtPG9fghxm8h4srIN9X6LEOX4,3054
|
|
169
|
+
vastdb/tests/test_schemas.py,sha256=l70YQMlx2UL1KRQhApriiG2ZM7GJF-IzWU31H3Yqn1U,3312
|
|
170
|
+
vastdb/tests/test_tables.py,sha256=YhkeeTHq8aW1RgU86GolJl1dG3KGTlVG97Bny9RzyrM,30124
|
|
171
|
+
vastdb/tests/test_util.py,sha256=Ok_sAEBJsRGF5Voa_v5eu3eAd52GWu8jMjjQbadwW-s,1260
|
|
172
172
|
vastdb/tests/util.py,sha256=dpRJYbboDnlqL4qIdvScpp8--5fxRUBIcIYitrfcj9o,555
|
|
173
|
-
vastdb-0.1.
|
|
174
|
-
vastdb-0.1.
|
|
175
|
-
vastdb-0.1.
|
|
176
|
-
vastdb-0.1.
|
|
177
|
-
vastdb-0.1.
|
|
173
|
+
vastdb-0.1.7.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
|
|
174
|
+
vastdb-0.1.7.dist-info/METADATA,sha256=gwlUIInf2mlaAT2GsJ0bztYteRXpE4kqukDPKrNNJfk,1350
|
|
175
|
+
vastdb-0.1.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
176
|
+
vastdb-0.1.7.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
|
|
177
|
+
vastdb-0.1.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|