vastdb 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/__init__.py +3 -0
- vastdb/{internal_commands.py → _internal.py} +266 -339
- vastdb/bucket.py +2 -2
- vastdb/schema.py +8 -4
- vastdb/session.py +18 -5
- vastdb/table.py +26 -17
- vastdb/tests/test_duckdb.py +2 -2
- vastdb/tests/test_sanity.py +3 -2
- vastdb/tests/test_tables.py +54 -1
- vastdb/tests/test_util.py +6 -0
- vastdb/transaction.py +2 -2
- vastdb/util.py +40 -1
- {vastdb-0.1.6.dist-info → vastdb-0.1.7.dist-info}/METADATA +2 -2
- {vastdb-0.1.6.dist-info → vastdb-0.1.7.dist-info}/RECORD +17 -17
- {vastdb-0.1.6.dist-info → vastdb-0.1.7.dist-info}/LICENSE +0 -0
- {vastdb-0.1.6.dist-info → vastdb-0.1.7.dist-info}/WHEEL +0 -0
- {vastdb-0.1.6.dist-info → vastdb-0.1.7.dist-info}/top_level.txt +0 -0
vastdb/bucket.py
CHANGED
|
@@ -6,7 +6,7 @@ It is possible to list and access VAST snapshots generated over a bucket.
|
|
|
6
6
|
|
|
7
7
|
import logging
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
|
-
from typing import TYPE_CHECKING,
|
|
9
|
+
from typing import TYPE_CHECKING, Iterable, Optional
|
|
10
10
|
|
|
11
11
|
from . import errors, schema, transaction
|
|
12
12
|
|
|
@@ -55,7 +55,7 @@ class Bucket:
|
|
|
55
55
|
|
|
56
56
|
return Bucket(name=f'{self.name}/{expected_name}', tx=self.tx)
|
|
57
57
|
|
|
58
|
-
def snapshots(self) ->
|
|
58
|
+
def snapshots(self) -> Iterable["Bucket"]:
|
|
59
59
|
"""List bucket's snapshots."""
|
|
60
60
|
snapshots = []
|
|
61
61
|
next_key = 0
|
vastdb/schema.py
CHANGED
|
@@ -6,7 +6,7 @@ It is possible to list and access VAST snapshots generated over a bucket.
|
|
|
6
6
|
|
|
7
7
|
import logging
|
|
8
8
|
from dataclasses import dataclass
|
|
9
|
-
from typing import TYPE_CHECKING, List, Optional
|
|
9
|
+
from typing import TYPE_CHECKING, Iterable, List, Optional
|
|
10
10
|
|
|
11
11
|
import pyarrow as pa
|
|
12
12
|
|
|
@@ -62,7 +62,7 @@ class Schema:
|
|
|
62
62
|
assert len(names) == 1, f"Expected to receive only a single schema, but got {len(schemas)}: ({schemas})"
|
|
63
63
|
return schema.Schema(name=self._subschema_full_name(names[0]), bucket=self.bucket)
|
|
64
64
|
|
|
65
|
-
def schemas(self, batch_size=None) ->
|
|
65
|
+
def schemas(self, batch_size=None) -> Iterable["Schema"]:
|
|
66
66
|
"""List child schemas."""
|
|
67
67
|
next_key = 0
|
|
68
68
|
if not batch_size:
|
|
@@ -76,14 +76,18 @@ class Schema:
|
|
|
76
76
|
break
|
|
77
77
|
return result
|
|
78
78
|
|
|
79
|
-
def create_table(self, table_name: str, columns: pa.Schema, fail_if_exists=True) -> "Table":
|
|
79
|
+
def create_table(self, table_name: str, columns: pa.Schema, fail_if_exists=True, use_external_row_ids_allocation=False) -> "Table":
|
|
80
80
|
"""Create a new table under this schema."""
|
|
81
81
|
if current := self.table(table_name, fail_if_missing=False):
|
|
82
82
|
if fail_if_exists:
|
|
83
83
|
raise errors.TableExists(self.bucket.name, self.name, table_name)
|
|
84
84
|
else:
|
|
85
85
|
return current
|
|
86
|
-
|
|
86
|
+
if use_external_row_ids_allocation:
|
|
87
|
+
self.tx._rpc.features.check_external_row_ids_allocation()
|
|
88
|
+
|
|
89
|
+
self.tx._rpc.api.create_table(self.bucket.name, self.name, table_name, columns, txid=self.tx.txid,
|
|
90
|
+
use_external_row_ids_allocation=use_external_row_ids_allocation)
|
|
87
91
|
log.info("Created table: %s", table_name)
|
|
88
92
|
return self.table(table_name) # type: ignore[return-value]
|
|
89
93
|
|
vastdb/session.py
CHANGED
|
@@ -9,10 +9,12 @@ For more details see:
|
|
|
9
9
|
|
|
10
10
|
import logging
|
|
11
11
|
import os
|
|
12
|
+
from typing import Optional
|
|
12
13
|
|
|
13
14
|
import boto3
|
|
14
15
|
|
|
15
|
-
from . import
|
|
16
|
+
from . import _internal, errors, transaction
|
|
17
|
+
from ._internal import BackoffConfig
|
|
16
18
|
|
|
17
19
|
log = logging.getLogger()
|
|
18
20
|
|
|
@@ -36,6 +38,10 @@ class Features:
|
|
|
36
38
|
"Semi-sorted projection enforcement requires 5.1+ VAST release",
|
|
37
39
|
vast_version >= (5, 1))
|
|
38
40
|
|
|
41
|
+
self.check_external_row_ids_allocation = self._check(
|
|
42
|
+
"External row IDs allocation requires 5.1+ VAST release",
|
|
43
|
+
vast_version >= (5, 1))
|
|
44
|
+
|
|
39
45
|
def _check(self, msg, supported):
|
|
40
46
|
log.debug("%s (current version is %s): supported=%s", msg, self.vast_version, supported)
|
|
41
47
|
if not supported:
|
|
@@ -51,7 +57,10 @@ class Features:
|
|
|
51
57
|
class Session:
|
|
52
58
|
"""VAST database session."""
|
|
53
59
|
|
|
54
|
-
def __init__(self, access=None, secret=None, endpoint=None,
|
|
60
|
+
def __init__(self, access=None, secret=None, endpoint=None,
|
|
61
|
+
*,
|
|
62
|
+
ssl_verify=True,
|
|
63
|
+
backoff_config: Optional[BackoffConfig] = None):
|
|
55
64
|
"""Connect to a VAST Database endpoint, using specified credentials."""
|
|
56
65
|
if access is None:
|
|
57
66
|
access = os.environ['AWS_ACCESS_KEY_ID']
|
|
@@ -60,9 +69,13 @@ class Session:
|
|
|
60
69
|
if endpoint is None:
|
|
61
70
|
endpoint = os.environ['AWS_S3_ENDPOINT_URL']
|
|
62
71
|
|
|
63
|
-
self.api =
|
|
64
|
-
|
|
65
|
-
|
|
72
|
+
self.api = _internal.VastdbApi(
|
|
73
|
+
endpoint=endpoint,
|
|
74
|
+
access_key=access,
|
|
75
|
+
secret_key=secret,
|
|
76
|
+
ssl_verify=ssl_verify,
|
|
77
|
+
backoff_config=backoff_config)
|
|
78
|
+
self.features = Features(self.api.vast_version)
|
|
66
79
|
self.s3 = boto3.client('s3',
|
|
67
80
|
aws_access_key_id=access,
|
|
68
81
|
aws_secret_access_key=secret,
|
vastdb/table.py
CHANGED
|
@@ -7,14 +7,14 @@ import queue
|
|
|
7
7
|
from dataclasses import dataclass, field
|
|
8
8
|
from math import ceil
|
|
9
9
|
from threading import Event
|
|
10
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
10
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
11
11
|
|
|
12
12
|
import backoff
|
|
13
13
|
import ibis
|
|
14
14
|
import pyarrow as pa
|
|
15
15
|
import requests
|
|
16
16
|
|
|
17
|
-
from . import
|
|
17
|
+
from . import _internal, errors, schema, util
|
|
18
18
|
|
|
19
19
|
log = logging.getLogger(__name__)
|
|
20
20
|
|
|
@@ -80,7 +80,12 @@ class QueryConfig:
|
|
|
80
80
|
# used for worker threads' naming
|
|
81
81
|
query_id: str = ""
|
|
82
82
|
|
|
83
|
-
#
|
|
83
|
+
# non-negative integer, used for server-side prioritization of queued requests:
|
|
84
|
+
# - requests with lower values will be served before requests with higher values.
|
|
85
|
+
# - if unset, the request will be added to the queue's end.
|
|
86
|
+
queue_priority: Optional[int] = None
|
|
87
|
+
|
|
88
|
+
# DEPRECATED: will be removed in a future release
|
|
84
89
|
backoff_func: Any = field(default=backoff.on_exception(backoff.expo, RETRIABLE_ERRORS, max_tries=10))
|
|
85
90
|
|
|
86
91
|
|
|
@@ -102,14 +107,13 @@ class SelectSplitState:
|
|
|
102
107
|
self.query_data_request = query_data_request
|
|
103
108
|
self.table = table
|
|
104
109
|
|
|
105
|
-
def batches(self, api:
|
|
110
|
+
def batches(self, api: _internal.VastdbApi):
|
|
106
111
|
"""Execute QueryData request, and yield parsed RecordBatch objects.
|
|
107
112
|
|
|
108
113
|
Can be called repeatedly, to allow pagination.
|
|
109
114
|
"""
|
|
110
115
|
while not self.done:
|
|
111
|
-
|
|
112
|
-
response = query_with_backoff(
|
|
116
|
+
response = api.query_data(
|
|
113
117
|
bucket=self.table.bucket.name,
|
|
114
118
|
schema=self.table.schema.name,
|
|
115
119
|
table=self.table.name,
|
|
@@ -120,10 +124,11 @@ class SelectSplitState:
|
|
|
120
124
|
txid=self.table.tx.txid,
|
|
121
125
|
limit_rows=self.config.limit_rows_per_sub_split,
|
|
122
126
|
sub_split_start_row_ids=self.subsplits_state.items(),
|
|
127
|
+
schedule_id=self.config.queue_priority,
|
|
123
128
|
enable_sorted_projections=self.config.use_semi_sorted_projections,
|
|
124
129
|
query_imports_table=self.table._imports_table,
|
|
125
130
|
projection=self.config.semi_sorted_projection_name)
|
|
126
|
-
pages_iter =
|
|
131
|
+
pages_iter = _internal.parse_query_data_response(
|
|
127
132
|
conn=response.raw,
|
|
128
133
|
schema=self.query_data_request.response_schema,
|
|
129
134
|
start_row_ids=self.subsplits_state,
|
|
@@ -137,7 +142,7 @@ class SelectSplitState:
|
|
|
137
142
|
@property
|
|
138
143
|
def done(self):
|
|
139
144
|
"""Returns true iff the pagination over."""
|
|
140
|
-
return all(row_id ==
|
|
145
|
+
return all(row_id == _internal.TABULAR_INVALID_ROW_ID for row_id in self.subsplits_state.values())
|
|
141
146
|
|
|
142
147
|
|
|
143
148
|
@dataclass
|
|
@@ -187,14 +192,14 @@ class Table:
|
|
|
187
192
|
"""Get a specific semi-sorted projection of this table."""
|
|
188
193
|
if self._imports_table:
|
|
189
194
|
raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
|
|
190
|
-
projs = self.projections(projection_name=name)
|
|
195
|
+
projs = tuple(self.projections(projection_name=name))
|
|
191
196
|
if not projs:
|
|
192
197
|
raise errors.MissingProjection(self.bucket.name, self.schema.name, self.name, name)
|
|
193
198
|
assert len(projs) == 1, f"Expected to receive only a single projection, but got: {len(projs)}. projections: {projs}"
|
|
194
199
|
log.debug("Found projection: %s", projs[0])
|
|
195
200
|
return projs[0]
|
|
196
201
|
|
|
197
|
-
def projections(self, projection_name=None) ->
|
|
202
|
+
def projections(self, projection_name=None) -> Iterable["Projection"]:
|
|
198
203
|
"""List all semi-sorted projections of this table."""
|
|
199
204
|
if self._imports_table:
|
|
200
205
|
raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
|
|
@@ -214,7 +219,7 @@ class Table:
|
|
|
214
219
|
break
|
|
215
220
|
return [_parse_projection_info(projection, self) for projection in projections]
|
|
216
221
|
|
|
217
|
-
def import_files(self, files_to_import:
|
|
222
|
+
def import_files(self, files_to_import: Iterable[str], config: Optional[ImportConfig] = None) -> None:
|
|
218
223
|
"""Import a list of Parquet files into this table.
|
|
219
224
|
|
|
220
225
|
The files must be on VAST S3 server and be accessible using current credentials.
|
|
@@ -283,7 +288,7 @@ class Table:
|
|
|
283
288
|
max_workers=config.import_concurrency, thread_name_prefix='import_thread') as pool:
|
|
284
289
|
try:
|
|
285
290
|
for endpoint in endpoints:
|
|
286
|
-
session =
|
|
291
|
+
session = _internal.VastdbApi(endpoint, self.tx._rpc.api.access_key, self.tx._rpc.api.secret_key)
|
|
287
292
|
futures.append(pool.submit(import_worker, files_queue, session))
|
|
288
293
|
|
|
289
294
|
log.debug("Waiting for import workers to finish")
|
|
@@ -342,13 +347,13 @@ class Table:
|
|
|
342
347
|
if predicate is True:
|
|
343
348
|
predicate = None
|
|
344
349
|
if predicate is False:
|
|
345
|
-
response_schema =
|
|
350
|
+
response_schema = _internal.get_response_schema(schema=query_schema, field_names=columns)
|
|
346
351
|
return pa.RecordBatchReader.from_batches(response_schema, [])
|
|
347
352
|
|
|
348
353
|
if isinstance(predicate, ibis.common.deferred.Deferred):
|
|
349
354
|
predicate = predicate.resolve(self._ibis_table) # may raise if the predicate is invalid (e.g. wrong types / missing column)
|
|
350
355
|
|
|
351
|
-
query_data_request =
|
|
356
|
+
query_data_request = _internal.build_query_data_request(
|
|
352
357
|
schema=query_schema,
|
|
353
358
|
predicate=predicate,
|
|
354
359
|
field_names=columns)
|
|
@@ -376,7 +381,7 @@ class Table:
|
|
|
376
381
|
|
|
377
382
|
def single_endpoint_worker(endpoint: str):
|
|
378
383
|
try:
|
|
379
|
-
host_api =
|
|
384
|
+
host_api = _internal.VastdbApi(endpoint=endpoint, access_key=self.tx._rpc.api.access_key, secret_key=self.tx._rpc.api.secret_key)
|
|
380
385
|
while True:
|
|
381
386
|
check_stop()
|
|
382
387
|
try:
|
|
@@ -473,7 +478,7 @@ class Table:
|
|
|
473
478
|
for slice in serialized_slices:
|
|
474
479
|
res = self.tx._rpc.api.insert_rows(self.bucket.name, self.schema.name, self.name, record_batch=slice,
|
|
475
480
|
txid=self.tx.txid)
|
|
476
|
-
(batch,) = pa.RecordBatchStreamReader(res.
|
|
481
|
+
(batch,) = pa.RecordBatchStreamReader(res.content)
|
|
477
482
|
row_ids.append(batch[INTERNAL_ROW_ID])
|
|
478
483
|
try:
|
|
479
484
|
self.tx._rpc.features.check_return_row_ids()
|
|
@@ -509,6 +514,8 @@ class Table:
|
|
|
509
514
|
else:
|
|
510
515
|
update_rows_rb = rows
|
|
511
516
|
|
|
517
|
+
update_rows_rb = util.sort_record_batch_if_needed(update_rows_rb, INTERNAL_ROW_ID)
|
|
518
|
+
|
|
512
519
|
serialized_slices = util.iter_serialized_slices(update_rows_rb, MAX_ROWS_PER_BATCH)
|
|
513
520
|
for slice in serialized_slices:
|
|
514
521
|
self.tx._rpc.api.update_rows(self.bucket.name, self.schema.name, self.name, record_batch=slice,
|
|
@@ -528,6 +535,8 @@ class Table:
|
|
|
528
535
|
delete_rows_rb = pa.record_batch(schema=pa.schema([(INTERNAL_ROW_ID, pa.uint64())]),
|
|
529
536
|
data=[_combine_chunks(rows_chunk)])
|
|
530
537
|
|
|
538
|
+
delete_rows_rb = util.sort_record_batch_if_needed(delete_rows_rb, INTERNAL_ROW_ID)
|
|
539
|
+
|
|
531
540
|
serialized_slices = util.iter_serialized_slices(delete_rows_rb, MAX_ROWS_PER_BATCH)
|
|
532
541
|
for slice in serialized_slices:
|
|
533
542
|
self.tx._rpc.api.delete_rows(self.bucket.name, self.schema.name, self.name, record_batch=slice,
|
|
@@ -593,7 +602,7 @@ class Table:
|
|
|
593
602
|
return self.imports_table() # type: ignore[return-value]
|
|
594
603
|
|
|
595
604
|
def imports_table(self) -> Optional["Table"]:
|
|
596
|
-
"""Get the imports table
|
|
605
|
+
"""Get the imports table of this table."""
|
|
597
606
|
self.tx._rpc.features.check_imports_table()
|
|
598
607
|
return Table(name=self.name, schema=self.schema, handle=int(self.handle), stats=self.stats, _imports_table=True)
|
|
599
608
|
|
vastdb/tests/test_duckdb.py
CHANGED
|
@@ -56,6 +56,6 @@ def test_closed_tx(session, clean_bucket_name):
|
|
|
56
56
|
res = conn.execute('SELECT a FROM batches')
|
|
57
57
|
log.debug("closing tx=%s after first batch=%s", t.tx, first)
|
|
58
58
|
|
|
59
|
-
# transaction is closed, collecting the result should fail
|
|
60
|
-
with pytest.raises(duckdb.InvalidInputException
|
|
59
|
+
# transaction is closed, collecting the result should fail internally in DuckDB
|
|
60
|
+
with pytest.raises(duckdb.InvalidInputException):
|
|
61
61
|
res.arrow()
|
vastdb/tests/test_sanity.py
CHANGED
|
@@ -25,8 +25,9 @@ def test_bad_credentials(session):
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def test_bad_endpoint(session):
|
|
28
|
+
backoff_config = vastdb.session.BackoffConfig(max_tries=3)
|
|
28
29
|
with pytest.raises(requests.exceptions.ConnectionError):
|
|
29
|
-
vastdb.connect(access='BAD', secret='BAD', endpoint='http://invalid-host-name-for-tests:12345')
|
|
30
|
+
vastdb.connect(access='BAD', secret='BAD', endpoint='http://invalid-host-name-for-tests:12345', backoff_config=backoff_config)
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
def test_version_extraction():
|
|
@@ -36,7 +37,7 @@ def test_version_extraction():
|
|
|
36
37
|
("5", None), # major
|
|
37
38
|
("5.2", None), # major.minor
|
|
38
39
|
("5.2.0", None), # major.minor.patch
|
|
39
|
-
("5.2.0.10",
|
|
40
|
+
("5.2.0.10", (5, 2, 0, 10)), # major.minor.patch.protocol
|
|
40
41
|
("5.2.0.10 some other things", None), # suffix
|
|
41
42
|
("5.2.0.10.20", None), # extra version
|
|
42
43
|
]
|
vastdb/tests/test_tables.py
CHANGED
|
@@ -58,7 +58,7 @@ def test_tables(session, clean_bucket_name):
|
|
|
58
58
|
}
|
|
59
59
|
|
|
60
60
|
columns_to_delete = pa.schema([(INTERNAL_ROW_ID, pa.uint64())])
|
|
61
|
-
rb = pa.record_batch(schema=columns_to_delete, data=[[0]]) # delete
|
|
61
|
+
rb = pa.record_batch(schema=columns_to_delete, data=[[0]]) # delete row 0
|
|
62
62
|
t.delete(rb)
|
|
63
63
|
|
|
64
64
|
selected_rows = t.select(columns=['b'], predicate=(t['a'] == 222), internal_row_id=True).read_all()
|
|
@@ -81,6 +81,19 @@ def test_insert_wide_row(session, clean_bucket_name):
|
|
|
81
81
|
assert actual == expected
|
|
82
82
|
|
|
83
83
|
|
|
84
|
+
def test_insert_empty(session, clean_bucket_name):
|
|
85
|
+
columns = pa.schema([('a', pa.int8()), ('b', pa.float32())])
|
|
86
|
+
data = [[None] * 5, [None] * 5]
|
|
87
|
+
all_nulls = pa.table(schema=columns, data=data)
|
|
88
|
+
no_columns = all_nulls.select([])
|
|
89
|
+
|
|
90
|
+
with session.transaction() as tx:
|
|
91
|
+
t = tx.bucket(clean_bucket_name).create_schema('s').create_table('t', columns)
|
|
92
|
+
t.insert(all_nulls)
|
|
93
|
+
with pytest.raises(errors.NotImplemented):
|
|
94
|
+
t.insert(no_columns)
|
|
95
|
+
|
|
96
|
+
|
|
84
97
|
def test_exists(session, clean_bucket_name):
|
|
85
98
|
with session.transaction() as tx:
|
|
86
99
|
s = tx.bucket(clean_bucket_name).create_schema('s1')
|
|
@@ -156,6 +169,27 @@ def test_update_table(session, clean_bucket_name):
|
|
|
156
169
|
'b': [0.5, 1.5, 2.5]
|
|
157
170
|
}
|
|
158
171
|
|
|
172
|
+
# test update for not sorted rows:
|
|
173
|
+
rb = pa.record_batch(schema=columns_to_update, data=[
|
|
174
|
+
[2, 0], # update rows 0,2
|
|
175
|
+
[231, 235]
|
|
176
|
+
])
|
|
177
|
+
t.update(rb)
|
|
178
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
179
|
+
assert actual.to_pydict() == {
|
|
180
|
+
'a': [235, 2222, 231],
|
|
181
|
+
'b': [0.5, 1.5, 2.5]
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# test delete for not sorted rows:
|
|
185
|
+
rb = pa.record_batch(schema=pa.schema([(INTERNAL_ROW_ID, pa.uint64())]), data=[[2, 0]])
|
|
186
|
+
t.delete(rb)
|
|
187
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
188
|
+
assert actual.to_pydict() == {
|
|
189
|
+
'a': [2222],
|
|
190
|
+
'b': [1.5]
|
|
191
|
+
}
|
|
192
|
+
|
|
159
193
|
|
|
160
194
|
def test_select_with_multisplits(session, clean_bucket_name):
|
|
161
195
|
columns = pa.schema([
|
|
@@ -174,6 +208,25 @@ def test_select_with_multisplits(session, clean_bucket_name):
|
|
|
174
208
|
assert actual == expected
|
|
175
209
|
|
|
176
210
|
|
|
211
|
+
def test_select_with_priority(session, clean_bucket_name):
|
|
212
|
+
columns = pa.schema([
|
|
213
|
+
('a', pa.int32())
|
|
214
|
+
])
|
|
215
|
+
expected = pa.table(schema=columns, data=[range(100)])
|
|
216
|
+
with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
217
|
+
config = QueryConfig()
|
|
218
|
+
|
|
219
|
+
config.queue_priority = 0
|
|
220
|
+
assert t.select(config=config).read_all() == expected
|
|
221
|
+
|
|
222
|
+
config.queue_priority = 12345
|
|
223
|
+
assert t.select(config=config).read_all() == expected
|
|
224
|
+
|
|
225
|
+
config.queue_priority = -1
|
|
226
|
+
with pytest.raises(errors.BadRequest):
|
|
227
|
+
t.select(config=config).read_all()
|
|
228
|
+
|
|
229
|
+
|
|
177
230
|
def test_types(session, clean_bucket_name):
|
|
178
231
|
columns = pa.schema([
|
|
179
232
|
('tb', pa.bool_()),
|
vastdb/tests/test_util.py
CHANGED
|
@@ -33,6 +33,12 @@ def test_wide_row():
|
|
|
33
33
|
list(util.iter_serialized_slices(t))
|
|
34
34
|
|
|
35
35
|
|
|
36
|
+
def test_expand_ip_ranges():
|
|
37
|
+
endpoints = ["http://172.19.101.1-3"]
|
|
38
|
+
expected = ["http://172.19.101.1", "http://172.19.101.2", "http://172.19.101.3"]
|
|
39
|
+
assert util.expand_ip_ranges(endpoints) == expected
|
|
40
|
+
|
|
41
|
+
|
|
36
42
|
def _parse(bufs):
|
|
37
43
|
for buf in bufs:
|
|
38
44
|
with pa.ipc.open_stream(buf) as reader:
|
vastdb/transaction.py
CHANGED
|
@@ -8,7 +8,7 @@ A transcation is used as a context manager, since every Database-related operati
|
|
|
8
8
|
|
|
9
9
|
import logging
|
|
10
10
|
from dataclasses import dataclass
|
|
11
|
-
from typing import TYPE_CHECKING,
|
|
11
|
+
from typing import TYPE_CHECKING, Iterable, Optional
|
|
12
12
|
|
|
13
13
|
import botocore
|
|
14
14
|
|
|
@@ -72,7 +72,7 @@ class Transaction:
|
|
|
72
72
|
raise
|
|
73
73
|
return bucket.Bucket(name, self)
|
|
74
74
|
|
|
75
|
-
def catalog_snapshots(self) ->
|
|
75
|
+
def catalog_snapshots(self) -> Iterable["Bucket"]:
|
|
76
76
|
"""Return VAST Catalog bucket snapshots."""
|
|
77
77
|
return bucket.Bucket(VAST_CATALOG_BUCKET_NAME, self).snapshots()
|
|
78
78
|
|
vastdb/util.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import re
|
|
2
3
|
from typing import TYPE_CHECKING, Callable, List, Optional, Union
|
|
3
4
|
|
|
4
5
|
import pyarrow as pa
|
|
6
|
+
import pyarrow.compute as pc
|
|
5
7
|
import pyarrow.parquet as pq
|
|
6
8
|
|
|
7
9
|
from .errors import InvalidArgument, TooWideRow
|
|
@@ -88,8 +90,11 @@ MAX_QUERY_DATA_REQUEST_SIZE = int(0.9 * MAX_TABULAR_REQUEST_SIZE)
|
|
|
88
90
|
|
|
89
91
|
def iter_serialized_slices(batch: Union[pa.RecordBatch, pa.Table], max_rows_per_slice=None):
|
|
90
92
|
"""Iterate over a list of record batch slices."""
|
|
93
|
+
if batch.nbytes:
|
|
94
|
+
rows_per_slice = int(0.9 * len(batch) * MAX_RECORD_BATCH_SLICE_SIZE / batch.nbytes)
|
|
95
|
+
else:
|
|
96
|
+
rows_per_slice = len(batch) # if the batch has no buffers (no rows/columns)
|
|
91
97
|
|
|
92
|
-
rows_per_slice = int(0.9 * len(batch) * MAX_RECORD_BATCH_SLICE_SIZE / batch.nbytes)
|
|
93
98
|
if max_rows_per_slice is not None:
|
|
94
99
|
rows_per_slice = min(rows_per_slice, max_rows_per_slice)
|
|
95
100
|
|
|
@@ -113,3 +118,37 @@ def serialize_record_batch(batch: Union[pa.RecordBatch, pa.Table]):
|
|
|
113
118
|
with pa.ipc.new_stream(sink, batch.schema) as writer:
|
|
114
119
|
writer.write(batch)
|
|
115
120
|
return sink.getvalue()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def expand_ip_ranges(endpoints):
|
|
124
|
+
"""Expands endpoint strings that include an IP range in the format 'http://172.19.101.1-16'."""
|
|
125
|
+
expanded_endpoints = []
|
|
126
|
+
pattern = re.compile(r"(http://\d+\.\d+\.\d+)\.(\d+)-(\d+)")
|
|
127
|
+
|
|
128
|
+
for endpoint in endpoints:
|
|
129
|
+
match = pattern.match(endpoint)
|
|
130
|
+
if match:
|
|
131
|
+
base_url = match.group(1)
|
|
132
|
+
start_ip = int(match.group(2))
|
|
133
|
+
end_ip = int(match.group(3))
|
|
134
|
+
if start_ip > end_ip:
|
|
135
|
+
raise ValueError("Start IP cannot be greater than end IP in the range.")
|
|
136
|
+
expanded_endpoints.extend(f"{base_url}.{ip}" for ip in range(start_ip, end_ip + 1))
|
|
137
|
+
else:
|
|
138
|
+
expanded_endpoints.append(endpoint)
|
|
139
|
+
return expanded_endpoints
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def is_sorted(arr):
|
|
143
|
+
"""Check if the array is sorted."""
|
|
144
|
+
return pc.all(pc.greater(arr[1:], arr[:-1])).as_py()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def sort_record_batch_if_needed(record_batch, sort_column):
|
|
148
|
+
"""Sort the RecordBatch by the specified column if it is not already sorted."""
|
|
149
|
+
column_data = record_batch[sort_column]
|
|
150
|
+
|
|
151
|
+
if not is_sorted(column_data):
|
|
152
|
+
return record_batch.sort_by(sort_column)
|
|
153
|
+
else:
|
|
154
|
+
return record_batch
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vastdb
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: VAST Data SDK
|
|
5
5
|
Home-page: https://github.com/vast-data/vastdb_sdk
|
|
6
6
|
Author: VAST DATA
|
|
@@ -21,7 +21,7 @@ License-File: LICENSE
|
|
|
21
21
|
Requires-Dist: aws-requests-auth
|
|
22
22
|
Requires-Dist: boto3
|
|
23
23
|
Requires-Dist: flatbuffers
|
|
24
|
-
Requires-Dist: ibis-framework ==
|
|
24
|
+
Requires-Dist: ibis-framework ==9.0.0
|
|
25
25
|
Requires-Dist: pyarrow
|
|
26
26
|
Requires-Dist: requests
|
|
27
27
|
Requires-Dist: xmltodict
|
|
@@ -148,30 +148,30 @@ vast_flatbuf/tabular/ObjectDetails.py,sha256=qW0WtbkCYYE_L-Kw6VNRDCLYaRm5lKvTbLN
|
|
|
148
148
|
vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI0,4450
|
|
149
149
|
vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
|
|
150
150
|
vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
151
|
-
vastdb/__init__.py,sha256=
|
|
152
|
-
vastdb/
|
|
151
|
+
vastdb/__init__.py,sha256=8PLcZowy_vM0zuiYSQPXuxIEMcwHD7IRFpgcPK-03bk,386
|
|
152
|
+
vastdb/_internal.py,sha256=FB0pHOc79tAgMLzZcLSeeHIzZogt81S8FHhzdZiKEuI,89095
|
|
153
|
+
vastdb/bucket.py,sha256=5KuKhPjZOevznZqWHDVVocejvAy7dcwobPuV6BJCfPc,2544
|
|
153
154
|
vastdb/conftest.py,sha256=D4RvOhGvMQy-JliKY-uyzcB-_mFBwI6aMF__xwHiwOM,2359
|
|
154
155
|
vastdb/errors.py,sha256=nC7d05xwe0WxMFyM3cEEqIvA09OXNqcxiUGsKov822I,4098
|
|
155
|
-
vastdb/
|
|
156
|
-
vastdb/
|
|
157
|
-
vastdb/
|
|
158
|
-
vastdb/
|
|
159
|
-
vastdb/
|
|
160
|
-
vastdb/util.py,sha256=vt4LWROOFdZieJXLpQMlcnF7YWQFpPqQTVaRbmQ241o,4342
|
|
156
|
+
vastdb/schema.py,sha256=X7IRrogXH7Z0kes-DsDh1bRqIhvjH6owlFigGBXy7XQ,5913
|
|
157
|
+
vastdb/session.py,sha256=3YHhG7IamFOKuy-Fkq_IVtPNriSfI6IN_4z4arBFbDU,3349
|
|
158
|
+
vastdb/table.py,sha256=a0ZARfBdptxlm-zwiqdZ1ALud54-IkfX_ZI_ZD5hcsw,31047
|
|
159
|
+
vastdb/transaction.py,sha256=qu2rOlR7AS1ojMOzgWapQMpcorrutelZZLH1mLmTHxk,3186
|
|
160
|
+
vastdb/util.py,sha256=4LTYBBR13na376AmDm5lQILJzLcfelIKdkNPy0IqI0o,5684
|
|
161
161
|
vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
162
|
vastdb/bench/test_perf.py,sha256=yn5gE7t_nzmJHBl9bCs1hxQOgzhvFphuYElsWGko8ts,1084
|
|
163
163
|
vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
|
-
vastdb/tests/test_duckdb.py,sha256=
|
|
164
|
+
vastdb/tests/test_duckdb.py,sha256=STw_1PwTQR8Naz6s0p6lQTV1ZTKKhe3LPBUbhqzTCu0,1880
|
|
165
165
|
vastdb/tests/test_imports.py,sha256=xKub3-bisFjH0BsZM8COfiUWuMrtoOoQKprF6VQT9RI,5669
|
|
166
166
|
vastdb/tests/test_nested.py,sha256=22NAxBTm7Aq-Vn6AIYbi5Cb1ET8W0XeLK3pp4D8BYWI,3448
|
|
167
167
|
vastdb/tests/test_projections.py,sha256=11a-55VbJcqaFPkOKaKDEdM5nkeI0xtUhh6cQc1upSA,4223
|
|
168
|
-
vastdb/tests/test_sanity.py,sha256=
|
|
168
|
+
vastdb/tests/test_sanity.py,sha256=xD-XBmmuFxALj5r8eirtPG9fghxm8h4srIN9X6LEOX4,3054
|
|
169
169
|
vastdb/tests/test_schemas.py,sha256=l70YQMlx2UL1KRQhApriiG2ZM7GJF-IzWU31H3Yqn1U,3312
|
|
170
|
-
vastdb/tests/test_tables.py,sha256=
|
|
171
|
-
vastdb/tests/test_util.py,sha256=
|
|
170
|
+
vastdb/tests/test_tables.py,sha256=YhkeeTHq8aW1RgU86GolJl1dG3KGTlVG97Bny9RzyrM,30124
|
|
171
|
+
vastdb/tests/test_util.py,sha256=Ok_sAEBJsRGF5Voa_v5eu3eAd52GWu8jMjjQbadwW-s,1260
|
|
172
172
|
vastdb/tests/util.py,sha256=dpRJYbboDnlqL4qIdvScpp8--5fxRUBIcIYitrfcj9o,555
|
|
173
|
-
vastdb-0.1.
|
|
174
|
-
vastdb-0.1.
|
|
175
|
-
vastdb-0.1.
|
|
176
|
-
vastdb-0.1.
|
|
177
|
-
vastdb-0.1.
|
|
173
|
+
vastdb-0.1.7.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
|
|
174
|
+
vastdb-0.1.7.dist-info/METADATA,sha256=gwlUIInf2mlaAT2GsJ0bztYteRXpE4kqukDPKrNNJfk,1350
|
|
175
|
+
vastdb-0.1.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
176
|
+
vastdb-0.1.7.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
|
|
177
|
+
vastdb-0.1.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|