vastdb 1.3.8__py3-none-any.whl → 1.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/__init__.py +2 -2
- vastdb/_internal.py +41 -8
- vastdb/features.py +1 -1
- vastdb/table.py +25 -4
- {vastdb-1.3.8.dist-info → vastdb-1.3.9.dist-info}/METADATA +1 -1
- {vastdb-1.3.8.dist-info → vastdb-1.3.9.dist-info}/RECORD +9 -9
- {vastdb-1.3.8.dist-info → vastdb-1.3.9.dist-info}/LICENSE +0 -0
- {vastdb-1.3.8.dist-info → vastdb-1.3.9.dist-info}/WHEEL +0 -0
- {vastdb-1.3.8.dist-info → vastdb-1.3.9.dist-info}/top_level.txt +0 -0
vastdb/__init__.py
CHANGED
|
@@ -13,5 +13,5 @@ def connect(*args, **kwargs): # noqa: D103
|
|
|
13
13
|
|
|
14
14
|
def version():
|
|
15
15
|
"""Return VAST DB SDK version."""
|
|
16
|
-
import
|
|
17
|
-
return
|
|
16
|
+
from importlib import metadata
|
|
17
|
+
return metadata.distribution(__package__).version
|
vastdb/_internal.py
CHANGED
|
@@ -794,7 +794,7 @@ def _decode_table_props(s):
|
|
|
794
794
|
return {y: _prop_coding[x][1](z) for x, y, z in triplets if z != ''}
|
|
795
795
|
|
|
796
796
|
|
|
797
|
-
TableInfo = namedtuple('TableInfo', 'name properties handle num_rows size_in_bytes num_partitions sorting_key_enabled')
|
|
797
|
+
TableInfo = namedtuple('TableInfo', 'name properties handle num_rows size_in_bytes num_partitions sorting_key_enabled sorting_score write_amplification acummulative_row_insertion_count sorting_done')
|
|
798
798
|
|
|
799
799
|
|
|
800
800
|
def _parse_table_info(obj, parse_properties):
|
|
@@ -806,13 +806,20 @@ def _parse_table_info(obj, parse_properties):
|
|
|
806
806
|
num_partitions = obj.NumPartitions()
|
|
807
807
|
properties = parse_properties(properties)
|
|
808
808
|
sorting_key_enabled = obj.SortingKeyEnabled()
|
|
809
|
-
|
|
809
|
+
sorting_score_raw = obj.SortingScore()
|
|
810
|
+
write_amplification = obj.WriteAmplification()
|
|
811
|
+
acummulative_row_insertion_count = obj.AcummulativeRowInseritionCount()
|
|
812
|
+
|
|
813
|
+
sorting_score = sorting_score_raw & ((1 << 63) - 1)
|
|
814
|
+
sorting_done = bool(sorting_score_raw >> 63)
|
|
815
|
+
return TableInfo(name, properties, handle, num_rows, used_bytes, num_partitions, sorting_key_enabled,
|
|
816
|
+
sorting_score, write_amplification, acummulative_row_insertion_count, sorting_done)
|
|
810
817
|
|
|
811
818
|
|
|
812
819
|
# Results that returns from tablestats
|
|
813
820
|
|
|
814
821
|
|
|
815
|
-
TableStatsResult = namedtuple("TableStatsResult",
|
|
822
|
+
TableStatsResult = namedtuple("TableStatsResult", 'num_rows size_in_bytes is_external_rowid_alloc sorting_key_enabled sorting_score write_amplification acummulative_row_inserition_count sorting_done endpoints')
|
|
816
823
|
|
|
817
824
|
|
|
818
825
|
_RETRIABLE_EXCEPTIONS = (
|
|
@@ -1213,8 +1220,16 @@ class VastdbApi:
|
|
|
1213
1220
|
num_rows = stats.NumRows()
|
|
1214
1221
|
size_in_bytes = stats.SizeInBytes()
|
|
1215
1222
|
is_external_rowid_alloc = stats.IsExternalRowidAlloc()
|
|
1223
|
+
sorting_key_enabled = stats.SortingKeyEnabled()
|
|
1224
|
+
sorting_score_raw = stats.SortingScore()
|
|
1225
|
+
write_amplification = stats.WriteAmplification()
|
|
1226
|
+
acummulative_row_inserition_count = stats.AcummulativeRowInseritionCount()
|
|
1227
|
+
|
|
1228
|
+
sorting_score = sorting_score_raw & ((1 << 63) - 1)
|
|
1229
|
+
sorting_done = bool(sorting_score_raw >> 63)
|
|
1230
|
+
|
|
1216
1231
|
endpoints = [self.url] # we cannot replace the host by a VIP address in HTTPS-based URLs
|
|
1217
|
-
return TableStatsResult(num_rows, size_in_bytes, is_external_rowid_alloc, tuple(endpoints))
|
|
1232
|
+
return TableStatsResult(num_rows, size_in_bytes, is_external_rowid_alloc, sorting_key_enabled, sorting_score, write_amplification, acummulative_row_inserition_count, sorting_done, tuple(endpoints))
|
|
1218
1233
|
|
|
1219
1234
|
def alter_topic(self, bucket, name,
|
|
1220
1235
|
new_name="", expected_retvals=[],
|
|
@@ -1302,8 +1317,8 @@ class VastdbApi:
|
|
|
1302
1317
|
expected_retvals=expected_retvals,
|
|
1303
1318
|
include_list_stats=include_list_stats, count_only=count_only)
|
|
1304
1319
|
|
|
1305
|
-
def
|
|
1306
|
-
|
|
1320
|
+
def _list_tables_raw(self, bucket, schema, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
|
|
1321
|
+
exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
|
|
1307
1322
|
"""
|
|
1308
1323
|
GET /mybucket/schema_path?table HTTP/1.1
|
|
1309
1324
|
tabular-txid: TransactionId
|
|
@@ -1323,7 +1338,6 @@ class VastdbApi:
|
|
|
1323
1338
|
headers['tabular-list-count-only'] = str(count_only)
|
|
1324
1339
|
headers['tabular-include-list-stats'] = str(include_list_stats)
|
|
1325
1340
|
|
|
1326
|
-
tables = []
|
|
1327
1341
|
res = self._request(
|
|
1328
1342
|
method="GET",
|
|
1329
1343
|
url=self._url(bucket=bucket, schema=schema, command="table"),
|
|
@@ -1333,17 +1347,36 @@ class VastdbApi:
|
|
|
1333
1347
|
next_key = int(res_headers['tabular-next-key'])
|
|
1334
1348
|
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
1335
1349
|
lists = list_tables.GetRootAs(res.content)
|
|
1350
|
+
tables_length = lists.TablesLength()
|
|
1351
|
+
count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
|
|
1352
|
+
return lists, is_truncated, count
|
|
1353
|
+
|
|
1354
|
+
def _list_tables_internal(self, bucket, schema, parse_properties, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
|
|
1355
|
+
exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
|
|
1356
|
+
tables = []
|
|
1357
|
+
lists, is_truncated, count = self._list_tables_raw(bucket, schema, txid=txid, client_tags=client_tags, max_keys=max_keys,
|
|
1358
|
+
next_key=next_key, name_prefix=name_prefix, exact_match=exact_match, expected_retvals=expected_retvals,
|
|
1359
|
+
include_list_stats=include_list_stats, count_only=count_only)
|
|
1336
1360
|
bucket_name = lists.BucketName().decode()
|
|
1337
1361
|
schema_name = lists.SchemaName().decode()
|
|
1338
1362
|
if not bucket.startswith(bucket_name): # ignore snapshot name
|
|
1339
1363
|
raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
|
|
1340
1364
|
tables_length = lists.TablesLength()
|
|
1341
|
-
count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
|
|
1342
1365
|
for i in range(tables_length):
|
|
1343
1366
|
tables.append(_parse_table_info(lists.Tables(i), parse_properties))
|
|
1344
1367
|
|
|
1345
1368
|
return bucket_name, schema_name, tables, next_key, is_truncated, count
|
|
1346
1369
|
|
|
1370
|
+
def raw_sorting_score(self, bucket, schema, txid, name):
|
|
1371
|
+
lists, _, _ = self._list_tables_raw(bucket, schema, txid=txid, exact_match=True, name_prefix=name, include_list_stats=True)
|
|
1372
|
+
bucket_name = lists.BucketName().decode()
|
|
1373
|
+
if not bucket.startswith(bucket_name): # ignore snapshot name
|
|
1374
|
+
raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
|
|
1375
|
+
tables_length = lists.TablesLength()
|
|
1376
|
+
if tables_length != 1:
|
|
1377
|
+
raise ValueError(f'table: {name} received {tables_length} response')
|
|
1378
|
+
return lists.Tables(0).SortingScore()
|
|
1379
|
+
|
|
1347
1380
|
def add_columns(self, bucket, schema, name, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
|
|
1348
1381
|
"""
|
|
1349
1382
|
Add a column to table, use the following request
|
vastdb/features.py
CHANGED
|
@@ -33,7 +33,7 @@ class Features:
|
|
|
33
33
|
|
|
34
34
|
self.check_elysium = self._check(
|
|
35
35
|
"Elysium requires 5.3.5+ VAST release",
|
|
36
|
-
vast_version >= (5, 3
|
|
36
|
+
vast_version >= (5, 3)) # TODO: make this validation stricter for v5.4 (beta/poc version is 5.3.0.x)
|
|
37
37
|
|
|
38
38
|
self.check_zip_import = self._check(
|
|
39
39
|
"Zip import requires 5.3.1+ VAST release",
|
vastdb/table.py
CHANGED
|
@@ -29,6 +29,7 @@ MAX_ROWS_PER_BATCH = 512 * 1024
|
|
|
29
29
|
MAX_INSERT_ROWS_PER_PATCH = 512 * 1024
|
|
30
30
|
# in case insert has TooWideRow - need to insert in smaller batches - each cell could contain up to 128K, and our wire is limited to 5MB
|
|
31
31
|
MAX_COLUMN_IN_BATCH = int(5 * 1024 / 128)
|
|
32
|
+
SORTING_SCORE_BITS = 63
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
@dataclass
|
|
@@ -37,7 +38,12 @@ class TableStats:
|
|
|
37
38
|
|
|
38
39
|
num_rows: int
|
|
39
40
|
size_in_bytes: int
|
|
41
|
+
sorting_score: int
|
|
42
|
+
write_amplification: int
|
|
43
|
+
acummulative_row_inserition_count: int
|
|
40
44
|
is_external_rowid_alloc: bool = False
|
|
45
|
+
sorting_key_enabled: bool = False
|
|
46
|
+
sorting_done: bool = False
|
|
41
47
|
endpoints: Tuple[str, ...] = ()
|
|
42
48
|
|
|
43
49
|
|
|
@@ -277,8 +283,8 @@ class Table:
|
|
|
277
283
|
except queue.Empty:
|
|
278
284
|
pass
|
|
279
285
|
if files_batch:
|
|
280
|
-
log.
|
|
281
|
-
log.
|
|
286
|
+
log.info("Starting import batch of %s files", len(files_batch))
|
|
287
|
+
log.debug(f"starting import of {files_batch}")
|
|
282
288
|
session.import_data(
|
|
283
289
|
self.bucket.name, self.schema.name, self.name, files_batch, txid=self.tx.txid,
|
|
284
290
|
key_names=key_names)
|
|
@@ -372,7 +378,7 @@ class Table:
|
|
|
372
378
|
num_rows = 0
|
|
373
379
|
if self.sorted_table:
|
|
374
380
|
num_rows = self._get_row_estimate(columns, predicate, query_schema)
|
|
375
|
-
log.
|
|
381
|
+
log.debug(f'sorted estimate: {num_rows}')
|
|
376
382
|
if num_rows == 0:
|
|
377
383
|
if stats is None:
|
|
378
384
|
stats = self.get_stats()
|
|
@@ -653,6 +659,20 @@ class Table:
|
|
|
653
659
|
"""
|
|
654
660
|
return self._ibis_table[col_name]
|
|
655
661
|
|
|
662
|
+
def sorting_done(self) -> int:
|
|
663
|
+
"""Sorting done indicator for the table. Always False for unsorted tables."""
|
|
664
|
+
if not self.sorted_table:
|
|
665
|
+
return False
|
|
666
|
+
raw_sorting_score = self.tx._rpc.api.raw_sorting_score(self.schema.bucket.name, self.schema.name, self.schema.tx.txid, self.name)
|
|
667
|
+
return bool(raw_sorting_score >> SORTING_SCORE_BITS)
|
|
668
|
+
|
|
669
|
+
def sorting_score(self) -> int:
|
|
670
|
+
"""Sorting score for the table. Always 0 for unsorted tables."""
|
|
671
|
+
if not self.sorted_table:
|
|
672
|
+
return 0
|
|
673
|
+
raw_sorting_score = self.tx._rpc.api.raw_sorting_score(self.schema.bucket.name, self.schema.name, self.schema.tx.txid, self.name)
|
|
674
|
+
return raw_sorting_score & ((1 << SORTING_SCORE_BITS) - 1)
|
|
675
|
+
|
|
656
676
|
|
|
657
677
|
@dataclass
|
|
658
678
|
class Projection:
|
|
@@ -710,7 +730,8 @@ class Projection:
|
|
|
710
730
|
|
|
711
731
|
def _parse_projection_info(projection_info, table: "Table"):
|
|
712
732
|
log.info("Projection info %s", str(projection_info))
|
|
713
|
-
stats = TableStats(num_rows=projection_info.num_rows, size_in_bytes=projection_info.size_in_bytes
|
|
733
|
+
stats = TableStats(num_rows=projection_info.num_rows, size_in_bytes=projection_info.size_in_bytes,
|
|
734
|
+
sorting_score=0, write_amplification=0, acummulative_row_inserition_count=0)
|
|
714
735
|
return Projection(name=projection_info.name, table=table, stats=stats, handle=int(projection_info.handle))
|
|
715
736
|
|
|
716
737
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
vastdb/__init__.py,sha256=
|
|
2
|
-
vastdb/_internal.py,sha256
|
|
1
|
+
vastdb/__init__.py,sha256=uf-AXdzsD4nPxFP7WxkcAXGG0whv8BHLrrXCJtsPGaQ,436
|
|
2
|
+
vastdb/_internal.py,sha256=tGNU-9wOtRoK7OXFmX1-uEgQRjpKQXPA0H4rZy86-JM,107257
|
|
3
3
|
vastdb/bucket.py,sha256=aomUbrfK5Oa6FdGPVsoBXgRW39IzYnmsorF8642r990,2549
|
|
4
4
|
vastdb/config.py,sha256=OehnsWrjzv0-SUouEXmkrKBugiWyhXOn4XiSLV3s9yk,2342
|
|
5
5
|
vastdb/conftest.py,sha256=X2kVveySPQYZlVBXUMoo7Oea5IsvmJzjdqq3fpH2kVw,3469
|
|
6
6
|
vastdb/errors.py,sha256=B_FNFONDE8apoTRL8wkMNjUJWAjXu36mO0HI4cGSBgY,4328
|
|
7
|
-
vastdb/features.py,sha256=
|
|
7
|
+
vastdb/features.py,sha256=6OAyTGxpOlMYqkcX2IfuG_ihJC8qrmraKdnef_B3xuo,1727
|
|
8
8
|
vastdb/schema.py,sha256=UR1WzQvfAdnpDaNsEaGZLYGC65Blri5MYOWinCcl8Hc,6552
|
|
9
9
|
vastdb/session.py,sha256=toMR0BXwTaECdWDKnIZky1F3MA1SmelRBiqCrqQ3GCM,2067
|
|
10
|
-
vastdb/table.py,sha256=
|
|
10
|
+
vastdb/table.py,sha256=V7LymaLfirOiAbBj68M_29ijOeSZKD0_gYU44OGkkac,35278
|
|
11
11
|
vastdb/transaction.py,sha256=NlVkEowJ_pmtffjWBBDaKExYDKPekjSZyj_fK_bZPJE,3026
|
|
12
12
|
vastdb/util.py,sha256=8CUnVRsJukC3uNHNoB5D0qPf0FxS8OSdVB84nNoLJKc,6290
|
|
13
13
|
vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -209,8 +209,8 @@ vastdb/vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
|
|
|
209
209
|
vastdb/vast_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
210
210
|
vastdb/vast_tests/test_ha.py,sha256=744P4G6VJ09RIkHhMQL4wlipCBJWQVMhyvUrSc4k1HQ,975
|
|
211
211
|
vastdb/vast_tests/test_scale.py,sha256=5jGwOdZH6Tv5tPdZYPWoqcxOceI2jA5i2D1zNKZHER4,3958
|
|
212
|
-
vastdb-1.3.
|
|
213
|
-
vastdb-1.3.
|
|
214
|
-
vastdb-1.3.
|
|
215
|
-
vastdb-1.3.
|
|
216
|
-
vastdb-1.3.
|
|
212
|
+
vastdb-1.3.9.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
|
|
213
|
+
vastdb-1.3.9.dist-info/METADATA,sha256=szfHdpcb7zy4H49PgDYqVnn2J5UCAu0azRHkfXJkEpY,1340
|
|
214
|
+
vastdb-1.3.9.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
215
|
+
vastdb-1.3.9.dist-info/top_level.txt,sha256=nnKAaZaQa8GFbYpWAexr_B9HrhonZbUlX6hL6AC--yA,7
|
|
216
|
+
vastdb-1.3.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|