vastdb 1.3.8__py3-none-any.whl → 1.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vastdb/__init__.py CHANGED
@@ -13,5 +13,5 @@ def connect(*args, **kwargs): # noqa: D103
13
13
 
14
14
  def version():
15
15
  """Return VAST DB SDK version."""
16
- import importlib
17
- return importlib.metadata.distribution(__package__).version
16
+ from importlib import metadata
17
+ return metadata.distribution(__package__).version
vastdb/_internal.py CHANGED
@@ -794,7 +794,7 @@ def _decode_table_props(s):
794
794
  return {y: _prop_coding[x][1](z) for x, y, z in triplets if z != ''}
795
795
 
796
796
 
797
- TableInfo = namedtuple('TableInfo', 'name properties handle num_rows size_in_bytes num_partitions sorting_key_enabled')
797
+ TableInfo = namedtuple('TableInfo', 'name properties handle num_rows size_in_bytes num_partitions sorting_key_enabled sorting_score write_amplification acummulative_row_insertion_count sorting_done')
798
798
 
799
799
 
800
800
  def _parse_table_info(obj, parse_properties):
@@ -806,13 +806,20 @@ def _parse_table_info(obj, parse_properties):
806
806
  num_partitions = obj.NumPartitions()
807
807
  properties = parse_properties(properties)
808
808
  sorting_key_enabled = obj.SortingKeyEnabled()
809
- return TableInfo(name, properties, handle, num_rows, used_bytes, num_partitions, sorting_key_enabled)
809
+ sorting_score_raw = obj.SortingScore()
810
+ write_amplification = obj.WriteAmplification()
811
+ acummulative_row_insertion_count = obj.AcummulativeRowInseritionCount()
812
+
813
+ sorting_score = sorting_score_raw & ((1 << 63) - 1)
814
+ sorting_done = bool(sorting_score_raw >> 63)
815
+ return TableInfo(name, properties, handle, num_rows, used_bytes, num_partitions, sorting_key_enabled,
816
+ sorting_score, write_amplification, acummulative_row_insertion_count, sorting_done)
810
817
 
811
818
 
812
819
  # Results that returns from tablestats
813
820
 
814
821
 
815
- TableStatsResult = namedtuple("TableStatsResult", ["num_rows", "size_in_bytes", "is_external_rowid_alloc", "endpoints"])
822
+ TableStatsResult = namedtuple("TableStatsResult", 'num_rows size_in_bytes is_external_rowid_alloc sorting_key_enabled sorting_score write_amplification acummulative_row_inserition_count sorting_done endpoints')
816
823
 
817
824
 
818
825
  _RETRIABLE_EXCEPTIONS = (
@@ -1213,8 +1220,16 @@ class VastdbApi:
1213
1220
  num_rows = stats.NumRows()
1214
1221
  size_in_bytes = stats.SizeInBytes()
1215
1222
  is_external_rowid_alloc = stats.IsExternalRowidAlloc()
1223
+ sorting_key_enabled = stats.SortingKeyEnabled()
1224
+ sorting_score_raw = stats.SortingScore()
1225
+ write_amplification = stats.WriteAmplification()
1226
+ acummulative_row_inserition_count = stats.AcummulativeRowInseritionCount()
1227
+
1228
+ sorting_score = sorting_score_raw & ((1 << 63) - 1)
1229
+ sorting_done = bool(sorting_score_raw >> 63)
1230
+
1216
1231
  endpoints = [self.url] # we cannot replace the host by a VIP address in HTTPS-based URLs
1217
- return TableStatsResult(num_rows, size_in_bytes, is_external_rowid_alloc, tuple(endpoints))
1232
+ return TableStatsResult(num_rows, size_in_bytes, is_external_rowid_alloc, sorting_key_enabled, sorting_score, write_amplification, acummulative_row_inserition_count, sorting_done, tuple(endpoints))
1218
1233
 
1219
1234
  def alter_topic(self, bucket, name,
1220
1235
  new_name="", expected_retvals=[],
@@ -1302,8 +1317,8 @@ class VastdbApi:
1302
1317
  expected_retvals=expected_retvals,
1303
1318
  include_list_stats=include_list_stats, count_only=count_only)
1304
1319
 
1305
- def _list_tables_internal(self, bucket, schema, parse_properties, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
1306
- exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
1320
+ def _list_tables_raw(self, bucket, schema, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
1321
+ exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
1307
1322
  """
1308
1323
  GET /mybucket/schema_path?table HTTP/1.1
1309
1324
  tabular-txid: TransactionId
@@ -1323,7 +1338,6 @@ class VastdbApi:
1323
1338
  headers['tabular-list-count-only'] = str(count_only)
1324
1339
  headers['tabular-include-list-stats'] = str(include_list_stats)
1325
1340
 
1326
- tables = []
1327
1341
  res = self._request(
1328
1342
  method="GET",
1329
1343
  url=self._url(bucket=bucket, schema=schema, command="table"),
@@ -1333,17 +1347,36 @@ class VastdbApi:
1333
1347
  next_key = int(res_headers['tabular-next-key'])
1334
1348
  is_truncated = res_headers['tabular-is-truncated'] == 'true'
1335
1349
  lists = list_tables.GetRootAs(res.content)
1350
+ tables_length = lists.TablesLength()
1351
+ count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
1352
+ return lists, is_truncated, count
1353
+
1354
+ def _list_tables_internal(self, bucket, schema, parse_properties, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
1355
+ exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
1356
+ tables = []
1357
+ lists, is_truncated, count = self._list_tables_raw(bucket, schema, txid=txid, client_tags=client_tags, max_keys=max_keys,
1358
+ next_key=next_key, name_prefix=name_prefix, exact_match=exact_match, expected_retvals=expected_retvals,
1359
+ include_list_stats=include_list_stats, count_only=count_only)
1336
1360
  bucket_name = lists.BucketName().decode()
1337
1361
  schema_name = lists.SchemaName().decode()
1338
1362
  if not bucket.startswith(bucket_name): # ignore snapshot name
1339
1363
  raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
1340
1364
  tables_length = lists.TablesLength()
1341
- count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
1342
1365
  for i in range(tables_length):
1343
1366
  tables.append(_parse_table_info(lists.Tables(i), parse_properties))
1344
1367
 
1345
1368
  return bucket_name, schema_name, tables, next_key, is_truncated, count
1346
1369
 
1370
+ def raw_sorting_score(self, bucket, schema, txid, name):
1371
+ lists, _, _ = self._list_tables_raw(bucket, schema, txid=txid, exact_match=True, name_prefix=name, include_list_stats=True)
1372
+ bucket_name = lists.BucketName().decode()
1373
+ if not bucket.startswith(bucket_name): # ignore snapshot name
1374
+ raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
1375
+ tables_length = lists.TablesLength()
1376
+ if tables_length != 1:
1377
+ raise ValueError(f'table: {name} received {tables_length} response')
1378
+ return lists.Tables(0).SortingScore()
1379
+
1347
1380
  def add_columns(self, bucket, schema, name, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
1348
1381
  """
1349
1382
  Add a column to table, use the following request
vastdb/features.py CHANGED
@@ -33,7 +33,7 @@ class Features:
33
33
 
34
34
  self.check_elysium = self._check(
35
35
  "Elysium requires 5.3.5+ VAST release",
36
- vast_version >= (5, 3, 5))
36
+ vast_version >= (5, 3)) # TODO: make this validation stricter for v5.4 (beta/poc version is 5.3.0.x)
37
37
 
38
38
  self.check_zip_import = self._check(
39
39
  "Zip import requires 5.3.1+ VAST release",
vastdb/table.py CHANGED
@@ -29,6 +29,7 @@ MAX_ROWS_PER_BATCH = 512 * 1024
29
29
  MAX_INSERT_ROWS_PER_PATCH = 512 * 1024
30
30
  # in case insert has TooWideRow - need to insert in smaller batches - each cell could contain up to 128K, and our wire is limited to 5MB
31
31
  MAX_COLUMN_IN_BATCH = int(5 * 1024 / 128)
32
+ SORTING_SCORE_BITS = 63
32
33
 
33
34
 
34
35
  @dataclass
@@ -37,7 +38,12 @@ class TableStats:
37
38
 
38
39
  num_rows: int
39
40
  size_in_bytes: int
41
+ sorting_score: int
42
+ write_amplification: int
43
+ acummulative_row_inserition_count: int
40
44
  is_external_rowid_alloc: bool = False
45
+ sorting_key_enabled: bool = False
46
+ sorting_done: bool = False
41
47
  endpoints: Tuple[str, ...] = ()
42
48
 
43
49
 
@@ -277,8 +283,8 @@ class Table:
277
283
  except queue.Empty:
278
284
  pass
279
285
  if files_batch:
280
- log.debug("Starting import batch of %s files", len(files_batch))
281
- log.info(f"starting import of {files_batch}")
286
+ log.info("Starting import batch of %s files", len(files_batch))
287
+ log.debug(f"starting import of {files_batch}")
282
288
  session.import_data(
283
289
  self.bucket.name, self.schema.name, self.name, files_batch, txid=self.tx.txid,
284
290
  key_names=key_names)
@@ -372,7 +378,7 @@ class Table:
372
378
  num_rows = 0
373
379
  if self.sorted_table:
374
380
  num_rows = self._get_row_estimate(columns, predicate, query_schema)
375
- log.info(f'sorted estimate: {num_rows}')
381
+ log.debug(f'sorted estimate: {num_rows}')
376
382
  if num_rows == 0:
377
383
  if stats is None:
378
384
  stats = self.get_stats()
@@ -653,6 +659,20 @@ class Table:
653
659
  """
654
660
  return self._ibis_table[col_name]
655
661
 
662
+ def sorting_done(self) -> int:
663
+ """Sorting done indicator for the table. Always False for unsorted tables."""
664
+ if not self.sorted_table:
665
+ return False
666
+ raw_sorting_score = self.tx._rpc.api.raw_sorting_score(self.schema.bucket.name, self.schema.name, self.schema.tx.txid, self.name)
667
+ return bool(raw_sorting_score >> SORTING_SCORE_BITS)
668
+
669
+ def sorting_score(self) -> int:
670
+ """Sorting score for the table. Always 0 for unsorted tables."""
671
+ if not self.sorted_table:
672
+ return 0
673
+ raw_sorting_score = self.tx._rpc.api.raw_sorting_score(self.schema.bucket.name, self.schema.name, self.schema.tx.txid, self.name)
674
+ return raw_sorting_score & ((1 << SORTING_SCORE_BITS) - 1)
675
+
656
676
 
657
677
  @dataclass
658
678
  class Projection:
@@ -710,7 +730,8 @@ class Projection:
710
730
 
711
731
  def _parse_projection_info(projection_info, table: "Table"):
712
732
  log.info("Projection info %s", str(projection_info))
713
- stats = TableStats(num_rows=projection_info.num_rows, size_in_bytes=projection_info.size_in_bytes)
733
+ stats = TableStats(num_rows=projection_info.num_rows, size_in_bytes=projection_info.size_in_bytes,
734
+ sorting_score=0, write_amplification=0, acummulative_row_inserition_count=0)
714
735
  return Projection(name=projection_info.name, table=table, stats=stats, handle=int(projection_info.handle))
715
736
 
716
737
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vastdb
3
- Version: 1.3.8
3
+ Version: 1.3.9
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
@@ -1,13 +1,13 @@
1
- vastdb/__init__.py,sha256=J1JjKiFkKC95BHowfh9kJfQFTjRce-QMsc6zF_FfxC0,432
2
- vastdb/_internal.py,sha256=-TSpq5nYQPrPdB2nsnt7DWDDaD6HC8iOnI5yXRiM3Ao,104965
1
+ vastdb/__init__.py,sha256=uf-AXdzsD4nPxFP7WxkcAXGG0whv8BHLrrXCJtsPGaQ,436
2
+ vastdb/_internal.py,sha256=tGNU-9wOtRoK7OXFmX1-uEgQRjpKQXPA0H4rZy86-JM,107257
3
3
  vastdb/bucket.py,sha256=aomUbrfK5Oa6FdGPVsoBXgRW39IzYnmsorF8642r990,2549
4
4
  vastdb/config.py,sha256=OehnsWrjzv0-SUouEXmkrKBugiWyhXOn4XiSLV3s9yk,2342
5
5
  vastdb/conftest.py,sha256=X2kVveySPQYZlVBXUMoo7Oea5IsvmJzjdqq3fpH2kVw,3469
6
6
  vastdb/errors.py,sha256=B_FNFONDE8apoTRL8wkMNjUJWAjXu36mO0HI4cGSBgY,4328
7
- vastdb/features.py,sha256=3QRyIMUDovLcOTDVM_4qYFHmKtzCDtlkdlbhbK1a8rY,1652
7
+ vastdb/features.py,sha256=6OAyTGxpOlMYqkcX2IfuG_ihJC8qrmraKdnef_B3xuo,1727
8
8
  vastdb/schema.py,sha256=UR1WzQvfAdnpDaNsEaGZLYGC65Blri5MYOWinCcl8Hc,6552
9
9
  vastdb/session.py,sha256=toMR0BXwTaECdWDKnIZky1F3MA1SmelRBiqCrqQ3GCM,2067
10
- vastdb/table.py,sha256=1QSvZDhpaOjRsEu_FU8di3STUrbsRmGW4VFx4g4FYFs,34237
10
+ vastdb/table.py,sha256=V7LymaLfirOiAbBj68M_29ijOeSZKD0_gYU44OGkkac,35278
11
11
  vastdb/transaction.py,sha256=NlVkEowJ_pmtffjWBBDaKExYDKPekjSZyj_fK_bZPJE,3026
12
12
  vastdb/util.py,sha256=8CUnVRsJukC3uNHNoB5D0qPf0FxS8OSdVB84nNoLJKc,6290
13
13
  vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -209,8 +209,8 @@ vastdb/vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
209
209
  vastdb/vast_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
210
210
  vastdb/vast_tests/test_ha.py,sha256=744P4G6VJ09RIkHhMQL4wlipCBJWQVMhyvUrSc4k1HQ,975
211
211
  vastdb/vast_tests/test_scale.py,sha256=5jGwOdZH6Tv5tPdZYPWoqcxOceI2jA5i2D1zNKZHER4,3958
212
- vastdb-1.3.8.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
213
- vastdb-1.3.8.dist-info/METADATA,sha256=JO1YYjtkqWE7VLusG8OkWUNHw4Osq6hduCUlg6xIU7g,1340
214
- vastdb-1.3.8.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
215
- vastdb-1.3.8.dist-info/top_level.txt,sha256=nnKAaZaQa8GFbYpWAexr_B9HrhonZbUlX6hL6AC--yA,7
216
- vastdb-1.3.8.dist-info/RECORD,,
212
+ vastdb-1.3.9.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
213
+ vastdb-1.3.9.dist-info/METADATA,sha256=szfHdpcb7zy4H49PgDYqVnn2J5UCAu0azRHkfXJkEpY,1340
214
+ vastdb-1.3.9.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
215
+ vastdb-1.3.9.dist-info/top_level.txt,sha256=nnKAaZaQa8GFbYpWAexr_B9HrhonZbUlX6hL6AC--yA,7
216
+ vastdb-1.3.9.dist-info/RECORD,,
File without changes