vastdb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vastdb/transaction.py CHANGED
@@ -16,6 +16,14 @@ from . import bucket, errors, schema, session, table
16
16
 
17
17
  log = logging.getLogger(__name__)
18
18
 
19
+ TABULAR_BC_BUCKET = "vast-big-catalog-bucket"
20
+ VAST_CATALOG_SCHEMA_NAME = 'vast_big_catalog_schema'
21
+ VAST_CATALOG_TABLE_NAME = 'vast_big_catalog_table'
22
+
23
+ TABULAR_AUDERY_BUCKET = "vast-audit-log-bucket"
24
+ AUDERY_SCHEMA_NAME = 'vast_audit_log_schema'
25
+ AUDERY_TABLE_NAME = 'vast_audit_log_table'
26
+
19
27
 
20
28
  @dataclass
21
29
  class Transaction:
@@ -44,6 +52,8 @@ class Transaction:
44
52
 
45
53
  def __repr__(self):
46
54
  """Don't show the session details."""
55
+ if self.txid is None:
56
+ return 'InvalidTransaction'
47
57
  return f'Transaction(id=0x{self.txid:016x})'
48
58
 
49
59
  def bucket(self, name: str) -> "bucket.Bucket":
@@ -59,6 +69,12 @@ class Transaction:
59
69
 
60
70
  def catalog(self, fail_if_missing=True) -> Optional["table.Table"]:
61
71
  """Return VAST Catalog table."""
62
- b = bucket.Bucket("vast-big-catalog-bucket", self)
63
- s = schema.Schema("vast_big_catalog_schema", b)
64
- return s.table(name="vast_big_catalog_table", fail_if_missing=fail_if_missing)
72
+ b = bucket.Bucket(TABULAR_BC_BUCKET, self)
73
+ s = schema.Schema(VAST_CATALOG_SCHEMA_NAME, b)
74
+ return s.table(name=VAST_CATALOG_TABLE_NAME, fail_if_missing=fail_if_missing)
75
+
76
+ def audit_log(self, fail_if_missing=True) -> Optional["table.Table"]:
77
+ """Return VAST AuditLog table."""
78
+ b = bucket.Bucket(TABULAR_AUDERY_BUCKET, self)
79
+ s = schema.Schema(AUDERY_SCHEMA_NAME, b)
80
+ return s.table(name=AUDERY_TABLE_NAME, fail_if_missing=fail_if_missing)
vastdb/util.py CHANGED
@@ -1,20 +1,22 @@
1
1
  import logging
2
- from typing import Callable, List, Optional
2
+ from typing import TYPE_CHECKING, Callable, List, Optional, Union
3
3
 
4
4
  import pyarrow as pa
5
5
  import pyarrow.parquet as pq
6
6
 
7
- from .errors import InvalidArgument
8
- from .schema import Schema
9
- from .table import ImportConfig, Table
7
+ from .errors import InvalidArgument, TooWideRow
10
8
 
11
9
  log = logging.getLogger(__name__)
12
10
 
11
+ if TYPE_CHECKING:
12
+ from .schema import Schema
13
+ from .table import ImportConfig, Table
14
+
13
15
 
14
16
  def create_table_from_files(
15
- schema: Schema, table_name: str, parquet_files: List[str],
17
+ schema: "Schema", table_name: str, parquet_files: List[str],
16
18
  schema_merge_func: Optional[Callable] = None,
17
- config: Optional[ImportConfig] = None) -> Table:
19
+ config: Optional["ImportConfig"] = None) -> "Table":
18
20
  if not schema_merge_func:
19
21
  schema_merge_func = default_schema_merge
20
22
  else:
@@ -77,3 +79,36 @@ def union_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.S
77
79
  This function returns a unified schema from potentially two different schemas.
78
80
  """
79
81
  return pa.unify_schemas([current_schema, new_schema])
82
+
83
+
84
+ MAX_TABULAR_REQUEST_SIZE = 5 << 20 # in bytes
85
+ MAX_RECORD_BATCH_SLICE_SIZE = int(0.9 * MAX_TABULAR_REQUEST_SIZE)
86
+
87
+
88
+ def iter_serialized_slices(batch: Union[pa.RecordBatch, pa.Table], max_rows_per_slice=None):
89
+ """Iterate over a list of record batch slices."""
90
+
91
+ rows_per_slice = int(0.9 * len(batch) * MAX_RECORD_BATCH_SLICE_SIZE / batch.nbytes)
92
+ if max_rows_per_slice is not None:
93
+ rows_per_slice = min(rows_per_slice, max_rows_per_slice)
94
+
95
+ offset = 0
96
+ while offset < len(batch):
97
+ if rows_per_slice < 1:
98
+ raise TooWideRow(batch)
99
+
100
+ batch_slice = batch.slice(offset, rows_per_slice)
101
+ serialized_slice_batch = serialize_record_batch(batch_slice)
102
+ if len(serialized_slice_batch) <= MAX_RECORD_BATCH_SLICE_SIZE:
103
+ yield serialized_slice_batch
104
+ offset += rows_per_slice
105
+ else:
106
+ rows_per_slice = rows_per_slice // 2
107
+
108
+
109
+ def serialize_record_batch(batch: Union[pa.RecordBatch, pa.Table]):
110
+ """Serialize a RecordBatch using Arrow IPC format."""
111
+ sink = pa.BufferOutputStream()
112
+ with pa.ipc.new_stream(sink, batch.schema) as writer:
113
+ writer.write(batch)
114
+ return sink.getvalue()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vastdb
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
@@ -21,10 +21,11 @@ License-File: LICENSE
21
21
  Requires-Dist: aws-requests-auth
22
22
  Requires-Dist: boto3
23
23
  Requires-Dist: flatbuffers
24
- Requires-Dist: ibis-framework
24
+ Requires-Dist: ibis-framework ==8.0.0
25
25
  Requires-Dist: pyarrow
26
26
  Requires-Dist: requests
27
27
  Requires-Dist: xmltodict
28
+ Requires-Dist: backoff ==2.2.1
28
29
 
29
30
 
30
31
  `vastdb` is a Python-based SDK designed for interacting
@@ -149,28 +149,29 @@ vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI
149
149
  vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
150
150
  vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
151
151
  vastdb/__init__.py,sha256=cMJtZuJ0IL9aKyM3DUWqTCzuP1H1MXXVivKKE1-q0DY,292
152
- vastdb/bucket.py,sha256=xtKs7S4w0jmI4MujDWH3HDI-iEgbq5Xqqsod-tw4zSo,2991
152
+ vastdb/bucket.py,sha256=4rPEm9qlPTg7ccWO6VGmd4LKb8w-BDhJYwzXGjn03sc,3566
153
153
  vastdb/conftest.py,sha256=pKpo_46Vq4QHzTDQAFxasrVhnZ2V2L-y6IMLxojxaFM,2132
154
- vastdb/errors.py,sha256=fxpKSxjEgoJZuBtEGWzTW9lpDlEjuzgpgXwAQc1W6BQ,3436
155
- vastdb/internal_commands.py,sha256=3F6FiYu-Ama1zBO7hENPxCaQYJT8mcZP6rSQvtI7Sks,101273
156
- vastdb/schema.py,sha256=MrQr-WIrES8KcQ0V6cJkRRp_-9jj9FboyrBnkNBsw-8,3324
157
- vastdb/session.py,sha256=VZOFGZbAdr5Tl4cp88VRQYnR4Q16UNuYjSmX_QPW1II,1718
158
- vastdb/table.py,sha256=bdx3C1iWiFivKmtifH7MyG7TMqnVVIU91as-_hMn1rE,20532
159
- vastdb/transaction.py,sha256=1uCSHXqWcwsMJv6DuNx4WyQMGUm8P-RCCqYdBdUGusI,2196
160
- vastdb/util.py,sha256=Tjj6p4gqabK5G21uWuCiuYM9FaaR04_Zk5X8NWtcdj8,3022
154
+ vastdb/errors.py,sha256=fj8IlPnGi1lbJWIl1-8MSjLavL9bYQ-YUoboWbXCo54,4047
155
+ vastdb/internal_commands.py,sha256=yS6ylyuJjaAwAm4OqVGX4tq-Un5cvM-LXp7F4eYOUDw,100414
156
+ vastdb/schema.py,sha256=ql4TPB1W_FQ_BHov3CKHI8JX3krXMlcKWz7dTrjpQ1w,3346
157
+ vastdb/session.py,sha256=2tu5cp7xG28ynyQfEl9_HM2dtNcLM2AoJmm3bfNLC0o,2563
158
+ vastdb/table.py,sha256=apRXCrglg6_glozJXu8D7q6du5seP7NMi42PNjyGcTM,28891
159
+ vastdb/transaction.py,sha256=g8YTcYnsNPIhB2udbHyT5RIFB5kHnBLJcvV2CWRICwI,2845
160
+ vastdb/util.py,sha256=rs7nLL2Qz-OVEZDSVIqAvS-uETMq-zxQs5jBksB5-JA,4276
161
161
  vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
162
  vastdb/bench/test_perf.py,sha256=iHE3E60fvyU5SBDHPi4h03Dj6QcY6VI9l9mMhgNMtPc,1117
163
163
  vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
164
  vastdb/tests/test_duckdb.py,sha256=KDuv4PrjGEwChCGHG36xNT2JiFlBOt6K3DQ3L06Kq-A,1913
165
- vastdb/tests/test_imports.py,sha256=fDUjO5U-5i4QTIMoNnSSW4X_ZnOStLbx0mJkNq2pj9Q,5033
165
+ vastdb/tests/test_imports.py,sha256=48kbJKsa_MrEXcBYQUbUDr1e9wzjG4FHQ7C3wUEQfXA,5705
166
166
  vastdb/tests/test_nested.py,sha256=3kejEvtSqV0LrUgb1QglRjrlxnKI4_AXTFw2nE7Q520,951
167
167
  vastdb/tests/test_projections.py,sha256=_cDNfD5zTwbCXLk6uGpPUWGN0P-4HElu5OjubWu-Jg0,1255
168
168
  vastdb/tests/test_sanity.py,sha256=ixx0QPo73hLHjAa7bByFXjS1XST0WvmSwLEpgnHh_JY,2960
169
- vastdb/tests/test_schemas.py,sha256=b-JpYHOFYVTdE570_La7O2RWf8BGN-q8KDXNXeC8CSg,1724
170
- vastdb/tests/test_tables.py,sha256=TXM4LSBvPb3EEu7XScZ5iEiu_zhHClq61W18EQodxw8,25667
171
- vastdb/tests/util.py,sha256=NaCzKymEGy1xuiyMxyt2_0frKVfVk9iGrFwLf3GHjTI,435
172
- vastdb-0.1.2.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
173
- vastdb-0.1.2.dist-info/METADATA,sha256=edJPdDWmHj6tRHRR97eSppfN9_4ARfIr0jS9HMjHfSQ,1311
174
- vastdb-0.1.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
175
- vastdb-0.1.2.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
176
- vastdb-0.1.2.dist-info/RECORD,,
169
+ vastdb/tests/test_schemas.py,sha256=qoHTLX51D-0S4bMxdCpRh9gaYQd-BkZdT_agGOwFwTM,1739
170
+ vastdb/tests/test_tables.py,sha256=pfQx0OZm6oVJj1-CziPWUoEn3l2-OET4Bpl8M9Z4mws,27499
171
+ vastdb/tests/test_util.py,sha256=owRAU3TCKMq-kz54NRdA5wX2O_bZIHqG5ucUR77jm5k,1046
172
+ vastdb/tests/util.py,sha256=dpRJYbboDnlqL4qIdvScpp8--5fxRUBIcIYitrfcj9o,555
173
+ vastdb-0.1.4.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
174
+ vastdb-0.1.4.dist-info/METADATA,sha256=SyZkyjQSwklzsq3oub8m8w9lY-HuI4XOG72y8trKvf4,1350
175
+ vastdb-0.1.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
176
+ vastdb-0.1.4.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
177
+ vastdb-0.1.4.dist-info/RECORD,,
File without changes