vastdb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/bucket.py +20 -10
- vastdb/errors.py +43 -2
- vastdb/internal_commands.py +81 -95
- vastdb/schema.py +1 -1
- vastdb/session.py +21 -1
- vastdb/table.py +202 -34
- vastdb/tests/test_imports.py +13 -1
- vastdb/tests/test_schemas.py +1 -2
- vastdb/tests/test_tables.py +43 -2
- vastdb/tests/test_util.py +39 -0
- vastdb/tests/util.py +3 -1
- vastdb/transaction.py +19 -3
- vastdb/util.py +41 -6
- {vastdb-0.1.2.dist-info → vastdb-0.1.4.dist-info}/METADATA +3 -2
- {vastdb-0.1.2.dist-info → vastdb-0.1.4.dist-info}/RECORD +18 -17
- {vastdb-0.1.2.dist-info → vastdb-0.1.4.dist-info}/LICENSE +0 -0
- {vastdb-0.1.2.dist-info → vastdb-0.1.4.dist-info}/WHEEL +0 -0
- {vastdb-0.1.2.dist-info → vastdb-0.1.4.dist-info}/top_level.txt +0 -0
vastdb/transaction.py
CHANGED
|
@@ -16,6 +16,14 @@ from . import bucket, errors, schema, session, table
|
|
|
16
16
|
|
|
17
17
|
log = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
|
+
TABULAR_BC_BUCKET = "vast-big-catalog-bucket"
|
|
20
|
+
VAST_CATALOG_SCHEMA_NAME = 'vast_big_catalog_schema'
|
|
21
|
+
VAST_CATALOG_TABLE_NAME = 'vast_big_catalog_table'
|
|
22
|
+
|
|
23
|
+
TABULAR_AUDERY_BUCKET = "vast-audit-log-bucket"
|
|
24
|
+
AUDERY_SCHEMA_NAME = 'vast_audit_log_schema'
|
|
25
|
+
AUDERY_TABLE_NAME = 'vast_audit_log_table'
|
|
26
|
+
|
|
19
27
|
|
|
20
28
|
@dataclass
|
|
21
29
|
class Transaction:
|
|
@@ -44,6 +52,8 @@ class Transaction:
|
|
|
44
52
|
|
|
45
53
|
def __repr__(self):
|
|
46
54
|
"""Don't show the session details."""
|
|
55
|
+
if self.txid is None:
|
|
56
|
+
return 'InvalidTransaction'
|
|
47
57
|
return f'Transaction(id=0x{self.txid:016x})'
|
|
48
58
|
|
|
49
59
|
def bucket(self, name: str) -> "bucket.Bucket":
|
|
@@ -59,6 +69,12 @@ class Transaction:
|
|
|
59
69
|
|
|
60
70
|
def catalog(self, fail_if_missing=True) -> Optional["table.Table"]:
|
|
61
71
|
"""Return VAST Catalog table."""
|
|
62
|
-
b = bucket.Bucket(
|
|
63
|
-
s = schema.Schema(
|
|
64
|
-
return s.table(name=
|
|
72
|
+
b = bucket.Bucket(TABULAR_BC_BUCKET, self)
|
|
73
|
+
s = schema.Schema(VAST_CATALOG_SCHEMA_NAME, b)
|
|
74
|
+
return s.table(name=VAST_CATALOG_TABLE_NAME, fail_if_missing=fail_if_missing)
|
|
75
|
+
|
|
76
|
+
def audit_log(self, fail_if_missing=True) -> Optional["table.Table"]:
|
|
77
|
+
"""Return VAST AuditLog table."""
|
|
78
|
+
b = bucket.Bucket(TABULAR_AUDERY_BUCKET, self)
|
|
79
|
+
s = schema.Schema(AUDERY_SCHEMA_NAME, b)
|
|
80
|
+
return s.table(name=AUDERY_TABLE_NAME, fail_if_missing=fail_if_missing)
|
vastdb/util.py
CHANGED
|
@@ -1,20 +1,22 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Callable, List, Optional
|
|
2
|
+
from typing import TYPE_CHECKING, Callable, List, Optional, Union
|
|
3
3
|
|
|
4
4
|
import pyarrow as pa
|
|
5
5
|
import pyarrow.parquet as pq
|
|
6
6
|
|
|
7
|
-
from .errors import InvalidArgument
|
|
8
|
-
from .schema import Schema
|
|
9
|
-
from .table import ImportConfig, Table
|
|
7
|
+
from .errors import InvalidArgument, TooWideRow
|
|
10
8
|
|
|
11
9
|
log = logging.getLogger(__name__)
|
|
12
10
|
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from .schema import Schema
|
|
13
|
+
from .table import ImportConfig, Table
|
|
14
|
+
|
|
13
15
|
|
|
14
16
|
def create_table_from_files(
|
|
15
|
-
schema: Schema, table_name: str, parquet_files: List[str],
|
|
17
|
+
schema: "Schema", table_name: str, parquet_files: List[str],
|
|
16
18
|
schema_merge_func: Optional[Callable] = None,
|
|
17
|
-
config: Optional[ImportConfig] = None) -> Table:
|
|
19
|
+
config: Optional["ImportConfig"] = None) -> "Table":
|
|
18
20
|
if not schema_merge_func:
|
|
19
21
|
schema_merge_func = default_schema_merge
|
|
20
22
|
else:
|
|
@@ -77,3 +79,36 @@ def union_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.S
|
|
|
77
79
|
This function returns a unified schema from potentially two different schemas.
|
|
78
80
|
"""
|
|
79
81
|
return pa.unify_schemas([current_schema, new_schema])
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
MAX_TABULAR_REQUEST_SIZE = 5 << 20 # in bytes
|
|
85
|
+
MAX_RECORD_BATCH_SLICE_SIZE = int(0.9 * MAX_TABULAR_REQUEST_SIZE)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def iter_serialized_slices(batch: Union[pa.RecordBatch, pa.Table], max_rows_per_slice=None):
|
|
89
|
+
"""Iterate over a list of record batch slices."""
|
|
90
|
+
|
|
91
|
+
rows_per_slice = int(0.9 * len(batch) * MAX_RECORD_BATCH_SLICE_SIZE / batch.nbytes)
|
|
92
|
+
if max_rows_per_slice is not None:
|
|
93
|
+
rows_per_slice = min(rows_per_slice, max_rows_per_slice)
|
|
94
|
+
|
|
95
|
+
offset = 0
|
|
96
|
+
while offset < len(batch):
|
|
97
|
+
if rows_per_slice < 1:
|
|
98
|
+
raise TooWideRow(batch)
|
|
99
|
+
|
|
100
|
+
batch_slice = batch.slice(offset, rows_per_slice)
|
|
101
|
+
serialized_slice_batch = serialize_record_batch(batch_slice)
|
|
102
|
+
if len(serialized_slice_batch) <= MAX_RECORD_BATCH_SLICE_SIZE:
|
|
103
|
+
yield serialized_slice_batch
|
|
104
|
+
offset += rows_per_slice
|
|
105
|
+
else:
|
|
106
|
+
rows_per_slice = rows_per_slice // 2
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def serialize_record_batch(batch: Union[pa.RecordBatch, pa.Table]):
|
|
110
|
+
"""Serialize a RecordBatch using Arrow IPC format."""
|
|
111
|
+
sink = pa.BufferOutputStream()
|
|
112
|
+
with pa.ipc.new_stream(sink, batch.schema) as writer:
|
|
113
|
+
writer.write(batch)
|
|
114
|
+
return sink.getvalue()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vastdb
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: VAST Data SDK
|
|
5
5
|
Home-page: https://github.com/vast-data/vastdb_sdk
|
|
6
6
|
Author: VAST DATA
|
|
@@ -21,10 +21,11 @@ License-File: LICENSE
|
|
|
21
21
|
Requires-Dist: aws-requests-auth
|
|
22
22
|
Requires-Dist: boto3
|
|
23
23
|
Requires-Dist: flatbuffers
|
|
24
|
-
Requires-Dist: ibis-framework
|
|
24
|
+
Requires-Dist: ibis-framework ==8.0.0
|
|
25
25
|
Requires-Dist: pyarrow
|
|
26
26
|
Requires-Dist: requests
|
|
27
27
|
Requires-Dist: xmltodict
|
|
28
|
+
Requires-Dist: backoff ==2.2.1
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
`vastdb` is a Python-based SDK designed for interacting
|
|
@@ -149,28 +149,29 @@ vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI
|
|
|
149
149
|
vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
|
|
150
150
|
vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
151
151
|
vastdb/__init__.py,sha256=cMJtZuJ0IL9aKyM3DUWqTCzuP1H1MXXVivKKE1-q0DY,292
|
|
152
|
-
vastdb/bucket.py,sha256=
|
|
152
|
+
vastdb/bucket.py,sha256=4rPEm9qlPTg7ccWO6VGmd4LKb8w-BDhJYwzXGjn03sc,3566
|
|
153
153
|
vastdb/conftest.py,sha256=pKpo_46Vq4QHzTDQAFxasrVhnZ2V2L-y6IMLxojxaFM,2132
|
|
154
|
-
vastdb/errors.py,sha256=
|
|
155
|
-
vastdb/internal_commands.py,sha256=
|
|
156
|
-
vastdb/schema.py,sha256=
|
|
157
|
-
vastdb/session.py,sha256=
|
|
158
|
-
vastdb/table.py,sha256=
|
|
159
|
-
vastdb/transaction.py,sha256=
|
|
160
|
-
vastdb/util.py,sha256=
|
|
154
|
+
vastdb/errors.py,sha256=fj8IlPnGi1lbJWIl1-8MSjLavL9bYQ-YUoboWbXCo54,4047
|
|
155
|
+
vastdb/internal_commands.py,sha256=yS6ylyuJjaAwAm4OqVGX4tq-Un5cvM-LXp7F4eYOUDw,100414
|
|
156
|
+
vastdb/schema.py,sha256=ql4TPB1W_FQ_BHov3CKHI8JX3krXMlcKWz7dTrjpQ1w,3346
|
|
157
|
+
vastdb/session.py,sha256=2tu5cp7xG28ynyQfEl9_HM2dtNcLM2AoJmm3bfNLC0o,2563
|
|
158
|
+
vastdb/table.py,sha256=apRXCrglg6_glozJXu8D7q6du5seP7NMi42PNjyGcTM,28891
|
|
159
|
+
vastdb/transaction.py,sha256=g8YTcYnsNPIhB2udbHyT5RIFB5kHnBLJcvV2CWRICwI,2845
|
|
160
|
+
vastdb/util.py,sha256=rs7nLL2Qz-OVEZDSVIqAvS-uETMq-zxQs5jBksB5-JA,4276
|
|
161
161
|
vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
162
|
vastdb/bench/test_perf.py,sha256=iHE3E60fvyU5SBDHPi4h03Dj6QcY6VI9l9mMhgNMtPc,1117
|
|
163
163
|
vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
164
|
vastdb/tests/test_duckdb.py,sha256=KDuv4PrjGEwChCGHG36xNT2JiFlBOt6K3DQ3L06Kq-A,1913
|
|
165
|
-
vastdb/tests/test_imports.py,sha256=
|
|
165
|
+
vastdb/tests/test_imports.py,sha256=48kbJKsa_MrEXcBYQUbUDr1e9wzjG4FHQ7C3wUEQfXA,5705
|
|
166
166
|
vastdb/tests/test_nested.py,sha256=3kejEvtSqV0LrUgb1QglRjrlxnKI4_AXTFw2nE7Q520,951
|
|
167
167
|
vastdb/tests/test_projections.py,sha256=_cDNfD5zTwbCXLk6uGpPUWGN0P-4HElu5OjubWu-Jg0,1255
|
|
168
168
|
vastdb/tests/test_sanity.py,sha256=ixx0QPo73hLHjAa7bByFXjS1XST0WvmSwLEpgnHh_JY,2960
|
|
169
|
-
vastdb/tests/test_schemas.py,sha256=
|
|
170
|
-
vastdb/tests/test_tables.py,sha256=
|
|
171
|
-
vastdb/tests/
|
|
172
|
-
vastdb
|
|
173
|
-
vastdb-0.1.
|
|
174
|
-
vastdb-0.1.
|
|
175
|
-
vastdb-0.1.
|
|
176
|
-
vastdb-0.1.
|
|
169
|
+
vastdb/tests/test_schemas.py,sha256=qoHTLX51D-0S4bMxdCpRh9gaYQd-BkZdT_agGOwFwTM,1739
|
|
170
|
+
vastdb/tests/test_tables.py,sha256=pfQx0OZm6oVJj1-CziPWUoEn3l2-OET4Bpl8M9Z4mws,27499
|
|
171
|
+
vastdb/tests/test_util.py,sha256=owRAU3TCKMq-kz54NRdA5wX2O_bZIHqG5ucUR77jm5k,1046
|
|
172
|
+
vastdb/tests/util.py,sha256=dpRJYbboDnlqL4qIdvScpp8--5fxRUBIcIYitrfcj9o,555
|
|
173
|
+
vastdb-0.1.4.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
|
|
174
|
+
vastdb-0.1.4.dist-info/METADATA,sha256=SyZkyjQSwklzsq3oub8m8w9lY-HuI4XOG72y8trKvf4,1350
|
|
175
|
+
vastdb-0.1.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
176
|
+
vastdb-0.1.4.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
|
|
177
|
+
vastdb-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|