vastdb 0.0.5.2__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vast_flatbuf/tabular/GetTableStatsResponse.py +45 -1
- vast_flatbuf/tabular/VipRange.py +56 -0
- vastdb/__init__.py +7 -0
- vastdb/bucket.py +77 -0
- vastdb/errors.py +158 -0
- vastdb/{api.py → internal_commands.py} +283 -747
- vastdb/schema.py +77 -0
- vastdb/session.py +48 -0
- vastdb/table.py +480 -0
- vastdb/tests/conftest.py +46 -0
- vastdb/tests/test_imports.py +125 -0
- vastdb/tests/test_projections.py +41 -0
- vastdb/tests/test_sanity.py +83 -0
- vastdb/tests/test_schemas.py +45 -0
- vastdb/tests/test_tables.py +608 -0
- vastdb/transaction.py +55 -0
- vastdb/util.py +77 -0
- vastdb-0.1.0.dist-info/METADATA +38 -0
- {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/RECORD +23 -24
- vast_protobuf/substrait/__init__.py +0 -0
- vast_protobuf/substrait/algebra_pb2.py +0 -1344
- vast_protobuf/substrait/capabilities_pb2.py +0 -46
- vast_protobuf/substrait/ddl_pb2.py +0 -57
- vast_protobuf/substrait/extended_expression_pb2.py +0 -49
- vast_protobuf/substrait/extensions/__init__.py +0 -0
- vast_protobuf/substrait/extensions/extensions_pb2.py +0 -89
- vast_protobuf/substrait/function_pb2.py +0 -168
- vast_protobuf/substrait/parameterized_types_pb2.py +0 -181
- vast_protobuf/substrait/plan_pb2.py +0 -67
- vast_protobuf/substrait/type_expressions_pb2.py +0 -198
- vast_protobuf/substrait/type_pb2.py +0 -350
- vast_protobuf/tabular/__init__.py +0 -0
- vast_protobuf/tabular/rpc_pb2.py +0 -344
- vastdb/v2.py +0 -108
- vastdb-0.0.5.2.dist-info/METADATA +0 -47
- {vast_protobuf → vastdb/tests}/__init__.py +0 -0
- {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/LICENSE +0 -0
- {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/WHEEL +0 -0
- {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/top_level.txt +0 -0
vastdb/util.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
import pyarrow as pa
|
|
5
|
+
import pyarrow.parquet as pq
|
|
6
|
+
|
|
7
|
+
from .errors import InvalidArgument
|
|
8
|
+
from .schema import Schema
|
|
9
|
+
from .table import Table
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_table_from_files(
|
|
15
|
+
schema: Schema, table_name: str, parquet_files: [str], schema_merge_func: Callable = None) -> Table:
|
|
16
|
+
if not schema_merge_func:
|
|
17
|
+
schema_merge_func = default_schema_merge
|
|
18
|
+
else:
|
|
19
|
+
assert schema_merge_func in [default_schema_merge, strict_schema_merge, union_schema_merge]
|
|
20
|
+
tx = schema.tx
|
|
21
|
+
current_schema = pa.schema([])
|
|
22
|
+
s3fs = pa.fs.S3FileSystem(
|
|
23
|
+
access_key=tx._rpc.api.access_key, secret_key=tx._rpc.api.secret_key, endpoint_override=tx._rpc.api.url)
|
|
24
|
+
for prq_file in parquet_files:
|
|
25
|
+
if not prq_file.startswith('/'):
|
|
26
|
+
raise InvalidArgument(f"Path {prq_file} must start with a '/'")
|
|
27
|
+
parquet_ds = pq.ParquetDataset(prq_file.lstrip('/'), filesystem=s3fs)
|
|
28
|
+
current_schema = schema_merge_func(current_schema, parquet_ds.schema)
|
|
29
|
+
|
|
30
|
+
log.info("Creating table %s from %d Parquet files, with columns: %s",
|
|
31
|
+
table_name, len(parquet_files), list(current_schema))
|
|
32
|
+
table = schema.create_table(table_name, current_schema)
|
|
33
|
+
|
|
34
|
+
log.info("Starting import of %d files to table: %s", len(parquet_files), table)
|
|
35
|
+
table.import_files(parquet_files)
|
|
36
|
+
log.info("Finished import of %d files to table: %s", len(parquet_files), table)
|
|
37
|
+
return table
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def default_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.Schema:
|
|
41
|
+
"""
|
|
42
|
+
This function validates a schema is contained in another schema
|
|
43
|
+
Raises an InvalidArgument if a certain field does not exist in the target schema
|
|
44
|
+
"""
|
|
45
|
+
if not current_schema.names:
|
|
46
|
+
return new_schema
|
|
47
|
+
s1 = set(current_schema)
|
|
48
|
+
s2 = set(new_schema)
|
|
49
|
+
|
|
50
|
+
if len(s1) > len(s2):
|
|
51
|
+
s1, s2 = s2, s1
|
|
52
|
+
result = current_schema # We need this variable in order to preserve the original fields order
|
|
53
|
+
else:
|
|
54
|
+
result = new_schema
|
|
55
|
+
|
|
56
|
+
if not s1.issubset(s2):
|
|
57
|
+
log.error("Schema mismatch. schema: %s isn't contained in schema: %s.", s1, s2)
|
|
58
|
+
raise InvalidArgument("Found mismatch in parquet files schemas.")
|
|
59
|
+
return result
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def strict_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.Schema:
|
|
63
|
+
"""
|
|
64
|
+
This function validates two Schemas are identical.
|
|
65
|
+
Raises an InvalidArgument if schemas aren't identical.
|
|
66
|
+
"""
|
|
67
|
+
if current_schema.names and current_schema != new_schema:
|
|
68
|
+
raise InvalidArgument(f"Schemas are not identical. \n {current_schema} \n vs \n {new_schema}")
|
|
69
|
+
|
|
70
|
+
return new_schema
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def union_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.Schema:
|
|
74
|
+
"""
|
|
75
|
+
This function returns a unified schema from potentially two different schemas.
|
|
76
|
+
"""
|
|
77
|
+
return pa.unify_schemas([current_schema, new_schema])
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: vastdb
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: VAST Data SDK
|
|
5
|
+
Home-page: https://github.com/vast-data/vastdb_sdk
|
|
6
|
+
Author: VAST DATA
|
|
7
|
+
Author-email: hello@vastdata.com
|
|
8
|
+
License: Copyright (C) VAST Data Ltd.
|
|
9
|
+
Platform: UNKNOWN
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Classifier: Topic :: Database :: Front-Ends
|
|
19
|
+
Requires-Python: >=3.9.0
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: aws-requests-auth
|
|
23
|
+
Requires-Dist: boto3
|
|
24
|
+
Requires-Dist: flatbuffers
|
|
25
|
+
Requires-Dist: ibis-framework
|
|
26
|
+
Requires-Dist: pyarrow
|
|
27
|
+
Requires-Dist: requests
|
|
28
|
+
Requires-Dist: xmltodict
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
`vastdb` is a Python-based SDK designed for interacting
|
|
32
|
+
with [VAST Database](https://vastdata.com/database)
|
|
33
|
+
and [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database),
|
|
34
|
+
enabling schema and table management, efficient ingest, query and modification of columnar data.
|
|
35
|
+
|
|
36
|
+
For more details, see [our whitepaper](https://vastdata.com/whitepaper/#TheVASTDataBase).
|
|
37
|
+
|
|
38
|
+
|
|
@@ -139,34 +139,33 @@ vast_flatbuf/tabular/ColumnType.py,sha256=_4-jMG08VR2zdn1ZH7F4aahYPxWsBSm7adUoVf
|
|
|
139
139
|
vast_flatbuf/tabular/CreateProjectionRequest.py,sha256=POlK1DrYMAldNJscLIRL3j4jAT0Sv_fRzfvBXwZAAMw,2516
|
|
140
140
|
vast_flatbuf/tabular/CreateSchemaRequest.py,sha256=MrOfWaFu0Q1-mxLlGV8YMPajZ5kASyvowVSrKU-NPx8,1626
|
|
141
141
|
vast_flatbuf/tabular/GetProjectionTableStatsResponse.py,sha256=Bp-ln-0lcZEiUvp3vWYmnCP6t2UsZ5J-lezgkUUWhzo,3474
|
|
142
|
-
vast_flatbuf/tabular/GetTableStatsResponse.py,sha256=
|
|
142
|
+
vast_flatbuf/tabular/GetTableStatsResponse.py,sha256=_UsKj6-VAvyDZ8Eku9fegQlRKV-T_0Dsb7qjulYoZus,4655
|
|
143
143
|
vast_flatbuf/tabular/ImportDataRequest.py,sha256=f1chKp5d5NUxfNjI8YI1o4MYInF8UDhIhpWkT3vG4Do,2450
|
|
144
144
|
vast_flatbuf/tabular/ListProjectionsResponse.py,sha256=secqrBsJY3ydbA28j09rmxzBqj-c1JNqaP7JMuib7nE,4240
|
|
145
145
|
vast_flatbuf/tabular/ListSchemasResponse.py,sha256=V8tbwcWAC96eNwuoqDNqCSb02BnMdq60TpyISuWTVMk,3036
|
|
146
146
|
vast_flatbuf/tabular/ListTablesResponse.py,sha256=V7jZAS8ryKY8s6o_QyjWzgan-rsGm17zjKEmi7K6qTM,3550
|
|
147
147
|
vast_flatbuf/tabular/ObjectDetails.py,sha256=qW0WtbkCYYE_L-Kw6VNRDCLYaRm5lKvTbLNkfD4zV4A,3589
|
|
148
148
|
vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI0,4450
|
|
149
|
+
vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
|
|
149
150
|
vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
vastdb/
|
|
166
|
-
vastdb/
|
|
167
|
-
vastdb/
|
|
168
|
-
vastdb-0.0.
|
|
169
|
-
vastdb-0.0.
|
|
170
|
-
vastdb-0.0.
|
|
171
|
-
vastdb-0.0.5.2.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
|
|
172
|
-
vastdb-0.0.5.2.dist-info/RECORD,,
|
|
151
|
+
vastdb/__init__.py,sha256=GY30IfZQApfl7HfcFmfTzFpx48oHgQIrDcUQCiTnxpo,206
|
|
152
|
+
vastdb/bucket.py,sha256=Xbgn5Ns7veBL4oKH7EaSj4SxTPDRlicl9Saaz_39ZsU,2526
|
|
153
|
+
vastdb/errors.py,sha256=mveQ2O0fLKOS51V9k5Y-HwY8Y1XiYdE9aJ9j0wlltWQ,3119
|
|
154
|
+
vastdb/internal_commands.py,sha256=FR4rkr-sRvqMc-Y5hW7APOPa41a8d7L8DNJ2ROHRaFM,104441
|
|
155
|
+
vastdb/schema.py,sha256=TbgqaUqAraj43vKCyVQNUSOMuJdw2Y4M06svs5jpcPo,2836
|
|
156
|
+
vastdb/session.py,sha256=qgjT9rt1yUr4GyHOZRsVGFj3HYBoHFAEdczi_R26O8o,1731
|
|
157
|
+
vastdb/table.py,sha256=p0uE0Gw9hen7hLTx9xC_MbxTaG6ZhZEFieaenUnbyUY,20442
|
|
158
|
+
vastdb/transaction.py,sha256=jleiVmg4iui2q1GqMFsPo5GZRNtIa5NzGLFVooBbzkQ,1797
|
|
159
|
+
vastdb/util.py,sha256=pBw4ywNJfkvKik-T7ZKPrWBoZOqGns-WsSZkG7HHa2I,2908
|
|
160
|
+
vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
161
|
+
vastdb/tests/conftest.py,sha256=VLqImQ1XMr1FLCLCqZpgv8wMjNskfYAtMp-qjZFBqWo,1694
|
|
162
|
+
vastdb/tests/test_imports.py,sha256=OhkbuhTeLgD4I6Vbub-B7wQo-G37TlXoHVQhGCLz9Wo,5035
|
|
163
|
+
vastdb/tests/test_projections.py,sha256=PRi1Jf__95fsL9ZCQ_s2PtszWIO5FIFbniiL6FnV18M,1253
|
|
164
|
+
vastdb/tests/test_sanity.py,sha256=gijOWK4ymGhVRHkf0ecHibVlaJxl92RinPdFUwWj1OQ,2959
|
|
165
|
+
vastdb/tests/test_schemas.py,sha256=YX0lF8FbXzNCNVUAxq3g0L0OCjGq1OwkQaNaBtzDe4Q,1253
|
|
166
|
+
vastdb/tests/test_tables.py,sha256=qnDRBtOPh9qcV4O3kB6xF4WJFj3B3WX2RAmUzkzl05g,23634
|
|
167
|
+
vastdb-0.1.0.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
|
|
168
|
+
vastdb-0.1.0.dist-info/METADATA,sha256=pCY34hVZGzoB51SyIgrzrtxDaOXC_2DGOQbafZdOmQg,1331
|
|
169
|
+
vastdb-0.1.0.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
|
170
|
+
vastdb-0.1.0.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
|
|
171
|
+
vastdb-0.1.0.dist-info/RECORD,,
|
|
File without changes
|