vastdb 0.0.5.2__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. vast_flatbuf/tabular/GetTableStatsResponse.py +45 -1
  2. vast_flatbuf/tabular/VipRange.py +56 -0
  3. vastdb/__init__.py +7 -0
  4. vastdb/bucket.py +77 -0
  5. vastdb/errors.py +158 -0
  6. vastdb/{api.py → internal_commands.py} +283 -747
  7. vastdb/schema.py +77 -0
  8. vastdb/session.py +48 -0
  9. vastdb/table.py +480 -0
  10. vastdb/tests/conftest.py +46 -0
  11. vastdb/tests/test_imports.py +125 -0
  12. vastdb/tests/test_projections.py +41 -0
  13. vastdb/tests/test_sanity.py +83 -0
  14. vastdb/tests/test_schemas.py +45 -0
  15. vastdb/tests/test_tables.py +608 -0
  16. vastdb/transaction.py +55 -0
  17. vastdb/util.py +77 -0
  18. vastdb-0.1.0.dist-info/METADATA +38 -0
  19. {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/RECORD +23 -24
  20. vast_protobuf/substrait/__init__.py +0 -0
  21. vast_protobuf/substrait/algebra_pb2.py +0 -1344
  22. vast_protobuf/substrait/capabilities_pb2.py +0 -46
  23. vast_protobuf/substrait/ddl_pb2.py +0 -57
  24. vast_protobuf/substrait/extended_expression_pb2.py +0 -49
  25. vast_protobuf/substrait/extensions/__init__.py +0 -0
  26. vast_protobuf/substrait/extensions/extensions_pb2.py +0 -89
  27. vast_protobuf/substrait/function_pb2.py +0 -168
  28. vast_protobuf/substrait/parameterized_types_pb2.py +0 -181
  29. vast_protobuf/substrait/plan_pb2.py +0 -67
  30. vast_protobuf/substrait/type_expressions_pb2.py +0 -198
  31. vast_protobuf/substrait/type_pb2.py +0 -350
  32. vast_protobuf/tabular/__init__.py +0 -0
  33. vast_protobuf/tabular/rpc_pb2.py +0 -344
  34. vastdb/v2.py +0 -108
  35. vastdb-0.0.5.2.dist-info/METADATA +0 -47
  36. {vast_protobuf → vastdb/tests}/__init__.py +0 -0
  37. {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/LICENSE +0 -0
  38. {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/WHEEL +0 -0
  39. {vastdb-0.0.5.2.dist-info → vastdb-0.1.0.dist-info}/top_level.txt +0 -0
vastdb/util.py ADDED
@@ -0,0 +1,77 @@
1
+ import logging
2
+ from typing import Callable
3
+
4
+ import pyarrow as pa
5
+ import pyarrow.parquet as pq
6
+
7
+ from .errors import InvalidArgument
8
+ from .schema import Schema
9
+ from .table import Table
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+
14
+ def create_table_from_files(
15
+ schema: Schema, table_name: str, parquet_files: [str], schema_merge_func: Callable = None) -> Table:
16
+ if not schema_merge_func:
17
+ schema_merge_func = default_schema_merge
18
+ else:
19
+ assert schema_merge_func in [default_schema_merge, strict_schema_merge, union_schema_merge]
20
+ tx = schema.tx
21
+ current_schema = pa.schema([])
22
+ s3fs = pa.fs.S3FileSystem(
23
+ access_key=tx._rpc.api.access_key, secret_key=tx._rpc.api.secret_key, endpoint_override=tx._rpc.api.url)
24
+ for prq_file in parquet_files:
25
+ if not prq_file.startswith('/'):
26
+ raise InvalidArgument(f"Path {prq_file} must start with a '/'")
27
+ parquet_ds = pq.ParquetDataset(prq_file.lstrip('/'), filesystem=s3fs)
28
+ current_schema = schema_merge_func(current_schema, parquet_ds.schema)
29
+
30
+ log.info("Creating table %s from %d Parquet files, with columns: %s",
31
+ table_name, len(parquet_files), list(current_schema))
32
+ table = schema.create_table(table_name, current_schema)
33
+
34
+ log.info("Starting import of %d files to table: %s", len(parquet_files), table)
35
+ table.import_files(parquet_files)
36
+ log.info("Finished import of %d files to table: %s", len(parquet_files), table)
37
+ return table
38
+
39
+
40
+ def default_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.Schema:
41
+ """
42
+ This function validates a schema is contained in another schema
43
+ Raises an InvalidArgument if a certain field does not exist in the target schema
44
+ """
45
+ if not current_schema.names:
46
+ return new_schema
47
+ s1 = set(current_schema)
48
+ s2 = set(new_schema)
49
+
50
+ if len(s1) > len(s2):
51
+ s1, s2 = s2, s1
52
+ result = current_schema # We need this variable in order to preserve the original fields order
53
+ else:
54
+ result = new_schema
55
+
56
+ if not s1.issubset(s2):
57
+ log.error("Schema mismatch. schema: %s isn't contained in schema: %s.", s1, s2)
58
+ raise InvalidArgument("Found mismatch in parquet files schemas.")
59
+ return result
60
+
61
+
62
+ def strict_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.Schema:
63
+ """
64
+ This function validates two Schemas are identical.
65
+ Raises an InvalidArgument if schemas aren't identical.
66
+ """
67
+ if current_schema.names and current_schema != new_schema:
68
+ raise InvalidArgument(f"Schemas are not identical. \n {current_schema} \n vs \n {new_schema}")
69
+
70
+ return new_schema
71
+
72
+
73
+ def union_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.Schema:
74
+ """
75
+ This function returns a unified schema from potentially two different schemas.
76
+ """
77
+ return pa.unify_schemas([current_schema, new_schema])
@@ -0,0 +1,38 @@
1
+ Metadata-Version: 2.1
2
+ Name: vastdb
3
+ Version: 0.1.0
4
+ Summary: VAST Data SDK
5
+ Home-page: https://github.com/vast-data/vastdb_sdk
6
+ Author: VAST DATA
7
+ Author-email: hello@vastdata.com
8
+ License: Copyright (C) VAST Data Ltd.
9
+ Platform: UNKNOWN
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: License :: OSI Approved :: Apache Software License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Database
18
+ Classifier: Topic :: Database :: Front-Ends
19
+ Requires-Python: >=3.9.0
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: aws-requests-auth
23
+ Requires-Dist: boto3
24
+ Requires-Dist: flatbuffers
25
+ Requires-Dist: ibis-framework
26
+ Requires-Dist: pyarrow
27
+ Requires-Dist: requests
28
+ Requires-Dist: xmltodict
29
+
30
+
31
+ `vastdb` is a Python-based SDK designed for interacting
32
+ with [VAST Database](https://vastdata.com/database)
33
+ and [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database),
34
+ enabling schema and table management, efficient ingest, query and modification of columnar data.
35
+
36
+ For more details, see [our whitepaper](https://vastdata.com/whitepaper/#TheVASTDataBase).
37
+
38
+
@@ -139,34 +139,33 @@ vast_flatbuf/tabular/ColumnType.py,sha256=_4-jMG08VR2zdn1ZH7F4aahYPxWsBSm7adUoVf
139
139
  vast_flatbuf/tabular/CreateProjectionRequest.py,sha256=POlK1DrYMAldNJscLIRL3j4jAT0Sv_fRzfvBXwZAAMw,2516
140
140
  vast_flatbuf/tabular/CreateSchemaRequest.py,sha256=MrOfWaFu0Q1-mxLlGV8YMPajZ5kASyvowVSrKU-NPx8,1626
141
141
  vast_flatbuf/tabular/GetProjectionTableStatsResponse.py,sha256=Bp-ln-0lcZEiUvp3vWYmnCP6t2UsZ5J-lezgkUUWhzo,3474
142
- vast_flatbuf/tabular/GetTableStatsResponse.py,sha256=xO9xU_fwGEQst57IMCmSiwXoLA1QkAyEZXLEOG31h4g,2732
142
+ vast_flatbuf/tabular/GetTableStatsResponse.py,sha256=_UsKj6-VAvyDZ8Eku9fegQlRKV-T_0Dsb7qjulYoZus,4655
143
143
  vast_flatbuf/tabular/ImportDataRequest.py,sha256=f1chKp5d5NUxfNjI8YI1o4MYInF8UDhIhpWkT3vG4Do,2450
144
144
  vast_flatbuf/tabular/ListProjectionsResponse.py,sha256=secqrBsJY3ydbA28j09rmxzBqj-c1JNqaP7JMuib7nE,4240
145
145
  vast_flatbuf/tabular/ListSchemasResponse.py,sha256=V8tbwcWAC96eNwuoqDNqCSb02BnMdq60TpyISuWTVMk,3036
146
146
  vast_flatbuf/tabular/ListTablesResponse.py,sha256=V7jZAS8ryKY8s6o_QyjWzgan-rsGm17zjKEmi7K6qTM,3550
147
147
  vast_flatbuf/tabular/ObjectDetails.py,sha256=qW0WtbkCYYE_L-Kw6VNRDCLYaRm5lKvTbLNkfD4zV4A,3589
148
148
  vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI0,4450
149
+ vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
149
150
  vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
150
- vast_protobuf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
151
- vast_protobuf/substrait/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
152
- vast_protobuf/substrait/algebra_pb2.py,sha256=Y0RBz_IszyfCTgyNA5fh-tJPq6IJs3QqhYZoyFOy2Wg,100838
153
- vast_protobuf/substrait/capabilities_pb2.py,sha256=NDfdXUrGPNGfB11h3QI1OrYtZypfAlu9lE17BAVimMQ,2453
154
- vast_protobuf/substrait/ddl_pb2.py,sha256=2MDXdDznqoD6vtMSWwvkxpiZ-yPBnSXchc8jDQyoKZw,2683
155
- vast_protobuf/substrait/extended_expression_pb2.py,sha256=Rs8A8HmNcEevxO3jVsNlIeYqyXCaIwGb2xTK8p7_eFU,3481
156
- vast_protobuf/substrait/function_pb2.py,sha256=dtVctHDJC-BsofPY0ktPYLJkAWxLAKVMsOhGER36hoo,13339
157
- vast_protobuf/substrait/parameterized_types_pb2.py,sha256=hCTBDXbqunSuSmrxnkvFDNUjCxr8UPE8XrghpX2mqsM,15074
158
- vast_protobuf/substrait/plan_pb2.py,sha256=LDQFI5QE-KpVItmqzG9k-9XuiAT-eaXuOTMUaMoeODQ,3831
159
- vast_protobuf/substrait/type_expressions_pb2.py,sha256=hpvSwkZjmpEfh0q6pGeCt_5ARzyOoX2HVXYXNShSh3s,17633
160
- vast_protobuf/substrait/type_pb2.py,sha256=w-FzIb2OJNpaOCp1f1ox_CVDevB1ID5wudju8e1NkBY,20790
161
- vast_protobuf/substrait/extensions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
- vast_protobuf/substrait/extensions/extensions_pb2.py,sha256=I_6c6nMmMaYvVtzF-5ycqpzFYlsAVlKQDyatoU8RewQ,6110
163
- vast_protobuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
- vast_protobuf/tabular/rpc_pb2.py,sha256=7kW2WrA2sGk6WVbD83mc_cKkZ2MxoImSO5GOVz6NbbE,23776
165
- vastdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
- vastdb/api.py,sha256=ZfkwDyTqaQ_j1wmp2iog9Yj3LW98S8IzLxs7gExiPG8,124746
167
- vastdb/v2.py,sha256=0fLulaIQGlIbVNBBFGd6iwYPuGhaaJIHTiJORyio_YQ,2438
168
- vastdb-0.0.5.2.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
169
- vastdb-0.0.5.2.dist-info/METADATA,sha256=t6yxMkCPHuy7GJU6lSmd6QjrCsGmcPvzJE0qihLjMQQ,1369
170
- vastdb-0.0.5.2.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
171
- vastdb-0.0.5.2.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
172
- vastdb-0.0.5.2.dist-info/RECORD,,
151
+ vastdb/__init__.py,sha256=GY30IfZQApfl7HfcFmfTzFpx48oHgQIrDcUQCiTnxpo,206
152
+ vastdb/bucket.py,sha256=Xbgn5Ns7veBL4oKH7EaSj4SxTPDRlicl9Saaz_39ZsU,2526
153
+ vastdb/errors.py,sha256=mveQ2O0fLKOS51V9k5Y-HwY8Y1XiYdE9aJ9j0wlltWQ,3119
154
+ vastdb/internal_commands.py,sha256=FR4rkr-sRvqMc-Y5hW7APOPa41a8d7L8DNJ2ROHRaFM,104441
155
+ vastdb/schema.py,sha256=TbgqaUqAraj43vKCyVQNUSOMuJdw2Y4M06svs5jpcPo,2836
156
+ vastdb/session.py,sha256=qgjT9rt1yUr4GyHOZRsVGFj3HYBoHFAEdczi_R26O8o,1731
157
+ vastdb/table.py,sha256=p0uE0Gw9hen7hLTx9xC_MbxTaG6ZhZEFieaenUnbyUY,20442
158
+ vastdb/transaction.py,sha256=jleiVmg4iui2q1GqMFsPo5GZRNtIa5NzGLFVooBbzkQ,1797
159
+ vastdb/util.py,sha256=pBw4ywNJfkvKik-T7ZKPrWBoZOqGns-WsSZkG7HHa2I,2908
160
+ vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
+ vastdb/tests/conftest.py,sha256=VLqImQ1XMr1FLCLCqZpgv8wMjNskfYAtMp-qjZFBqWo,1694
162
+ vastdb/tests/test_imports.py,sha256=OhkbuhTeLgD4I6Vbub-B7wQo-G37TlXoHVQhGCLz9Wo,5035
163
+ vastdb/tests/test_projections.py,sha256=PRi1Jf__95fsL9ZCQ_s2PtszWIO5FIFbniiL6FnV18M,1253
164
+ vastdb/tests/test_sanity.py,sha256=gijOWK4ymGhVRHkf0ecHibVlaJxl92RinPdFUwWj1OQ,2959
165
+ vastdb/tests/test_schemas.py,sha256=YX0lF8FbXzNCNVUAxq3g0L0OCjGq1OwkQaNaBtzDe4Q,1253
166
+ vastdb/tests/test_tables.py,sha256=qnDRBtOPh9qcV4O3kB6xF4WJFj3B3WX2RAmUzkzl05g,23634
167
+ vastdb-0.1.0.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
168
+ vastdb-0.1.0.dist-info/METADATA,sha256=pCY34hVZGzoB51SyIgrzrtxDaOXC_2DGOQbafZdOmQg,1331
169
+ vastdb-0.1.0.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
170
+ vastdb-0.1.0.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
171
+ vastdb-0.1.0.dist-info/RECORD,,
File without changes