vastdb 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/__init__.py +3 -0
- vastdb/{internal_commands.py → _internal.py} +267 -359
- vastdb/bench/test_perf.py +1 -2
- vastdb/bucket.py +14 -39
- vastdb/conftest.py +13 -4
- vastdb/errors.py +5 -1
- vastdb/schema.py +52 -3
- vastdb/session.py +42 -13
- vastdb/table.py +44 -23
- vastdb/tests/test_duckdb.py +2 -2
- vastdb/tests/test_imports.py +3 -3
- vastdb/tests/test_nested.py +4 -4
- vastdb/tests/test_projections.py +78 -0
- vastdb/tests/test_sanity.py +3 -2
- vastdb/tests/test_schemas.py +49 -0
- vastdb/tests/test_tables.py +107 -29
- vastdb/tests/test_util.py +6 -0
- vastdb/transaction.py +23 -14
- vastdb/util.py +41 -1
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/METADATA +2 -2
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/RECORD +24 -24
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/LICENSE +0 -0
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/WHEEL +0 -0
- {vastdb-0.1.5.dist-info → vastdb-0.1.7.dist-info}/top_level.txt +0 -0
|
@@ -5,18 +5,37 @@ import re
|
|
|
5
5
|
import struct
|
|
6
6
|
import urllib.parse
|
|
7
7
|
from collections import defaultdict, namedtuple
|
|
8
|
+
from dataclasses import dataclass, field
|
|
8
9
|
from enum import Enum
|
|
9
|
-
from
|
|
10
|
-
from typing import Any, Dict, Iterator, List, Optional, Union
|
|
10
|
+
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
|
|
11
11
|
|
|
12
|
+
import backoff
|
|
12
13
|
import flatbuffers
|
|
13
14
|
import ibis
|
|
14
15
|
import pyarrow as pa
|
|
15
|
-
import pyarrow.parquet as pq
|
|
16
16
|
import requests
|
|
17
17
|
import urllib3
|
|
18
18
|
import xmltodict
|
|
19
19
|
from aws_requests_auth.aws_auth import AWSRequestsAuth
|
|
20
|
+
from ibis.expr.operations.generic import (
|
|
21
|
+
IsNull,
|
|
22
|
+
Literal,
|
|
23
|
+
)
|
|
24
|
+
from ibis.expr.operations.logical import (
|
|
25
|
+
And,
|
|
26
|
+
Between,
|
|
27
|
+
Equals,
|
|
28
|
+
Greater,
|
|
29
|
+
GreaterEqual,
|
|
30
|
+
InValues,
|
|
31
|
+
Less,
|
|
32
|
+
LessEqual,
|
|
33
|
+
Not,
|
|
34
|
+
NotEquals,
|
|
35
|
+
Or,
|
|
36
|
+
)
|
|
37
|
+
from ibis.expr.operations.relations import Field
|
|
38
|
+
from ibis.expr.operations.strings import StringContains
|
|
20
39
|
|
|
21
40
|
import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
|
|
22
41
|
import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BooleanLiteral as fb_bool_lit
|
|
@@ -138,26 +157,6 @@ class Predicate:
|
|
|
138
157
|
self.expr = expr
|
|
139
158
|
|
|
140
159
|
def serialize(self, builder: 'flatbuffers.builder.Builder'):
|
|
141
|
-
from ibis.expr.operations.generic import (
|
|
142
|
-
IsNull,
|
|
143
|
-
Literal,
|
|
144
|
-
TableColumn,
|
|
145
|
-
)
|
|
146
|
-
from ibis.expr.operations.logical import (
|
|
147
|
-
And,
|
|
148
|
-
Between,
|
|
149
|
-
Equals,
|
|
150
|
-
Greater,
|
|
151
|
-
GreaterEqual,
|
|
152
|
-
InValues,
|
|
153
|
-
Less,
|
|
154
|
-
LessEqual,
|
|
155
|
-
Not,
|
|
156
|
-
NotEquals,
|
|
157
|
-
Or,
|
|
158
|
-
)
|
|
159
|
-
from ibis.expr.operations.strings import StringContains
|
|
160
|
-
|
|
161
160
|
builder_map = {
|
|
162
161
|
Greater: self.build_greater,
|
|
163
162
|
GreaterEqual: self.build_greater_equal,
|
|
@@ -217,7 +216,7 @@ class Predicate:
|
|
|
217
216
|
if not isinstance(literal, Literal):
|
|
218
217
|
raise NotImplementedError(self.expr)
|
|
219
218
|
|
|
220
|
-
if not isinstance(column,
|
|
219
|
+
if not isinstance(column, Field):
|
|
221
220
|
raise NotImplementedError(self.expr)
|
|
222
221
|
|
|
223
222
|
field_name = column.name
|
|
@@ -723,19 +722,59 @@ def _parse_table_info(obj):
|
|
|
723
722
|
TableStatsResult = namedtuple("TableStatsResult", ["num_rows", "size_in_bytes", "is_external_rowid_alloc", "endpoints"])
|
|
724
723
|
|
|
725
724
|
|
|
725
|
+
def _backoff_giveup(exc: Exception) -> bool:
|
|
726
|
+
|
|
727
|
+
if isinstance(exc, errors.Slowdown):
|
|
728
|
+
# the server is overloaded, retry later
|
|
729
|
+
return False
|
|
730
|
+
|
|
731
|
+
if isinstance(exc, requests.exceptions.ConnectionError):
|
|
732
|
+
if exc.request.method == "GET":
|
|
733
|
+
# low-level connection issue, it is safe to retry only read-only requests
|
|
734
|
+
return False
|
|
735
|
+
|
|
736
|
+
return True # giveup in case of other exceptions
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
@dataclass
|
|
740
|
+
class BackoffConfig:
|
|
741
|
+
wait_gen: Callable = field(default=backoff.expo)
|
|
742
|
+
max_tries: int = 10
|
|
743
|
+
max_time: float = 60.0 # in seconds
|
|
744
|
+
backoff_log_level: int = logging.DEBUG
|
|
745
|
+
|
|
746
|
+
|
|
726
747
|
class VastdbApi:
|
|
727
748
|
# we expect the vast version to be <major>.<minor>.<patch>.<protocol>
|
|
728
749
|
VAST_VERSION_REGEX = re.compile(r'^vast (\d+\.\d+\.\d+\.\d+)$')
|
|
729
750
|
|
|
730
|
-
def __init__(self, endpoint, access_key, secret_key,
|
|
751
|
+
def __init__(self, endpoint, access_key, secret_key,
|
|
752
|
+
*,
|
|
753
|
+
auth_type=AuthType.SIGV4,
|
|
754
|
+
ssl_verify=True,
|
|
755
|
+
backoff_config: Optional[BackoffConfig] = None):
|
|
756
|
+
|
|
757
|
+
from . import __version__ # import lazily here (to avoid circular dependencies)
|
|
758
|
+
self.client_sdk_version = f"VAST Database Python SDK {__version__} - 2024 (c)"
|
|
759
|
+
|
|
731
760
|
url = urllib3.util.parse_url(endpoint)
|
|
732
761
|
self.access_key = access_key
|
|
733
762
|
self.secret_key = secret_key
|
|
734
763
|
|
|
735
764
|
self.default_max_list_columns_page_size = 1000
|
|
736
|
-
self.
|
|
737
|
-
self.
|
|
738
|
-
self.
|
|
765
|
+
self._session = requests.Session()
|
|
766
|
+
self._session.verify = ssl_verify
|
|
767
|
+
self._session.headers['user-agent'] = self.client_sdk_version
|
|
768
|
+
|
|
769
|
+
backoff_config = backoff_config or BackoffConfig()
|
|
770
|
+
backoff_decorator = backoff.on_exception(
|
|
771
|
+
wait_gen=backoff_config.wait_gen,
|
|
772
|
+
exception=(requests.exceptions.ConnectionError, errors.Slowdown),
|
|
773
|
+
giveup=_backoff_giveup,
|
|
774
|
+
max_tries=backoff_config.max_tries,
|
|
775
|
+
max_time=backoff_config.max_time,
|
|
776
|
+
backoff_log_level=backoff_config.backoff_log_level)
|
|
777
|
+
self._request = backoff_decorator(self._single_request)
|
|
739
778
|
|
|
740
779
|
if url.port in {80, 443, None}:
|
|
741
780
|
self.aws_host = f'{url.host}'
|
|
@@ -745,22 +784,21 @@ class VastdbApi:
|
|
|
745
784
|
self.url = str(url)
|
|
746
785
|
_logger.debug('url=%s aws_host=%s', self.url, self.aws_host)
|
|
747
786
|
|
|
748
|
-
self.
|
|
787
|
+
self._session.auth = AWSRequestsAuth(aws_access_key=access_key,
|
|
749
788
|
aws_secret_access_key=secret_key,
|
|
750
789
|
aws_host=self.aws_host,
|
|
751
|
-
aws_region='
|
|
790
|
+
aws_region='',
|
|
752
791
|
aws_service='s3')
|
|
753
792
|
|
|
754
793
|
# probe the cluster for its version
|
|
755
|
-
self.
|
|
756
|
-
|
|
794
|
+
res = self._request(method="GET", url=self._url(command="transaction"), skip_status_check=True) # used only for the response headers
|
|
795
|
+
_logger.debug("headers=%s code=%s content=%s", res.headers, res.status_code, res.content)
|
|
757
796
|
server_header = res.headers.get("Server")
|
|
758
797
|
if server_header is None:
|
|
759
798
|
_logger.error("Response doesn't contain 'Server' header")
|
|
760
799
|
else:
|
|
761
|
-
_logger.debug("Server header is '%s'", server_header)
|
|
762
800
|
if m := self.VAST_VERSION_REGEX.match(server_header):
|
|
763
|
-
self.vast_version, = m.
|
|
801
|
+
self.vast_version: Tuple[int, ...] = tuple(int(v) for v in m.group(1).split("."))
|
|
764
802
|
return
|
|
765
803
|
else:
|
|
766
804
|
_logger.error("'Server' header '%s' doesn't match the expected pattern", server_header)
|
|
@@ -773,15 +811,14 @@ class VastdbApi:
|
|
|
773
811
|
_logger.critical(msg)
|
|
774
812
|
raise NotImplementedError(msg)
|
|
775
813
|
|
|
776
|
-
def
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
aws_service='s3')
|
|
814
|
+
def _single_request(self, *, method, url, skip_status_check=False, **kwargs):
|
|
815
|
+
res = self._session.request(method=method, url=url, **kwargs)
|
|
816
|
+
if not skip_status_check:
|
|
817
|
+
if exc := errors.from_response(res):
|
|
818
|
+
raise exc # application-level error
|
|
819
|
+
return res # successful response
|
|
783
820
|
|
|
784
|
-
def
|
|
821
|
+
def _url(self, bucket="", schema="", table="", command="", url_params={}):
|
|
785
822
|
prefix_list = [self.url]
|
|
786
823
|
if len(bucket):
|
|
787
824
|
prefix_list.append(bucket)
|
|
@@ -816,11 +853,6 @@ class VastdbApi:
|
|
|
816
853
|
|
|
817
854
|
return common_headers | {f'tabular-client-tags-{index}': tag for index, tag in enumerate(client_tags)}
|
|
818
855
|
|
|
819
|
-
def _check_res(self, res, cmd="", expected_retvals=[]):
|
|
820
|
-
if exc := errors.from_response(res):
|
|
821
|
-
raise exc
|
|
822
|
-
return res
|
|
823
|
-
|
|
824
856
|
def create_schema(self, bucket, name, txid=0, client_tags=[], schema_properties="", expected_retvals=[]):
|
|
825
857
|
"""
|
|
826
858
|
Create a collection of tables, use the following request
|
|
@@ -842,10 +874,10 @@ class VastdbApi:
|
|
|
842
874
|
|
|
843
875
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
844
876
|
headers['Content-Length'] = str(len(create_schema_req))
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
877
|
+
self._request(
|
|
878
|
+
method="POST",
|
|
879
|
+
url=self._url(bucket=bucket, schema=name, command="schema"),
|
|
880
|
+
data=create_schema_req, headers=headers)
|
|
849
881
|
|
|
850
882
|
def alter_schema(self, bucket, name, txid=0, client_tags=[], schema_properties="", new_name="", expected_retvals=[]):
|
|
851
883
|
"""
|
|
@@ -871,10 +903,10 @@ class VastdbApi:
|
|
|
871
903
|
headers['Content-Length'] = str(len(alter_schema_req))
|
|
872
904
|
url_params = {'tabular-new-schema-name': new_name} if len(new_name) else {}
|
|
873
905
|
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
906
|
+
self._request(
|
|
907
|
+
method="PUT",
|
|
908
|
+
url=self._url(bucket=bucket, schema=name, command="schema", url_params=url_params),
|
|
909
|
+
data=alter_schema_req, headers=headers)
|
|
878
910
|
|
|
879
911
|
def drop_schema(self, bucket, name, txid=0, client_tags=[], expected_retvals=[]):
|
|
880
912
|
"""
|
|
@@ -885,9 +917,10 @@ class VastdbApi:
|
|
|
885
917
|
"""
|
|
886
918
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
887
919
|
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
920
|
+
self._request(
|
|
921
|
+
method="DELETE",
|
|
922
|
+
url=self._url(bucket=bucket, schema=name, command="schema"),
|
|
923
|
+
headers=headers)
|
|
891
924
|
|
|
892
925
|
def list_schemas(self, bucket, schema="", txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
|
|
893
926
|
exact_match=False, expected_retvals=[], count_only=False):
|
|
@@ -916,25 +949,27 @@ class VastdbApi:
|
|
|
916
949
|
|
|
917
950
|
schemas = []
|
|
918
951
|
schema = schema or ""
|
|
919
|
-
res = self.
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
952
|
+
res = self._request(
|
|
953
|
+
method="GET",
|
|
954
|
+
url=self._url(bucket=bucket, schema=schema, command="schema"),
|
|
955
|
+
headers=headers)
|
|
956
|
+
|
|
957
|
+
res_headers = res.headers
|
|
958
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
959
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
960
|
+
lists = list_schemas.GetRootAs(res.content)
|
|
961
|
+
bucket_name = lists.BucketName().decode()
|
|
962
|
+
if not bucket.startswith(bucket_name):
|
|
963
|
+
raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
|
|
964
|
+
schemas_length = lists.SchemasLength()
|
|
965
|
+
count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else schemas_length
|
|
966
|
+
for i in range(schemas_length):
|
|
967
|
+
schema_obj = lists.Schemas(i)
|
|
968
|
+
name = schema_obj.Name().decode()
|
|
969
|
+
properties = schema_obj.Properties().decode()
|
|
970
|
+
schemas.append([name, properties])
|
|
971
|
+
|
|
972
|
+
return bucket_name, schemas, next_key, is_truncated, count
|
|
938
973
|
|
|
939
974
|
def list_snapshots(self, bucket, max_keys=1000, next_token=None, name_prefix=''):
|
|
940
975
|
next_token = next_token or ''
|
|
@@ -942,8 +977,9 @@ class VastdbApi:
|
|
|
942
977
|
if next_token:
|
|
943
978
|
url_params['continuation-token'] = next_token
|
|
944
979
|
|
|
945
|
-
res = self.
|
|
946
|
-
|
|
980
|
+
res = self._request(
|
|
981
|
+
method="GET",
|
|
982
|
+
url=self._url(bucket=bucket, command="list", url_params=url_params))
|
|
947
983
|
|
|
948
984
|
xml_str = res.content.decode()
|
|
949
985
|
xml_dict = xmltodict.parse(xml_str)
|
|
@@ -986,33 +1022,10 @@ class VastdbApi:
|
|
|
986
1022
|
if create_imports_table:
|
|
987
1023
|
url_params['sub-table'] = IMPORTED_OBJECTS_TABLE_NAME
|
|
988
1024
|
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
def create_table_from_parquet_schema(self, bucket, schema, name, parquet_path=None,
|
|
994
|
-
parquet_bucket_name=None, parquet_object_name=None,
|
|
995
|
-
txid=0, client_tags=[], expected_retvals=[]):
|
|
996
|
-
|
|
997
|
-
# Use pyarrow.parquet.ParquetDataset to open the Parquet file
|
|
998
|
-
if parquet_path:
|
|
999
|
-
parquet_ds = pq.ParquetDataset(parquet_path)
|
|
1000
|
-
elif parquet_bucket_name and parquet_object_name:
|
|
1001
|
-
s3fs = pa.fs.S3FileSystem(access_key=self.access_key, secret_key=self.secret_key, endpoint_override=self.url)
|
|
1002
|
-
parquet_ds = pq.ParquetDataset('/'.join([parquet_bucket_name, parquet_object_name]), filesystem=s3fs)
|
|
1003
|
-
else:
|
|
1004
|
-
raise RuntimeError(f'invalid params parquet_path={parquet_path} parquet_bucket_name={parquet_bucket_name} parquet_object_name={parquet_object_name}')
|
|
1005
|
-
|
|
1006
|
-
# Get the schema of the Parquet file
|
|
1007
|
-
if isinstance(parquet_ds.schema, pq.ParquetSchema):
|
|
1008
|
-
arrow_schema = parquet_ds.schema.to_arrow_schema()
|
|
1009
|
-
elif isinstance(parquet_ds.schema, pa.Schema):
|
|
1010
|
-
arrow_schema = parquet_ds.schema
|
|
1011
|
-
else:
|
|
1012
|
-
raise RuntimeError(f'invalid type(parquet_ds.schema) = {type(parquet_ds.schema)}')
|
|
1013
|
-
|
|
1014
|
-
# create the table
|
|
1015
|
-
return self.create_table(bucket, schema, name, arrow_schema, txid, client_tags, expected_retvals)
|
|
1025
|
+
self._request(
|
|
1026
|
+
method="POST",
|
|
1027
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
|
|
1028
|
+
data=serialized_schema, headers=headers)
|
|
1016
1029
|
|
|
1017
1030
|
def get_table_stats(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], imports_table_stats=False):
|
|
1018
1031
|
"""
|
|
@@ -1024,32 +1037,16 @@ class VastdbApi:
|
|
|
1024
1037
|
"""
|
|
1025
1038
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1026
1039
|
url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if imports_table_stats else {}
|
|
1027
|
-
res = self.
|
|
1028
|
-
|
|
1040
|
+
res = self._request(
|
|
1041
|
+
method="GET",
|
|
1042
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="stats", url_params=url_params),
|
|
1043
|
+
headers=headers)
|
|
1029
1044
|
|
|
1030
1045
|
stats = get_table_stats.GetRootAs(res.content)
|
|
1031
1046
|
num_rows = stats.NumRows()
|
|
1032
1047
|
size_in_bytes = stats.SizeInBytes()
|
|
1033
1048
|
is_external_rowid_alloc = stats.IsExternalRowidAlloc()
|
|
1034
|
-
endpoints = []
|
|
1035
|
-
if stats.VipsLength() == 0:
|
|
1036
|
-
endpoints.append(self.url)
|
|
1037
|
-
else:
|
|
1038
|
-
url = urllib3.util.parse_url(self.url)
|
|
1039
|
-
|
|
1040
|
-
ip_cls = IPv6Address if (stats.AddressType() == "ipv6") else IPv4Address
|
|
1041
|
-
vips = [stats.Vips(i) for i in range(stats.VipsLength())]
|
|
1042
|
-
ips = []
|
|
1043
|
-
# extract the vips into list of IPs
|
|
1044
|
-
for vip in vips:
|
|
1045
|
-
start_ip = int(ip_cls(vip.StartAddress().decode()))
|
|
1046
|
-
ips.extend(ip_cls(start_ip + i) for i in range(vip.AddressCount()))
|
|
1047
|
-
# build a list of endpoint URLs, reusing schema and port (if specified when constructing the session).
|
|
1048
|
-
# it is assumed that the client can access the returned IPs (e.g. if they are part of the VIP pool).
|
|
1049
|
-
for ip in ips:
|
|
1050
|
-
d = url._asdict()
|
|
1051
|
-
d['host'] = str(ip)
|
|
1052
|
-
endpoints.append(str(urllib3.util.Url(**d)))
|
|
1049
|
+
endpoints = [self.url] # we cannot replace the host by a VIP address in HTTPS-based URLs
|
|
1053
1050
|
return TableStatsResult(num_rows, size_in_bytes, is_external_rowid_alloc, tuple(endpoints))
|
|
1054
1051
|
|
|
1055
1052
|
def alter_table(self, bucket, schema, name, txid=0, client_tags=[], table_properties="",
|
|
@@ -1078,10 +1075,10 @@ class VastdbApi:
|
|
|
1078
1075
|
headers['Content-Length'] = str(len(alter_table_req))
|
|
1079
1076
|
url_params = {'tabular-new-table-name': schema + "/" + new_name} if len(new_name) else {}
|
|
1080
1077
|
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1078
|
+
self._request(
|
|
1079
|
+
method="PUT",
|
|
1080
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
|
|
1081
|
+
data=alter_table_req, headers=headers)
|
|
1085
1082
|
|
|
1086
1083
|
def drop_table(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], remove_imports_table=False):
|
|
1087
1084
|
"""
|
|
@@ -1094,9 +1091,10 @@ class VastdbApi:
|
|
|
1094
1091
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1095
1092
|
url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if remove_imports_table else {}
|
|
1096
1093
|
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1094
|
+
self._request(
|
|
1095
|
+
method="DELETE",
|
|
1096
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
|
|
1097
|
+
headers=headers)
|
|
1100
1098
|
|
|
1101
1099
|
def list_tables(self, bucket, schema, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
|
|
1102
1100
|
exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
|
|
@@ -1120,23 +1118,25 @@ class VastdbApi:
|
|
|
1120
1118
|
headers['tabular-include-list-stats'] = str(include_list_stats)
|
|
1121
1119
|
|
|
1122
1120
|
tables = []
|
|
1123
|
-
res = self.
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1121
|
+
res = self._request(
|
|
1122
|
+
method="GET",
|
|
1123
|
+
url=self._url(bucket=bucket, schema=schema, command="table"),
|
|
1124
|
+
headers=headers)
|
|
1125
|
+
|
|
1126
|
+
res_headers = res.headers
|
|
1127
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
1128
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
1129
|
+
lists = list_tables.GetRootAs(res.content)
|
|
1130
|
+
bucket_name = lists.BucketName().decode()
|
|
1131
|
+
schema_name = lists.SchemaName().decode()
|
|
1132
|
+
if not bucket.startswith(bucket_name): # ignore snapshot name
|
|
1133
|
+
raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
|
|
1134
|
+
tables_length = lists.TablesLength()
|
|
1135
|
+
count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
|
|
1136
|
+
for i in range(tables_length):
|
|
1137
|
+
tables.append(_parse_table_info(lists.Tables(i)))
|
|
1138
|
+
|
|
1139
|
+
return bucket_name, schema_name, tables, next_key, is_truncated, count
|
|
1140
1140
|
|
|
1141
1141
|
def add_columns(self, bucket, schema, name, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
|
|
1142
1142
|
"""
|
|
@@ -1158,9 +1158,10 @@ class VastdbApi:
|
|
|
1158
1158
|
serialized_schema = arrow_schema.serialize()
|
|
1159
1159
|
headers['Content-Length'] = str(len(serialized_schema))
|
|
1160
1160
|
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1161
|
+
self._request(
|
|
1162
|
+
method="POST",
|
|
1163
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="column"),
|
|
1164
|
+
data=serialized_schema, headers=headers)
|
|
1164
1165
|
|
|
1165
1166
|
def alter_column(self, bucket, schema, table, name, txid=0, client_tags=[], column_properties="",
|
|
1166
1167
|
new_name="", column_sep=".", column_stats="", expected_retvals=[]):
|
|
@@ -1196,9 +1197,10 @@ class VastdbApi:
|
|
|
1196
1197
|
if len(new_name):
|
|
1197
1198
|
url_params['tabular-new-column-name'] = new_name
|
|
1198
1199
|
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1200
|
+
self._request(
|
|
1201
|
+
method="PUT",
|
|
1202
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
|
|
1203
|
+
data=alter_column_req, headers=headers)
|
|
1202
1204
|
|
|
1203
1205
|
def drop_columns(self, bucket, schema, table, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
|
|
1204
1206
|
"""
|
|
@@ -1211,9 +1213,10 @@ class VastdbApi:
|
|
|
1211
1213
|
serialized_schema = arrow_schema.serialize()
|
|
1212
1214
|
headers['Content-Length'] = str(len(serialized_schema))
|
|
1213
1215
|
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1216
|
+
self._request(
|
|
1217
|
+
method="DELETE",
|
|
1218
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="column"),
|
|
1219
|
+
data=serialized_schema, headers=headers)
|
|
1217
1220
|
|
|
1218
1221
|
def list_columns(self, bucket, schema, table, *, txid=0, client_tags=None, max_keys=None, next_key=0,
|
|
1219
1222
|
count_only=False, name_prefix="", exact_match=False,
|
|
@@ -1245,18 +1248,18 @@ class VastdbApi:
|
|
|
1245
1248
|
headers['tabular-name-prefix'] = name_prefix
|
|
1246
1249
|
|
|
1247
1250
|
url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if list_imports_table else {}
|
|
1248
|
-
res = self.
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1251
|
+
res = self._request(
|
|
1252
|
+
method="GET",
|
|
1253
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
|
|
1254
|
+
headers=headers)
|
|
1255
|
+
|
|
1256
|
+
res_headers = res.headers
|
|
1257
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
1258
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
1259
|
+
count = int(res_headers['tabular-list-count'])
|
|
1260
|
+
columns = [] if count_only else pa.ipc.open_stream(res.content).schema
|
|
1261
|
+
|
|
1262
|
+
return columns, next_key, is_truncated, count
|
|
1260
1263
|
|
|
1261
1264
|
def begin_transaction(self, client_tags=[], expected_retvals=[]):
|
|
1262
1265
|
"""
|
|
@@ -1267,8 +1270,10 @@ class VastdbApi:
|
|
|
1267
1270
|
tabular-txid: TransactionId
|
|
1268
1271
|
"""
|
|
1269
1272
|
headers = self._fill_common_headers(client_tags=client_tags)
|
|
1270
|
-
|
|
1271
|
-
|
|
1273
|
+
return self._request(
|
|
1274
|
+
method="POST",
|
|
1275
|
+
url=self._url(command="transaction"),
|
|
1276
|
+
headers=headers)
|
|
1272
1277
|
|
|
1273
1278
|
def commit_transaction(self, txid, client_tags=[], expected_retvals=[]):
|
|
1274
1279
|
"""
|
|
@@ -1277,8 +1282,10 @@ class VastdbApi:
|
|
|
1277
1282
|
tabular-client-tag: ClientTag
|
|
1278
1283
|
"""
|
|
1279
1284
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1280
|
-
|
|
1281
|
-
|
|
1285
|
+
self._request(
|
|
1286
|
+
method="PUT",
|
|
1287
|
+
url=self._url(command="transaction"),
|
|
1288
|
+
headers=headers)
|
|
1282
1289
|
|
|
1283
1290
|
def rollback_transaction(self, txid, client_tags=[], expected_retvals=[]):
|
|
1284
1291
|
"""
|
|
@@ -1287,8 +1294,10 @@ class VastdbApi:
|
|
|
1287
1294
|
tabular-client-tag: ClientTag
|
|
1288
1295
|
"""
|
|
1289
1296
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1290
|
-
|
|
1291
|
-
|
|
1297
|
+
self._request(
|
|
1298
|
+
method="DELETE",
|
|
1299
|
+
url=self._url(command="transaction"),
|
|
1300
|
+
headers=headers)
|
|
1292
1301
|
|
|
1293
1302
|
def get_transaction(self, txid, client_tags=[], expected_retvals=[]):
|
|
1294
1303
|
"""
|
|
@@ -1297,56 +1306,10 @@ class VastdbApi:
|
|
|
1297
1306
|
tabular-client-tag: ClientTag
|
|
1298
1307
|
"""
|
|
1299
1308
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
retry_count=0, enable_sorted_projections=True):
|
|
1305
|
-
"""
|
|
1306
|
-
POST /mybucket/myschema/mytable?query-data=SelectRowIds HTTP/1.1
|
|
1307
|
-
"""
|
|
1308
|
-
|
|
1309
|
-
# add query option select-only and read-only
|
|
1310
|
-
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1311
|
-
headers['Content-Length'] = str(len(params))
|
|
1312
|
-
headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
|
|
1313
|
-
if retry_count > 0:
|
|
1314
|
-
headers['tabular-retry-count'] = str(retry_count)
|
|
1315
|
-
|
|
1316
|
-
res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=SelectRowIds",),
|
|
1317
|
-
data=params, headers=headers, stream=True)
|
|
1318
|
-
return self._check_res(res, "query_data", expected_retvals)
|
|
1319
|
-
|
|
1320
|
-
def read_columns_data(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[], tenant_guid=None,
|
|
1321
|
-
retry_count=0, enable_sorted_projections=True):
|
|
1322
|
-
"""
|
|
1323
|
-
POST /mybucket/myschema/mytable?query-data=ReadColumns HTTP/1.1
|
|
1324
|
-
"""
|
|
1325
|
-
|
|
1326
|
-
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1327
|
-
headers['Content-Length'] = str(len(params))
|
|
1328
|
-
headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
|
|
1329
|
-
if retry_count > 0:
|
|
1330
|
-
headers['tabular-retry-count'] = str(retry_count)
|
|
1331
|
-
|
|
1332
|
-
res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=ReadColumns",),
|
|
1333
|
-
data=params, headers=headers, stream=True)
|
|
1334
|
-
return self._check_res(res, "query_data", expected_retvals)
|
|
1335
|
-
|
|
1336
|
-
def count_rows(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[], tenant_guid=None,
|
|
1337
|
-
retry_count=0, enable_sorted_projections=True):
|
|
1338
|
-
"""
|
|
1339
|
-
POST /mybucket/myschema/mytable?query-data=CountRows HTTP/1.1
|
|
1340
|
-
"""
|
|
1341
|
-
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1342
|
-
headers['Content-Length'] = str(len(params))
|
|
1343
|
-
headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
|
|
1344
|
-
if retry_count > 0:
|
|
1345
|
-
headers['tabular-retry-count'] = str(retry_count)
|
|
1346
|
-
|
|
1347
|
-
res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=CountRows",),
|
|
1348
|
-
data=params, headers=headers, stream=True)
|
|
1349
|
-
return self._check_res(res, "query_data", expected_retvals)
|
|
1309
|
+
self._request(
|
|
1310
|
+
method="GET",
|
|
1311
|
+
url=self._url(command="transaction"),
|
|
1312
|
+
headers=headers)
|
|
1350
1313
|
|
|
1351
1314
|
def _build_query_data_headers(self, txid, client_tags, params, split, num_sub_splits, request_format, response_format,
|
|
1352
1315
|
enable_sorted_projections, limit_rows, schedule_id, retry_count, search_path, tenant_guid,
|
|
@@ -1388,35 +1351,6 @@ class VastdbApi:
|
|
|
1388
1351
|
url_params['name'] = projection
|
|
1389
1352
|
return url_params
|
|
1390
1353
|
|
|
1391
|
-
def legacy_query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
|
|
1392
|
-
txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
|
|
1393
|
-
search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
|
|
1394
|
-
request_format='string', response_format='string', query_imports_table=False):
|
|
1395
|
-
"""
|
|
1396
|
-
POST /mybucket/myschema/mytable?query-data=LegacyQueryData HTTP/1.1
|
|
1397
|
-
Content-Length: ContentLength
|
|
1398
|
-
tabular-txid: TransactionId
|
|
1399
|
-
tabular-client-tag: ClientTag
|
|
1400
|
-
tabular-split: "split_id,total_splits,num_row_groups_per_split"
|
|
1401
|
-
tabular-num-of-subsplits: "total"
|
|
1402
|
-
tabular-request-format: "string"
|
|
1403
|
-
tabular-response-format: "string" #arrow/trino
|
|
1404
|
-
tabular-schedule-id: "schedule-id"
|
|
1405
|
-
|
|
1406
|
-
Request Body (flatbuf)
|
|
1407
|
-
projections_chunk [expressions]
|
|
1408
|
-
predicate_chunk "formatted_data", (required)
|
|
1409
|
-
|
|
1410
|
-
"""
|
|
1411
|
-
headers = self._build_query_data_headers(txid, client_tags, params, split, num_sub_splits, request_format, response_format,
|
|
1412
|
-
enable_sorted_projections, limit_rows, schedule_id, retry_count, search_path, tenant_guid,
|
|
1413
|
-
sub_split_start_row_ids)
|
|
1414
|
-
url_params = self._build_query_data_url_params(projection, query_imports_table)
|
|
1415
|
-
|
|
1416
|
-
res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=LegacyQueryData",
|
|
1417
|
-
url_params=url_params), data=params, headers=headers, stream=True)
|
|
1418
|
-
return self._check_res(res, "legacy_query_data", expected_retvals)
|
|
1419
|
-
|
|
1420
1354
|
def query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
|
|
1421
1355
|
txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
|
|
1422
1356
|
search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
|
|
@@ -1446,9 +1380,10 @@ class VastdbApi:
|
|
|
1446
1380
|
|
|
1447
1381
|
url_params = self._build_query_data_url_params(projection, query_imports_table)
|
|
1448
1382
|
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1383
|
+
return self._request(
|
|
1384
|
+
method="GET",
|
|
1385
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="data", url_params=url_params),
|
|
1386
|
+
data=params, headers=headers, stream=True)
|
|
1452
1387
|
|
|
1453
1388
|
"""
|
|
1454
1389
|
source_files: list of (bucket_name, file_name)
|
|
@@ -1534,12 +1469,14 @@ class VastdbApi:
|
|
|
1534
1469
|
headers['tabular-schedule-id'] = str(schedule_id)
|
|
1535
1470
|
if retry_count > 0:
|
|
1536
1471
|
headers['tabular-retry-count'] = str(retry_count)
|
|
1537
|
-
res = self.
|
|
1538
|
-
|
|
1472
|
+
res = self._request(
|
|
1473
|
+
method="POST",
|
|
1474
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="data"),
|
|
1475
|
+
data=import_req, headers=headers, stream=True)
|
|
1539
1476
|
if blocking:
|
|
1540
1477
|
res = iterate_over_import_data_response(res)
|
|
1541
1478
|
|
|
1542
|
-
return
|
|
1479
|
+
return res
|
|
1543
1480
|
|
|
1544
1481
|
def insert_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[]):
|
|
1545
1482
|
"""
|
|
@@ -1553,9 +1490,10 @@ class VastdbApi:
|
|
|
1553
1490
|
"""
|
|
1554
1491
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1555
1492
|
headers['Content-Length'] = str(len(record_batch))
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1493
|
+
return self._request(
|
|
1494
|
+
method="POST",
|
|
1495
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="rows"),
|
|
1496
|
+
data=record_batch, headers=headers)
|
|
1559
1497
|
|
|
1560
1498
|
def update_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[]):
|
|
1561
1499
|
"""
|
|
@@ -1569,9 +1507,10 @@ class VastdbApi:
|
|
|
1569
1507
|
"""
|
|
1570
1508
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1571
1509
|
headers['Content-Length'] = str(len(record_batch))
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1510
|
+
self._request(
|
|
1511
|
+
method="PUT",
|
|
1512
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="rows"),
|
|
1513
|
+
data=record_batch, headers=headers)
|
|
1575
1514
|
|
|
1576
1515
|
def delete_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[],
|
|
1577
1516
|
delete_from_imports_table=False):
|
|
@@ -1588,9 +1527,10 @@ class VastdbApi:
|
|
|
1588
1527
|
headers['Content-Length'] = str(len(record_batch))
|
|
1589
1528
|
url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if delete_from_imports_table else {}
|
|
1590
1529
|
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1530
|
+
self._request(
|
|
1531
|
+
method="DELETE",
|
|
1532
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="rows", url_params=url_params),
|
|
1533
|
+
data=record_batch, headers=headers)
|
|
1594
1534
|
|
|
1595
1535
|
def create_projection(self, bucket, schema, table, name, columns, txid=0, client_tags=[], expected_retvals=[]):
|
|
1596
1536
|
"""
|
|
@@ -1637,9 +1577,10 @@ class VastdbApi:
|
|
|
1637
1577
|
headers['Content-Length'] = str(len(create_projection_req))
|
|
1638
1578
|
url_params = {'name': name}
|
|
1639
1579
|
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1580
|
+
self._request(
|
|
1581
|
+
method="POST",
|
|
1582
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
|
|
1583
|
+
data=create_projection_req, headers=headers)
|
|
1643
1584
|
|
|
1644
1585
|
def get_projection_stats(self, bucket, schema, table, name, txid=0, client_tags=[], expected_retvals=[]):
|
|
1645
1586
|
"""
|
|
@@ -1651,17 +1592,17 @@ class VastdbApi:
|
|
|
1651
1592
|
"""
|
|
1652
1593
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1653
1594
|
url_params = {'name': name}
|
|
1654
|
-
res = self.
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
return
|
|
1595
|
+
res = self._request(
|
|
1596
|
+
method="GET",
|
|
1597
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection-stats", url_params=url_params),
|
|
1598
|
+
headers=headers)
|
|
1599
|
+
|
|
1600
|
+
stats = get_projection_table_stats.GetRootAs(res.content)
|
|
1601
|
+
num_rows = stats.NumRows()
|
|
1602
|
+
size_in_bytes = stats.SizeInBytes()
|
|
1603
|
+
dirty_blocks_percentage = stats.DirtyBlocksPercentage()
|
|
1604
|
+
initial_sync_progress = stats.InitialSyncProgress()
|
|
1605
|
+
return num_rows, size_in_bytes, dirty_blocks_percentage, initial_sync_progress
|
|
1665
1606
|
|
|
1666
1607
|
def alter_projection(self, bucket, schema, table, name, txid=0, client_tags=[], table_properties="",
|
|
1667
1608
|
new_name="", expected_retvals=[]):
|
|
@@ -1693,10 +1634,10 @@ class VastdbApi:
|
|
|
1693
1634
|
headers['Content-Length'] = str(len(alter_projection_req))
|
|
1694
1635
|
url_params = {'name': name}
|
|
1695
1636
|
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1637
|
+
self._request(
|
|
1638
|
+
method="PUT",
|
|
1639
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
|
|
1640
|
+
data=alter_projection_req, headers=headers)
|
|
1700
1641
|
|
|
1701
1642
|
def drop_projection(self, bucket, schema, table, name, txid=0, client_tags=[], expected_retvals=[]):
|
|
1702
1643
|
"""
|
|
@@ -1707,9 +1648,10 @@ class VastdbApi:
|
|
|
1707
1648
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1708
1649
|
url_params = {'name': name}
|
|
1709
1650
|
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1651
|
+
self._request(
|
|
1652
|
+
method="DELETE",
|
|
1653
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
|
|
1654
|
+
headers=headers)
|
|
1713
1655
|
|
|
1714
1656
|
def list_projections(self, bucket, schema, table, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
|
|
1715
1657
|
exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
|
|
@@ -1733,24 +1675,26 @@ class VastdbApi:
|
|
|
1733
1675
|
headers['tabular-include-list-stats'] = str(include_list_stats)
|
|
1734
1676
|
|
|
1735
1677
|
projections = []
|
|
1736
|
-
res = self.
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1678
|
+
res = self._request(
|
|
1679
|
+
method="GET",
|
|
1680
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection"),
|
|
1681
|
+
headers=headers)
|
|
1682
|
+
|
|
1683
|
+
res_headers = res.headers
|
|
1684
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
1685
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
1686
|
+
count = int(res_headers['tabular-list-count'])
|
|
1687
|
+
lists = list_projections.GetRootAs(res.content)
|
|
1688
|
+
bucket_name = lists.BucketName().decode()
|
|
1689
|
+
schema_name = lists.SchemaName().decode()
|
|
1690
|
+
table_name = lists.TableName().decode()
|
|
1691
|
+
if not bucket.startswith(bucket_name): # ignore snapshot name
|
|
1692
|
+
raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
|
|
1693
|
+
projections_length = lists.ProjectionsLength()
|
|
1694
|
+
for i in range(projections_length):
|
|
1695
|
+
projections.append(_parse_table_info(lists.Projections(i)))
|
|
1696
|
+
|
|
1697
|
+
return bucket_name, schema_name, table_name, projections, next_key, is_truncated, count
|
|
1754
1698
|
|
|
1755
1699
|
def list_projection_columns(self, bucket, schema, table, projection, txid=0, client_tags=[], max_keys=1000,
|
|
1756
1700
|
next_key=0, count_only=False, name_prefix="", exact_match=False,
|
|
@@ -1778,19 +1722,20 @@ class VastdbApi:
|
|
|
1778
1722
|
|
|
1779
1723
|
url_params = {'name': projection}
|
|
1780
1724
|
|
|
1781
|
-
res = self.
|
|
1782
|
-
|
|
1783
|
-
|
|
1725
|
+
res = self._request(
|
|
1726
|
+
method="GET",
|
|
1727
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection-columns", url_params=url_params),
|
|
1728
|
+
headers=headers)
|
|
1729
|
+
|
|
1784
1730
|
# list projection columns response will also show column type Sorted/UnSorted
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
pa.ipc.open_stream(res.content).schema]
|
|
1731
|
+
res_headers = res.headers
|
|
1732
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
1733
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
1734
|
+
count = int(res_headers['tabular-list-count'])
|
|
1735
|
+
columns = [] if count_only else [[f.name, f.type, f.metadata] for f in
|
|
1736
|
+
pa.ipc.open_stream(res.content).schema]
|
|
1792
1737
|
|
|
1793
|
-
|
|
1738
|
+
return columns, next_key, is_truncated, count
|
|
1794
1739
|
|
|
1795
1740
|
|
|
1796
1741
|
class QueryDataInternalError(Exception):
|
|
@@ -2137,40 +2082,3 @@ def build_query_data_request(schema: 'pa.Schema' = pa.schema([]), predicate: ibi
|
|
|
2137
2082
|
builder.Finish(relation)
|
|
2138
2083
|
|
|
2139
2084
|
return QueryDataRequest(serialized=builder.Output(), response_schema=response_schema, response_parser=QueryDataParser(response_schema))
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
def convert_column_types(table: 'pa.Table') -> 'pa.Table':
|
|
2143
|
-
"""
|
|
2144
|
-
Adjusting table values
|
|
2145
|
-
|
|
2146
|
-
1. Because the timestamp resolution is too high it is necessary to trim it. ORION-96961
|
|
2147
|
-
2. Since the values of nfs_mode_bits are returned in decimal, need to convert them to octal,
|
|
2148
|
-
as in all representations, so that the mode of 448 turn into 700
|
|
2149
|
-
3. for owner_name and group_owner_name 0 -> root, and 65534 -> nobody
|
|
2150
|
-
"""
|
|
2151
|
-
ts_indexes = []
|
|
2152
|
-
indexes_of_fields_to_change = {}
|
|
2153
|
-
sid_to_name = {
|
|
2154
|
-
'0': 'root',
|
|
2155
|
-
'65534': 'nobody' # NFSNOBODY_UID_16_BIT
|
|
2156
|
-
}
|
|
2157
|
-
column_matcher = { # column_name: custom converting rule
|
|
2158
|
-
'nfs_mode_bits': lambda val: int(oct(val).replace('0o', '')) if val is not None else val,
|
|
2159
|
-
'owner_name': lambda val: sid_to_name.get(val, val),
|
|
2160
|
-
'group_owner_name': lambda val: sid_to_name.get(val, val),
|
|
2161
|
-
}
|
|
2162
|
-
for index, field in enumerate(table.schema):
|
|
2163
|
-
if isinstance(field.type, pa.TimestampType) and field.type.unit == 'ns':
|
|
2164
|
-
ts_indexes.append(index)
|
|
2165
|
-
if field.name in column_matcher:
|
|
2166
|
-
indexes_of_fields_to_change[field.name] = index
|
|
2167
|
-
for changing_index in ts_indexes:
|
|
2168
|
-
field_name = table.schema[changing_index].name
|
|
2169
|
-
new_column = table[field_name].cast(pa.timestamp('us'), safe=False)
|
|
2170
|
-
table = table.set_column(changing_index, field_name, new_column)
|
|
2171
|
-
for field_name, changing_index in indexes_of_fields_to_change.items():
|
|
2172
|
-
new_column = table[field_name].to_pylist()
|
|
2173
|
-
new_column = list(map(column_matcher[field_name], new_column))
|
|
2174
|
-
new_column = pa.array(new_column, table[field_name].type)
|
|
2175
|
-
table = table.set_column(changing_index, field_name, new_column)
|
|
2176
|
-
return table
|