vastdb 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/__init__.py +3 -0
- vastdb/{internal_commands.py → _internal.py} +289 -346
- vastdb/bucket.py +2 -2
- vastdb/conftest.py +16 -2
- vastdb/errors.py +6 -0
- vastdb/schema.py +8 -4
- vastdb/session.py +18 -5
- vastdb/table.py +79 -58
- vastdb/tests/test_duckdb.py +2 -2
- vastdb/tests/test_projections.py +5 -1
- vastdb/tests/test_sanity.py +5 -5
- vastdb/tests/test_tables.py +54 -1
- vastdb/tests/test_util.py +6 -0
- vastdb/transaction.py +2 -2
- vastdb/util.py +40 -1
- vastdb/vast_tests/__init__.py +0 -0
- vastdb/vast_tests/test_ha.py +29 -0
- {vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/METADATA +2 -2
- {vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/RECORD +22 -20
- {vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/LICENSE +0 -0
- {vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/WHEEL +0 -0
- {vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/top_level.txt +0 -0
|
@@ -5,17 +5,37 @@ import re
|
|
|
5
5
|
import struct
|
|
6
6
|
import urllib.parse
|
|
7
7
|
from collections import defaultdict, namedtuple
|
|
8
|
+
from dataclasses import dataclass, field
|
|
8
9
|
from enum import Enum
|
|
9
|
-
from typing import Any, Dict, Iterator, List, Optional, Union
|
|
10
|
+
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
|
|
10
11
|
|
|
12
|
+
import backoff
|
|
11
13
|
import flatbuffers
|
|
12
14
|
import ibis
|
|
13
15
|
import pyarrow as pa
|
|
14
|
-
import pyarrow.parquet as pq
|
|
15
16
|
import requests
|
|
16
17
|
import urllib3
|
|
17
18
|
import xmltodict
|
|
18
19
|
from aws_requests_auth.aws_auth import AWSRequestsAuth
|
|
20
|
+
from ibis.expr.operations.generic import (
|
|
21
|
+
IsNull,
|
|
22
|
+
Literal,
|
|
23
|
+
)
|
|
24
|
+
from ibis.expr.operations.logical import (
|
|
25
|
+
And,
|
|
26
|
+
Between,
|
|
27
|
+
Equals,
|
|
28
|
+
Greater,
|
|
29
|
+
GreaterEqual,
|
|
30
|
+
InValues,
|
|
31
|
+
Less,
|
|
32
|
+
LessEqual,
|
|
33
|
+
Not,
|
|
34
|
+
NotEquals,
|
|
35
|
+
Or,
|
|
36
|
+
)
|
|
37
|
+
from ibis.expr.operations.relations import Field
|
|
38
|
+
from ibis.expr.operations.strings import StringContains
|
|
19
39
|
|
|
20
40
|
import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
|
|
21
41
|
import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BooleanLiteral as fb_bool_lit
|
|
@@ -137,26 +157,6 @@ class Predicate:
|
|
|
137
157
|
self.expr = expr
|
|
138
158
|
|
|
139
159
|
def serialize(self, builder: 'flatbuffers.builder.Builder'):
|
|
140
|
-
from ibis.expr.operations.generic import (
|
|
141
|
-
IsNull,
|
|
142
|
-
Literal,
|
|
143
|
-
TableColumn,
|
|
144
|
-
)
|
|
145
|
-
from ibis.expr.operations.logical import (
|
|
146
|
-
And,
|
|
147
|
-
Between,
|
|
148
|
-
Equals,
|
|
149
|
-
Greater,
|
|
150
|
-
GreaterEqual,
|
|
151
|
-
InValues,
|
|
152
|
-
Less,
|
|
153
|
-
LessEqual,
|
|
154
|
-
Not,
|
|
155
|
-
NotEquals,
|
|
156
|
-
Or,
|
|
157
|
-
)
|
|
158
|
-
from ibis.expr.operations.strings import StringContains
|
|
159
|
-
|
|
160
160
|
builder_map = {
|
|
161
161
|
Greater: self.build_greater,
|
|
162
162
|
GreaterEqual: self.build_greater_equal,
|
|
@@ -216,7 +216,7 @@ class Predicate:
|
|
|
216
216
|
if not isinstance(literal, Literal):
|
|
217
217
|
raise NotImplementedError(self.expr)
|
|
218
218
|
|
|
219
|
-
if not isinstance(column,
|
|
219
|
+
if not isinstance(column, Field):
|
|
220
220
|
raise NotImplementedError(self.expr)
|
|
221
221
|
|
|
222
222
|
field_name = column.name
|
|
@@ -722,19 +722,67 @@ def _parse_table_info(obj):
|
|
|
722
722
|
TableStatsResult = namedtuple("TableStatsResult", ["num_rows", "size_in_bytes", "is_external_rowid_alloc", "endpoints"])
|
|
723
723
|
|
|
724
724
|
|
|
725
|
+
_RETRIABLE_EXCEPTIONS = (
|
|
726
|
+
errors.ConnectionError, # only if 'may_retry' is True
|
|
727
|
+
errors.Slowdown,
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def _backoff_giveup(exc: Exception) -> bool:
|
|
732
|
+
"""Exception types below MUST be part of `_RETRIABLE_EXCEPTIONS` above."""
|
|
733
|
+
|
|
734
|
+
_logger.info("Backoff giveup: %r", exc)
|
|
735
|
+
if isinstance(exc, errors.Slowdown):
|
|
736
|
+
return False # the server is overloaded, don't give up
|
|
737
|
+
|
|
738
|
+
if isinstance(exc, errors.ConnectionError):
|
|
739
|
+
if exc.may_retry:
|
|
740
|
+
return False # don't give up of retriable connection errors
|
|
741
|
+
|
|
742
|
+
return True # give up in case of other exceptions
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
@dataclass
|
|
746
|
+
class BackoffConfig:
|
|
747
|
+
wait_gen: Callable = field(default=backoff.expo)
|
|
748
|
+
max_value: Optional[float] = None # max duration for a single wait period
|
|
749
|
+
max_tries: int = 10
|
|
750
|
+
max_time: float = 60.0 # in seconds
|
|
751
|
+
backoff_log_level: int = logging.DEBUG
|
|
752
|
+
|
|
753
|
+
|
|
725
754
|
class VastdbApi:
|
|
726
755
|
# we expect the vast version to be <major>.<minor>.<patch>.<protocol>
|
|
727
756
|
VAST_VERSION_REGEX = re.compile(r'^vast (\d+\.\d+\.\d+\.\d+)$')
|
|
728
757
|
|
|
729
|
-
def __init__(self, endpoint, access_key, secret_key,
|
|
758
|
+
def __init__(self, endpoint, access_key, secret_key,
|
|
759
|
+
*,
|
|
760
|
+
auth_type=AuthType.SIGV4,
|
|
761
|
+
ssl_verify=True,
|
|
762
|
+
backoff_config: Optional[BackoffConfig] = None):
|
|
763
|
+
|
|
764
|
+
from . import __version__ # import lazily here (to avoid circular dependencies)
|
|
765
|
+
self.client_sdk_version = f"VAST Database Python SDK {__version__} - 2024 (c)"
|
|
766
|
+
|
|
730
767
|
url = urllib3.util.parse_url(endpoint)
|
|
731
768
|
self.access_key = access_key
|
|
732
769
|
self.secret_key = secret_key
|
|
733
770
|
|
|
734
771
|
self.default_max_list_columns_page_size = 1000
|
|
735
|
-
self.
|
|
736
|
-
self.
|
|
737
|
-
self.
|
|
772
|
+
self._session = requests.Session()
|
|
773
|
+
self._session.verify = ssl_verify
|
|
774
|
+
self._session.headers['user-agent'] = self.client_sdk_version
|
|
775
|
+
|
|
776
|
+
backoff_config = backoff_config or BackoffConfig()
|
|
777
|
+
self._backoff_decorator = backoff.on_exception(
|
|
778
|
+
wait_gen=backoff_config.wait_gen,
|
|
779
|
+
exception=_RETRIABLE_EXCEPTIONS,
|
|
780
|
+
giveup=_backoff_giveup,
|
|
781
|
+
max_tries=backoff_config.max_tries,
|
|
782
|
+
max_time=backoff_config.max_time,
|
|
783
|
+
max_value=backoff_config.max_value, # passed to `backoff_config.wait_gen`
|
|
784
|
+
backoff_log_level=backoff_config.backoff_log_level)
|
|
785
|
+
self._request = self._backoff_decorator(self._single_request)
|
|
738
786
|
|
|
739
787
|
if url.port in {80, 443, None}:
|
|
740
788
|
self.aws_host = f'{url.host}'
|
|
@@ -744,22 +792,21 @@ class VastdbApi:
|
|
|
744
792
|
self.url = str(url)
|
|
745
793
|
_logger.debug('url=%s aws_host=%s', self.url, self.aws_host)
|
|
746
794
|
|
|
747
|
-
self.
|
|
795
|
+
self._session.auth = AWSRequestsAuth(aws_access_key=access_key,
|
|
748
796
|
aws_secret_access_key=secret_key,
|
|
749
797
|
aws_host=self.aws_host,
|
|
750
|
-
aws_region='
|
|
798
|
+
aws_region='',
|
|
751
799
|
aws_service='s3')
|
|
752
800
|
|
|
753
801
|
# probe the cluster for its version
|
|
754
|
-
self.
|
|
755
|
-
|
|
802
|
+
res = self._request(method="GET", url=self._url(command="transaction"), skip_status_check=True) # used only for the response headers
|
|
803
|
+
_logger.debug("headers=%s code=%s content=%s", res.headers, res.status_code, res.content)
|
|
756
804
|
server_header = res.headers.get("Server")
|
|
757
805
|
if server_header is None:
|
|
758
806
|
_logger.error("Response doesn't contain 'Server' header")
|
|
759
807
|
else:
|
|
760
|
-
_logger.debug("Server header is '%s'", server_header)
|
|
761
808
|
if m := self.VAST_VERSION_REGEX.match(server_header):
|
|
762
|
-
self.vast_version, = m.
|
|
809
|
+
self.vast_version: Tuple[int, ...] = tuple(int(v) for v in m.group(1).split("."))
|
|
763
810
|
return
|
|
764
811
|
else:
|
|
765
812
|
_logger.error("'Server' header '%s' doesn't match the expected pattern", server_header)
|
|
@@ -772,15 +819,21 @@ class VastdbApi:
|
|
|
772
819
|
_logger.critical(msg)
|
|
773
820
|
raise NotImplementedError(msg)
|
|
774
821
|
|
|
775
|
-
def
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
822
|
+
def _single_request(self, *, method, url, skip_status_check=False, **kwargs):
|
|
823
|
+
_logger.debug("Sending request: %s %s %s", method, url, kwargs)
|
|
824
|
+
try:
|
|
825
|
+
res = self._session.request(method=method, url=url, **kwargs)
|
|
826
|
+
except requests.exceptions.ConnectionError as err:
|
|
827
|
+
# low-level connection issue, it is safe to retry only read-only requests
|
|
828
|
+
may_retry = (method == "GET")
|
|
829
|
+
raise errors.ConnectionError(cause=err, may_retry=may_retry) from err
|
|
830
|
+
|
|
831
|
+
if not skip_status_check:
|
|
832
|
+
if exc := errors.from_response(res):
|
|
833
|
+
raise exc # application-level error
|
|
834
|
+
return res # successful response
|
|
835
|
+
|
|
836
|
+
def _url(self, bucket="", schema="", table="", command="", url_params={}):
|
|
784
837
|
prefix_list = [self.url]
|
|
785
838
|
if len(bucket):
|
|
786
839
|
prefix_list.append(bucket)
|
|
@@ -815,11 +868,6 @@ class VastdbApi:
|
|
|
815
868
|
|
|
816
869
|
return common_headers | {f'tabular-client-tags-{index}': tag for index, tag in enumerate(client_tags)}
|
|
817
870
|
|
|
818
|
-
def _check_res(self, res, cmd="", expected_retvals=[]):
|
|
819
|
-
if exc := errors.from_response(res):
|
|
820
|
-
raise exc
|
|
821
|
-
return res
|
|
822
|
-
|
|
823
871
|
def create_schema(self, bucket, name, txid=0, client_tags=[], schema_properties="", expected_retvals=[]):
|
|
824
872
|
"""
|
|
825
873
|
Create a collection of tables, use the following request
|
|
@@ -841,10 +889,10 @@ class VastdbApi:
|
|
|
841
889
|
|
|
842
890
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
843
891
|
headers['Content-Length'] = str(len(create_schema_req))
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
892
|
+
self._request(
|
|
893
|
+
method="POST",
|
|
894
|
+
url=self._url(bucket=bucket, schema=name, command="schema"),
|
|
895
|
+
data=create_schema_req, headers=headers)
|
|
848
896
|
|
|
849
897
|
def alter_schema(self, bucket, name, txid=0, client_tags=[], schema_properties="", new_name="", expected_retvals=[]):
|
|
850
898
|
"""
|
|
@@ -870,10 +918,10 @@ class VastdbApi:
|
|
|
870
918
|
headers['Content-Length'] = str(len(alter_schema_req))
|
|
871
919
|
url_params = {'tabular-new-schema-name': new_name} if len(new_name) else {}
|
|
872
920
|
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
921
|
+
self._request(
|
|
922
|
+
method="PUT",
|
|
923
|
+
url=self._url(bucket=bucket, schema=name, command="schema", url_params=url_params),
|
|
924
|
+
data=alter_schema_req, headers=headers)
|
|
877
925
|
|
|
878
926
|
def drop_schema(self, bucket, name, txid=0, client_tags=[], expected_retvals=[]):
|
|
879
927
|
"""
|
|
@@ -884,9 +932,10 @@ class VastdbApi:
|
|
|
884
932
|
"""
|
|
885
933
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
886
934
|
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
935
|
+
self._request(
|
|
936
|
+
method="DELETE",
|
|
937
|
+
url=self._url(bucket=bucket, schema=name, command="schema"),
|
|
938
|
+
headers=headers)
|
|
890
939
|
|
|
891
940
|
def list_schemas(self, bucket, schema="", txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
|
|
892
941
|
exact_match=False, expected_retvals=[], count_only=False):
|
|
@@ -915,25 +964,27 @@ class VastdbApi:
|
|
|
915
964
|
|
|
916
965
|
schemas = []
|
|
917
966
|
schema = schema or ""
|
|
918
|
-
res = self.
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
967
|
+
res = self._request(
|
|
968
|
+
method="GET",
|
|
969
|
+
url=self._url(bucket=bucket, schema=schema, command="schema"),
|
|
970
|
+
headers=headers)
|
|
971
|
+
|
|
972
|
+
res_headers = res.headers
|
|
973
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
974
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
975
|
+
lists = list_schemas.GetRootAs(res.content)
|
|
976
|
+
bucket_name = lists.BucketName().decode()
|
|
977
|
+
if not bucket.startswith(bucket_name):
|
|
978
|
+
raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
|
|
979
|
+
schemas_length = lists.SchemasLength()
|
|
980
|
+
count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else schemas_length
|
|
981
|
+
for i in range(schemas_length):
|
|
982
|
+
schema_obj = lists.Schemas(i)
|
|
983
|
+
name = schema_obj.Name().decode()
|
|
984
|
+
properties = schema_obj.Properties().decode()
|
|
985
|
+
schemas.append([name, properties])
|
|
986
|
+
|
|
987
|
+
return bucket_name, schemas, next_key, is_truncated, count
|
|
937
988
|
|
|
938
989
|
def list_snapshots(self, bucket, max_keys=1000, next_token=None, name_prefix=''):
|
|
939
990
|
next_token = next_token or ''
|
|
@@ -941,8 +992,9 @@ class VastdbApi:
|
|
|
941
992
|
if next_token:
|
|
942
993
|
url_params['continuation-token'] = next_token
|
|
943
994
|
|
|
944
|
-
res = self.
|
|
945
|
-
|
|
995
|
+
res = self._request(
|
|
996
|
+
method="GET",
|
|
997
|
+
url=self._url(bucket=bucket, command="list", url_params=url_params))
|
|
946
998
|
|
|
947
999
|
xml_str = res.content.decode()
|
|
948
1000
|
xml_dict = xmltodict.parse(xml_str)
|
|
@@ -985,33 +1037,10 @@ class VastdbApi:
|
|
|
985
1037
|
if create_imports_table:
|
|
986
1038
|
url_params['sub-table'] = IMPORTED_OBJECTS_TABLE_NAME
|
|
987
1039
|
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
def create_table_from_parquet_schema(self, bucket, schema, name, parquet_path=None,
|
|
993
|
-
parquet_bucket_name=None, parquet_object_name=None,
|
|
994
|
-
txid=0, client_tags=[], expected_retvals=[]):
|
|
995
|
-
|
|
996
|
-
# Use pyarrow.parquet.ParquetDataset to open the Parquet file
|
|
997
|
-
if parquet_path:
|
|
998
|
-
parquet_ds = pq.ParquetDataset(parquet_path)
|
|
999
|
-
elif parquet_bucket_name and parquet_object_name:
|
|
1000
|
-
s3fs = pa.fs.S3FileSystem(access_key=self.access_key, secret_key=self.secret_key, endpoint_override=self.url)
|
|
1001
|
-
parquet_ds = pq.ParquetDataset('/'.join([parquet_bucket_name, parquet_object_name]), filesystem=s3fs)
|
|
1002
|
-
else:
|
|
1003
|
-
raise RuntimeError(f'invalid params parquet_path={parquet_path} parquet_bucket_name={parquet_bucket_name} parquet_object_name={parquet_object_name}')
|
|
1004
|
-
|
|
1005
|
-
# Get the schema of the Parquet file
|
|
1006
|
-
if isinstance(parquet_ds.schema, pq.ParquetSchema):
|
|
1007
|
-
arrow_schema = parquet_ds.schema.to_arrow_schema()
|
|
1008
|
-
elif isinstance(parquet_ds.schema, pa.Schema):
|
|
1009
|
-
arrow_schema = parquet_ds.schema
|
|
1010
|
-
else:
|
|
1011
|
-
raise RuntimeError(f'invalid type(parquet_ds.schema) = {type(parquet_ds.schema)}')
|
|
1012
|
-
|
|
1013
|
-
# create the table
|
|
1014
|
-
return self.create_table(bucket, schema, name, arrow_schema, txid, client_tags, expected_retvals)
|
|
1040
|
+
self._request(
|
|
1041
|
+
method="POST",
|
|
1042
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
|
|
1043
|
+
data=serialized_schema, headers=headers)
|
|
1015
1044
|
|
|
1016
1045
|
def get_table_stats(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], imports_table_stats=False):
|
|
1017
1046
|
"""
|
|
@@ -1023,8 +1052,10 @@ class VastdbApi:
|
|
|
1023
1052
|
"""
|
|
1024
1053
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1025
1054
|
url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if imports_table_stats else {}
|
|
1026
|
-
res = self.
|
|
1027
|
-
|
|
1055
|
+
res = self._request(
|
|
1056
|
+
method="GET",
|
|
1057
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="stats", url_params=url_params),
|
|
1058
|
+
headers=headers)
|
|
1028
1059
|
|
|
1029
1060
|
stats = get_table_stats.GetRootAs(res.content)
|
|
1030
1061
|
num_rows = stats.NumRows()
|
|
@@ -1059,10 +1090,10 @@ class VastdbApi:
|
|
|
1059
1090
|
headers['Content-Length'] = str(len(alter_table_req))
|
|
1060
1091
|
url_params = {'tabular-new-table-name': schema + "/" + new_name} if len(new_name) else {}
|
|
1061
1092
|
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1093
|
+
self._request(
|
|
1094
|
+
method="PUT",
|
|
1095
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
|
|
1096
|
+
data=alter_table_req, headers=headers)
|
|
1066
1097
|
|
|
1067
1098
|
def drop_table(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], remove_imports_table=False):
|
|
1068
1099
|
"""
|
|
@@ -1075,9 +1106,10 @@ class VastdbApi:
|
|
|
1075
1106
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1076
1107
|
url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if remove_imports_table else {}
|
|
1077
1108
|
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1109
|
+
self._request(
|
|
1110
|
+
method="DELETE",
|
|
1111
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
|
|
1112
|
+
headers=headers)
|
|
1081
1113
|
|
|
1082
1114
|
def list_tables(self, bucket, schema, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
|
|
1083
1115
|
exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
|
|
@@ -1101,23 +1133,25 @@ class VastdbApi:
|
|
|
1101
1133
|
headers['tabular-include-list-stats'] = str(include_list_stats)
|
|
1102
1134
|
|
|
1103
1135
|
tables = []
|
|
1104
|
-
res = self.
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1136
|
+
res = self._request(
|
|
1137
|
+
method="GET",
|
|
1138
|
+
url=self._url(bucket=bucket, schema=schema, command="table"),
|
|
1139
|
+
headers=headers)
|
|
1140
|
+
|
|
1141
|
+
res_headers = res.headers
|
|
1142
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
1143
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
1144
|
+
lists = list_tables.GetRootAs(res.content)
|
|
1145
|
+
bucket_name = lists.BucketName().decode()
|
|
1146
|
+
schema_name = lists.SchemaName().decode()
|
|
1147
|
+
if not bucket.startswith(bucket_name): # ignore snapshot name
|
|
1148
|
+
raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
|
|
1149
|
+
tables_length = lists.TablesLength()
|
|
1150
|
+
count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
|
|
1151
|
+
for i in range(tables_length):
|
|
1152
|
+
tables.append(_parse_table_info(lists.Tables(i)))
|
|
1153
|
+
|
|
1154
|
+
return bucket_name, schema_name, tables, next_key, is_truncated, count
|
|
1121
1155
|
|
|
1122
1156
|
def add_columns(self, bucket, schema, name, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
|
|
1123
1157
|
"""
|
|
@@ -1139,9 +1173,10 @@ class VastdbApi:
|
|
|
1139
1173
|
serialized_schema = arrow_schema.serialize()
|
|
1140
1174
|
headers['Content-Length'] = str(len(serialized_schema))
|
|
1141
1175
|
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1176
|
+
self._request(
|
|
1177
|
+
method="POST",
|
|
1178
|
+
url=self._url(bucket=bucket, schema=schema, table=name, command="column"),
|
|
1179
|
+
data=serialized_schema, headers=headers)
|
|
1145
1180
|
|
|
1146
1181
|
def alter_column(self, bucket, schema, table, name, txid=0, client_tags=[], column_properties="",
|
|
1147
1182
|
new_name="", column_sep=".", column_stats="", expected_retvals=[]):
|
|
@@ -1177,9 +1212,10 @@ class VastdbApi:
|
|
|
1177
1212
|
if len(new_name):
|
|
1178
1213
|
url_params['tabular-new-column-name'] = new_name
|
|
1179
1214
|
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1215
|
+
self._request(
|
|
1216
|
+
method="PUT",
|
|
1217
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
|
|
1218
|
+
data=alter_column_req, headers=headers)
|
|
1183
1219
|
|
|
1184
1220
|
def drop_columns(self, bucket, schema, table, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
|
|
1185
1221
|
"""
|
|
@@ -1192,9 +1228,10 @@ class VastdbApi:
|
|
|
1192
1228
|
serialized_schema = arrow_schema.serialize()
|
|
1193
1229
|
headers['Content-Length'] = str(len(serialized_schema))
|
|
1194
1230
|
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1231
|
+
self._request(
|
|
1232
|
+
method="DELETE",
|
|
1233
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="column"),
|
|
1234
|
+
data=serialized_schema, headers=headers)
|
|
1198
1235
|
|
|
1199
1236
|
def list_columns(self, bucket, schema, table, *, txid=0, client_tags=None, max_keys=None, next_key=0,
|
|
1200
1237
|
count_only=False, name_prefix="", exact_match=False,
|
|
@@ -1226,18 +1263,18 @@ class VastdbApi:
|
|
|
1226
1263
|
headers['tabular-name-prefix'] = name_prefix
|
|
1227
1264
|
|
|
1228
1265
|
url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if list_imports_table else {}
|
|
1229
|
-
res = self.
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1266
|
+
res = self._request(
|
|
1267
|
+
method="GET",
|
|
1268
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
|
|
1269
|
+
headers=headers)
|
|
1270
|
+
|
|
1271
|
+
res_headers = res.headers
|
|
1272
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
1273
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
1274
|
+
count = int(res_headers['tabular-list-count'])
|
|
1275
|
+
columns = [] if count_only else pa.ipc.open_stream(res.content).schema
|
|
1276
|
+
|
|
1277
|
+
return columns, next_key, is_truncated, count
|
|
1241
1278
|
|
|
1242
1279
|
def begin_transaction(self, client_tags=[], expected_retvals=[]):
|
|
1243
1280
|
"""
|
|
@@ -1248,8 +1285,10 @@ class VastdbApi:
|
|
|
1248
1285
|
tabular-txid: TransactionId
|
|
1249
1286
|
"""
|
|
1250
1287
|
headers = self._fill_common_headers(client_tags=client_tags)
|
|
1251
|
-
|
|
1252
|
-
|
|
1288
|
+
return self._request(
|
|
1289
|
+
method="POST",
|
|
1290
|
+
url=self._url(command="transaction"),
|
|
1291
|
+
headers=headers)
|
|
1253
1292
|
|
|
1254
1293
|
def commit_transaction(self, txid, client_tags=[], expected_retvals=[]):
|
|
1255
1294
|
"""
|
|
@@ -1258,8 +1297,10 @@ class VastdbApi:
|
|
|
1258
1297
|
tabular-client-tag: ClientTag
|
|
1259
1298
|
"""
|
|
1260
1299
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1261
|
-
|
|
1262
|
-
|
|
1300
|
+
self._request(
|
|
1301
|
+
method="PUT",
|
|
1302
|
+
url=self._url(command="transaction"),
|
|
1303
|
+
headers=headers)
|
|
1263
1304
|
|
|
1264
1305
|
def rollback_transaction(self, txid, client_tags=[], expected_retvals=[]):
|
|
1265
1306
|
"""
|
|
@@ -1268,8 +1309,10 @@ class VastdbApi:
|
|
|
1268
1309
|
tabular-client-tag: ClientTag
|
|
1269
1310
|
"""
|
|
1270
1311
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1271
|
-
|
|
1272
|
-
|
|
1312
|
+
self._request(
|
|
1313
|
+
method="DELETE",
|
|
1314
|
+
url=self._url(command="transaction"),
|
|
1315
|
+
headers=headers)
|
|
1273
1316
|
|
|
1274
1317
|
def get_transaction(self, txid, client_tags=[], expected_retvals=[]):
|
|
1275
1318
|
"""
|
|
@@ -1278,56 +1321,10 @@ class VastdbApi:
|
|
|
1278
1321
|
tabular-client-tag: ClientTag
|
|
1279
1322
|
"""
|
|
1280
1323
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
retry_count=0, enable_sorted_projections=True):
|
|
1286
|
-
"""
|
|
1287
|
-
POST /mybucket/myschema/mytable?query-data=SelectRowIds HTTP/1.1
|
|
1288
|
-
"""
|
|
1289
|
-
|
|
1290
|
-
# add query option select-only and read-only
|
|
1291
|
-
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1292
|
-
headers['Content-Length'] = str(len(params))
|
|
1293
|
-
headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
|
|
1294
|
-
if retry_count > 0:
|
|
1295
|
-
headers['tabular-retry-count'] = str(retry_count)
|
|
1296
|
-
|
|
1297
|
-
res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=SelectRowIds",),
|
|
1298
|
-
data=params, headers=headers, stream=True)
|
|
1299
|
-
return self._check_res(res, "query_data", expected_retvals)
|
|
1300
|
-
|
|
1301
|
-
def read_columns_data(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[], tenant_guid=None,
|
|
1302
|
-
retry_count=0, enable_sorted_projections=True):
|
|
1303
|
-
"""
|
|
1304
|
-
POST /mybucket/myschema/mytable?query-data=ReadColumns HTTP/1.1
|
|
1305
|
-
"""
|
|
1306
|
-
|
|
1307
|
-
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1308
|
-
headers['Content-Length'] = str(len(params))
|
|
1309
|
-
headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
|
|
1310
|
-
if retry_count > 0:
|
|
1311
|
-
headers['tabular-retry-count'] = str(retry_count)
|
|
1312
|
-
|
|
1313
|
-
res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=ReadColumns",),
|
|
1314
|
-
data=params, headers=headers, stream=True)
|
|
1315
|
-
return self._check_res(res, "query_data", expected_retvals)
|
|
1316
|
-
|
|
1317
|
-
def count_rows(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[], tenant_guid=None,
|
|
1318
|
-
retry_count=0, enable_sorted_projections=True):
|
|
1319
|
-
"""
|
|
1320
|
-
POST /mybucket/myschema/mytable?query-data=CountRows HTTP/1.1
|
|
1321
|
-
"""
|
|
1322
|
-
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1323
|
-
headers['Content-Length'] = str(len(params))
|
|
1324
|
-
headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
|
|
1325
|
-
if retry_count > 0:
|
|
1326
|
-
headers['tabular-retry-count'] = str(retry_count)
|
|
1327
|
-
|
|
1328
|
-
res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=CountRows",),
|
|
1329
|
-
data=params, headers=headers, stream=True)
|
|
1330
|
-
return self._check_res(res, "query_data", expected_retvals)
|
|
1324
|
+
self._request(
|
|
1325
|
+
method="GET",
|
|
1326
|
+
url=self._url(command="transaction"),
|
|
1327
|
+
headers=headers)
|
|
1331
1328
|
|
|
1332
1329
|
def _build_query_data_headers(self, txid, client_tags, params, split, num_sub_splits, request_format, response_format,
|
|
1333
1330
|
enable_sorted_projections, limit_rows, schedule_id, retry_count, search_path, tenant_guid,
|
|
@@ -1369,35 +1366,6 @@ class VastdbApi:
|
|
|
1369
1366
|
url_params['name'] = projection
|
|
1370
1367
|
return url_params
|
|
1371
1368
|
|
|
1372
|
-
def legacy_query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
|
|
1373
|
-
txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
|
|
1374
|
-
search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
|
|
1375
|
-
request_format='string', response_format='string', query_imports_table=False):
|
|
1376
|
-
"""
|
|
1377
|
-
POST /mybucket/myschema/mytable?query-data=LegacyQueryData HTTP/1.1
|
|
1378
|
-
Content-Length: ContentLength
|
|
1379
|
-
tabular-txid: TransactionId
|
|
1380
|
-
tabular-client-tag: ClientTag
|
|
1381
|
-
tabular-split: "split_id,total_splits,num_row_groups_per_split"
|
|
1382
|
-
tabular-num-of-subsplits: "total"
|
|
1383
|
-
tabular-request-format: "string"
|
|
1384
|
-
tabular-response-format: "string" #arrow/trino
|
|
1385
|
-
tabular-schedule-id: "schedule-id"
|
|
1386
|
-
|
|
1387
|
-
Request Body (flatbuf)
|
|
1388
|
-
projections_chunk [expressions]
|
|
1389
|
-
predicate_chunk "formatted_data", (required)
|
|
1390
|
-
|
|
1391
|
-
"""
|
|
1392
|
-
headers = self._build_query_data_headers(txid, client_tags, params, split, num_sub_splits, request_format, response_format,
|
|
1393
|
-
enable_sorted_projections, limit_rows, schedule_id, retry_count, search_path, tenant_guid,
|
|
1394
|
-
sub_split_start_row_ids)
|
|
1395
|
-
url_params = self._build_query_data_url_params(projection, query_imports_table)
|
|
1396
|
-
|
|
1397
|
-
res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=LegacyQueryData",
|
|
1398
|
-
url_params=url_params), data=params, headers=headers, stream=True)
|
|
1399
|
-
return self._check_res(res, "legacy_query_data", expected_retvals)
|
|
1400
|
-
|
|
1401
1369
|
def query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
|
|
1402
1370
|
txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
|
|
1403
1371
|
search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
|
|
@@ -1427,9 +1395,11 @@ class VastdbApi:
|
|
|
1427
1395
|
|
|
1428
1396
|
url_params = self._build_query_data_url_params(projection, query_imports_table)
|
|
1429
1397
|
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1398
|
+
# The retries will be done during SelectSplitState processing:
|
|
1399
|
+
return self._single_request(
|
|
1400
|
+
method="GET",
|
|
1401
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="data", url_params=url_params),
|
|
1402
|
+
data=params, headers=headers, stream=True)
|
|
1433
1403
|
|
|
1434
1404
|
"""
|
|
1435
1405
|
source_files: list of (bucket_name, file_name)
|
|
@@ -1506,6 +1476,10 @@ class VastdbApi:
|
|
|
1506
1476
|
else:
|
|
1507
1477
|
_logger.debug("import_data of object name '%s' is in progress. "
|
|
1508
1478
|
"status: %s", chunk_dict['object_name'], chunk_dict['res'])
|
|
1479
|
+
if chunk_dict['res'] == 'Success':
|
|
1480
|
+
_logger.info("imported /%s/%s into table=/%s/%s/%s",
|
|
1481
|
+
chunk_dict['bucket_name'], chunk_dict['object_name'],
|
|
1482
|
+
bucket, schema, table)
|
|
1509
1483
|
return response
|
|
1510
1484
|
|
|
1511
1485
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
@@ -1515,12 +1489,14 @@ class VastdbApi:
|
|
|
1515
1489
|
headers['tabular-schedule-id'] = str(schedule_id)
|
|
1516
1490
|
if retry_count > 0:
|
|
1517
1491
|
headers['tabular-retry-count'] = str(retry_count)
|
|
1518
|
-
res = self.
|
|
1519
|
-
|
|
1492
|
+
res = self._request(
|
|
1493
|
+
method="POST",
|
|
1494
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="data"),
|
|
1495
|
+
data=import_req, headers=headers, stream=True)
|
|
1520
1496
|
if blocking:
|
|
1521
1497
|
res = iterate_over_import_data_response(res)
|
|
1522
1498
|
|
|
1523
|
-
return
|
|
1499
|
+
return res
|
|
1524
1500
|
|
|
1525
1501
|
def insert_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[]):
|
|
1526
1502
|
"""
|
|
@@ -1534,9 +1510,10 @@ class VastdbApi:
|
|
|
1534
1510
|
"""
|
|
1535
1511
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1536
1512
|
headers['Content-Length'] = str(len(record_batch))
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1513
|
+
return self._request(
|
|
1514
|
+
method="POST",
|
|
1515
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="rows"),
|
|
1516
|
+
data=record_batch, headers=headers)
|
|
1540
1517
|
|
|
1541
1518
|
def update_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[]):
|
|
1542
1519
|
"""
|
|
@@ -1550,9 +1527,10 @@ class VastdbApi:
|
|
|
1550
1527
|
"""
|
|
1551
1528
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1552
1529
|
headers['Content-Length'] = str(len(record_batch))
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1530
|
+
self._request(
|
|
1531
|
+
method="PUT",
|
|
1532
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="rows"),
|
|
1533
|
+
data=record_batch, headers=headers)
|
|
1556
1534
|
|
|
1557
1535
|
def delete_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[],
|
|
1558
1536
|
delete_from_imports_table=False):
|
|
@@ -1569,9 +1547,10 @@ class VastdbApi:
|
|
|
1569
1547
|
headers['Content-Length'] = str(len(record_batch))
|
|
1570
1548
|
url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if delete_from_imports_table else {}
|
|
1571
1549
|
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1550
|
+
self._request(
|
|
1551
|
+
method="DELETE",
|
|
1552
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="rows", url_params=url_params),
|
|
1553
|
+
data=record_batch, headers=headers)
|
|
1575
1554
|
|
|
1576
1555
|
def create_projection(self, bucket, schema, table, name, columns, txid=0, client_tags=[], expected_retvals=[]):
|
|
1577
1556
|
"""
|
|
@@ -1618,9 +1597,10 @@ class VastdbApi:
|
|
|
1618
1597
|
headers['Content-Length'] = str(len(create_projection_req))
|
|
1619
1598
|
url_params = {'name': name}
|
|
1620
1599
|
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1600
|
+
self._request(
|
|
1601
|
+
method="POST",
|
|
1602
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
|
|
1603
|
+
data=create_projection_req, headers=headers)
|
|
1624
1604
|
|
|
1625
1605
|
def get_projection_stats(self, bucket, schema, table, name, txid=0, client_tags=[], expected_retvals=[]):
|
|
1626
1606
|
"""
|
|
@@ -1632,17 +1612,17 @@ class VastdbApi:
|
|
|
1632
1612
|
"""
|
|
1633
1613
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1634
1614
|
url_params = {'name': name}
|
|
1635
|
-
res = self.
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
return
|
|
1615
|
+
res = self._request(
|
|
1616
|
+
method="GET",
|
|
1617
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection-stats", url_params=url_params),
|
|
1618
|
+
headers=headers)
|
|
1619
|
+
|
|
1620
|
+
stats = get_projection_table_stats.GetRootAs(res.content)
|
|
1621
|
+
num_rows = stats.NumRows()
|
|
1622
|
+
size_in_bytes = stats.SizeInBytes()
|
|
1623
|
+
dirty_blocks_percentage = stats.DirtyBlocksPercentage()
|
|
1624
|
+
initial_sync_progress = stats.InitialSyncProgress()
|
|
1625
|
+
return num_rows, size_in_bytes, dirty_blocks_percentage, initial_sync_progress
|
|
1646
1626
|
|
|
1647
1627
|
def alter_projection(self, bucket, schema, table, name, txid=0, client_tags=[], table_properties="",
|
|
1648
1628
|
new_name="", expected_retvals=[]):
|
|
@@ -1674,10 +1654,10 @@ class VastdbApi:
|
|
|
1674
1654
|
headers['Content-Length'] = str(len(alter_projection_req))
|
|
1675
1655
|
url_params = {'name': name}
|
|
1676
1656
|
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1657
|
+
self._request(
|
|
1658
|
+
method="PUT",
|
|
1659
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
|
|
1660
|
+
data=alter_projection_req, headers=headers)
|
|
1681
1661
|
|
|
1682
1662
|
def drop_projection(self, bucket, schema, table, name, txid=0, client_tags=[], expected_retvals=[]):
|
|
1683
1663
|
"""
|
|
@@ -1688,9 +1668,10 @@ class VastdbApi:
|
|
|
1688
1668
|
headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
|
|
1689
1669
|
url_params = {'name': name}
|
|
1690
1670
|
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1671
|
+
self._request(
|
|
1672
|
+
method="DELETE",
|
|
1673
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
|
|
1674
|
+
headers=headers)
|
|
1694
1675
|
|
|
1695
1676
|
def list_projections(self, bucket, schema, table, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
|
|
1696
1677
|
exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
|
|
@@ -1714,24 +1695,26 @@ class VastdbApi:
|
|
|
1714
1695
|
headers['tabular-include-list-stats'] = str(include_list_stats)
|
|
1715
1696
|
|
|
1716
1697
|
projections = []
|
|
1717
|
-
res = self.
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1698
|
+
res = self._request(
|
|
1699
|
+
method="GET",
|
|
1700
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection"),
|
|
1701
|
+
headers=headers)
|
|
1702
|
+
|
|
1703
|
+
res_headers = res.headers
|
|
1704
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
1705
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
1706
|
+
count = int(res_headers['tabular-list-count'])
|
|
1707
|
+
lists = list_projections.GetRootAs(res.content)
|
|
1708
|
+
bucket_name = lists.BucketName().decode()
|
|
1709
|
+
schema_name = lists.SchemaName().decode()
|
|
1710
|
+
table_name = lists.TableName().decode()
|
|
1711
|
+
if not bucket.startswith(bucket_name): # ignore snapshot name
|
|
1712
|
+
raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
|
|
1713
|
+
projections_length = lists.ProjectionsLength()
|
|
1714
|
+
for i in range(projections_length):
|
|
1715
|
+
projections.append(_parse_table_info(lists.Projections(i)))
|
|
1716
|
+
|
|
1717
|
+
return bucket_name, schema_name, table_name, projections, next_key, is_truncated, count
|
|
1735
1718
|
|
|
1736
1719
|
def list_projection_columns(self, bucket, schema, table, projection, txid=0, client_tags=[], max_keys=1000,
|
|
1737
1720
|
next_key=0, count_only=False, name_prefix="", exact_match=False,
|
|
@@ -1759,19 +1742,20 @@ class VastdbApi:
|
|
|
1759
1742
|
|
|
1760
1743
|
url_params = {'name': projection}
|
|
1761
1744
|
|
|
1762
|
-
res = self.
|
|
1763
|
-
|
|
1764
|
-
|
|
1745
|
+
res = self._request(
|
|
1746
|
+
method="GET",
|
|
1747
|
+
url=self._url(bucket=bucket, schema=schema, table=table, command="projection-columns", url_params=url_params),
|
|
1748
|
+
headers=headers)
|
|
1749
|
+
|
|
1765
1750
|
# list projection columns response will also show column type Sorted/UnSorted
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
pa.ipc.open_stream(res.content).schema]
|
|
1751
|
+
res_headers = res.headers
|
|
1752
|
+
next_key = int(res_headers['tabular-next-key'])
|
|
1753
|
+
is_truncated = res_headers['tabular-is-truncated'] == 'true'
|
|
1754
|
+
count = int(res_headers['tabular-list-count'])
|
|
1755
|
+
columns = [] if count_only else [[f.name, f.type, f.metadata] for f in
|
|
1756
|
+
pa.ipc.open_stream(res.content).schema]
|
|
1773
1757
|
|
|
1774
|
-
|
|
1758
|
+
return columns, next_key, is_truncated, count
|
|
1775
1759
|
|
|
1776
1760
|
|
|
1777
1761
|
class QueryDataInternalError(Exception):
|
|
@@ -1828,15 +1812,12 @@ def _iter_query_data_response_columns(fileobj, stream_ids=None):
|
|
|
1828
1812
|
yield (stream_id, next_row_id, table)
|
|
1829
1813
|
|
|
1830
1814
|
|
|
1831
|
-
def parse_query_data_response(conn, schema, stream_ids=None,
|
|
1815
|
+
def parse_query_data_response(conn, schema, stream_ids=None, debug=False, parser: Optional[QueryDataParser] = None):
|
|
1832
1816
|
"""
|
|
1833
1817
|
Generates pyarrow.Table objects from QueryData API response stream.
|
|
1834
1818
|
|
|
1835
1819
|
A pyarrow.Table is a helper class that combines a Schema with multiple RecordBatches and allows easy data access.
|
|
1836
1820
|
"""
|
|
1837
|
-
if start_row_ids is None:
|
|
1838
|
-
start_row_ids = {}
|
|
1839
|
-
|
|
1840
1821
|
is_empty_projection = (len(schema) == 0)
|
|
1841
1822
|
if parser is None:
|
|
1842
1823
|
parser = QueryDataParser(schema, debug=debug)
|
|
@@ -1855,8 +1836,7 @@ def parse_query_data_response(conn, schema, stream_ids=None, start_row_ids=None,
|
|
|
1855
1836
|
|
|
1856
1837
|
_logger.debug("stream_id=%d rows=%d next_row_id=%d table=%s",
|
|
1857
1838
|
stream_id, len(parsed_table), next_row_id, parsed_table)
|
|
1858
|
-
|
|
1859
|
-
yield parsed_table # the result of a single "select_rows()" cycle
|
|
1839
|
+
yield stream_id, next_row_id, parsed_table
|
|
1860
1840
|
|
|
1861
1841
|
if states:
|
|
1862
1842
|
raise EOFError(f'all streams should be done before EOF. {states}')
|
|
@@ -2118,40 +2098,3 @@ def build_query_data_request(schema: 'pa.Schema' = pa.schema([]), predicate: ibi
|
|
|
2118
2098
|
builder.Finish(relation)
|
|
2119
2099
|
|
|
2120
2100
|
return QueryDataRequest(serialized=builder.Output(), response_schema=response_schema, response_parser=QueryDataParser(response_schema))
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
def convert_column_types(table: 'pa.Table') -> 'pa.Table':
|
|
2124
|
-
"""
|
|
2125
|
-
Adjusting table values
|
|
2126
|
-
|
|
2127
|
-
1. Because the timestamp resolution is too high it is necessary to trim it. ORION-96961
|
|
2128
|
-
2. Since the values of nfs_mode_bits are returned in decimal, need to convert them to octal,
|
|
2129
|
-
as in all representations, so that the mode of 448 turn into 700
|
|
2130
|
-
3. for owner_name and group_owner_name 0 -> root, and 65534 -> nobody
|
|
2131
|
-
"""
|
|
2132
|
-
ts_indexes = []
|
|
2133
|
-
indexes_of_fields_to_change = {}
|
|
2134
|
-
sid_to_name = {
|
|
2135
|
-
'0': 'root',
|
|
2136
|
-
'65534': 'nobody' # NFSNOBODY_UID_16_BIT
|
|
2137
|
-
}
|
|
2138
|
-
column_matcher = { # column_name: custom converting rule
|
|
2139
|
-
'nfs_mode_bits': lambda val: int(oct(val).replace('0o', '')) if val is not None else val,
|
|
2140
|
-
'owner_name': lambda val: sid_to_name.get(val, val),
|
|
2141
|
-
'group_owner_name': lambda val: sid_to_name.get(val, val),
|
|
2142
|
-
}
|
|
2143
|
-
for index, field in enumerate(table.schema):
|
|
2144
|
-
if isinstance(field.type, pa.TimestampType) and field.type.unit == 'ns':
|
|
2145
|
-
ts_indexes.append(index)
|
|
2146
|
-
if field.name in column_matcher:
|
|
2147
|
-
indexes_of_fields_to_change[field.name] = index
|
|
2148
|
-
for changing_index in ts_indexes:
|
|
2149
|
-
field_name = table.schema[changing_index].name
|
|
2150
|
-
new_column = table[field_name].cast(pa.timestamp('us'), safe=False)
|
|
2151
|
-
table = table.set_column(changing_index, field_name, new_column)
|
|
2152
|
-
for field_name, changing_index in indexes_of_fields_to_change.items():
|
|
2153
|
-
new_column = table[field_name].to_pylist()
|
|
2154
|
-
new_column = list(map(column_matcher[field_name], new_column))
|
|
2155
|
-
new_column = pa.array(new_column, table[field_name].type)
|
|
2156
|
-
table = table.set_column(changing_index, field_name, new_column)
|
|
2157
|
-
return table
|