vastdb 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,17 +5,37 @@ import re
5
5
  import struct
6
6
  import urllib.parse
7
7
  from collections import defaultdict, namedtuple
8
+ from dataclasses import dataclass, field
8
9
  from enum import Enum
9
- from typing import Any, Dict, Iterator, List, Optional, Union
10
+ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
10
11
 
12
+ import backoff
11
13
  import flatbuffers
12
14
  import ibis
13
15
  import pyarrow as pa
14
- import pyarrow.parquet as pq
15
16
  import requests
16
17
  import urllib3
17
18
  import xmltodict
18
19
  from aws_requests_auth.aws_auth import AWSRequestsAuth
20
+ from ibis.expr.operations.generic import (
21
+ IsNull,
22
+ Literal,
23
+ )
24
+ from ibis.expr.operations.logical import (
25
+ And,
26
+ Between,
27
+ Equals,
28
+ Greater,
29
+ GreaterEqual,
30
+ InValues,
31
+ Less,
32
+ LessEqual,
33
+ Not,
34
+ NotEquals,
35
+ Or,
36
+ )
37
+ from ibis.expr.operations.relations import Field
38
+ from ibis.expr.operations.strings import StringContains
19
39
 
20
40
  import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
21
41
  import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BooleanLiteral as fb_bool_lit
@@ -137,26 +157,6 @@ class Predicate:
137
157
  self.expr = expr
138
158
 
139
159
  def serialize(self, builder: 'flatbuffers.builder.Builder'):
140
- from ibis.expr.operations.generic import (
141
- IsNull,
142
- Literal,
143
- TableColumn,
144
- )
145
- from ibis.expr.operations.logical import (
146
- And,
147
- Between,
148
- Equals,
149
- Greater,
150
- GreaterEqual,
151
- InValues,
152
- Less,
153
- LessEqual,
154
- Not,
155
- NotEquals,
156
- Or,
157
- )
158
- from ibis.expr.operations.strings import StringContains
159
-
160
160
  builder_map = {
161
161
  Greater: self.build_greater,
162
162
  GreaterEqual: self.build_greater_equal,
@@ -216,7 +216,7 @@ class Predicate:
216
216
  if not isinstance(literal, Literal):
217
217
  raise NotImplementedError(self.expr)
218
218
 
219
- if not isinstance(column, TableColumn):
219
+ if not isinstance(column, Field):
220
220
  raise NotImplementedError(self.expr)
221
221
 
222
222
  field_name = column.name
@@ -722,19 +722,67 @@ def _parse_table_info(obj):
722
722
  TableStatsResult = namedtuple("TableStatsResult", ["num_rows", "size_in_bytes", "is_external_rowid_alloc", "endpoints"])
723
723
 
724
724
 
725
+ _RETRIABLE_EXCEPTIONS = (
726
+ errors.ConnectionError, # only if 'may_retry' is True
727
+ errors.Slowdown,
728
+ )
729
+
730
+
731
+ def _backoff_giveup(exc: Exception) -> bool:
732
+ """Exception types below MUST be part of `_RETRIABLE_EXCEPTIONS` above."""
733
+
734
+ _logger.info("Backoff giveup: %r", exc)
735
+ if isinstance(exc, errors.Slowdown):
736
+ return False # the server is overloaded, don't give up
737
+
738
+ if isinstance(exc, errors.ConnectionError):
739
+ if exc.may_retry:
740
+ return False # don't give up of retriable connection errors
741
+
742
+ return True # give up in case of other exceptions
743
+
744
+
745
+ @dataclass
746
+ class BackoffConfig:
747
+ wait_gen: Callable = field(default=backoff.expo)
748
+ max_value: Optional[float] = None # max duration for a single wait period
749
+ max_tries: int = 10
750
+ max_time: float = 60.0 # in seconds
751
+ backoff_log_level: int = logging.DEBUG
752
+
753
+
725
754
  class VastdbApi:
726
755
  # we expect the vast version to be <major>.<minor>.<patch>.<protocol>
727
756
  VAST_VERSION_REGEX = re.compile(r'^vast (\d+\.\d+\.\d+\.\d+)$')
728
757
 
729
- def __init__(self, endpoint, access_key, secret_key, auth_type=AuthType.SIGV4, ssl_verify=True):
758
+ def __init__(self, endpoint, access_key, secret_key,
759
+ *,
760
+ auth_type=AuthType.SIGV4,
761
+ ssl_verify=True,
762
+ backoff_config: Optional[BackoffConfig] = None):
763
+
764
+ from . import __version__ # import lazily here (to avoid circular dependencies)
765
+ self.client_sdk_version = f"VAST Database Python SDK {__version__} - 2024 (c)"
766
+
730
767
  url = urllib3.util.parse_url(endpoint)
731
768
  self.access_key = access_key
732
769
  self.secret_key = secret_key
733
770
 
734
771
  self.default_max_list_columns_page_size = 1000
735
- self.session = requests.Session()
736
- self.session.verify = ssl_verify
737
- self.session.headers['user-agent'] = "VastData Tabular API 1.0 - 2022 (c)"
772
+ self._session = requests.Session()
773
+ self._session.verify = ssl_verify
774
+ self._session.headers['user-agent'] = self.client_sdk_version
775
+
776
+ backoff_config = backoff_config or BackoffConfig()
777
+ self._backoff_decorator = backoff.on_exception(
778
+ wait_gen=backoff_config.wait_gen,
779
+ exception=_RETRIABLE_EXCEPTIONS,
780
+ giveup=_backoff_giveup,
781
+ max_tries=backoff_config.max_tries,
782
+ max_time=backoff_config.max_time,
783
+ max_value=backoff_config.max_value, # passed to `backoff_config.wait_gen`
784
+ backoff_log_level=backoff_config.backoff_log_level)
785
+ self._request = self._backoff_decorator(self._single_request)
738
786
 
739
787
  if url.port in {80, 443, None}:
740
788
  self.aws_host = f'{url.host}'
@@ -744,22 +792,21 @@ class VastdbApi:
744
792
  self.url = str(url)
745
793
  _logger.debug('url=%s aws_host=%s', self.url, self.aws_host)
746
794
 
747
- self.session.auth = AWSRequestsAuth(aws_access_key=access_key,
795
+ self._session.auth = AWSRequestsAuth(aws_access_key=access_key,
748
796
  aws_secret_access_key=secret_key,
749
797
  aws_host=self.aws_host,
750
- aws_region='us-east-1',
798
+ aws_region='',
751
799
  aws_service='s3')
752
800
 
753
801
  # probe the cluster for its version
754
- self.vast_version = None
755
- res = self.session.get(self.url)
802
+ res = self._request(method="GET", url=self._url(command="transaction"), skip_status_check=True) # used only for the response headers
803
+ _logger.debug("headers=%s code=%s content=%s", res.headers, res.status_code, res.content)
756
804
  server_header = res.headers.get("Server")
757
805
  if server_header is None:
758
806
  _logger.error("Response doesn't contain 'Server' header")
759
807
  else:
760
- _logger.debug("Server header is '%s'", server_header)
761
808
  if m := self.VAST_VERSION_REGEX.match(server_header):
762
- self.vast_version, = m.groups()
809
+ self.vast_version: Tuple[int, ...] = tuple(int(v) for v in m.group(1).split("."))
763
810
  return
764
811
  else:
765
812
  _logger.error("'Server' header '%s' doesn't match the expected pattern", server_header)
@@ -772,15 +819,21 @@ class VastdbApi:
772
819
  _logger.critical(msg)
773
820
  raise NotImplementedError(msg)
774
821
 
775
- def update_mgmt_session(self, access_key: str, secret_key: str, auth_type=AuthType.SIGV4):
776
- if auth_type != AuthType.BASIC:
777
- self.session.auth = AWSRequestsAuth(aws_access_key=access_key,
778
- aws_secret_access_key=secret_key,
779
- aws_host=self.aws_host,
780
- aws_region='us-east-1',
781
- aws_service='s3')
782
-
783
- def _api_prefix(self, bucket="", schema="", table="", command="", url_params={}):
822
+ def _single_request(self, *, method, url, skip_status_check=False, **kwargs):
823
+ _logger.debug("Sending request: %s %s %s", method, url, kwargs)
824
+ try:
825
+ res = self._session.request(method=method, url=url, **kwargs)
826
+ except requests.exceptions.ConnectionError as err:
827
+ # low-level connection issue, it is safe to retry only read-only requests
828
+ may_retry = (method == "GET")
829
+ raise errors.ConnectionError(cause=err, may_retry=may_retry) from err
830
+
831
+ if not skip_status_check:
832
+ if exc := errors.from_response(res):
833
+ raise exc # application-level error
834
+ return res # successful response
835
+
836
+ def _url(self, bucket="", schema="", table="", command="", url_params={}):
784
837
  prefix_list = [self.url]
785
838
  if len(bucket):
786
839
  prefix_list.append(bucket)
@@ -815,11 +868,6 @@ class VastdbApi:
815
868
 
816
869
  return common_headers | {f'tabular-client-tags-{index}': tag for index, tag in enumerate(client_tags)}
817
870
 
818
- def _check_res(self, res, cmd="", expected_retvals=[]):
819
- if exc := errors.from_response(res):
820
- raise exc
821
- return res
822
-
823
871
  def create_schema(self, bucket, name, txid=0, client_tags=[], schema_properties="", expected_retvals=[]):
824
872
  """
825
873
  Create a collection of tables, use the following request
@@ -841,10 +889,10 @@ class VastdbApi:
841
889
 
842
890
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
843
891
  headers['Content-Length'] = str(len(create_schema_req))
844
- res = self.session.post(self._api_prefix(bucket=bucket, schema=name, command="schema"),
845
- data=create_schema_req, headers=headers, stream=True)
846
-
847
- return self._check_res(res, "create_schema", expected_retvals)
892
+ self._request(
893
+ method="POST",
894
+ url=self._url(bucket=bucket, schema=name, command="schema"),
895
+ data=create_schema_req, headers=headers)
848
896
 
849
897
  def alter_schema(self, bucket, name, txid=0, client_tags=[], schema_properties="", new_name="", expected_retvals=[]):
850
898
  """
@@ -870,10 +918,10 @@ class VastdbApi:
870
918
  headers['Content-Length'] = str(len(alter_schema_req))
871
919
  url_params = {'tabular-new-schema-name': new_name} if len(new_name) else {}
872
920
 
873
- res = self.session.put(self._api_prefix(bucket=bucket, schema=name, command="schema", url_params=url_params),
874
- data=alter_schema_req, headers=headers)
875
-
876
- return self._check_res(res, "alter_schema", expected_retvals)
921
+ self._request(
922
+ method="PUT",
923
+ url=self._url(bucket=bucket, schema=name, command="schema", url_params=url_params),
924
+ data=alter_schema_req, headers=headers)
877
925
 
878
926
  def drop_schema(self, bucket, name, txid=0, client_tags=[], expected_retvals=[]):
879
927
  """
@@ -884,9 +932,10 @@ class VastdbApi:
884
932
  """
885
933
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
886
934
 
887
- res = self.session.delete(self._api_prefix(bucket=bucket, schema=name, command="schema"), headers=headers)
888
-
889
- return self._check_res(res, "drop_schema", expected_retvals)
935
+ self._request(
936
+ method="DELETE",
937
+ url=self._url(bucket=bucket, schema=name, command="schema"),
938
+ headers=headers)
890
939
 
891
940
  def list_schemas(self, bucket, schema="", txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
892
941
  exact_match=False, expected_retvals=[], count_only=False):
@@ -915,25 +964,27 @@ class VastdbApi:
915
964
 
916
965
  schemas = []
917
966
  schema = schema or ""
918
- res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, command="schema"), headers=headers, stream=True)
919
- self._check_res(res, "list_schemas", expected_retvals)
920
- if res.status_code == 200:
921
- res_headers = res.headers
922
- next_key = int(res_headers['tabular-next-key'])
923
- is_truncated = res_headers['tabular-is-truncated'] == 'true'
924
- lists = list_schemas.GetRootAs(res.content)
925
- bucket_name = lists.BucketName().decode()
926
- if not bucket.startswith(bucket_name):
927
- raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
928
- schemas_length = lists.SchemasLength()
929
- count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else schemas_length
930
- for i in range(schemas_length):
931
- schema_obj = lists.Schemas(i)
932
- name = schema_obj.Name().decode()
933
- properties = schema_obj.Properties().decode()
934
- schemas.append([name, properties])
935
-
936
- return bucket_name, schemas, next_key, is_truncated, count
967
+ res = self._request(
968
+ method="GET",
969
+ url=self._url(bucket=bucket, schema=schema, command="schema"),
970
+ headers=headers)
971
+
972
+ res_headers = res.headers
973
+ next_key = int(res_headers['tabular-next-key'])
974
+ is_truncated = res_headers['tabular-is-truncated'] == 'true'
975
+ lists = list_schemas.GetRootAs(res.content)
976
+ bucket_name = lists.BucketName().decode()
977
+ if not bucket.startswith(bucket_name):
978
+ raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
979
+ schemas_length = lists.SchemasLength()
980
+ count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else schemas_length
981
+ for i in range(schemas_length):
982
+ schema_obj = lists.Schemas(i)
983
+ name = schema_obj.Name().decode()
984
+ properties = schema_obj.Properties().decode()
985
+ schemas.append([name, properties])
986
+
987
+ return bucket_name, schemas, next_key, is_truncated, count
937
988
 
938
989
  def list_snapshots(self, bucket, max_keys=1000, next_token=None, name_prefix=''):
939
990
  next_token = next_token or ''
@@ -941,8 +992,9 @@ class VastdbApi:
941
992
  if next_token:
942
993
  url_params['continuation-token'] = next_token
943
994
 
944
- res = self.session.get(self._api_prefix(bucket=bucket, command="list", url_params=url_params), headers={}, stream=True)
945
- self._check_res(res, "list_snapshots")
995
+ res = self._request(
996
+ method="GET",
997
+ url=self._url(bucket=bucket, command="list", url_params=url_params))
946
998
 
947
999
  xml_str = res.content.decode()
948
1000
  xml_dict = xmltodict.parse(xml_str)
@@ -985,33 +1037,10 @@ class VastdbApi:
985
1037
  if create_imports_table:
986
1038
  url_params['sub-table'] = IMPORTED_OBJECTS_TABLE_NAME
987
1039
 
988
- res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
989
- data=serialized_schema, headers=headers)
990
- return self._check_res(res, "create_table", expected_retvals)
991
-
992
- def create_table_from_parquet_schema(self, bucket, schema, name, parquet_path=None,
993
- parquet_bucket_name=None, parquet_object_name=None,
994
- txid=0, client_tags=[], expected_retvals=[]):
995
-
996
- # Use pyarrow.parquet.ParquetDataset to open the Parquet file
997
- if parquet_path:
998
- parquet_ds = pq.ParquetDataset(parquet_path)
999
- elif parquet_bucket_name and parquet_object_name:
1000
- s3fs = pa.fs.S3FileSystem(access_key=self.access_key, secret_key=self.secret_key, endpoint_override=self.url)
1001
- parquet_ds = pq.ParquetDataset('/'.join([parquet_bucket_name, parquet_object_name]), filesystem=s3fs)
1002
- else:
1003
- raise RuntimeError(f'invalid params parquet_path={parquet_path} parquet_bucket_name={parquet_bucket_name} parquet_object_name={parquet_object_name}')
1004
-
1005
- # Get the schema of the Parquet file
1006
- if isinstance(parquet_ds.schema, pq.ParquetSchema):
1007
- arrow_schema = parquet_ds.schema.to_arrow_schema()
1008
- elif isinstance(parquet_ds.schema, pa.Schema):
1009
- arrow_schema = parquet_ds.schema
1010
- else:
1011
- raise RuntimeError(f'invalid type(parquet_ds.schema) = {type(parquet_ds.schema)}')
1012
-
1013
- # create the table
1014
- return self.create_table(bucket, schema, name, arrow_schema, txid, client_tags, expected_retvals)
1040
+ self._request(
1041
+ method="POST",
1042
+ url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
1043
+ data=serialized_schema, headers=headers)
1015
1044
 
1016
1045
  def get_table_stats(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], imports_table_stats=False):
1017
1046
  """
@@ -1023,8 +1052,10 @@ class VastdbApi:
1023
1052
  """
1024
1053
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1025
1054
  url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if imports_table_stats else {}
1026
- res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=name, command="stats", url_params=url_params), headers=headers)
1027
- self._check_res(res, "get_table_stats", expected_retvals)
1055
+ res = self._request(
1056
+ method="GET",
1057
+ url=self._url(bucket=bucket, schema=schema, table=name, command="stats", url_params=url_params),
1058
+ headers=headers)
1028
1059
 
1029
1060
  stats = get_table_stats.GetRootAs(res.content)
1030
1061
  num_rows = stats.NumRows()
@@ -1059,10 +1090,10 @@ class VastdbApi:
1059
1090
  headers['Content-Length'] = str(len(alter_table_req))
1060
1091
  url_params = {'tabular-new-table-name': schema + "/" + new_name} if len(new_name) else {}
1061
1092
 
1062
- res = self.session.put(self._api_prefix(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
1063
- data=alter_table_req, headers=headers)
1064
-
1065
- return self._check_res(res, "alter_table", expected_retvals)
1093
+ self._request(
1094
+ method="PUT",
1095
+ url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
1096
+ data=alter_table_req, headers=headers)
1066
1097
 
1067
1098
  def drop_table(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], remove_imports_table=False):
1068
1099
  """
@@ -1075,9 +1106,10 @@ class VastdbApi:
1075
1106
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1076
1107
  url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if remove_imports_table else {}
1077
1108
 
1078
- res = self.session.delete(self._api_prefix(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
1079
- headers=headers)
1080
- return self._check_res(res, "drop_table", expected_retvals)
1109
+ self._request(
1110
+ method="DELETE",
1111
+ url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
1112
+ headers=headers)
1081
1113
 
1082
1114
  def list_tables(self, bucket, schema, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
1083
1115
  exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
@@ -1101,23 +1133,25 @@ class VastdbApi:
1101
1133
  headers['tabular-include-list-stats'] = str(include_list_stats)
1102
1134
 
1103
1135
  tables = []
1104
- res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, command="table"), headers=headers)
1105
- self._check_res(res, "list_table", expected_retvals)
1106
- if res.status_code == 200:
1107
- res_headers = res.headers
1108
- next_key = int(res_headers['tabular-next-key'])
1109
- is_truncated = res_headers['tabular-is-truncated'] == 'true'
1110
- lists = list_tables.GetRootAs(res.content)
1111
- bucket_name = lists.BucketName().decode()
1112
- schema_name = lists.SchemaName().decode()
1113
- if not bucket.startswith(bucket_name): # ignore snapshot name
1114
- raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
1115
- tables_length = lists.TablesLength()
1116
- count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
1117
- for i in range(tables_length):
1118
- tables.append(_parse_table_info(lists.Tables(i)))
1119
-
1120
- return bucket_name, schema_name, tables, next_key, is_truncated, count
1136
+ res = self._request(
1137
+ method="GET",
1138
+ url=self._url(bucket=bucket, schema=schema, command="table"),
1139
+ headers=headers)
1140
+
1141
+ res_headers = res.headers
1142
+ next_key = int(res_headers['tabular-next-key'])
1143
+ is_truncated = res_headers['tabular-is-truncated'] == 'true'
1144
+ lists = list_tables.GetRootAs(res.content)
1145
+ bucket_name = lists.BucketName().decode()
1146
+ schema_name = lists.SchemaName().decode()
1147
+ if not bucket.startswith(bucket_name): # ignore snapshot name
1148
+ raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
1149
+ tables_length = lists.TablesLength()
1150
+ count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
1151
+ for i in range(tables_length):
1152
+ tables.append(_parse_table_info(lists.Tables(i)))
1153
+
1154
+ return bucket_name, schema_name, tables, next_key, is_truncated, count
1121
1155
 
1122
1156
  def add_columns(self, bucket, schema, name, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
1123
1157
  """
@@ -1139,9 +1173,10 @@ class VastdbApi:
1139
1173
  serialized_schema = arrow_schema.serialize()
1140
1174
  headers['Content-Length'] = str(len(serialized_schema))
1141
1175
 
1142
- res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=name, command="column"),
1143
- data=serialized_schema, headers=headers)
1144
- return self._check_res(res, "add_columns", expected_retvals)
1176
+ self._request(
1177
+ method="POST",
1178
+ url=self._url(bucket=bucket, schema=schema, table=name, command="column"),
1179
+ data=serialized_schema, headers=headers)
1145
1180
 
1146
1181
  def alter_column(self, bucket, schema, table, name, txid=0, client_tags=[], column_properties="",
1147
1182
  new_name="", column_sep=".", column_stats="", expected_retvals=[]):
@@ -1177,9 +1212,10 @@ class VastdbApi:
1177
1212
  if len(new_name):
1178
1213
  url_params['tabular-new-column-name'] = new_name
1179
1214
 
1180
- res = self.session.put(self._api_prefix(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
1181
- data=alter_column_req, headers=headers)
1182
- return self._check_res(res, "alter_column", expected_retvals)
1215
+ self._request(
1216
+ method="PUT",
1217
+ url=self._url(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
1218
+ data=alter_column_req, headers=headers)
1183
1219
 
1184
1220
  def drop_columns(self, bucket, schema, table, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
1185
1221
  """
@@ -1192,9 +1228,10 @@ class VastdbApi:
1192
1228
  serialized_schema = arrow_schema.serialize()
1193
1229
  headers['Content-Length'] = str(len(serialized_schema))
1194
1230
 
1195
- res = self.session.delete(self._api_prefix(bucket=bucket, schema=schema, table=table, command="column"),
1196
- data=serialized_schema, headers=headers)
1197
- return self._check_res(res, "drop_columns", expected_retvals)
1231
+ self._request(
1232
+ method="DELETE",
1233
+ url=self._url(bucket=bucket, schema=schema, table=table, command="column"),
1234
+ data=serialized_schema, headers=headers)
1198
1235
 
1199
1236
  def list_columns(self, bucket, schema, table, *, txid=0, client_tags=None, max_keys=None, next_key=0,
1200
1237
  count_only=False, name_prefix="", exact_match=False,
@@ -1226,18 +1263,18 @@ class VastdbApi:
1226
1263
  headers['tabular-name-prefix'] = name_prefix
1227
1264
 
1228
1265
  url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if list_imports_table else {}
1229
- res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="column",
1230
- url_params=url_params),
1231
- headers=headers, stream=True)
1232
- self._check_res(res, "list_columns", expected_retvals)
1233
- if res.status_code == 200:
1234
- res_headers = res.headers
1235
- next_key = int(res_headers['tabular-next-key'])
1236
- is_truncated = res_headers['tabular-is-truncated'] == 'true'
1237
- count = int(res_headers['tabular-list-count'])
1238
- columns = [] if count_only else pa.ipc.open_stream(res.content).schema
1239
-
1240
- return columns, next_key, is_truncated, count
1266
+ res = self._request(
1267
+ method="GET",
1268
+ url=self._url(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
1269
+ headers=headers)
1270
+
1271
+ res_headers = res.headers
1272
+ next_key = int(res_headers['tabular-next-key'])
1273
+ is_truncated = res_headers['tabular-is-truncated'] == 'true'
1274
+ count = int(res_headers['tabular-list-count'])
1275
+ columns = [] if count_only else pa.ipc.open_stream(res.content).schema
1276
+
1277
+ return columns, next_key, is_truncated, count
1241
1278
 
1242
1279
  def begin_transaction(self, client_tags=[], expected_retvals=[]):
1243
1280
  """
@@ -1248,8 +1285,10 @@ class VastdbApi:
1248
1285
  tabular-txid: TransactionId
1249
1286
  """
1250
1287
  headers = self._fill_common_headers(client_tags=client_tags)
1251
- res = self.session.post(self._api_prefix(command="transaction"), headers=headers)
1252
- return self._check_res(res, "begin_transaction", expected_retvals)
1288
+ return self._request(
1289
+ method="POST",
1290
+ url=self._url(command="transaction"),
1291
+ headers=headers)
1253
1292
 
1254
1293
  def commit_transaction(self, txid, client_tags=[], expected_retvals=[]):
1255
1294
  """
@@ -1258,8 +1297,10 @@ class VastdbApi:
1258
1297
  tabular-client-tag: ClientTag
1259
1298
  """
1260
1299
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1261
- res = self.session.put(self._api_prefix(command="transaction"), headers=headers)
1262
- return self._check_res(res, "commit_transaction", expected_retvals)
1300
+ self._request(
1301
+ method="PUT",
1302
+ url=self._url(command="transaction"),
1303
+ headers=headers)
1263
1304
 
1264
1305
  def rollback_transaction(self, txid, client_tags=[], expected_retvals=[]):
1265
1306
  """
@@ -1268,8 +1309,10 @@ class VastdbApi:
1268
1309
  tabular-client-tag: ClientTag
1269
1310
  """
1270
1311
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1271
- res = self.session.delete(self._api_prefix(command="transaction"), headers=headers)
1272
- return self._check_res(res, "rollback_transaction", expected_retvals)
1312
+ self._request(
1313
+ method="DELETE",
1314
+ url=self._url(command="transaction"),
1315
+ headers=headers)
1273
1316
 
1274
1317
  def get_transaction(self, txid, client_tags=[], expected_retvals=[]):
1275
1318
  """
@@ -1278,56 +1321,10 @@ class VastdbApi:
1278
1321
  tabular-client-tag: ClientTag
1279
1322
  """
1280
1323
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1281
- res = self.session.get(self._api_prefix(command="transaction"), headers=headers)
1282
- return self._check_res(res, "get_transaction", expected_retvals)
1283
-
1284
- def select_row_ids(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[],
1285
- retry_count=0, enable_sorted_projections=True):
1286
- """
1287
- POST /mybucket/myschema/mytable?query-data=SelectRowIds HTTP/1.1
1288
- """
1289
-
1290
- # add query option select-only and read-only
1291
- headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1292
- headers['Content-Length'] = str(len(params))
1293
- headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
1294
- if retry_count > 0:
1295
- headers['tabular-retry-count'] = str(retry_count)
1296
-
1297
- res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=SelectRowIds",),
1298
- data=params, headers=headers, stream=True)
1299
- return self._check_res(res, "query_data", expected_retvals)
1300
-
1301
- def read_columns_data(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[], tenant_guid=None,
1302
- retry_count=0, enable_sorted_projections=True):
1303
- """
1304
- POST /mybucket/myschema/mytable?query-data=ReadColumns HTTP/1.1
1305
- """
1306
-
1307
- headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1308
- headers['Content-Length'] = str(len(params))
1309
- headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
1310
- if retry_count > 0:
1311
- headers['tabular-retry-count'] = str(retry_count)
1312
-
1313
- res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=ReadColumns",),
1314
- data=params, headers=headers, stream=True)
1315
- return self._check_res(res, "query_data", expected_retvals)
1316
-
1317
- def count_rows(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[], tenant_guid=None,
1318
- retry_count=0, enable_sorted_projections=True):
1319
- """
1320
- POST /mybucket/myschema/mytable?query-data=CountRows HTTP/1.1
1321
- """
1322
- headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1323
- headers['Content-Length'] = str(len(params))
1324
- headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
1325
- if retry_count > 0:
1326
- headers['tabular-retry-count'] = str(retry_count)
1327
-
1328
- res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=CountRows",),
1329
- data=params, headers=headers, stream=True)
1330
- return self._check_res(res, "query_data", expected_retvals)
1324
+ self._request(
1325
+ method="GET",
1326
+ url=self._url(command="transaction"),
1327
+ headers=headers)
1331
1328
 
1332
1329
  def _build_query_data_headers(self, txid, client_tags, params, split, num_sub_splits, request_format, response_format,
1333
1330
  enable_sorted_projections, limit_rows, schedule_id, retry_count, search_path, tenant_guid,
@@ -1369,35 +1366,6 @@ class VastdbApi:
1369
1366
  url_params['name'] = projection
1370
1367
  return url_params
1371
1368
 
1372
- def legacy_query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
1373
- txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
1374
- search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
1375
- request_format='string', response_format='string', query_imports_table=False):
1376
- """
1377
- POST /mybucket/myschema/mytable?query-data=LegacyQueryData HTTP/1.1
1378
- Content-Length: ContentLength
1379
- tabular-txid: TransactionId
1380
- tabular-client-tag: ClientTag
1381
- tabular-split: "split_id,total_splits,num_row_groups_per_split"
1382
- tabular-num-of-subsplits: "total"
1383
- tabular-request-format: "string"
1384
- tabular-response-format: "string" #arrow/trino
1385
- tabular-schedule-id: "schedule-id"
1386
-
1387
- Request Body (flatbuf)
1388
- projections_chunk [expressions]
1389
- predicate_chunk "formatted_data", (required)
1390
-
1391
- """
1392
- headers = self._build_query_data_headers(txid, client_tags, params, split, num_sub_splits, request_format, response_format,
1393
- enable_sorted_projections, limit_rows, schedule_id, retry_count, search_path, tenant_guid,
1394
- sub_split_start_row_ids)
1395
- url_params = self._build_query_data_url_params(projection, query_imports_table)
1396
-
1397
- res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=LegacyQueryData",
1398
- url_params=url_params), data=params, headers=headers, stream=True)
1399
- return self._check_res(res, "legacy_query_data", expected_retvals)
1400
-
1401
1369
  def query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
1402
1370
  txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
1403
1371
  search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
@@ -1427,9 +1395,11 @@ class VastdbApi:
1427
1395
 
1428
1396
  url_params = self._build_query_data_url_params(projection, query_imports_table)
1429
1397
 
1430
- res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="data", url_params=url_params),
1431
- data=params, headers=headers, stream=True)
1432
- return self._check_res(res, "query_data", expected_retvals)
1398
+ # The retries will be done during SelectSplitState processing:
1399
+ return self._single_request(
1400
+ method="GET",
1401
+ url=self._url(bucket=bucket, schema=schema, table=table, command="data", url_params=url_params),
1402
+ data=params, headers=headers, stream=True)
1433
1403
 
1434
1404
  """
1435
1405
  source_files: list of (bucket_name, file_name)
@@ -1506,6 +1476,10 @@ class VastdbApi:
1506
1476
  else:
1507
1477
  _logger.debug("import_data of object name '%s' is in progress. "
1508
1478
  "status: %s", chunk_dict['object_name'], chunk_dict['res'])
1479
+ if chunk_dict['res'] == 'Success':
1480
+ _logger.info("imported /%s/%s into table=/%s/%s/%s",
1481
+ chunk_dict['bucket_name'], chunk_dict['object_name'],
1482
+ bucket, schema, table)
1509
1483
  return response
1510
1484
 
1511
1485
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
@@ -1515,12 +1489,14 @@ class VastdbApi:
1515
1489
  headers['tabular-schedule-id'] = str(schedule_id)
1516
1490
  if retry_count > 0:
1517
1491
  headers['tabular-retry-count'] = str(retry_count)
1518
- res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="data"),
1519
- data=import_req, headers=headers, stream=True)
1492
+ res = self._request(
1493
+ method="POST",
1494
+ url=self._url(bucket=bucket, schema=schema, table=table, command="data"),
1495
+ data=import_req, headers=headers, stream=True)
1520
1496
  if blocking:
1521
1497
  res = iterate_over_import_data_response(res)
1522
1498
 
1523
- return self._check_res(res, "import_data", expected_retvals)
1499
+ return res
1524
1500
 
1525
1501
  def insert_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[]):
1526
1502
  """
@@ -1534,9 +1510,10 @@ class VastdbApi:
1534
1510
  """
1535
1511
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1536
1512
  headers['Content-Length'] = str(len(record_batch))
1537
- res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="rows"),
1538
- data=record_batch, headers=headers, stream=True)
1539
- return self._check_res(res, "insert_rows", expected_retvals)
1513
+ return self._request(
1514
+ method="POST",
1515
+ url=self._url(bucket=bucket, schema=schema, table=table, command="rows"),
1516
+ data=record_batch, headers=headers)
1540
1517
 
1541
1518
  def update_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[]):
1542
1519
  """
@@ -1550,9 +1527,10 @@ class VastdbApi:
1550
1527
  """
1551
1528
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1552
1529
  headers['Content-Length'] = str(len(record_batch))
1553
- res = self.session.put(self._api_prefix(bucket=bucket, schema=schema, table=table, command="rows"),
1554
- data=record_batch, headers=headers)
1555
- self._check_res(res, "update_rows", expected_retvals)
1530
+ self._request(
1531
+ method="PUT",
1532
+ url=self._url(bucket=bucket, schema=schema, table=table, command="rows"),
1533
+ data=record_batch, headers=headers)
1556
1534
 
1557
1535
  def delete_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[],
1558
1536
  delete_from_imports_table=False):
@@ -1569,9 +1547,10 @@ class VastdbApi:
1569
1547
  headers['Content-Length'] = str(len(record_batch))
1570
1548
  url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if delete_from_imports_table else {}
1571
1549
 
1572
- res = self.session.delete(self._api_prefix(bucket=bucket, schema=schema, table=table, command="rows", url_params=url_params),
1573
- data=record_batch, headers=headers)
1574
- self._check_res(res, "delete_rows", expected_retvals)
1550
+ self._request(
1551
+ method="DELETE",
1552
+ url=self._url(bucket=bucket, schema=schema, table=table, command="rows", url_params=url_params),
1553
+ data=record_batch, headers=headers)
1575
1554
 
1576
1555
  def create_projection(self, bucket, schema, table, name, columns, txid=0, client_tags=[], expected_retvals=[]):
1577
1556
  """
@@ -1618,9 +1597,10 @@ class VastdbApi:
1618
1597
  headers['Content-Length'] = str(len(create_projection_req))
1619
1598
  url_params = {'name': name}
1620
1599
 
1621
- res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
1622
- data=create_projection_req, headers=headers)
1623
- return self._check_res(res, "create_projection", expected_retvals)
1600
+ self._request(
1601
+ method="POST",
1602
+ url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
1603
+ data=create_projection_req, headers=headers)
1624
1604
 
1625
1605
  def get_projection_stats(self, bucket, schema, table, name, txid=0, client_tags=[], expected_retvals=[]):
1626
1606
  """
@@ -1632,17 +1612,17 @@ class VastdbApi:
1632
1612
  """
1633
1613
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1634
1614
  url_params = {'name': name}
1635
- res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection-stats", url_params=url_params),
1636
- headers=headers)
1637
- if res.status_code == 200:
1638
- stats = get_projection_table_stats.GetRootAs(res.content)
1639
- num_rows = stats.NumRows()
1640
- size_in_bytes = stats.SizeInBytes()
1641
- dirty_blocks_percentage = stats.DirtyBlocksPercentage()
1642
- initial_sync_progress = stats.InitialSyncProgress()
1643
- return num_rows, size_in_bytes, dirty_blocks_percentage, initial_sync_progress
1644
-
1645
- return self._check_res(res, "get_projection_stats", expected_retvals)
1615
+ res = self._request(
1616
+ method="GET",
1617
+ url=self._url(bucket=bucket, schema=schema, table=table, command="projection-stats", url_params=url_params),
1618
+ headers=headers)
1619
+
1620
+ stats = get_projection_table_stats.GetRootAs(res.content)
1621
+ num_rows = stats.NumRows()
1622
+ size_in_bytes = stats.SizeInBytes()
1623
+ dirty_blocks_percentage = stats.DirtyBlocksPercentage()
1624
+ initial_sync_progress = stats.InitialSyncProgress()
1625
+ return num_rows, size_in_bytes, dirty_blocks_percentage, initial_sync_progress
1646
1626
 
1647
1627
  def alter_projection(self, bucket, schema, table, name, txid=0, client_tags=[], table_properties="",
1648
1628
  new_name="", expected_retvals=[]):
@@ -1674,10 +1654,10 @@ class VastdbApi:
1674
1654
  headers['Content-Length'] = str(len(alter_projection_req))
1675
1655
  url_params = {'name': name}
1676
1656
 
1677
- res = self.session.put(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
1678
- data=alter_projection_req, headers=headers)
1679
-
1680
- return self._check_res(res, "alter_projection", expected_retvals)
1657
+ self._request(
1658
+ method="PUT",
1659
+ url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
1660
+ data=alter_projection_req, headers=headers)
1681
1661
 
1682
1662
  def drop_projection(self, bucket, schema, table, name, txid=0, client_tags=[], expected_retvals=[]):
1683
1663
  """
@@ -1688,9 +1668,10 @@ class VastdbApi:
1688
1668
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1689
1669
  url_params = {'name': name}
1690
1670
 
1691
- res = self.session.delete(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
1692
- headers=headers)
1693
- return self._check_res(res, "drop_projection", expected_retvals)
1671
+ self._request(
1672
+ method="DELETE",
1673
+ url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
1674
+ headers=headers)
1694
1675
 
1695
1676
  def list_projections(self, bucket, schema, table, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
1696
1677
  exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
@@ -1714,24 +1695,26 @@ class VastdbApi:
1714
1695
  headers['tabular-include-list-stats'] = str(include_list_stats)
1715
1696
 
1716
1697
  projections = []
1717
- res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection"), headers=headers)
1718
- self._check_res(res, "list_projections", expected_retvals)
1719
- if res.status_code == 200:
1720
- res_headers = res.headers
1721
- next_key = int(res_headers['tabular-next-key'])
1722
- is_truncated = res_headers['tabular-is-truncated'] == 'true'
1723
- count = int(res_headers['tabular-list-count'])
1724
- lists = list_projections.GetRootAs(res.content)
1725
- bucket_name = lists.BucketName().decode()
1726
- schema_name = lists.SchemaName().decode()
1727
- table_name = lists.TableName().decode()
1728
- if not bucket.startswith(bucket_name): # ignore snapshot name
1729
- raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
1730
- projections_length = lists.ProjectionsLength()
1731
- for i in range(projections_length):
1732
- projections.append(_parse_table_info(lists.Projections(i)))
1733
-
1734
- return bucket_name, schema_name, table_name, projections, next_key, is_truncated, count
1698
+ res = self._request(
1699
+ method="GET",
1700
+ url=self._url(bucket=bucket, schema=schema, table=table, command="projection"),
1701
+ headers=headers)
1702
+
1703
+ res_headers = res.headers
1704
+ next_key = int(res_headers['tabular-next-key'])
1705
+ is_truncated = res_headers['tabular-is-truncated'] == 'true'
1706
+ count = int(res_headers['tabular-list-count'])
1707
+ lists = list_projections.GetRootAs(res.content)
1708
+ bucket_name = lists.BucketName().decode()
1709
+ schema_name = lists.SchemaName().decode()
1710
+ table_name = lists.TableName().decode()
1711
+ if not bucket.startswith(bucket_name): # ignore snapshot name
1712
+ raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
1713
+ projections_length = lists.ProjectionsLength()
1714
+ for i in range(projections_length):
1715
+ projections.append(_parse_table_info(lists.Projections(i)))
1716
+
1717
+ return bucket_name, schema_name, table_name, projections, next_key, is_truncated, count
1735
1718
 
1736
1719
  def list_projection_columns(self, bucket, schema, table, projection, txid=0, client_tags=[], max_keys=1000,
1737
1720
  next_key=0, count_only=False, name_prefix="", exact_match=False,
@@ -1759,19 +1742,20 @@ class VastdbApi:
1759
1742
 
1760
1743
  url_params = {'name': projection}
1761
1744
 
1762
- res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection-columns", url_params=url_params),
1763
- headers=headers, stream=True)
1764
- self._check_res(res, "list_projection_columns", expected_retvals)
1745
+ res = self._request(
1746
+ method="GET",
1747
+ url=self._url(bucket=bucket, schema=schema, table=table, command="projection-columns", url_params=url_params),
1748
+ headers=headers)
1749
+
1765
1750
  # list projection columns response will also show column type Sorted/UnSorted
1766
- if res.status_code == 200:
1767
- res_headers = res.headers
1768
- next_key = int(res_headers['tabular-next-key'])
1769
- is_truncated = res_headers['tabular-is-truncated'] == 'true'
1770
- count = int(res_headers['tabular-list-count'])
1771
- columns = [] if count_only else [[f.name, f.type, f.metadata] for f in
1772
- pa.ipc.open_stream(res.content).schema]
1751
+ res_headers = res.headers
1752
+ next_key = int(res_headers['tabular-next-key'])
1753
+ is_truncated = res_headers['tabular-is-truncated'] == 'true'
1754
+ count = int(res_headers['tabular-list-count'])
1755
+ columns = [] if count_only else [[f.name, f.type, f.metadata] for f in
1756
+ pa.ipc.open_stream(res.content).schema]
1773
1757
 
1774
- return columns, next_key, is_truncated, count
1758
+ return columns, next_key, is_truncated, count
1775
1759
 
1776
1760
 
1777
1761
  class QueryDataInternalError(Exception):
@@ -1828,15 +1812,12 @@ def _iter_query_data_response_columns(fileobj, stream_ids=None):
1828
1812
  yield (stream_id, next_row_id, table)
1829
1813
 
1830
1814
 
1831
- def parse_query_data_response(conn, schema, stream_ids=None, start_row_ids=None, debug=False, parser: Optional[QueryDataParser] = None):
1815
+ def parse_query_data_response(conn, schema, stream_ids=None, debug=False, parser: Optional[QueryDataParser] = None):
1832
1816
  """
1833
1817
  Generates pyarrow.Table objects from QueryData API response stream.
1834
1818
 
1835
1819
  A pyarrow.Table is a helper class that combines a Schema with multiple RecordBatches and allows easy data access.
1836
1820
  """
1837
- if start_row_ids is None:
1838
- start_row_ids = {}
1839
-
1840
1821
  is_empty_projection = (len(schema) == 0)
1841
1822
  if parser is None:
1842
1823
  parser = QueryDataParser(schema, debug=debug)
@@ -1855,8 +1836,7 @@ def parse_query_data_response(conn, schema, stream_ids=None, start_row_ids=None,
1855
1836
 
1856
1837
  _logger.debug("stream_id=%d rows=%d next_row_id=%d table=%s",
1857
1838
  stream_id, len(parsed_table), next_row_id, parsed_table)
1858
- start_row_ids[stream_id] = next_row_id
1859
- yield parsed_table # the result of a single "select_rows()" cycle
1839
+ yield stream_id, next_row_id, parsed_table
1860
1840
 
1861
1841
  if states:
1862
1842
  raise EOFError(f'all streams should be done before EOF. {states}')
@@ -2118,40 +2098,3 @@ def build_query_data_request(schema: 'pa.Schema' = pa.schema([]), predicate: ibi
2118
2098
  builder.Finish(relation)
2119
2099
 
2120
2100
  return QueryDataRequest(serialized=builder.Output(), response_schema=response_schema, response_parser=QueryDataParser(response_schema))
2121
-
2122
-
2123
- def convert_column_types(table: 'pa.Table') -> 'pa.Table':
2124
- """
2125
- Adjusting table values
2126
-
2127
- 1. Because the timestamp resolution is too high it is necessary to trim it. ORION-96961
2128
- 2. Since the values of nfs_mode_bits are returned in decimal, need to convert them to octal,
2129
- as in all representations, so that the mode of 448 turn into 700
2130
- 3. for owner_name and group_owner_name 0 -> root, and 65534 -> nobody
2131
- """
2132
- ts_indexes = []
2133
- indexes_of_fields_to_change = {}
2134
- sid_to_name = {
2135
- '0': 'root',
2136
- '65534': 'nobody' # NFSNOBODY_UID_16_BIT
2137
- }
2138
- column_matcher = { # column_name: custom converting rule
2139
- 'nfs_mode_bits': lambda val: int(oct(val).replace('0o', '')) if val is not None else val,
2140
- 'owner_name': lambda val: sid_to_name.get(val, val),
2141
- 'group_owner_name': lambda val: sid_to_name.get(val, val),
2142
- }
2143
- for index, field in enumerate(table.schema):
2144
- if isinstance(field.type, pa.TimestampType) and field.type.unit == 'ns':
2145
- ts_indexes.append(index)
2146
- if field.name in column_matcher:
2147
- indexes_of_fields_to_change[field.name] = index
2148
- for changing_index in ts_indexes:
2149
- field_name = table.schema[changing_index].name
2150
- new_column = table[field_name].cast(pa.timestamp('us'), safe=False)
2151
- table = table.set_column(changing_index, field_name, new_column)
2152
- for field_name, changing_index in indexes_of_fields_to_change.items():
2153
- new_column = table[field_name].to_pylist()
2154
- new_column = list(map(column_matcher[field_name], new_column))
2155
- new_column = pa.array(new_column, table[field_name].type)
2156
- table = table.set_column(changing_index, field_name, new_column)
2157
- return table