PyPI - vastdb - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

vastdb 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

vastdb/__init__.py +3 -0
vastdb/{internal_commands.py → _internal.py} +289 -346
vastdb/bucket.py +2 -2
vastdb/conftest.py +16 -2
vastdb/errors.py +6 -0
vastdb/schema.py +8 -4
vastdb/session.py +18 -5
vastdb/table.py +79 -58
vastdb/tests/test_duckdb.py +2 -2
vastdb/tests/test_projections.py +5 -1
vastdb/tests/test_sanity.py +5 -5
vastdb/tests/test_tables.py +54 -1
vastdb/tests/test_util.py +6 -0
vastdb/transaction.py +2 -2
vastdb/util.py +40 -1
vastdb/vast_tests/__init__.py +0 -0
vastdb/vast_tests/test_ha.py +29 -0
{vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/METADATA +2 -2
{vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/RECORD +22 -20
{vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/LICENSE +0 -0
{vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/WHEEL +0 -0
{vastdb-0.1.6.dist-info → vastdb-0.1.8.dist-info}/top_level.txt +0 -0

vastdb/{internal_commands.py → _internal.py} RENAMED Viewed

@@ -5,17 +5,37 @@ import re
 import struct
 import urllib.parse
 from collections import defaultdict, namedtuple
+from dataclasses import dataclass, field
 from enum import Enum
-from typing import Any, Dict, Iterator, List, Optional, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
+import backoff
 import flatbuffers
 import ibis
 import pyarrow as pa
-import pyarrow.parquet as pq
 import requests
 import urllib3
 import xmltodict
 from aws_requests_auth.aws_auth import AWSRequestsAuth
+from ibis.expr.operations.generic import (
+    IsNull,
+    Literal,
+)
+from ibis.expr.operations.logical import (
+    And,
+    Between,
+    Equals,
+    Greater,
+    GreaterEqual,
+    InValues,
+    Less,
+    LessEqual,
+    Not,
+    NotEquals,
+    Or,
+)
+from ibis.expr.operations.relations import Field
+from ibis.expr.operations.strings import StringContains
 import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
 import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BooleanLiteral as fb_bool_lit
@@ -137,26 +157,6 @@ class Predicate:
         self.expr = expr
     def serialize(self, builder: 'flatbuffers.builder.Builder'):
-        from ibis.expr.operations.generic import (
-            IsNull,
-            Literal,
-            TableColumn,
-        )
-        from ibis.expr.operations.logical import (
-            And,
-            Between,
-            Equals,
-            Greater,
-            GreaterEqual,
-            InValues,
-            Less,
-            LessEqual,
-            Not,
-            NotEquals,
-            Or,
-        )
-        from ibis.expr.operations.strings import StringContains
         builder_map = {
             Greater: self.build_greater,
             GreaterEqual: self.build_greater_equal,
@@ -216,7 +216,7 @@ class Predicate:
                             if not isinstance(literal, Literal):
                                 raise NotImplementedError(self.expr)
-                    if not isinstance(column, TableColumn):
+                    if not isinstance(column, Field):
                         raise NotImplementedError(self.expr)
                     field_name = column.name
@@ -722,19 +722,67 @@ def _parse_table_info(obj):
 TableStatsResult = namedtuple("TableStatsResult", ["num_rows", "size_in_bytes", "is_external_rowid_alloc", "endpoints"])
+_RETRIABLE_EXCEPTIONS = (
+    errors.ConnectionError,  # only if 'may_retry' is True
+    errors.Slowdown,
+)
+def _backoff_giveup(exc: Exception) -> bool:
+    """Exception types below MUST be part of `_RETRIABLE_EXCEPTIONS` above."""
+    _logger.info("Backoff giveup: %r", exc)
+    if isinstance(exc, errors.Slowdown):
+        return False  # the server is overloaded, don't give up
+    if isinstance(exc, errors.ConnectionError):
+        if exc.may_retry:
+            return False  # don't give up of retriable connection errors
+    return True  # give up in case of other exceptions
+@dataclass
+class BackoffConfig:
+    wait_gen: Callable = field(default=backoff.expo)
+    max_value: Optional[float] = None  # max duration for a single wait period
+    max_tries: int = 10
+    max_time: float = 60.0  # in seconds
+    backoff_log_level: int = logging.DEBUG
 class VastdbApi:
     # we expect the vast version to be <major>.<minor>.<patch>.<protocol>
     VAST_VERSION_REGEX = re.compile(r'^vast (\d+\.\d+\.\d+\.\d+)$')
-    def __init__(self, endpoint, access_key, secret_key, auth_type=AuthType.SIGV4, ssl_verify=True):
+    def __init__(self, endpoint, access_key, secret_key,
+            *,
+            auth_type=AuthType.SIGV4,
+            ssl_verify=True,
+            backoff_config: Optional[BackoffConfig] = None):
+        from . import __version__  # import lazily here (to avoid circular dependencies)
+        self.client_sdk_version = f"VAST Database Python SDK {__version__} - 2024 (c)"
         url = urllib3.util.parse_url(endpoint)
         self.access_key = access_key
         self.secret_key = secret_key
         self.default_max_list_columns_page_size = 1000
-        self.session = requests.Session()
-        self.session.verify = ssl_verify
-        self.session.headers['user-agent'] = "VastData Tabular API 1.0 - 2022 (c)"
+        self._session = requests.Session()
+        self._session.verify = ssl_verify
+        self._session.headers['user-agent'] = self.client_sdk_version
+        backoff_config = backoff_config or BackoffConfig()
+        self._backoff_decorator = backoff.on_exception(
+            wait_gen=backoff_config.wait_gen,
+            exception=_RETRIABLE_EXCEPTIONS,
+            giveup=_backoff_giveup,
+            max_tries=backoff_config.max_tries,
+            max_time=backoff_config.max_time,
+            max_value=backoff_config.max_value,  # passed to `backoff_config.wait_gen`
+            backoff_log_level=backoff_config.backoff_log_level)
+        self._request = self._backoff_decorator(self._single_request)
         if url.port in {80, 443, None}:
             self.aws_host = f'{url.host}'
@@ -744,22 +792,21 @@ class VastdbApi:
         self.url = str(url)
         _logger.debug('url=%s aws_host=%s', self.url, self.aws_host)
-        self.session.auth = AWSRequestsAuth(aws_access_key=access_key,
+        self._session.auth = AWSRequestsAuth(aws_access_key=access_key,
                                             aws_secret_access_key=secret_key,
                                             aws_host=self.aws_host,
-                                            aws_region='us-east-1',
+                                            aws_region='',
                                             aws_service='s3')
         # probe the cluster for its version
-        self.vast_version = None
-        res = self.session.get(self.url)
+        res = self._request(method="GET", url=self._url(command="transaction"), skip_status_check=True)  # used only for the response headers
+        _logger.debug("headers=%s code=%s content=%s", res.headers, res.status_code, res.content)
         server_header = res.headers.get("Server")
         if server_header is None:
             _logger.error("Response doesn't contain 'Server' header")
         else:
-            _logger.debug("Server header is '%s'", server_header)
             if m := self.VAST_VERSION_REGEX.match(server_header):
-                self.vast_version, = m.groups()
+                self.vast_version: Tuple[int, ...] = tuple(int(v) for v in m.group(1).split("."))
                 return
             else:
                 _logger.error("'Server' header '%s' doesn't match the expected pattern", server_header)
@@ -772,15 +819,21 @@ class VastdbApi:
         _logger.critical(msg)
         raise NotImplementedError(msg)
-    def update_mgmt_session(self, access_key: str, secret_key: str, auth_type=AuthType.SIGV4):
-        if auth_type != AuthType.BASIC:
-            self.session.auth = AWSRequestsAuth(aws_access_key=access_key,
-                                                aws_secret_access_key=secret_key,
-                                                aws_host=self.aws_host,
-                                                aws_region='us-east-1',
-                                                aws_service='s3')
-    def _api_prefix(self, bucket="", schema="", table="", command="", url_params={}):
+    def _single_request(self, *, method, url, skip_status_check=False, **kwargs):
+        _logger.debug("Sending request: %s %s %s", method, url, kwargs)
+        try:
+            res = self._session.request(method=method, url=url, **kwargs)
+        except requests.exceptions.ConnectionError as err:
+            # low-level connection issue, it is safe to retry only read-only requests
+            may_retry = (method == "GET")
+            raise errors.ConnectionError(cause=err, may_retry=may_retry) from err
+        if not skip_status_check:
+            if exc := errors.from_response(res):
+                raise exc  # application-level error
+        return res  # successful response
+    def _url(self, bucket="", schema="", table="", command="", url_params={}):
         prefix_list = [self.url]
         if len(bucket):
             prefix_list.append(bucket)
@@ -815,11 +868,6 @@ class VastdbApi:
         return common_headers | {f'tabular-client-tags-{index}': tag for index, tag in enumerate(client_tags)}
-    def _check_res(self, res, cmd="", expected_retvals=[]):
-        if exc := errors.from_response(res):
-            raise exc
-        return res
     def create_schema(self, bucket, name, txid=0, client_tags=[], schema_properties="", expected_retvals=[]):
         """
         Create a collection of tables, use the following request
@@ -841,10 +889,10 @@ class VastdbApi:
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
         headers['Content-Length'] = str(len(create_schema_req))
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=name, command="schema"),
-                                data=create_schema_req, headers=headers, stream=True)
-        return self._check_res(res, "create_schema", expected_retvals)
+        self._request(
+            method="POST",
+            url=self._url(bucket=bucket, schema=name, command="schema"),
+            data=create_schema_req, headers=headers)
     def alter_schema(self, bucket, name, txid=0, client_tags=[], schema_properties="", new_name="", expected_retvals=[]):
         """
@@ -870,10 +918,10 @@ class VastdbApi:
         headers['Content-Length'] = str(len(alter_schema_req))
         url_params = {'tabular-new-schema-name': new_name} if len(new_name) else {}
-        res = self.session.put(self._api_prefix(bucket=bucket, schema=name, command="schema", url_params=url_params),
-                               data=alter_schema_req, headers=headers)
-        return self._check_res(res, "alter_schema", expected_retvals)
+        self._request(
+            method="PUT",
+            url=self._url(bucket=bucket, schema=name, command="schema", url_params=url_params),
+            data=alter_schema_req, headers=headers)
     def drop_schema(self, bucket, name, txid=0, client_tags=[], expected_retvals=[]):
         """
@@ -884,9 +932,10 @@ class VastdbApi:
         """
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
-        res = self.session.delete(self._api_prefix(bucket=bucket, schema=name, command="schema"), headers=headers)
-        return self._check_res(res, "drop_schema", expected_retvals)
+        self._request(
+            method="DELETE",
+            url=self._url(bucket=bucket, schema=name, command="schema"),
+            headers=headers)
     def list_schemas(self, bucket, schema="", txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
                      exact_match=False, expected_retvals=[], count_only=False):
@@ -915,25 +964,27 @@ class VastdbApi:
         schemas = []
         schema = schema or ""
-        res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, command="schema"), headers=headers, stream=True)
-        self._check_res(res, "list_schemas", expected_retvals)
-        if res.status_code == 200:
-            res_headers = res.headers
-            next_key = int(res_headers['tabular-next-key'])
-            is_truncated = res_headers['tabular-is-truncated'] == 'true'
-            lists = list_schemas.GetRootAs(res.content)
-            bucket_name = lists.BucketName().decode()
-            if not bucket.startswith(bucket_name):
-                raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
-            schemas_length = lists.SchemasLength()
-            count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else schemas_length
-            for i in range(schemas_length):
-                schema_obj = lists.Schemas(i)
-                name = schema_obj.Name().decode()
-                properties = schema_obj.Properties().decode()
-                schemas.append([name, properties])
-            return bucket_name, schemas, next_key, is_truncated, count
+        res = self._request(
+            method="GET",
+            url=self._url(bucket=bucket, schema=schema, command="schema"),
+            headers=headers)
+        res_headers = res.headers
+        next_key = int(res_headers['tabular-next-key'])
+        is_truncated = res_headers['tabular-is-truncated'] == 'true'
+        lists = list_schemas.GetRootAs(res.content)
+        bucket_name = lists.BucketName().decode()
+        if not bucket.startswith(bucket_name):
+            raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
+        schemas_length = lists.SchemasLength()
+        count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else schemas_length
+        for i in range(schemas_length):
+            schema_obj = lists.Schemas(i)
+            name = schema_obj.Name().decode()
+            properties = schema_obj.Properties().decode()
+            schemas.append([name, properties])
+        return bucket_name, schemas, next_key, is_truncated, count
     def list_snapshots(self, bucket, max_keys=1000, next_token=None, name_prefix=''):
         next_token = next_token or ''
@@ -941,8 +992,9 @@ class VastdbApi:
         if next_token:
             url_params['continuation-token'] = next_token
-        res = self.session.get(self._api_prefix(bucket=bucket, command="list", url_params=url_params), headers={}, stream=True)
-        self._check_res(res, "list_snapshots")
+        res = self._request(
+            method="GET",
+            url=self._url(bucket=bucket, command="list", url_params=url_params))
         xml_str = res.content.decode()
         xml_dict = xmltodict.parse(xml_str)
@@ -985,33 +1037,10 @@ class VastdbApi:
         if create_imports_table:
             url_params['sub-table'] = IMPORTED_OBJECTS_TABLE_NAME
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
-                                data=serialized_schema, headers=headers)
-        return self._check_res(res, "create_table", expected_retvals)
-    def create_table_from_parquet_schema(self, bucket, schema, name, parquet_path=None,
-                                         parquet_bucket_name=None, parquet_object_name=None,
-                                         txid=0, client_tags=[], expected_retvals=[]):
-        # Use pyarrow.parquet.ParquetDataset to open the Parquet file
-        if parquet_path:
-            parquet_ds = pq.ParquetDataset(parquet_path)
-        elif parquet_bucket_name and parquet_object_name:
-            s3fs = pa.fs.S3FileSystem(access_key=self.access_key, secret_key=self.secret_key, endpoint_override=self.url)
-            parquet_ds = pq.ParquetDataset('/'.join([parquet_bucket_name, parquet_object_name]), filesystem=s3fs)
-        else:
-            raise RuntimeError(f'invalid params parquet_path={parquet_path} parquet_bucket_name={parquet_bucket_name} parquet_object_name={parquet_object_name}')
-        # Get the schema of the Parquet file
-        if isinstance(parquet_ds.schema, pq.ParquetSchema):
-            arrow_schema = parquet_ds.schema.to_arrow_schema()
-        elif isinstance(parquet_ds.schema, pa.Schema):
-            arrow_schema = parquet_ds.schema
-        else:
-            raise RuntimeError(f'invalid type(parquet_ds.schema) = {type(parquet_ds.schema)}')
-        # create the table
-        return self.create_table(bucket, schema, name, arrow_schema, txid, client_tags, expected_retvals)
+        self._request(
+            method="POST",
+            url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
+            data=serialized_schema, headers=headers)
     def get_table_stats(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], imports_table_stats=False):
         """
@@ -1023,8 +1052,10 @@ class VastdbApi:
         """
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
         url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if imports_table_stats else {}
-        res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=name, command="stats", url_params=url_params), headers=headers)
-        self._check_res(res, "get_table_stats", expected_retvals)
+        res = self._request(
+            method="GET",
+            url=self._url(bucket=bucket, schema=schema, table=name, command="stats", url_params=url_params),
+            headers=headers)
         stats = get_table_stats.GetRootAs(res.content)
         num_rows = stats.NumRows()
@@ -1059,10 +1090,10 @@ class VastdbApi:
         headers['Content-Length'] = str(len(alter_table_req))
         url_params = {'tabular-new-table-name': schema + "/" + new_name} if len(new_name) else {}
-        res = self.session.put(self._api_prefix(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
-                               data=alter_table_req, headers=headers)
-        return self._check_res(res, "alter_table", expected_retvals)
+        self._request(
+            method="PUT",
+            url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
+            data=alter_table_req, headers=headers)
     def drop_table(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], remove_imports_table=False):
         """
@@ -1075,9 +1106,10 @@ class VastdbApi:
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
         url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if remove_imports_table else {}
-        res = self.session.delete(self._api_prefix(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
-                                  headers=headers)
-        return self._check_res(res, "drop_table", expected_retvals)
+        self._request(
+            method="DELETE",
+            url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
+            headers=headers)
     def list_tables(self, bucket, schema, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
                     exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
@@ -1101,23 +1133,25 @@ class VastdbApi:
         headers['tabular-include-list-stats'] = str(include_list_stats)
         tables = []
-        res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, command="table"), headers=headers)
-        self._check_res(res, "list_table", expected_retvals)
-        if res.status_code == 200:
-            res_headers = res.headers
-            next_key = int(res_headers['tabular-next-key'])
-            is_truncated = res_headers['tabular-is-truncated'] == 'true'
-            lists = list_tables.GetRootAs(res.content)
-            bucket_name = lists.BucketName().decode()
-            schema_name = lists.SchemaName().decode()
-            if not bucket.startswith(bucket_name):  # ignore snapshot name
-                raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
-            tables_length = lists.TablesLength()
-            count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
-            for i in range(tables_length):
-                tables.append(_parse_table_info(lists.Tables(i)))
-            return bucket_name, schema_name, tables, next_key, is_truncated, count
+        res = self._request(
+            method="GET",
+            url=self._url(bucket=bucket, schema=schema, command="table"),
+            headers=headers)
+        res_headers = res.headers
+        next_key = int(res_headers['tabular-next-key'])
+        is_truncated = res_headers['tabular-is-truncated'] == 'true'
+        lists = list_tables.GetRootAs(res.content)
+        bucket_name = lists.BucketName().decode()
+        schema_name = lists.SchemaName().decode()
+        if not bucket.startswith(bucket_name):  # ignore snapshot name
+            raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
+        tables_length = lists.TablesLength()
+        count = int(res_headers['tabular-list-count']) if 'tabular-list-count' in res_headers else tables_length
+        for i in range(tables_length):
+            tables.append(_parse_table_info(lists.Tables(i)))
+        return bucket_name, schema_name, tables, next_key, is_truncated, count
     def add_columns(self, bucket, schema, name, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
         """
@@ -1139,9 +1173,10 @@ class VastdbApi:
         serialized_schema = arrow_schema.serialize()
         headers['Content-Length'] = str(len(serialized_schema))
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=name, command="column"),
-                                data=serialized_schema, headers=headers)
-        return self._check_res(res, "add_columns", expected_retvals)
+        self._request(
+            method="POST",
+            url=self._url(bucket=bucket, schema=schema, table=name, command="column"),
+            data=serialized_schema, headers=headers)
     def alter_column(self, bucket, schema, table, name, txid=0, client_tags=[], column_properties="",
                      new_name="", column_sep=".", column_stats="", expected_retvals=[]):
@@ -1177,9 +1212,10 @@ class VastdbApi:
         if len(new_name):
             url_params['tabular-new-column-name'] = new_name
-        res = self.session.put(self._api_prefix(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
-                               data=alter_column_req, headers=headers)
-        return self._check_res(res, "alter_column", expected_retvals)
+        self._request(
+            method="PUT",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
+            data=alter_column_req, headers=headers)
     def drop_columns(self, bucket, schema, table, arrow_schema, txid=0, client_tags=[], expected_retvals=[]):
         """
@@ -1192,9 +1228,10 @@ class VastdbApi:
         serialized_schema = arrow_schema.serialize()
         headers['Content-Length'] = str(len(serialized_schema))
-        res = self.session.delete(self._api_prefix(bucket=bucket, schema=schema, table=table, command="column"),
-                                data=serialized_schema, headers=headers)
-        return self._check_res(res, "drop_columns", expected_retvals)
+        self._request(
+            method="DELETE",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="column"),
+            data=serialized_schema, headers=headers)
     def list_columns(self, bucket, schema, table, *, txid=0, client_tags=None, max_keys=None, next_key=0,
                      count_only=False, name_prefix="", exact_match=False,
@@ -1226,18 +1263,18 @@ class VastdbApi:
             headers['tabular-name-prefix'] = name_prefix
         url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if list_imports_table else {}
-        res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="column",
-                                                url_params=url_params),
-                               headers=headers, stream=True)
-        self._check_res(res, "list_columns", expected_retvals)
-        if res.status_code == 200:
-            res_headers = res.headers
-            next_key = int(res_headers['tabular-next-key'])
-            is_truncated = res_headers['tabular-is-truncated'] == 'true'
-            count = int(res_headers['tabular-list-count'])
-            columns = [] if count_only else pa.ipc.open_stream(res.content).schema
-            return columns, next_key, is_truncated, count
+        res = self._request(
+            method="GET",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
+            headers=headers)
+        res_headers = res.headers
+        next_key = int(res_headers['tabular-next-key'])
+        is_truncated = res_headers['tabular-is-truncated'] == 'true'
+        count = int(res_headers['tabular-list-count'])
+        columns = [] if count_only else pa.ipc.open_stream(res.content).schema
+        return columns, next_key, is_truncated, count
     def begin_transaction(self, client_tags=[], expected_retvals=[]):
         """
@@ -1248,8 +1285,10 @@ class VastdbApi:
         tabular-txid: TransactionId
         """
         headers = self._fill_common_headers(client_tags=client_tags)
-        res = self.session.post(self._api_prefix(command="transaction"), headers=headers)
-        return self._check_res(res, "begin_transaction", expected_retvals)
+        return self._request(
+            method="POST",
+            url=self._url(command="transaction"),
+            headers=headers)
     def commit_transaction(self, txid, client_tags=[], expected_retvals=[]):
         """
@@ -1258,8 +1297,10 @@ class VastdbApi:
         tabular-client-tag: ClientTag
         """
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
-        res = self.session.put(self._api_prefix(command="transaction"), headers=headers)
-        return self._check_res(res, "commit_transaction", expected_retvals)
+        self._request(
+            method="PUT",
+            url=self._url(command="transaction"),
+            headers=headers)
     def rollback_transaction(self, txid, client_tags=[], expected_retvals=[]):
         """
@@ -1268,8 +1309,10 @@ class VastdbApi:
         tabular-client-tag: ClientTag
         """
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
-        res = self.session.delete(self._api_prefix(command="transaction"), headers=headers)
-        return self._check_res(res, "rollback_transaction", expected_retvals)
+        self._request(
+            method="DELETE",
+            url=self._url(command="transaction"),
+            headers=headers)
     def get_transaction(self, txid, client_tags=[], expected_retvals=[]):
         """
@@ -1278,56 +1321,10 @@ class VastdbApi:
         tabular-client-tag: ClientTag
         """
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
-        res = self.session.get(self._api_prefix(command="transaction"), headers=headers)
-        return self._check_res(res, "get_transaction", expected_retvals)
-    def select_row_ids(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[],
-                       retry_count=0, enable_sorted_projections=True):
-        """
-        POST /mybucket/myschema/mytable?query-data=SelectRowIds HTTP/1.1
-        """
-        # add query option select-only and read-only
-        headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
-        headers['Content-Length'] = str(len(params))
-        headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
-        if retry_count > 0:
-            headers['tabular-retry-count'] = str(retry_count)
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=SelectRowIds",),
-                                data=params, headers=headers, stream=True)
-        return self._check_res(res, "query_data", expected_retvals)
-    def read_columns_data(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[], tenant_guid=None,
-                          retry_count=0, enable_sorted_projections=True):
-        """
-        POST /mybucket/myschema/mytable?query-data=ReadColumns HTTP/1.1
-        """
-        headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
-        headers['Content-Length'] = str(len(params))
-        headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
-        if retry_count > 0:
-            headers['tabular-retry-count'] = str(retry_count)
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=ReadColumns",),
-                               data=params, headers=headers, stream=True)
-        return self._check_res(res, "query_data", expected_retvals)
-    def count_rows(self, bucket, schema, table, params, txid=0, client_tags=[], expected_retvals=[], tenant_guid=None,
-                   retry_count=0, enable_sorted_projections=True):
-        """
-        POST /mybucket/myschema/mytable?query-data=CountRows HTTP/1.1
-        """
-        headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
-        headers['Content-Length'] = str(len(params))
-        headers['tabular-enable-sorted-projections'] = str(enable_sorted_projections)
-        if retry_count > 0:
-            headers['tabular-retry-count'] = str(retry_count)
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=CountRows",),
-                               data=params, headers=headers, stream=True)
-        return self._check_res(res, "query_data", expected_retvals)
+        self._request(
+            method="GET",
+            url=self._url(command="transaction"),
+            headers=headers)
     def _build_query_data_headers(self, txid, client_tags, params, split, num_sub_splits, request_format, response_format,
                                   enable_sorted_projections, limit_rows, schedule_id, retry_count, search_path, tenant_guid,
@@ -1369,35 +1366,6 @@ class VastdbApi:
             url_params['name'] = projection
         return url_params
-    def legacy_query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
-                      txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
-                      search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
-                      request_format='string', response_format='string', query_imports_table=False):
-        """
-        POST /mybucket/myschema/mytable?query-data=LegacyQueryData HTTP/1.1
-        Content-Length: ContentLength
-        tabular-txid: TransactionId
-        tabular-client-tag: ClientTag
-        tabular-split: "split_id,total_splits,num_row_groups_per_split"
-        tabular-num-of-subsplits: "total"
-        tabular-request-format: "string"
-        tabular-response-format: "string" #arrow/trino
-        tabular-schedule-id: "schedule-id"
-        Request Body (flatbuf)
-        projections_chunk [expressions]
-        predicate_chunk "formatted_data", (required)
-        """
-        headers = self._build_query_data_headers(txid, client_tags, params, split, num_sub_splits, request_format, response_format,
-                                                  enable_sorted_projections, limit_rows, schedule_id, retry_count, search_path, tenant_guid,
-                                                  sub_split_start_row_ids)
-        url_params = self._build_query_data_url_params(projection, query_imports_table)
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="query-data=LegacyQueryData",
-                                                  url_params=url_params), data=params, headers=headers, stream=True)
-        return self._check_res(res, "legacy_query_data", expected_retvals)
     def query_data(self, bucket, schema, table, params, split=(0, 1, 8), num_sub_splits=1, response_row_id=False,
                    txid=0, client_tags=[], expected_retvals=[], limit_rows=0, schedule_id=None, retry_count=0,
                    search_path=None, sub_split_start_row_ids=[], tenant_guid=None, projection='', enable_sorted_projections=True,
@@ -1427,9 +1395,11 @@ class VastdbApi:
         url_params = self._build_query_data_url_params(projection, query_imports_table)
-        res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="data", url_params=url_params),
-                               data=params, headers=headers, stream=True)
-        return self._check_res(res, "query_data", expected_retvals)
+        # The retries will be done during SelectSplitState processing:
+        return self._single_request(
+            method="GET",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="data", url_params=url_params),
+            data=params, headers=headers, stream=True)
     """
     source_files: list of (bucket_name, file_name)
@@ -1506,6 +1476,10 @@ class VastdbApi:
                 else:
                     _logger.debug("import_data of object name '%s' is in progress. "
                                   "status: %s", chunk_dict['object_name'], chunk_dict['res'])
+                    if chunk_dict['res'] == 'Success':
+                        _logger.info("imported /%s/%s into table=/%s/%s/%s",
+                                     chunk_dict['bucket_name'], chunk_dict['object_name'],
+                                     bucket, schema, table)
             return response
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
@@ -1515,12 +1489,14 @@ class VastdbApi:
             headers['tabular-schedule-id'] = str(schedule_id)
         if retry_count > 0:
             headers['tabular-retry-count'] = str(retry_count)
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="data"),
-                                data=import_req, headers=headers, stream=True)
+        res = self._request(
+            method="POST",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="data"),
+            data=import_req, headers=headers, stream=True)
         if blocking:
             res = iterate_over_import_data_response(res)
-        return self._check_res(res, "import_data", expected_retvals)
+        return res
     def insert_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[]):
         """
@@ -1534,9 +1510,10 @@ class VastdbApi:
         """
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
         headers['Content-Length'] = str(len(record_batch))
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="rows"),
-                                data=record_batch, headers=headers, stream=True)
-        return self._check_res(res, "insert_rows", expected_retvals)
+        return self._request(
+            method="POST",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="rows"),
+            data=record_batch, headers=headers)
     def update_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[]):
         """
@@ -1550,9 +1527,10 @@ class VastdbApi:
         """
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
         headers['Content-Length'] = str(len(record_batch))
-        res = self.session.put(self._api_prefix(bucket=bucket, schema=schema, table=table, command="rows"),
-                                data=record_batch, headers=headers)
-        self._check_res(res, "update_rows", expected_retvals)
+        self._request(
+            method="PUT",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="rows"),
+            data=record_batch, headers=headers)
     def delete_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[],
                     delete_from_imports_table=False):
@@ -1569,9 +1547,10 @@ class VastdbApi:
         headers['Content-Length'] = str(len(record_batch))
         url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if delete_from_imports_table else {}
-        res = self.session.delete(self._api_prefix(bucket=bucket, schema=schema, table=table, command="rows", url_params=url_params),
-                                  data=record_batch, headers=headers)
-        self._check_res(res, "delete_rows", expected_retvals)
+        self._request(
+            method="DELETE",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="rows", url_params=url_params),
+            data=record_batch, headers=headers)
     def create_projection(self, bucket, schema, table, name, columns, txid=0, client_tags=[], expected_retvals=[]):
         """
@@ -1618,9 +1597,10 @@ class VastdbApi:
         headers['Content-Length'] = str(len(create_projection_req))
         url_params = {'name': name}
-        res = self.session.post(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
-                                data=create_projection_req, headers=headers)
-        return self._check_res(res, "create_projection", expected_retvals)
+        self._request(
+            method="POST",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
+            data=create_projection_req, headers=headers)
     def get_projection_stats(self, bucket, schema, table, name, txid=0, client_tags=[], expected_retvals=[]):
         """
@@ -1632,17 +1612,17 @@ class VastdbApi:
         """
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
         url_params = {'name': name}
-        res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection-stats", url_params=url_params),
-                               headers=headers)
-        if res.status_code == 200:
-            stats = get_projection_table_stats.GetRootAs(res.content)
-            num_rows = stats.NumRows()
-            size_in_bytes = stats.SizeInBytes()
-            dirty_blocks_percentage = stats.DirtyBlocksPercentage()
-            initial_sync_progress = stats.InitialSyncProgress()
-            return num_rows, size_in_bytes, dirty_blocks_percentage, initial_sync_progress
-        return self._check_res(res, "get_projection_stats", expected_retvals)
+        res = self._request(
+            method="GET",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="projection-stats", url_params=url_params),
+            headers=headers)
+        stats = get_projection_table_stats.GetRootAs(res.content)
+        num_rows = stats.NumRows()
+        size_in_bytes = stats.SizeInBytes()
+        dirty_blocks_percentage = stats.DirtyBlocksPercentage()
+        initial_sync_progress = stats.InitialSyncProgress()
+        return num_rows, size_in_bytes, dirty_blocks_percentage, initial_sync_progress
     def alter_projection(self, bucket, schema, table, name, txid=0, client_tags=[], table_properties="",
                          new_name="", expected_retvals=[]):
@@ -1674,10 +1654,10 @@ class VastdbApi:
         headers['Content-Length'] = str(len(alter_projection_req))
         url_params = {'name': name}
-        res = self.session.put(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
-                               data=alter_projection_req, headers=headers)
-        return self._check_res(res, "alter_projection", expected_retvals)
+        self._request(
+            method="PUT",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
+            data=alter_projection_req, headers=headers)
     def drop_projection(self, bucket, schema, table, name, txid=0, client_tags=[], expected_retvals=[]):
         """
@@ -1688,9 +1668,10 @@ class VastdbApi:
         headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
         url_params = {'name': name}
-        res = self.session.delete(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
-                                  headers=headers)
-        return self._check_res(res, "drop_projection", expected_retvals)
+        self._request(
+            method="DELETE",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="projection", url_params=url_params),
+            headers=headers)
     def list_projections(self, bucket, schema, table, txid=0, client_tags=[], max_keys=1000, next_key=0, name_prefix="",
                          exact_match=False, expected_retvals=[], include_list_stats=False, count_only=False):
@@ -1714,24 +1695,26 @@ class VastdbApi:
         headers['tabular-include-list-stats'] = str(include_list_stats)
         projections = []
-        res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection"), headers=headers)
-        self._check_res(res, "list_projections", expected_retvals)
-        if res.status_code == 200:
-            res_headers = res.headers
-            next_key = int(res_headers['tabular-next-key'])
-            is_truncated = res_headers['tabular-is-truncated'] == 'true'
-            count = int(res_headers['tabular-list-count'])
-            lists = list_projections.GetRootAs(res.content)
-            bucket_name = lists.BucketName().decode()
-            schema_name = lists.SchemaName().decode()
-            table_name = lists.TableName().decode()
-            if not bucket.startswith(bucket_name):  # ignore snapshot name
-                raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
-            projections_length = lists.ProjectionsLength()
-            for i in range(projections_length):
-                projections.append(_parse_table_info(lists.Projections(i)))
-            return bucket_name, schema_name, table_name, projections, next_key, is_truncated, count
+        res = self._request(
+            method="GET",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="projection"),
+            headers=headers)
+        res_headers = res.headers
+        next_key = int(res_headers['tabular-next-key'])
+        is_truncated = res_headers['tabular-is-truncated'] == 'true'
+        count = int(res_headers['tabular-list-count'])
+        lists = list_projections.GetRootAs(res.content)
+        bucket_name = lists.BucketName().decode()
+        schema_name = lists.SchemaName().decode()
+        table_name = lists.TableName().decode()
+        if not bucket.startswith(bucket_name):  # ignore snapshot name
+            raise ValueError(f'bucket: {bucket} did not start from {bucket_name}')
+        projections_length = lists.ProjectionsLength()
+        for i in range(projections_length):
+            projections.append(_parse_table_info(lists.Projections(i)))
+        return bucket_name, schema_name, table_name, projections, next_key, is_truncated, count
     def list_projection_columns(self, bucket, schema, table, projection, txid=0, client_tags=[], max_keys=1000,
                                 next_key=0, count_only=False, name_prefix="", exact_match=False,
@@ -1759,19 +1742,20 @@ class VastdbApi:
         url_params = {'name': projection}
-        res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=table, command="projection-columns", url_params=url_params),
-                               headers=headers, stream=True)
-        self._check_res(res, "list_projection_columns", expected_retvals)
+        res = self._request(
+            method="GET",
+            url=self._url(bucket=bucket, schema=schema, table=table, command="projection-columns", url_params=url_params),
+            headers=headers)
         # list projection columns response will also show column type Sorted/UnSorted
-        if res.status_code == 200:
-            res_headers = res.headers
-            next_key = int(res_headers['tabular-next-key'])
-            is_truncated = res_headers['tabular-is-truncated'] == 'true'
-            count = int(res_headers['tabular-list-count'])
-            columns = [] if count_only else [[f.name, f.type, f.metadata] for f in
-                                             pa.ipc.open_stream(res.content).schema]
+        res_headers = res.headers
+        next_key = int(res_headers['tabular-next-key'])
+        is_truncated = res_headers['tabular-is-truncated'] == 'true'
+        count = int(res_headers['tabular-list-count'])
+        columns = [] if count_only else [[f.name, f.type, f.metadata] for f in
+                                         pa.ipc.open_stream(res.content).schema]
-            return columns, next_key, is_truncated, count
+        return columns, next_key, is_truncated, count
 class QueryDataInternalError(Exception):
@@ -1828,15 +1812,12 @@ def _iter_query_data_response_columns(fileobj, stream_ids=None):
             yield (stream_id, next_row_id, table)
-def parse_query_data_response(conn, schema, stream_ids=None, start_row_ids=None, debug=False, parser: Optional[QueryDataParser] = None):
+def parse_query_data_response(conn, schema, stream_ids=None, debug=False, parser: Optional[QueryDataParser] = None):
     """
     Generates pyarrow.Table objects from QueryData API response stream.
     A pyarrow.Table is a helper class that combines a Schema with multiple RecordBatches and allows easy data access.
     """
-    if start_row_ids is None:
-        start_row_ids = {}
     is_empty_projection = (len(schema) == 0)
     if parser is None:
         parser = QueryDataParser(schema, debug=debug)
@@ -1855,8 +1836,7 @@ def parse_query_data_response(conn, schema, stream_ids=None, start_row_ids=None,
             _logger.debug("stream_id=%d rows=%d next_row_id=%d table=%s",
                           stream_id, len(parsed_table), next_row_id, parsed_table)
-            start_row_ids[stream_id] = next_row_id
-            yield parsed_table  # the result of a single "select_rows()" cycle
+            yield stream_id, next_row_id, parsed_table
     if states:
         raise EOFError(f'all streams should be done before EOF. {states}')
@@ -2118,40 +2098,3 @@ def build_query_data_request(schema: 'pa.Schema' = pa.schema([]), predicate: ibi
     builder.Finish(relation)
     return QueryDataRequest(serialized=builder.Output(), response_schema=response_schema, response_parser=QueryDataParser(response_schema))
-def convert_column_types(table: 'pa.Table') -> 'pa.Table':
-    """
-    Adjusting table values
-    1. Because the timestamp resolution is too high it is necessary to trim it. ORION-96961
-    2. Since the values of nfs_mode_bits are returned in decimal, need to convert them to octal,
-    as in all representations, so that the mode of 448 turn into 700
-    3. for owner_name and group_owner_name 0 -> root, and 65534 -> nobody
-    """
-    ts_indexes = []
-    indexes_of_fields_to_change = {}
-    sid_to_name = {
-        '0': 'root',
-        '65534': 'nobody'  # NFSNOBODY_UID_16_BIT
-    }
-    column_matcher = {  # column_name: custom converting rule
-        'nfs_mode_bits': lambda val: int(oct(val).replace('0o', '')) if val is not None else val,
-        'owner_name': lambda val: sid_to_name.get(val, val),
-        'group_owner_name': lambda val: sid_to_name.get(val, val),
-    }
-    for index, field in enumerate(table.schema):
-        if isinstance(field.type, pa.TimestampType) and field.type.unit == 'ns':
-            ts_indexes.append(index)
-        if field.name in column_matcher:
-            indexes_of_fields_to_change[field.name] = index
-    for changing_index in ts_indexes:
-        field_name = table.schema[changing_index].name
-        new_column = table[field_name].cast(pa.timestamp('us'), safe=False)
-        table = table.set_column(changing_index, field_name, new_column)
-    for field_name, changing_index in indexes_of_fields_to_change.items():
-        new_column = table[field_name].to_pylist()
-        new_column = list(map(column_matcher[field_name], new_column))
-        new_column = pa.array(new_column, table[field_name].type)
-        table = table.set_column(changing_index, field_name, new_column)
-    return table

vastdb 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

vastdb 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl