vastdb 1.4.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import TYPE_CHECKING, Iterable, Optional, Union
3
+
4
+ import ibis
5
+ import pyarrow as pa
6
+
7
+ from .config import ImportConfig, QueryConfig
8
+ from .table_metadata import TableRef
9
+
10
+ if TYPE_CHECKING:
11
+ from .table import Projection
12
+
13
+
14
+ class ITable(ABC):
15
+ """Interface for VAST Table operations."""
16
+
17
+ @property
18
+ @abstractmethod
19
+ def ref(self) -> TableRef:
20
+ """Return Table Ref."""
21
+ pass
22
+
23
+ @abstractmethod
24
+ def __eq__(self, other: object) -> bool:
25
+ """Table __eq__."""
26
+ pass
27
+
28
+ @property
29
+ @abstractmethod
30
+ def name(self) -> str:
31
+ """Table name."""
32
+ pass
33
+
34
+ @property
35
+ @abstractmethod
36
+ def arrow_schema(self) -> pa.Schema:
37
+ """Table arrow schema."""
38
+ pass
39
+
40
+ @property
41
+ @abstractmethod
42
+ def path(self) -> str:
43
+ """Return table's path."""
44
+ pass
45
+
46
+ @abstractmethod
47
+ def sorted_columns(self) -> list[str]:
48
+ """Return sorted columns' names."""
49
+ pass
50
+
51
+ @abstractmethod
52
+ def projection(self, name: str) -> "Projection":
53
+ """Get a specific semi-sorted projection of this table."""
54
+ pass
55
+
56
+ @abstractmethod
57
+ def projections(self, projection_name: str = "") -> Iterable["Projection"]:
58
+ """List semi-sorted projections."""
59
+ pass
60
+
61
+ @abstractmethod
62
+ def import_files(self, files_to_import: Iterable[str], config: Optional[ImportConfig] = None) -> None:
63
+ """Import files into table."""
64
+ pass
65
+
66
+ @abstractmethod
67
+ def import_partitioned_files(self, files_and_partitions: dict[str, pa.RecordBatch], config: Optional[ImportConfig] = None) -> None:
68
+ """Import partitioned files."""
69
+ pass
70
+
71
+ @abstractmethod
72
+ def select(self,
73
+ columns: Optional[list[str]] = None,
74
+ predicate: Union[ibis.expr.types.BooleanColumn,
75
+ ibis.common.deferred.Deferred] = None,
76
+ config: Optional[QueryConfig] = None,
77
+ *,
78
+ internal_row_id: bool = False,
79
+ limit_rows: Optional[int] = None) -> pa.RecordBatchReader:
80
+ """Execute a query."""
81
+ pass
82
+
83
+ @abstractmethod
84
+ def insert(self, rows: Union[pa.RecordBatch, pa.Table]) -> pa.ChunkedArray:
85
+ """Insert rows into table."""
86
+ pass
87
+
88
+ @abstractmethod
89
+ def update(self,
90
+ rows: Union[pa.RecordBatch, pa.Table],
91
+ columns: Optional[list[str]] = None) -> None:
92
+ """Update rows in table."""
93
+ pass
94
+
95
+ @abstractmethod
96
+ def delete(self, rows: Union[pa.RecordBatch, pa.Table]) -> None:
97
+ """Delete rows from table."""
98
+ pass
99
+
100
+ @abstractmethod
101
+ def imports_table(self) -> Optional["ITable"]:
102
+ """Get imports table."""
103
+ pass
104
+
105
+ @abstractmethod
106
+ def sorting_done(self) -> bool:
107
+ """Check if sorting is done."""
108
+ pass
109
+
110
+ @abstractmethod
111
+ def sorting_score(self) -> int:
112
+ """Get sorting score."""
113
+ pass
114
+
115
+ @abstractmethod
116
+ def reload_schema(self) -> None:
117
+ """Reload Arrow Schema."""
118
+ pass
119
+
120
+ @abstractmethod
121
+ def reload_stats(self) -> None:
122
+ """Reload Table Stats."""
123
+ pass
124
+
125
+ @abstractmethod
126
+ def reload_sorted_columns(self) -> None:
127
+ """Reload Sorted Columns."""
128
+ pass
129
+
130
+ @abstractmethod
131
+ def __getitem__(self, col_name: str) -> ibis.Column:
132
+ """Allow constructing ibis-like column expressions from this table.
133
+
134
+ It is useful for constructing expressions for predicate pushdown in `ITable.select()` method.
135
+ """
136
+ pass
@@ -113,7 +113,7 @@ def calculate_aggregate_stats(
113
113
  )
114
114
  agg_df["duration_sec"] = (
115
115
  r_df.groupby(group_flds)
116
- .apply(calc_total_time_coverage_seconds, include_groups=False)
116
+ .apply(calc_total_time_coverage_seconds, include_groups=False) # type: ignore
117
117
  .sort_index()
118
118
  )
119
119
  agg_df["M_rows_per_sec"] = (agg_df["n_rows"] / agg_df["duration_sec"] / 1e6).astype(
vastdb/bucket.py CHANGED
@@ -36,7 +36,7 @@ class Bucket:
36
36
  """Get a specific schema (a container of tables) under this bucket."""
37
37
  return self._root_schema.schema(name=name, fail_if_missing=fail_if_missing)
38
38
 
39
- def schemas(self, batch_size=None):
39
+ def schemas(self, batch_size: Optional[int] = None) -> Iterable["Schema"]:
40
40
  """List bucket's schemas."""
41
41
  return self._root_schema.schemas(batch_size=batch_size)
42
42
 
vastdb/conftest.py CHANGED
@@ -1,20 +1,42 @@
1
1
  import os
2
2
  import sqlite3
3
3
  from pathlib import Path
4
+ from typing import Iterable
4
5
 
5
6
  import boto3
6
7
  import pytest
7
8
 
8
9
  import vastdb
9
10
  import vastdb.errors
11
+ from vastdb.schema import Schema
12
+ from vastdb.session import Session
13
+
14
+ AWS_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID"
15
+ AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY"
16
+
17
+
18
+ def get_aws_cred_from_system(cred_name):
19
+ orion_dir = Path(os.path.expanduser(os.environ.get("ORION_DIR", "~/orion")))
20
+ orion_file_path = orion_dir / 'data' / cred_name
21
+
22
+ env_value = os.environ.get(cred_name)
23
+
24
+ if env_value is not None:
25
+ return env_value
26
+
27
+ if not orion_file_path.exists():
28
+ return None
29
+
30
+ with open(orion_file_path, "r") as f:
31
+ return f.read().strip()
10
32
 
11
33
 
12
34
  def pytest_addoption(parser):
13
35
  parser.addoption("--tabular-bucket-name", help="Name of the S3 bucket with Tabular enabled", default="vastdb")
14
36
  parser.addoption("--tabular-access-key", help="Access key with Tabular permissions (AWS_ACCESS_KEY_ID)",
15
- default=os.environ.get("AWS_ACCESS_KEY_ID", None))
37
+ default=get_aws_cred_from_system(AWS_ACCESS_KEY_ID))
16
38
  parser.addoption("--tabular-secret-key", help="Secret key with Tabular permissions (AWS_SECRET_ACCESS_KEY)",
17
- default=os.environ.get("AWS_SECRET_ACCESS_KEY", None))
39
+ default=get_aws_cred_from_system(AWS_SECRET_ACCESS_KEY))
18
40
  parser.addoption("--tabular-endpoint-url", help="Tabular server endpoint", default=[], action="append")
19
41
  parser.addoption("--data-path", help="Data files location", default=None)
20
42
  parser.addoption("--crater-path", help="Save benchmark results in a dedicated location", default=None)
@@ -24,7 +46,7 @@ def pytest_addoption(parser):
24
46
 
25
47
 
26
48
  @pytest.fixture(scope="session")
27
- def session_kwargs(request, tabular_endpoint_urls):
49
+ def session_kwargs(request: pytest.FixtureRequest, tabular_endpoint_urls):
28
50
  return dict(
29
51
  access=request.config.getoption("--tabular-access-key"),
30
52
  secret=request.config.getoption("--tabular-secret-key"),
@@ -38,30 +60,30 @@ def session(session_kwargs):
38
60
 
39
61
 
40
62
  @pytest.fixture(scope="session")
41
- def num_workers(request):
63
+ def num_workers(request: pytest.FixtureRequest):
42
64
  return int(request.config.getoption("--num-workers"))
43
65
 
44
66
 
45
67
  @pytest.fixture(scope="session")
46
- def test_bucket_name(request):
68
+ def test_bucket_name(request: pytest.FixtureRequest):
47
69
  return request.config.getoption("--tabular-bucket-name")
48
70
 
49
71
 
50
72
  @pytest.fixture(scope="session")
51
- def tabular_endpoint_urls(request):
73
+ def tabular_endpoint_urls(request: pytest.FixtureRequest):
52
74
  return request.config.getoption("--tabular-endpoint-url") or ["http://localhost:9090"]
53
75
 
54
76
 
55
- def iter_schemas(s):
56
- """Recusively scan all schemas."""
57
- children = s.schemas()
58
- for c in children:
59
- yield from iter_schemas(c)
60
- yield s
77
+ def iter_schemas(schema: Schema) -> Iterable[Schema]:
78
+ """Recursively scan all schemas."""
79
+ children = schema.schemas()
80
+ for child in children:
81
+ yield from iter_schemas(child)
82
+ yield schema
61
83
 
62
84
 
63
85
  @pytest.fixture(scope="function")
64
- def clean_bucket_name(request, test_bucket_name, session):
86
+ def clean_bucket_name(request: pytest.FixtureRequest, test_bucket_name: str, session: Session) -> str:
65
87
  with session.transaction() as tx:
66
88
  b = tx.bucket(test_bucket_name)
67
89
  for top_schema in b.schemas():
@@ -69,7 +91,8 @@ def clean_bucket_name(request, test_bucket_name, session):
69
91
  for t_name in s.tablenames():
70
92
  try:
71
93
  t = s.table(t_name)
72
- t.drop()
94
+ if t is not None:
95
+ t.drop()
73
96
  except vastdb.errors.NotSupportedSchema:
74
97
  # Use internal API to drop the table in case unsupported schema prevents creating a table
75
98
  # object.
@@ -79,7 +102,7 @@ def clean_bucket_name(request, test_bucket_name, session):
79
102
 
80
103
 
81
104
  @pytest.fixture(scope="session")
82
- def s3(request, tabular_endpoint_urls):
105
+ def s3(request: pytest.FixtureRequest, tabular_endpoint_urls):
83
106
  return boto3.client(
84
107
  's3',
85
108
  aws_access_key_id=request.config.getoption("--tabular-access-key"),
@@ -88,22 +111,22 @@ def s3(request, tabular_endpoint_urls):
88
111
 
89
112
 
90
113
  @pytest.fixture(scope="function")
91
- def parquets_path(request):
114
+ def parquets_path(request: pytest.FixtureRequest):
92
115
  return Path(request.config.getoption("--data-path"))
93
116
 
94
117
 
95
118
  @pytest.fixture(scope="function")
96
- def crater_path(request):
119
+ def crater_path(request: pytest.FixtureRequest):
97
120
  return request.config.getoption("--crater-path")
98
121
 
99
122
 
100
123
  @pytest.fixture(scope="function")
101
- def schema_name(request):
124
+ def schema_name(request: pytest.FixtureRequest):
102
125
  return request.config.getoption("--schema-name")
103
126
 
104
127
 
105
128
  @pytest.fixture(scope="function")
106
- def table_name(request):
129
+ def table_name(request: pytest.FixtureRequest):
107
130
  return request.config.getoption("--table-name")
108
131
 
109
132
 
vastdb/schema.py CHANGED
@@ -10,7 +10,10 @@ from typing import TYPE_CHECKING, Iterable, List, Optional
10
10
 
11
11
  import pyarrow as pa
12
12
 
13
+ from vastdb.table_metadata import TableMetadata, TableRef, TableType
14
+
13
15
  from . import bucket, errors, schema, table
16
+ from ._ibis_support import validate_ibis_support_schema
14
17
 
15
18
  if TYPE_CHECKING:
16
19
  from .table import Table
@@ -91,7 +94,7 @@ class Schema:
91
94
  if use_external_row_ids_allocation:
92
95
  self.tx._rpc.features.check_external_row_ids_allocation()
93
96
 
94
- table.Table.validate_ibis_support_schema(columns)
97
+ validate_ibis_support_schema(columns)
95
98
  self.tx._rpc.api.create_table(self.bucket.name, self.name, table_name, columns, txid=self.tx.txid,
96
99
  use_external_row_ids_allocation=use_external_row_ids_allocation,
97
100
  sorting_key=sorting_key)
@@ -117,7 +120,7 @@ class Schema:
117
120
  while True:
118
121
  _bucket_name, _schema_name, curr_tables, next_key, is_truncated, _ = \
119
122
  self.tx._rpc.api.list_tables(
120
- bucket=self.bucket.name, schema=self.name, next_key=next_key, max_keys=page_size, txid=self.tx.txid,
123
+ bucket=self.bucket.name, schema=self.name, next_key=next_key, max_keys=page_size, txid=self.tx.active_txid,
121
124
  exact_match=exact_match, name_prefix=name_prefix, include_list_stats=exact_match)
122
125
  if not curr_tables:
123
126
  break
@@ -152,4 +155,13 @@ class Schema:
152
155
 
153
156
 
154
157
  def _parse_table_info(table_info, schema: "schema.Schema"):
155
- return table.Table(name=table_info.name, schema=schema, handle=int(table_info.handle), _imports_table=False, sorted_table=table_info.sorting_key_enabled)
158
+ ref = TableRef(bucket=schema.bucket.name,
159
+ schema=schema.name,
160
+ table=table_info.name)
161
+
162
+ table_type = TableType.Elysium if table_info.sorting_key_enabled else TableType.Regular
163
+ table_metadata = TableMetadata(ref, table_type=table_type)
164
+
165
+ return table.Table(handle=int(table_info.handle),
166
+ metadata=table_metadata,
167
+ tx=schema.tx)
vastdb/session.py CHANGED
@@ -10,6 +10,8 @@ For more details see:
10
10
  import os
11
11
  from typing import TYPE_CHECKING, Optional
12
12
 
13
+ from vastdb.transaction import Transaction
14
+
13
15
  if TYPE_CHECKING:
14
16
  from .config import BackoffConfig
15
17
 
@@ -45,7 +47,7 @@ class Session:
45
47
  """Don't show the secret key."""
46
48
  return f'{self.__class__.__name__}(endpoint={self.api.url}, access={self.api.access_key})'
47
49
 
48
- def transaction(self):
50
+ def transaction(self) -> Transaction:
49
51
  """Create a non-initialized transaction object.
50
52
 
51
53
  It should be used as a context manager: