vastdb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vastdb/bucket.py CHANGED
@@ -16,14 +16,6 @@ if TYPE_CHECKING:
16
16
  log = logging.getLogger(__name__)
17
17
 
18
18
 
19
- @dataclass
20
- class Snapshot:
21
- """VAST bucket-level snapshot."""
22
-
23
- name: str
24
- bucket: "Bucket"
25
-
26
-
27
19
  @dataclass
28
20
  class Bucket:
29
21
  """VAST bucket."""
@@ -73,7 +65,22 @@ class Bucket:
73
65
 
74
66
  return [schema.Schema(name=name, bucket=self) for name, *_ in schemas]
75
67
 
76
- def snapshots(self) -> List[Snapshot]:
68
+ def snapshot(self, name, fail_if_missing=True) -> Optional["Bucket"]:
69
+ """Get snapshot by name (if exists)."""
70
+ snapshots, _is_truncated, _next_key = \
71
+ self.tx._rpc.api.list_snapshots(bucket=self.name, name_prefix=name, max_keys=1)
72
+
73
+ expected_name = f".snapshot/{name}"
74
+ exists = snapshots and snapshots[0] == expected_name + "/"
75
+ if not exists:
76
+ if fail_if_missing:
77
+ raise errors.MissingSnapshot(self.name, expected_name)
78
+ else:
79
+ return None
80
+
81
+ return Bucket(name=f'{self.name}/{expected_name}', tx=self.tx)
82
+
83
+ def snapshots(self) -> List["Bucket"]:
77
84
  """List bucket's snapshots."""
78
85
  snapshots = []
79
86
  next_key = 0
@@ -86,4 +93,7 @@ class Bucket:
86
93
  if not is_truncated:
87
94
  break
88
95
 
89
- return [Snapshot(name=snapshot, bucket=self) for snapshot in snapshots]
96
+ return [
97
+ Bucket(name=f'{self.name}/{snapshot.strip("/")}', tx=self.tx)
98
+ for snapshot in snapshots
99
+ ]
vastdb/errors.py CHANGED
@@ -85,6 +85,10 @@ class InvalidArgument(Exception):
85
85
  pass
86
86
 
87
87
 
88
+ class TooWideRow(InvalidArgument):
89
+ pass
90
+
91
+
88
92
  class Missing(Exception):
89
93
  pass
90
94
 
@@ -93,11 +97,21 @@ class MissingTransaction(Missing):
93
97
  pass
94
98
 
95
99
 
100
+ class NotSupported(Exception):
101
+ pass
102
+
103
+
96
104
  @dataclass
97
105
  class MissingBucket(Missing):
98
106
  bucket: str
99
107
 
100
108
 
109
+ @dataclass
110
+ class MissingSnapshot(Missing):
111
+ bucket: str
112
+ snapshot: str
113
+
114
+
101
115
  @dataclass
102
116
  class MissingSchema(Missing):
103
117
  bucket: str
@@ -136,6 +150,19 @@ class TableExists(Exists):
136
150
  table: str
137
151
 
138
152
 
153
+ @dataclass
154
+ class NotSupportedCommand(NotSupported):
155
+ bucket: str
156
+ schema: str
157
+ table: str
158
+
159
+
160
+ @dataclass
161
+ class NotSupportedVersion(NotSupported):
162
+ err_msg: str
163
+ version: str
164
+
165
+
139
166
  ERROR_TYPES_MAP = {
140
167
  HttpStatus.BAD_REQUEST: BadRequest,
141
168
  HttpStatus.FOBIDDEN: Forbidden,
@@ -178,4 +205,4 @@ def from_response(res: requests.Response):
178
205
  log.warning("RPC failed: %s", kwargs)
179
206
  status = HttpStatus(res.status_code)
180
207
  error_type = ERROR_TYPES_MAP.get(status, UnexpectedError)
181
- raise error_type(**kwargs)
208
+ return error_type(**kwargs)
@@ -1,7 +1,6 @@
1
1
  import itertools
2
2
  import json
3
3
  import logging
4
- import math
5
4
  import re
6
5
  import struct
7
6
  import urllib.parse
@@ -182,6 +181,7 @@ class Predicate:
182
181
  Equals,
183
182
  Greater,
184
183
  GreaterEqual,
184
+ InValues,
185
185
  Less,
186
186
  LessEqual,
187
187
  Not,
@@ -219,40 +219,54 @@ class Predicate:
219
219
  prev_field_name = None
220
220
  for inner_op in or_args:
221
221
  _logger.debug('inner_op %s', inner_op)
222
- builder_func: Any = builder_map.get(type(inner_op))
222
+ op_type = type(inner_op)
223
+ builder_func: Any = builder_map.get(op_type)
223
224
  if not builder_func:
224
- raise NotImplementedError(inner_op.name)
225
+ if op_type == InValues:
226
+ builder_func = self.build_equal
227
+ else:
228
+ raise NotImplementedError(self.expr)
225
229
 
226
230
  if builder_func == self.build_is_null:
227
231
  column, = inner_op.args
228
- literal = None
232
+ literals = (None,)
229
233
  elif builder_func == self.build_is_not_null:
230
234
  not_arg, = inner_op.args
231
235
  # currently we only support not is_null, checking we really got is_null under the not:
232
236
  if not builder_map.get(type(not_arg)) == self.build_is_null:
233
- raise NotImplementedError(not_arg.args[0].name)
237
+ raise NotImplementedError(self.expr)
234
238
  column, = not_arg.args
235
- literal = None
239
+ literals = (None,)
236
240
  else:
237
- column, literal = inner_op.args
238
- if not isinstance(literal, Literal):
239
- raise NotImplementedError(inner_op.name)
241
+ column, arg = inner_op.args
242
+ if isinstance(arg, tuple):
243
+ literals = arg
244
+ else:
245
+ literals = (arg,)
246
+ for literal in literals:
247
+ if not isinstance(literal, Literal):
248
+ raise NotImplementedError(self.expr)
240
249
 
241
250
  if not isinstance(column, TableColumn):
242
- raise NotImplementedError(inner_op.name)
251
+ raise NotImplementedError(self.expr)
243
252
 
244
253
  field_name = column.name
245
254
  if prev_field_name is None:
246
255
  prev_field_name = field_name
247
256
  elif prev_field_name != field_name:
248
- raise NotImplementedError(op.name)
257
+ raise NotImplementedError(self.expr)
249
258
 
250
- args_offsets = [self.build_column(position=positions_map[field_name])]
251
- if literal:
252
- field = self.schema.field(field_name)
253
- args_offsets.append(self.build_literal(field=field, value=literal.value))
259
+ column_offset = self.build_column(position=positions_map[field_name])
260
+ field = self.schema.field(field_name)
261
+ for literal in literals:
262
+ args_offsets = [column_offset]
263
+ if literal is not None:
264
+ args_offsets.append(self.build_literal(field=field, value=literal.value))
254
265
 
255
- inner_offsets.append(builder_func(*args_offsets))
266
+ inner_offsets.append(builder_func(*args_offsets))
267
+
268
+ if not inner_offsets:
269
+ raise NotImplementedError(self.expr) # an empty OR is equivalent to a 'FALSE' literal
256
270
 
257
271
  domain_offset = self.build_or(inner_offsets)
258
272
  offsets.append(domain_offset)
@@ -719,20 +733,6 @@ def _parse_table_info(obj):
719
733
  return TableInfo(name, properties, handle, num_rows, used_bytes)
720
734
 
721
735
 
722
- def build_record_batch(column_info, column_values):
723
- fields = [pa.field(column_name, column_type) for column_type, column_name in column_info]
724
- schema = pa.schema(fields)
725
- arrays = [pa.array(column_values[column_type], type=column_type) for column_type, _ in column_info]
726
- batch = pa.record_batch(arrays, schema)
727
- return serialize_record_batch(batch)
728
-
729
-
730
- def serialize_record_batch(batch):
731
- sink = pa.BufferOutputStream()
732
- with pa.ipc.new_stream(sink, batch.schema) as writer:
733
- writer.write(batch)
734
- return sink.getvalue()
735
-
736
736
  # Results that returns from tablestats
737
737
 
738
738
 
@@ -952,26 +952,27 @@ class VastdbApi:
952
952
 
953
953
  return bucket_name, schemas, next_key, is_truncated, count
954
954
 
955
- def list_snapshots(self, bucket, max_keys=1000, next_token=None, expected_retvals=None):
955
+ def list_snapshots(self, bucket, max_keys=1000, next_token=None, name_prefix=''):
956
956
  next_token = next_token or ''
957
- expected_retvals = expected_retvals or []
958
- url_params = {'list_type': '2', 'prefix': '.snapshot/', 'delimiter': '/', 'max_keys': str(max_keys)}
957
+ url_params = {'list_type': '2', 'prefix': '.snapshot/' + name_prefix, 'delimiter': '/', 'max_keys': str(max_keys)}
959
958
  if next_token:
960
959
  url_params['continuation-token'] = next_token
961
960
 
962
961
  res = self.session.get(self._api_prefix(bucket=bucket, command="list", url_params=url_params), headers={}, stream=True)
963
- self._check_res(res, "list_snapshots", expected_retvals)
964
- if res.status_code == 200:
965
- out = b''.join(res.iter_content(chunk_size=128))
966
- xml_str = out.decode()
967
- xml_dict = xmltodict.parse(xml_str)
968
- list_res = xml_dict['ListBucketResult']
969
- is_truncated = list_res['IsTruncated'] == 'true'
970
- marker = list_res['Marker']
971
- common_prefixes = list_res['CommonPrefixes'] if 'CommonPrefixes' in list_res else []
972
- snapshots = [v['Prefix'] for v in common_prefixes]
962
+ self._check_res(res, "list_snapshots")
963
+
964
+ out = b''.join(res.iter_content(chunk_size=128))
965
+ xml_str = out.decode()
966
+ xml_dict = xmltodict.parse(xml_str)
967
+ list_res = xml_dict['ListBucketResult']
968
+ is_truncated = list_res['IsTruncated'] == 'true'
969
+ marker = list_res['Marker']
970
+ common_prefixes = list_res.get('CommonPrefixes', [])
971
+ if isinstance(common_prefixes, dict): # in case there is a single snapshot
972
+ common_prefixes = [common_prefixes]
973
+ snapshots = [v['Prefix'] for v in common_prefixes]
973
974
 
974
- return snapshots, is_truncated, marker
975
+ return snapshots, is_truncated, marker
975
976
 
976
977
  def create_table(self, bucket, schema, name, arrow_schema, txid=0, client_tags=[], expected_retvals=[],
977
978
  topic_partitions=0, create_imports_table=False, use_external_row_ids_allocation=False):
@@ -1030,7 +1031,7 @@ class VastdbApi:
1030
1031
  # create the table
1031
1032
  return self.create_table(bucket, schema, name, arrow_schema, txid, client_tags, expected_retvals)
1032
1033
 
1033
- def get_table_stats(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[]):
1034
+ def get_table_stats(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], imports_table_stats=False):
1034
1035
  """
1035
1036
  GET /mybucket/myschema/mytable?stats HTTP/1.1
1036
1037
  tabular-txid: TransactionId
@@ -1039,7 +1040,8 @@ class VastdbApi:
1039
1040
  The Command will return the statistics in flatbuf format
1040
1041
  """
1041
1042
  headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1042
- res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=name, command="stats"), headers=headers)
1043
+ url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if imports_table_stats else {}
1044
+ res = self.session.get(self._api_prefix(bucket=bucket, schema=schema, table=name, command="stats", url_params=url_params), headers=headers)
1043
1045
  self._check_res(res, "get_table_stats", expected_retvals)
1044
1046
 
1045
1047
  flatbuf = b''.join(res.iter_content(chunk_size=128))
@@ -1527,11 +1529,18 @@ class VastdbApi:
1527
1529
  if response.status_code != 200:
1528
1530
  return response
1529
1531
 
1532
+ ALLOWED_IMPORT_STATES = {
1533
+ 'Success',
1534
+ 'TabularInProgress',
1535
+ 'TabularAlreadyImported',
1536
+ 'TabularImportNotStarted',
1537
+ }
1538
+
1530
1539
  chunk_size = 1024
1531
1540
  for chunk in response.iter_content(chunk_size=chunk_size):
1532
1541
  chunk_dict = json.loads(chunk)
1533
1542
  _logger.debug("import data chunk=%s, result: %s", chunk_dict, chunk_dict['res'])
1534
- if chunk_dict['res'] != 'Success' and chunk_dict['res'] != 'TabularInProgress' and chunk_dict['res'] != 'TabularAlreadyImported':
1543
+ if chunk_dict['res'] not in ALLOWED_IMPORT_STATES:
1535
1544
  raise errors.ImportFilesError(
1536
1545
  f"Encountered an error during import_data. status: {chunk_dict['res']}, "
1537
1546
  f"error message: {chunk_dict['err_msg'] or 'Unexpected error'} during import of "
@@ -1555,48 +1564,6 @@ class VastdbApi:
1555
1564
 
1556
1565
  return self._check_res(res, "import_data", expected_retvals)
1557
1566
 
1558
- def _record_batch_slices(self, batch, rows_per_slice=None):
1559
- max_slice_size_in_bytes = int(0.9 * 5 * 1024 * 1024) # 0.9 * 5MB
1560
- batch_len = len(batch)
1561
- serialized_batch = serialize_record_batch(batch)
1562
- batch_size_in_bytes = len(serialized_batch)
1563
- _logger.debug('max_slice_size_in_bytes=%d batch_len=%d batch_size_in_bytes=%d',
1564
- max_slice_size_in_bytes, batch_len, batch_size_in_bytes)
1565
-
1566
- if not rows_per_slice:
1567
- if batch_size_in_bytes < max_slice_size_in_bytes:
1568
- rows_per_slice = batch_len
1569
- else:
1570
- rows_per_slice = int(0.9 * batch_len * max_slice_size_in_bytes / batch_size_in_bytes)
1571
-
1572
- done_slicing = False
1573
- while not done_slicing:
1574
- # Attempt slicing according to the current rows_per_slice
1575
- offset = 0
1576
- serialized_slices = []
1577
- for i in range(math.ceil(batch_len / rows_per_slice)):
1578
- offset = rows_per_slice * i
1579
- if offset >= batch_len:
1580
- done_slicing = True
1581
- break
1582
- slice_batch = batch.slice(offset, rows_per_slice)
1583
- serialized_slice_batch = serialize_record_batch(slice_batch)
1584
- sizeof_serialized_slice_batch = len(serialized_slice_batch)
1585
-
1586
- if sizeof_serialized_slice_batch <= max_slice_size_in_bytes:
1587
- serialized_slices.append(serialized_slice_batch)
1588
- else:
1589
- _logger.info(f'Using rows_per_slice {rows_per_slice} slice {i} size {sizeof_serialized_slice_batch} exceeds {max_slice_size_in_bytes} bytes, trying smaller rows_per_slice')
1590
- # We have a slice that is too large
1591
- rows_per_slice = int(rows_per_slice / 2)
1592
- if rows_per_slice < 1:
1593
- raise ValueError('cannot decrease batch size below 1 row')
1594
- break
1595
- else:
1596
- done_slicing = True
1597
-
1598
- return serialized_slices
1599
-
1600
1567
  def insert_rows(self, bucket, schema, table, record_batch, txid=0, client_tags=[], expected_retvals=[]):
1601
1568
  """
1602
1569
  POST /mybucket/myschema/mytable?rows HTTP/1.1
vastdb/schema.py CHANGED
@@ -87,4 +87,4 @@ class Schema:
87
87
 
88
88
  def _parse_table_info(table_info, schema: "schema.Schema"):
89
89
  stats = table.TableStats(num_rows=table_info.num_rows, size_in_bytes=table_info.size_in_bytes)
90
- return table.Table(name=table_info.name, schema=schema, handle=int(table_info.handle), stats=stats)
90
+ return table.Table(name=table_info.name, schema=schema, handle=int(table_info.handle), stats=stats, _imports_table=False)
vastdb/session.py CHANGED
@@ -11,7 +11,20 @@ import os
11
11
 
12
12
  import boto3
13
13
 
14
- from . import internal_commands, transaction
14
+ from . import errors, internal_commands, transaction
15
+
16
+
17
+ class Features:
18
+ """VAST database features - check if server is already support a feature."""
19
+
20
+ def __init__(self, vast_version):
21
+ """Save the server version."""
22
+ self.vast_version = vast_version
23
+
24
+ def check_imports_table(self):
25
+ """Check if the feature that support imports table is supported."""
26
+ if self.vast_version < (5, 2):
27
+ raise errors.NotSupportedVersion("import_table requires 5.2+", self.vast_version)
15
28
 
16
29
 
17
30
  class Session:
@@ -27,6 +40,8 @@ class Session:
27
40
  endpoint = os.environ['AWS_S3_ENDPOINT_URL']
28
41
 
29
42
  self.api = internal_commands.VastdbApi(endpoint, access, secret)
43
+ version_tuple = tuple(int(part) for part in self.api.vast_version.split('.'))
44
+ self.features = Features(version_tuple)
30
45
  self.s3 = boto3.client('s3',
31
46
  aws_access_key_id=access,
32
47
  aws_secret_access_key=secret,
vastdb/table.py CHANGED
@@ -1,3 +1,5 @@
1
+ """VAST Database table."""
2
+
1
3
  import concurrent.futures
2
4
  import logging
3
5
  import os
@@ -10,7 +12,7 @@ from typing import Dict, List, Optional, Tuple, Union
10
12
  import ibis
11
13
  import pyarrow as pa
12
14
 
13
- from . import errors, internal_commands, schema
15
+ from . import errors, internal_commands, schema, util
14
16
 
15
17
  log = logging.getLogger(__name__)
16
18
 
@@ -24,6 +26,8 @@ MAX_INSERT_ROWS_PER_PATCH = 512 * 1024
24
26
 
25
27
  @dataclass
26
28
  class TableStats:
29
+ """Table-related information."""
30
+
27
31
  num_rows: int
28
32
  size_in_bytes: int
29
33
  is_external_rowid_alloc: bool = False
@@ -32,6 +36,8 @@ class TableStats:
32
36
 
33
37
  @dataclass
34
38
  class QueryConfig:
39
+ """Query execution configiration."""
40
+
35
41
  num_sub_splits: int = 4
36
42
  num_splits: int = 1
37
43
  data_endpoints: Optional[List[str]] = None
@@ -44,11 +50,16 @@ class QueryConfig:
44
50
 
45
51
  @dataclass
46
52
  class ImportConfig:
53
+ """Import execution configiration."""
54
+
47
55
  import_concurrency: int = 2
48
56
 
49
57
 
50
- class SelectSplitState():
58
+ class SelectSplitState:
59
+ """State of a specific query split execution."""
60
+
51
61
  def __init__(self, query_data_request, table: "Table", split_id: int, config: QueryConfig) -> None:
62
+ """Initialize query split state."""
52
63
  self.split_id = split_id
53
64
  self.subsplits_state = {i: 0 for i in range(config.num_sub_splits)}
54
65
  self.config = config
@@ -56,6 +67,10 @@ class SelectSplitState():
56
67
  self.table = table
57
68
 
58
69
  def batches(self, api: internal_commands.VastdbApi):
70
+ """Execute QueryData request, and yield parsed RecordBatch objects.
71
+
72
+ Can be called repeatedly, to allow pagination.
73
+ """
59
74
  while not self.done:
60
75
  response = api.query_data(
61
76
  bucket=self.table.bucket.name,
@@ -68,7 +83,8 @@ class SelectSplitState():
68
83
  txid=self.table.tx.txid,
69
84
  limit_rows=self.config.limit_rows_per_sub_split,
70
85
  sub_split_start_row_ids=self.subsplits_state.items(),
71
- enable_sorted_projections=self.config.use_semi_sorted_projections)
86
+ enable_sorted_projections=self.config.use_semi_sorted_projections,
87
+ query_imports_table=self.table._imports_table)
72
88
  pages_iter = internal_commands.parse_query_data_response(
73
89
  conn=response.raw,
74
90
  schema=self.query_data_request.response_schema,
@@ -82,19 +98,24 @@ class SelectSplitState():
82
98
 
83
99
  @property
84
100
  def done(self):
101
+ """Returns true iff the pagination over."""
85
102
  return all(row_id == internal_commands.TABULAR_INVALID_ROW_ID for row_id in self.subsplits_state.values())
86
103
 
87
104
 
88
105
  @dataclass
89
106
  class Table:
107
+ """VAST Table."""
108
+
90
109
  name: str
91
110
  schema: "schema.Schema"
92
111
  handle: int
93
112
  stats: TableStats
94
- arrow_schema: pa.Schema = field(init=False, compare=False)
95
- _ibis_table: ibis.Schema = field(init=False, compare=False)
113
+ arrow_schema: pa.Schema = field(init=False, compare=False, repr=False)
114
+ _ibis_table: ibis.Schema = field(init=False, compare=False, repr=False)
115
+ _imports_table: bool
96
116
 
97
117
  def __post_init__(self):
118
+ """Also, load columns' metadata."""
98
119
  self.arrow_schema = self.columns()
99
120
 
100
121
  table_path = f'{self.schema.bucket.name}/{self.schema.name}/{self.name}'
@@ -102,21 +123,21 @@ class Table:
102
123
 
103
124
  @property
104
125
  def tx(self):
126
+ """Return transaction."""
105
127
  return self.schema.tx
106
128
 
107
129
  @property
108
130
  def bucket(self):
131
+ """Return bucket."""
109
132
  return self.schema.bucket
110
133
 
111
- def __repr__(self):
112
- return f"{type(self).__name__}(name={self.name})"
113
-
114
134
  def columns(self) -> pa.Schema:
135
+ """Return columns' metadata."""
115
136
  fields = []
116
137
  next_key = 0
117
138
  while True:
118
139
  cur_columns, next_key, is_truncated, _count = self.tx._rpc.api.list_columns(
119
- bucket=self.bucket.name, schema=self.schema.name, table=self.name, next_key=next_key, txid=self.tx.txid)
140
+ bucket=self.bucket.name, schema=self.schema.name, table=self.name, next_key=next_key, txid=self.tx.txid, list_imports_table=self._imports_table)
120
141
  fields.extend(cur_columns)
121
142
  if not is_truncated:
122
143
  break
@@ -125,6 +146,9 @@ class Table:
125
146
  return self.arrow_schema
126
147
 
127
148
  def projection(self, name: str) -> "Projection":
149
+ """Get a specific semi-sorted projection of this table."""
150
+ if self._imports_table:
151
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
128
152
  projs = self.projections(projection_name=name)
129
153
  if not projs:
130
154
  raise errors.MissingProjection(self.bucket.name, self.schema.name, self.name, name)
@@ -133,6 +157,9 @@ class Table:
133
157
  return projs[0]
134
158
 
135
159
  def projections(self, projection_name=None) -> List["Projection"]:
160
+ """List all semi-sorted projections of this table."""
161
+ if self._imports_table:
162
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
136
163
  projections = []
137
164
  next_key = 0
138
165
  name_prefix = projection_name if projection_name else ""
@@ -150,6 +177,12 @@ class Table:
150
177
  return [_parse_projection_info(projection, self) for projection in projections]
151
178
 
152
179
  def import_files(self, files_to_import: List[str], config: Optional[ImportConfig] = None) -> None:
180
+ """Import a list of Parquet files into this table.
181
+
182
+ The files must be on VAST S3 server and be accessible using current credentials.
183
+ """
184
+ if self._imports_table:
185
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
153
186
  source_files = {}
154
187
  for f in files_to_import:
155
188
  bucket_name, object_path = _parse_bucket_and_object_names(f)
@@ -158,6 +191,13 @@ class Table:
158
191
  self._execute_import(source_files, config=config)
159
192
 
160
193
  def import_partitioned_files(self, files_and_partitions: Dict[str, pa.RecordBatch], config: Optional[ImportConfig] = None) -> None:
194
+ """Import a list of Parquet files into this table.
195
+
196
+ The files must be on VAST S3 server and be accessible using current credentials.
197
+ Each file must have its own partition values defined as an Arrow RecordBatch.
198
+ """
199
+ if self._imports_table:
200
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
161
201
  source_files = {}
162
202
  for f, record_batch in files_and_partitions.items():
163
203
  bucket_name, object_path = _parse_bucket_and_object_names(f)
@@ -216,8 +256,10 @@ class Table:
216
256
  # ThreadPoolExecutor will be joined at the end of the context
217
257
 
218
258
  def get_stats(self) -> TableStats:
259
+ """Get the statistics of this table."""
219
260
  stats_tuple = self.tx._rpc.api.get_table_stats(
220
- bucket=self.bucket.name, schema=self.schema.name, name=self.name, txid=self.tx.txid)
261
+ bucket=self.bucket.name, schema=self.schema.name, name=self.name, txid=self.tx.txid,
262
+ imports_table_stats=self._imports_table)
221
263
  return TableStats(**stats_tuple._asdict())
222
264
 
223
265
  def select(self, columns: Optional[List[str]] = None,
@@ -225,6 +267,14 @@ class Table:
225
267
  config: Optional[QueryConfig] = None,
226
268
  *,
227
269
  internal_row_id: bool = False) -> pa.RecordBatchReader:
270
+ """Execute a query over this table.
271
+
272
+ To read a subset of the columns, specify their names via `columns` argument. Otherwise, all columns will be read.
273
+
274
+ In order to apply a filter, a predicate can be specified. See https://github.com/vast-data/vastdb_sdk/blob/main/README.md#filters-and-projections for more details.
275
+
276
+ Query-execution configuration options can be specified via the optional `config` argument.
277
+ """
228
278
  if config is None:
229
279
  config = QueryConfig()
230
280
 
@@ -335,82 +385,129 @@ class Table:
335
385
 
336
386
  return pa.RecordBatchReader.from_batches(query_data_request.response_schema, batches_iterator())
337
387
 
338
- def _combine_chunks(self, col):
339
- if hasattr(col, "combine_chunks"):
340
- return col.combine_chunks()
341
- else:
342
- return col
343
-
344
388
  def insert(self, rows: pa.RecordBatch) -> pa.RecordBatch:
345
- serialized_slices = self.tx._rpc.api._record_batch_slices(rows, MAX_INSERT_ROWS_PER_PATCH)
389
+ """Insert a RecordBatch into this table."""
390
+ if self._imports_table:
391
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
392
+ serialized_slices = util.iter_serialized_slices(rows, MAX_INSERT_ROWS_PER_PATCH)
346
393
  for slice in serialized_slices:
347
394
  self.tx._rpc.api.insert_rows(self.bucket.name, self.schema.name, self.name, record_batch=slice,
348
395
  txid=self.tx.txid)
349
396
 
350
397
  def update(self, rows: Union[pa.RecordBatch, pa.Table], columns: Optional[List[str]] = None) -> None:
398
+ """Update a subset of cells in this table.
399
+
400
+ Row IDs are specified using a special field (named "$row_id" of uint64 type).
401
+
402
+ A subset of columns to be updated can be specified via the `columns` argument.
403
+ """
404
+ if self._imports_table:
405
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
351
406
  if columns is not None:
352
407
  update_fields = [(INTERNAL_ROW_ID, pa.uint64())]
353
- update_values = [self._combine_chunks(rows[INTERNAL_ROW_ID])]
408
+ update_values = [_combine_chunks(rows[INTERNAL_ROW_ID])]
354
409
  for col in columns:
355
410
  update_fields.append(rows.field(col))
356
- update_values.append(self._combine_chunks(rows[col]))
411
+ update_values.append(_combine_chunks(rows[col]))
357
412
 
358
413
  update_rows_rb = pa.record_batch(schema=pa.schema(update_fields), data=update_values)
359
414
  else:
360
415
  update_rows_rb = rows
361
416
 
362
- serialized_slices = self.tx._rpc.api._record_batch_slices(update_rows_rb, MAX_ROWS_PER_BATCH)
417
+ serialized_slices = util.iter_serialized_slices(update_rows_rb, MAX_ROWS_PER_BATCH)
363
418
  for slice in serialized_slices:
364
419
  self.tx._rpc.api.update_rows(self.bucket.name, self.schema.name, self.name, record_batch=slice,
365
420
  txid=self.tx.txid)
366
421
 
367
422
  def delete(self, rows: Union[pa.RecordBatch, pa.Table]) -> None:
423
+ """Delete a subset of rows in this table.
424
+
425
+ Row IDs are specified using a special field (named "$row_id" of uint64 type).
426
+ """
368
427
  delete_rows_rb = pa.record_batch(schema=pa.schema([(INTERNAL_ROW_ID, pa.uint64())]),
369
- data=[self._combine_chunks(rows[INTERNAL_ROW_ID])])
428
+ data=[_combine_chunks(rows[INTERNAL_ROW_ID])])
370
429
 
371
- serialized_slices = self.tx._rpc.api._record_batch_slices(delete_rows_rb, MAX_ROWS_PER_BATCH)
430
+ serialized_slices = util.iter_serialized_slices(delete_rows_rb, MAX_ROWS_PER_BATCH)
372
431
  for slice in serialized_slices:
373
432
  self.tx._rpc.api.delete_rows(self.bucket.name, self.schema.name, self.name, record_batch=slice,
374
- txid=self.tx.txid)
433
+ txid=self.tx.txid, delete_from_imports_table=self._imports_table)
375
434
 
376
435
  def drop(self) -> None:
377
- self.tx._rpc.api.drop_table(self.bucket.name, self.schema.name, self.name, txid=self.tx.txid)
436
+ """Drop this table."""
437
+ self.tx._rpc.api.drop_table(self.bucket.name, self.schema.name, self.name, txid=self.tx.txid, remove_imports_table=self._imports_table)
378
438
  log.info("Dropped table: %s", self.name)
379
439
 
380
440
  def rename(self, new_name) -> None:
441
+ """Rename this table."""
442
+ if self._imports_table:
443
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
381
444
  self.tx._rpc.api.alter_table(
382
445
  self.bucket.name, self.schema.name, self.name, txid=self.tx.txid, new_name=new_name)
383
446
  log.info("Renamed table from %s to %s ", self.name, new_name)
384
447
  self.name = new_name
385
448
 
386
449
  def add_column(self, new_column: pa.Schema) -> None:
450
+ """Add a new column."""
451
+ if self._imports_table:
452
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
387
453
  self.tx._rpc.api.add_columns(self.bucket.name, self.schema.name, self.name, new_column, txid=self.tx.txid)
388
454
  log.info("Added column(s): %s", new_column)
389
455
  self.arrow_schema = self.columns()
390
456
 
391
457
  def drop_column(self, column_to_drop: pa.Schema) -> None:
458
+ """Drop an existing column."""
459
+ if self._imports_table:
460
+ raise errors.NotSupported(self.bucket.name, self.schema.name, self.name)
461
+ if self._imports_table:
462
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
392
463
  self.tx._rpc.api.drop_columns(self.bucket.name, self.schema.name, self.name, column_to_drop, txid=self.tx.txid)
393
464
  log.info("Dropped column(s): %s", column_to_drop)
394
465
  self.arrow_schema = self.columns()
395
466
 
396
467
  def rename_column(self, current_column_name: str, new_column_name: str) -> None:
468
+ """Rename an existing column."""
469
+ if self._imports_table:
470
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
397
471
  self.tx._rpc.api.alter_column(self.bucket.name, self.schema.name, self.name, name=current_column_name,
398
472
  new_name=new_column_name, txid=self.tx.txid)
399
473
  log.info("Renamed column: %s to %s", current_column_name, new_column_name)
400
474
  self.arrow_schema = self.columns()
401
475
 
402
476
  def create_projection(self, projection_name: str, sorted_columns: List[str], unsorted_columns: List[str]) -> "Projection":
477
+ """Create a new semi-sorted projection."""
478
+ if self._imports_table:
479
+ raise errors.NotSupportedCommand(self.bucket.name, self.schema.name, self.name)
403
480
  columns = [(sorted_column, "Sorted") for sorted_column in sorted_columns] + [(unsorted_column, "Unorted") for unsorted_column in unsorted_columns]
404
481
  self.tx._rpc.api.create_projection(self.bucket.name, self.schema.name, self.name, projection_name, columns=columns, txid=self.tx.txid)
405
482
  log.info("Created projection: %s", projection_name)
406
483
  return self.projection(projection_name)
407
484
 
485
+ def create_imports_table(self, fail_if_exists=True) -> "Table":
486
+ """Create imports table."""
487
+ self.tx._rpc.features.check_imports_table()
488
+ empty_schema = pa.schema([])
489
+ self.tx._rpc.api.create_table(self.bucket.name, self.schema.name, self.name, empty_schema, txid=self.tx.txid,
490
+ create_imports_table=True)
491
+ log.info("Created imports table for table: %s", self.name)
492
+ return self.imports_table() # type: ignore[return-value]
493
+
494
+ def imports_table(self) -> Optional["Table"]:
495
+ """Get the imports table under of this table."""
496
+ self.tx._rpc.features.check_imports_table()
497
+ return Table(name=self.name, schema=self.schema, handle=int(self.handle), stats=self.stats, _imports_table=True)
498
+
408
499
  def __getitem__(self, col_name):
500
+ """Allow constructing ibis-like column expressions from this table.
501
+
502
+ It is useful for constructing expressions for predicate pushdown in `Table.select()` method.
503
+ """
409
504
  return self._ibis_table[col_name]
410
505
 
411
506
 
412
507
  @dataclass
413
508
  class Projection:
509
+ """VAST semi-sorted projection."""
510
+
414
511
  name: str
415
512
  table: Table
416
513
  handle: int
@@ -418,20 +515,21 @@ class Projection:
418
515
 
419
516
  @property
420
517
  def bucket(self):
518
+ """Return bucket."""
421
519
  return self.table.schema.bucket
422
520
 
423
521
  @property
424
522
  def schema(self):
523
+ """Return schema."""
425
524
  return self.table.schema
426
525
 
427
526
  @property
428
527
  def tx(self):
528
+ """Return transaction."""
429
529
  return self.table.schema.tx
430
530
 
431
- def __repr__(self):
432
- return f"{type(self).__name__}(name={self.name})"
433
-
434
531
  def columns(self) -> pa.Schema:
532
+ """Return this projections' columns as an Arrow schema."""
435
533
  columns = []
436
534
  next_key = 0
437
535
  while True:
@@ -447,12 +545,14 @@ class Projection:
447
545
  return self.arrow_schema
448
546
 
449
547
  def rename(self, new_name) -> None:
548
+ """Rename this projection."""
450
549
  self.tx._rpc.api.alter_projection(self.bucket.name, self.schema.name,
451
550
  self.table.name, self.name, txid=self.tx.txid, new_name=new_name)
452
551
  log.info("Renamed projection from %s to %s ", self.name, new_name)
453
552
  self.name = new_name
454
553
 
455
554
  def drop(self) -> None:
555
+ """Drop this projection."""
456
556
  self.tx._rpc.api.drop_projection(self.bucket.name, self.schema.name, self.table.name,
457
557
  self.name, txid=self.tx.txid)
458
558
  log.info("Dropped projection: %s", self.name)
@@ -478,3 +578,10 @@ def _serialize_record_batch(record_batch: pa.RecordBatch) -> pa.lib.Buffer:
478
578
  with pa.ipc.new_stream(sink, record_batch.schema) as writer:
479
579
  writer.write(record_batch)
480
580
  return sink.getvalue()
581
+
582
+
583
+ def _combine_chunks(col):
584
+ if hasattr(col, "combine_chunks"):
585
+ return col.combine_chunks()
586
+ else:
587
+ return col
@@ -6,7 +6,7 @@ import pyarrow.parquet as pq
6
6
  import pytest
7
7
 
8
8
  from vastdb import util
9
- from vastdb.errors import ImportFilesError, InvalidArgument
9
+ from vastdb.errors import ImportFilesError, InternalServerError, InvalidArgument
10
10
 
11
11
  log = logging.getLogger(__name__)
12
12
 
@@ -34,12 +34,24 @@ def test_parallel_imports(session, clean_bucket_name, s3):
34
34
  b = tx.bucket(clean_bucket_name)
35
35
  s = b.create_schema('s1')
36
36
  t = s.create_table('t1', pa.schema([('num', pa.int64())]))
37
+ with pytest.raises(InternalServerError):
38
+ t.create_imports_table()
37
39
  log.info("Starting import of %d files", num_files)
38
40
  t.import_files(files)
39
41
  arrow_table = pa.Table.from_batches(t.select(columns=['num']))
40
42
  assert arrow_table.num_rows == num_rows * num_files
41
43
  arrow_table = pa.Table.from_batches(t.select(columns=['num'], predicate=t['num'] == 100))
42
44
  assert arrow_table.num_rows == num_files
45
+ import_table = t.imports_table()
46
+ # checking all imports are on the imports table:
47
+ objects_name = pa.Table.from_batches(import_table.select(columns=["ObjectName"]))
48
+ objects_name = objects_name.to_pydict()
49
+ object_names = set(objects_name['ObjectName'])
50
+ prefix = 'prq'
51
+ numbers = set(range(53))
52
+ assert all(name.startswith(prefix) for name in object_names)
53
+ numbers.issubset(int(name.replace(prefix, '')) for name in object_names)
54
+ assert len(object_names) == len(objects_name['ObjectName'])
43
55
 
44
56
 
45
57
  def test_create_table_from_files(session, clean_bucket_name, s3):
@@ -60,5 +60,4 @@ def test_commits_and_rollbacks(session, clean_bucket_name):
60
60
  def test_list_snapshots(session, clean_bucket_name):
61
61
  with session.transaction() as tx:
62
62
  b = tx.bucket(clean_bucket_name)
63
- s = b.snapshots()
64
- assert s == []
63
+ b.snapshots() # VAST Catalog may create some snapshots
@@ -3,6 +3,7 @@ import decimal
3
3
  import logging
4
4
  import random
5
5
  import threading
6
+ import time
6
7
  from contextlib import closing
7
8
  from tempfile import NamedTemporaryFile
8
9
 
@@ -261,7 +262,7 @@ def test_filters(session, clean_bucket_name):
261
262
 
262
263
  with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
263
264
  def select(predicate):
264
- return pa.Table.from_batches(t.select(predicate=predicate))
265
+ return pa.Table.from_batches(t.select(predicate=predicate), t.arrow_schema)
265
266
 
266
267
  assert select(None) == expected
267
268
 
@@ -304,6 +305,13 @@ def test_filters(session, clean_bucket_name):
304
305
  assert select(t['s'].contains('b')) == expected.filter(pc.field('s') == 'bb')
305
306
  assert select(t['s'].contains('y')) == expected.filter(pc.field('s') == 'xyz')
306
307
 
308
+ assert select(t['a'].isin([555])) == expected.filter(pc.field('a').isin([555]))
309
+ assert select(t['a'].isin([111, 222, 999])) == expected.filter(pc.field('a').isin([111, 222, 999]))
310
+ assert select((t['a'] == 111) | t['a'].isin([333, 444]) | (t['a'] > 600)) == expected.filter((pc.field('a') == 111) | pc.field('a').isin([333, 444]) | (pc.field('a') > 600))
311
+
312
+ with pytest.raises(NotImplementedError):
313
+ select(t['a'].isin([]))
314
+
307
315
 
308
316
  def test_parquet_export(session, clean_bucket_name):
309
317
  with session.transaction() as tx:
@@ -638,3 +646,20 @@ def test_select_stop(session, clean_bucket_name):
638
646
 
639
647
  # validate that all query threads were killed.
640
648
  assert active_threads() == 0
649
+
650
+
651
+ def test_big_catalog_select(session, clean_bucket_name):
652
+ with session.transaction() as tx:
653
+ bc = tx.catalog()
654
+ actual = pa.Table.from_batches(bc.select(['name']))
655
+ assert actual
656
+ log.info("actual=%s", actual)
657
+
658
+
659
+ def test_audit_log_select(session, clean_bucket_name):
660
+ with session.transaction() as tx:
661
+ a = tx.audit_log()
662
+ a.columns()
663
+ time.sleep(1)
664
+ actual = pa.Table.from_batches(a.select(), a.arrow_schema)
665
+ log.info("actual=%s", actual)
@@ -0,0 +1,39 @@
1
+ import pyarrow as pa
2
+ import pytest
3
+
4
+ from .. import errors, util
5
+
6
+
7
+ def test_slices():
8
+ ROWS = 1 << 20
9
+ t = pa.table({"x": range(ROWS), "y": [i / 1000 for i in range(ROWS)]})
10
+
11
+ chunks = list(util.iter_serialized_slices(t))
12
+ assert len(chunks) > 1
13
+ sizes = [len(c) for c in chunks]
14
+
15
+ assert max(sizes) < util.MAX_RECORD_BATCH_SLICE_SIZE
16
+ assert t == pa.Table.from_batches(_parse(chunks))
17
+
18
+ chunks = list(util.iter_serialized_slices(t, 1000))
19
+ assert len(chunks) > 1
20
+ sizes = [len(c) for c in chunks]
21
+
22
+ assert max(sizes) < util.MAX_RECORD_BATCH_SLICE_SIZE
23
+ assert t == pa.Table.from_batches(_parse(chunks))
24
+
25
+
26
+ def test_wide_row():
27
+ cols = [pa.field(f"x{i}", pa.utf8()) for i in range(1000)]
28
+ values = [['a' * 10000]] * len(cols)
29
+ t = pa.table(values, schema=pa.schema(cols))
30
+ assert len(t) == 1
31
+
32
+ with pytest.raises(errors.TooWideRow):
33
+ list(util.iter_serialized_slices(t))
34
+
35
+
36
+ def _parse(bufs):
37
+ for buf in bufs:
38
+ with pa.ipc.open_stream(buf) as reader:
39
+ yield from reader
vastdb/transaction.py CHANGED
@@ -16,6 +16,14 @@ from . import bucket, errors, schema, session, table
16
16
 
17
17
  log = logging.getLogger(__name__)
18
18
 
19
+ TABULAR_BC_BUCKET = "vast-big-catalog-bucket"
20
+ VAST_CATALOG_SCHEMA_NAME = 'vast_big_catalog_schema'
21
+ VAST_CATALOG_TABLE_NAME = 'vast_big_catalog_table'
22
+
23
+ TABULAR_AUDERY_BUCKET = "vast-audit-log-bucket"
24
+ AUDERY_SCHEMA_NAME = 'vast_audit_log_schema'
25
+ AUDERY_TABLE_NAME = 'vast_audit_log_table'
26
+
19
27
 
20
28
  @dataclass
21
29
  class Transaction:
@@ -44,6 +52,8 @@ class Transaction:
44
52
 
45
53
  def __repr__(self):
46
54
  """Don't show the session details."""
55
+ if self.txid is None:
56
+ return 'InvalidTransaction'
47
57
  return f'Transaction(id=0x{self.txid:016x})'
48
58
 
49
59
  def bucket(self, name: str) -> "bucket.Bucket":
@@ -59,6 +69,12 @@ class Transaction:
59
69
 
60
70
  def catalog(self, fail_if_missing=True) -> Optional["table.Table"]:
61
71
  """Return VAST Catalog table."""
62
- b = bucket.Bucket("vast-big-catalog-bucket", self)
63
- s = schema.Schema("vast_big_catalog_schema", b)
64
- return s.table(name="vast_big_catalog_table", fail_if_missing=fail_if_missing)
72
+ b = bucket.Bucket(TABULAR_BC_BUCKET, self)
73
+ s = schema.Schema(VAST_CATALOG_SCHEMA_NAME, b)
74
+ return s.table(name=VAST_CATALOG_TABLE_NAME, fail_if_missing=fail_if_missing)
75
+
76
+ def audit_log(self, fail_if_missing=True) -> Optional["table.Table"]:
77
+ """Return VAST AuditLog table."""
78
+ b = bucket.Bucket(TABULAR_AUDERY_BUCKET, self)
79
+ s = schema.Schema(AUDERY_SCHEMA_NAME, b)
80
+ return s.table(name=AUDERY_TABLE_NAME, fail_if_missing=fail_if_missing)
vastdb/util.py CHANGED
@@ -1,20 +1,22 @@
1
1
  import logging
2
- from typing import Callable, List, Optional
2
+ from typing import TYPE_CHECKING, Callable, List, Optional, Union
3
3
 
4
4
  import pyarrow as pa
5
5
  import pyarrow.parquet as pq
6
6
 
7
- from .errors import InvalidArgument
8
- from .schema import Schema
9
- from .table import ImportConfig, Table
7
+ from .errors import InvalidArgument, TooWideRow
10
8
 
11
9
  log = logging.getLogger(__name__)
12
10
 
11
+ if TYPE_CHECKING:
12
+ from .schema import Schema
13
+ from .table import ImportConfig, Table
14
+
13
15
 
14
16
  def create_table_from_files(
15
- schema: Schema, table_name: str, parquet_files: List[str],
17
+ schema: "Schema", table_name: str, parquet_files: List[str],
16
18
  schema_merge_func: Optional[Callable] = None,
17
- config: Optional[ImportConfig] = None) -> Table:
19
+ config: Optional["ImportConfig"] = None) -> "Table":
18
20
  if not schema_merge_func:
19
21
  schema_merge_func = default_schema_merge
20
22
  else:
@@ -77,3 +79,36 @@ def union_schema_merge(current_schema: pa.Schema, new_schema: pa.Schema) -> pa.S
77
79
  This function returns a unified schema from potentially two different schemas.
78
80
  """
79
81
  return pa.unify_schemas([current_schema, new_schema])
82
+
83
+
84
+ MAX_TABULAR_REQUEST_SIZE = 5 << 20 # in bytes
85
+ MAX_RECORD_BATCH_SLICE_SIZE = int(0.9 * MAX_TABULAR_REQUEST_SIZE)
86
+
87
+
88
+ def iter_serialized_slices(batch: Union[pa.RecordBatch, pa.Table], max_rows_per_slice=None):
89
+ """Iterate over a list of record batch slices."""
90
+
91
+ rows_per_slice = int(0.9 * len(batch) * MAX_RECORD_BATCH_SLICE_SIZE / batch.nbytes)
92
+ if max_rows_per_slice is not None:
93
+ rows_per_slice = min(rows_per_slice, max_rows_per_slice)
94
+
95
+ offset = 0
96
+ while offset < len(batch):
97
+ if rows_per_slice < 1:
98
+ raise TooWideRow(batch)
99
+
100
+ batch_slice = batch.slice(offset, rows_per_slice)
101
+ serialized_slice_batch = serialize_record_batch(batch_slice)
102
+ if len(serialized_slice_batch) <= MAX_RECORD_BATCH_SLICE_SIZE:
103
+ yield serialized_slice_batch
104
+ offset += rows_per_slice
105
+ else:
106
+ rows_per_slice = rows_per_slice // 2
107
+
108
+
109
+ def serialize_record_batch(batch: Union[pa.RecordBatch, pa.Table]):
110
+ """Serialize a RecordBatch using Arrow IPC format."""
111
+ sink = pa.BufferOutputStream()
112
+ with pa.ipc.new_stream(sink, batch.schema) as writer:
113
+ writer.write(batch)
114
+ return sink.getvalue()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vastdb
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
@@ -21,7 +21,7 @@ License-File: LICENSE
21
21
  Requires-Dist: aws-requests-auth
22
22
  Requires-Dist: boto3
23
23
  Requires-Dist: flatbuffers
24
- Requires-Dist: ibis-framework
24
+ Requires-Dist: ibis-framework ==8.0.0
25
25
  Requires-Dist: pyarrow
26
26
  Requires-Dist: requests
27
27
  Requires-Dist: xmltodict
@@ -149,28 +149,29 @@ vast_flatbuf/tabular/S3File.py,sha256=KC9c2oS5-JXwTTriUVFdjOvRG0B54Cq9kviSDZY3NI
149
149
  vast_flatbuf/tabular/VipRange.py,sha256=_BJd1RRZAcK76T9vlsHzXKYVsPVaz6WTEAqStMQCAUQ,2069
150
150
  vast_flatbuf/tabular/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
151
151
  vastdb/__init__.py,sha256=cMJtZuJ0IL9aKyM3DUWqTCzuP1H1MXXVivKKE1-q0DY,292
152
- vastdb/bucket.py,sha256=xtKs7S4w0jmI4MujDWH3HDI-iEgbq5Xqqsod-tw4zSo,2991
152
+ vastdb/bucket.py,sha256=4rPEm9qlPTg7ccWO6VGmd4LKb8w-BDhJYwzXGjn03sc,3566
153
153
  vastdb/conftest.py,sha256=pKpo_46Vq4QHzTDQAFxasrVhnZ2V2L-y6IMLxojxaFM,2132
154
- vastdb/errors.py,sha256=fxpKSxjEgoJZuBtEGWzTW9lpDlEjuzgpgXwAQc1W6BQ,3436
155
- vastdb/internal_commands.py,sha256=3F6FiYu-Ama1zBO7hENPxCaQYJT8mcZP6rSQvtI7Sks,101273
156
- vastdb/schema.py,sha256=MrQr-WIrES8KcQ0V6cJkRRp_-9jj9FboyrBnkNBsw-8,3324
157
- vastdb/session.py,sha256=VZOFGZbAdr5Tl4cp88VRQYnR4Q16UNuYjSmX_QPW1II,1718
158
- vastdb/table.py,sha256=bdx3C1iWiFivKmtifH7MyG7TMqnVVIU91as-_hMn1rE,20532
159
- vastdb/transaction.py,sha256=1uCSHXqWcwsMJv6DuNx4WyQMGUm8P-RCCqYdBdUGusI,2196
160
- vastdb/util.py,sha256=Tjj6p4gqabK5G21uWuCiuYM9FaaR04_Zk5X8NWtcdj8,3022
154
+ vastdb/errors.py,sha256=vKWoq1yXrHyafMWwJgW_sQkSxQYxlI1JbTVCLz5Xi9Y,3793
155
+ vastdb/internal_commands.py,sha256=ZD2YXYvZ3lJWYzZU0oHtv8G3lNtDQUF0e8yg8813Xt4,99575
156
+ vastdb/schema.py,sha256=ql4TPB1W_FQ_BHov3CKHI8JX3krXMlcKWz7dTrjpQ1w,3346
157
+ vastdb/session.py,sha256=ciYS8Je2cRpuaAEE6Wjk79VsW0KAPdnRB2cqfxFCjis,2323
158
+ vastdb/table.py,sha256=xnSTWUUa0QHzXC5MUQWsGT1fsG8yAgMLy3nrgSH4j5Q,25661
159
+ vastdb/transaction.py,sha256=g8YTcYnsNPIhB2udbHyT5RIFB5kHnBLJcvV2CWRICwI,2845
160
+ vastdb/util.py,sha256=rs7nLL2Qz-OVEZDSVIqAvS-uETMq-zxQs5jBksB5-JA,4276
161
161
  vastdb/bench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
162
  vastdb/bench/test_perf.py,sha256=iHE3E60fvyU5SBDHPi4h03Dj6QcY6VI9l9mMhgNMtPc,1117
163
163
  vastdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
164
  vastdb/tests/test_duckdb.py,sha256=KDuv4PrjGEwChCGHG36xNT2JiFlBOt6K3DQ3L06Kq-A,1913
165
- vastdb/tests/test_imports.py,sha256=fDUjO5U-5i4QTIMoNnSSW4X_ZnOStLbx0mJkNq2pj9Q,5033
165
+ vastdb/tests/test_imports.py,sha256=48kbJKsa_MrEXcBYQUbUDr1e9wzjG4FHQ7C3wUEQfXA,5705
166
166
  vastdb/tests/test_nested.py,sha256=3kejEvtSqV0LrUgb1QglRjrlxnKI4_AXTFw2nE7Q520,951
167
167
  vastdb/tests/test_projections.py,sha256=_cDNfD5zTwbCXLk6uGpPUWGN0P-4HElu5OjubWu-Jg0,1255
168
168
  vastdb/tests/test_sanity.py,sha256=ixx0QPo73hLHjAa7bByFXjS1XST0WvmSwLEpgnHh_JY,2960
169
- vastdb/tests/test_schemas.py,sha256=b-JpYHOFYVTdE570_La7O2RWf8BGN-q8KDXNXeC8CSg,1724
170
- vastdb/tests/test_tables.py,sha256=TXM4LSBvPb3EEu7XScZ5iEiu_zhHClq61W18EQodxw8,25667
169
+ vastdb/tests/test_schemas.py,sha256=qoHTLX51D-0S4bMxdCpRh9gaYQd-BkZdT_agGOwFwTM,1739
170
+ vastdb/tests/test_tables.py,sha256=joeEQ30TwKBQc-2N_qGIdviZVnQr4rs6thlNsy5s_og,26672
171
+ vastdb/tests/test_util.py,sha256=owRAU3TCKMq-kz54NRdA5wX2O_bZIHqG5ucUR77jm5k,1046
171
172
  vastdb/tests/util.py,sha256=NaCzKymEGy1xuiyMxyt2_0frKVfVk9iGrFwLf3GHjTI,435
172
- vastdb-0.1.2.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
173
- vastdb-0.1.2.dist-info/METADATA,sha256=edJPdDWmHj6tRHRR97eSppfN9_4ARfIr0jS9HMjHfSQ,1311
174
- vastdb-0.1.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
175
- vastdb-0.1.2.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
176
- vastdb-0.1.2.dist-info/RECORD,,
173
+ vastdb-0.1.3.dist-info/LICENSE,sha256=obffan7LYrq7hLHNrY7vHcn2pKUTBUYXMKu-VOAvDxU,11333
174
+ vastdb-0.1.3.dist-info/METADATA,sha256=3h3JttUxw9oMMsxV_CVG_LMYwhgegsS9-b4gZkihrM0,1319
175
+ vastdb-0.1.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
176
+ vastdb-0.1.3.dist-info/top_level.txt,sha256=Vsj2MKtlhPg0J4so64slQtnwjhgoPmJgcG-6YcVAwVc,20
177
+ vastdb-0.1.3.dist-info/RECORD,,
File without changes