vastdb 1.3.6__py3-none-any.whl → 1.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vastdb/_internal.py +170 -78
- vastdb/bench/test_perf.py +2 -2
- vastdb/config.py +3 -0
- vastdb/errors.py +6 -0
- vastdb/features.py +9 -0
- vastdb/schema.py +5 -3
- vastdb/table.py +76 -15
- vastdb/tests/test_imports.py +70 -1
- vastdb/tests/test_tables.py +217 -0
- vastdb/tests/util.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Aggregate.py +4 -4
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Call.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/CaseFragment.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Cast.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ConditionalCase.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Filter.py +3 -3
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Grouping.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Join.py +4 -4
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/KeyValue.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Limit.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ListLiteral.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Literal.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/LiteralColumn.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/LiteralRelation.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/MapKey.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/MapLiteral.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/OrderBy.py +3 -3
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Plan.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Project.py +3 -3
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SetOperation.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SimpleCase.py +3 -3
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SortKey.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Source.py +4 -4
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/StructLiteral.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/WindowCall.py +3 -3
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DictionaryBatch.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DictionaryEncoding.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Field.py +3 -3
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Footer.py +4 -4
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Message.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/RecordBatch.py +3 -3
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Schema.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.py +4 -4
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensor.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensorIndexCOO.py +2 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensorIndexCSF.py +4 -4
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Tensor.py +2 -2
- vastdb/vast_flatbuf/tabular/ColumnDetails.py +56 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/CreateProjectionRequest.py +1 -1
- vastdb/vast_flatbuf/tabular/FilterString.py +45 -0
- vastdb/vast_flatbuf/tabular/GetRowColumnSecurityResponse.py +166 -0
- vastdb/vast_flatbuf/tabular/GetTableStatsResponse.py +178 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ImportDataRequest.py +34 -1
- vastdb/vast_flatbuf/tabular/KeyName.py +45 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListProjectionsResponse.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListSchemasResponse.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListTablesResponse.py +1 -1
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListViewsResponse.py +1 -1
- vastdb/vast_flatbuf/tabular/NameString.py +45 -0
- vastdb/vast_flatbuf/tabular/ObjectDetails.py +168 -0
- {vastdb-1.3.6.dist-info → vastdb-1.3.8.dist-info}/METADATA +1 -1
- vastdb-1.3.8.dist-info/RECORD +216 -0
- vastdb-1.3.8.dist-info/top_level.txt +1 -0
- vast_flatbuf/tabular/GetTableStatsResponse.py +0 -111
- vast_flatbuf/tabular/ObjectDetails.py +0 -112
- vastdb-1.3.6.dist-info/RECORD +0 -211
- vastdb-1.3.6.dist-info/top_level.txt +0 -2
- {vast_flatbuf → vastdb/vast_flatbuf}/__init__.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/__init__.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/__init__.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/__init__.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/__init__.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ArraySlice.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ArraySubscript.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/BinaryLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/BooleanLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Bound.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ConcreteBoundImpl.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/CurrentRow.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/DateLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/DecimalLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Deref.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/DurationLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Expression.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ExpressionImpl.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/FieldIndex.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/FieldRef.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/FixedSizeBinaryLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Float16Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Float32Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Float64Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Following.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Frame.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int16Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int32Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int64Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int8Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteralDaysMilliseconds.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteralImpl.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteralMonths.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/JoinKind.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/LiteralImpl.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Ordering.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Preceding.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/RelId.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Relation.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/RelationImpl.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SetOpKind.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/StringLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/StructField.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/TimeLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/TimestampLiteral.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt16Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt32Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt64Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt8Literal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Unbounded.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/__init__.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Binary.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Block.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/BodyCompression.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/BodyCompressionMethod.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Bool.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Buffer.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/CompressionType.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Date.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DateUnit.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Decimal.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DictionaryKind.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Duration.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Endianness.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Feature.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FieldNode.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FixedSizeBinary.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FixedSizeList.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FloatingPoint.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Int.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Interval.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/IntervalUnit.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/KeyValue.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/LargeBinary.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/LargeList.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/LargeUtf8.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/List.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Map.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/MessageHeader.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/MetadataVersion.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Null.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Precision.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensorIndex.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Struct_.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/TensorDim.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Time.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/TimeUnit.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Timestamp.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Type.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Union.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/UnionMode.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Utf8.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/__init__.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterColumnRequest.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterProjectionTableRequest.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterSchemaRequest.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterTableRequest.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/Column.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ColumnType.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/CreateSchemaRequest.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/CreateViewRequest.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/GetProjectionTableStatsResponse.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/S3File.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/VipRange.py +0 -0
- {vast_flatbuf → vastdb/vast_flatbuf}/tabular/__init__.py +0 -0
- {vastdb-1.3.6.dist-info → vastdb-1.3.8.dist-info}/LICENSE +0 -0
- {vastdb-1.3.6.dist-info → vastdb-1.3.8.dist-info}/WHEEL +0 -0
vastdb/table.py
CHANGED
|
@@ -21,6 +21,7 @@ log = logging.getLogger(__name__)
|
|
|
21
21
|
|
|
22
22
|
INTERNAL_ROW_ID = "$row_id"
|
|
23
23
|
INTERNAL_ROW_ID_FIELD = pa.field(INTERNAL_ROW_ID, pa.uint64())
|
|
24
|
+
INTERNAL_ROW_ID_SORTED_FIELD = pa.field(INTERNAL_ROW_ID, pa.decimal128(38, 0)) # Sorted tables have longer row ids
|
|
24
25
|
|
|
25
26
|
MAX_ROWS_PER_BATCH = 512 * 1024
|
|
26
27
|
# for insert we need a smaller limit due to response amplification
|
|
@@ -115,6 +116,7 @@ class Table:
|
|
|
115
116
|
arrow_schema: pa.Schema = field(init=False, compare=False, repr=False)
|
|
116
117
|
_ibis_table: ibis.Schema = field(init=False, compare=False, repr=False)
|
|
117
118
|
_imports_table: bool
|
|
119
|
+
sorted_table: bool
|
|
118
120
|
|
|
119
121
|
def __post_init__(self):
|
|
120
122
|
"""Also, load columns' metadata."""
|
|
@@ -157,6 +159,29 @@ class Table:
|
|
|
157
159
|
self.arrow_schema = pa.schema(fields)
|
|
158
160
|
return self.arrow_schema
|
|
159
161
|
|
|
162
|
+
def sorted_columns(self) -> list:
|
|
163
|
+
"""Return sorted columns' metadata."""
|
|
164
|
+
fields = []
|
|
165
|
+
try:
|
|
166
|
+
self.tx._rpc.features.check_elysium()
|
|
167
|
+
next_key = 0
|
|
168
|
+
while True:
|
|
169
|
+
cur_columns, next_key, is_truncated, _count = self.tx._rpc.api.list_sorted_columns(
|
|
170
|
+
bucket=self.bucket.name, schema=self.schema.name, table=self.name, next_key=next_key, txid=self.tx.txid, list_imports_table=self._imports_table)
|
|
171
|
+
fields.extend(cur_columns)
|
|
172
|
+
if not is_truncated:
|
|
173
|
+
break
|
|
174
|
+
except errors.BadRequest:
|
|
175
|
+
pass
|
|
176
|
+
except errors.InternalServerError as ise:
|
|
177
|
+
log.warning("Failed to get the sorted columns Elysium might not be supported: %s", ise)
|
|
178
|
+
pass
|
|
179
|
+
except errors.NotSupportedVersion:
|
|
180
|
+
log.warning("Failed to get the sorted columns, Elysium not supported")
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
return fields
|
|
184
|
+
|
|
160
185
|
def projection(self, name: str) -> "Projection":
|
|
161
186
|
"""Get a specific semi-sorted projection of this table."""
|
|
162
187
|
if self._imports_table:
|
|
@@ -228,6 +253,10 @@ class Table:
|
|
|
228
253
|
endpoints = [self.tx._rpc.api.url for _ in range(config.import_concurrency)] # TODO: use valid endpoints...
|
|
229
254
|
files_queue = queue.Queue()
|
|
230
255
|
|
|
256
|
+
key_names = config.key_names or []
|
|
257
|
+
if key_names:
|
|
258
|
+
self.tx._rpc.features.check_zip_import()
|
|
259
|
+
|
|
231
260
|
for source_file in source_files.items():
|
|
232
261
|
files_queue.put(source_file)
|
|
233
262
|
|
|
@@ -249,8 +278,10 @@ class Table:
|
|
|
249
278
|
pass
|
|
250
279
|
if files_batch:
|
|
251
280
|
log.debug("Starting import batch of %s files", len(files_batch))
|
|
281
|
+
log.info(f"starting import of {files_batch}")
|
|
252
282
|
session.import_data(
|
|
253
|
-
self.bucket.name, self.schema.name, self.name, files_batch, txid=self.tx.txid
|
|
283
|
+
self.bucket.name, self.schema.name, self.name, files_batch, txid=self.tx.txid,
|
|
284
|
+
key_names=key_names)
|
|
254
285
|
except (Exception, KeyboardInterrupt) as e:
|
|
255
286
|
stop_event.set()
|
|
256
287
|
log.error("Got exception inside import_worker. exception: %s", e)
|
|
@@ -277,6 +308,21 @@ class Table:
|
|
|
277
308
|
imports_table_stats=self._imports_table)
|
|
278
309
|
return TableStats(**stats_tuple._asdict())
|
|
279
310
|
|
|
311
|
+
def _get_row_estimate(self, columns: List[str], predicate: ibis.expr.types.BooleanColumn, arrow_schema: pa.Schema):
|
|
312
|
+
query_data_request = _internal.build_query_data_request(
|
|
313
|
+
schema=arrow_schema,
|
|
314
|
+
predicate=predicate,
|
|
315
|
+
field_names=columns)
|
|
316
|
+
response = self.tx._rpc.api.query_data(
|
|
317
|
+
bucket=self.bucket.name,
|
|
318
|
+
schema=self.schema.name,
|
|
319
|
+
table=self.name,
|
|
320
|
+
params=query_data_request.serialized,
|
|
321
|
+
split=(0xffffffff - 3, 1, 1),
|
|
322
|
+
txid=self.tx.txid)
|
|
323
|
+
batch = _internal.read_first_batch(response.raw)
|
|
324
|
+
return batch.num_rows * 2**16 if batch is not None else 0
|
|
325
|
+
|
|
280
326
|
def select(self, columns: Optional[List[str]] = None,
|
|
281
327
|
predicate: Union[ibis.expr.types.BooleanColumn, ibis.common.deferred.Deferred] = None,
|
|
282
328
|
config: Optional[QueryConfig] = None,
|
|
@@ -293,30 +339,22 @@ class Table:
|
|
|
293
339
|
if config is None:
|
|
294
340
|
config = QueryConfig()
|
|
295
341
|
|
|
342
|
+
stats = None
|
|
296
343
|
# Retrieve snapshots only if needed
|
|
297
|
-
if config.data_endpoints is None
|
|
344
|
+
if config.data_endpoints is None:
|
|
298
345
|
stats = self.get_stats()
|
|
299
346
|
log.debug("stats: %s", stats)
|
|
300
|
-
|
|
301
|
-
if config.data_endpoints is None:
|
|
302
347
|
endpoints = stats.endpoints
|
|
303
348
|
else:
|
|
304
349
|
endpoints = tuple(config.data_endpoints)
|
|
305
350
|
log.debug("endpoints: %s", endpoints)
|
|
306
351
|
|
|
307
|
-
if config.num_splits is None:
|
|
308
|
-
config.num_splits = max(1, stats.num_rows // config.rows_per_split)
|
|
309
|
-
log.debug("config: %s", config)
|
|
310
|
-
|
|
311
|
-
if config.semi_sorted_projection_name:
|
|
312
|
-
self.tx._rpc.features.check_enforce_semisorted_projection()
|
|
313
|
-
|
|
314
352
|
if columns is None:
|
|
315
353
|
columns = [f.name for f in self.arrow_schema]
|
|
316
354
|
|
|
317
355
|
query_schema = self.arrow_schema
|
|
318
356
|
if internal_row_id:
|
|
319
|
-
queried_fields = [INTERNAL_ROW_ID_FIELD]
|
|
357
|
+
queried_fields = [INTERNAL_ROW_ID_SORTED_FIELD if self.sorted_table else INTERNAL_ROW_ID_FIELD]
|
|
320
358
|
queried_fields.extend(column for column in self.arrow_schema)
|
|
321
359
|
query_schema = pa.schema(queried_fields)
|
|
322
360
|
columns.append(INTERNAL_ROW_ID)
|
|
@@ -330,6 +368,22 @@ class Table:
|
|
|
330
368
|
if isinstance(predicate, ibis.common.deferred.Deferred):
|
|
331
369
|
predicate = predicate.resolve(self._ibis_table) # may raise if the predicate is invalid (e.g. wrong types / missing column)
|
|
332
370
|
|
|
371
|
+
if config.num_splits is None:
|
|
372
|
+
num_rows = 0
|
|
373
|
+
if self.sorted_table:
|
|
374
|
+
num_rows = self._get_row_estimate(columns, predicate, query_schema)
|
|
375
|
+
log.info(f'sorted estimate: {num_rows}')
|
|
376
|
+
if num_rows == 0:
|
|
377
|
+
if stats is None:
|
|
378
|
+
stats = self.get_stats()
|
|
379
|
+
num_rows = stats.num_rows
|
|
380
|
+
|
|
381
|
+
config.num_splits = max(1, num_rows // config.rows_per_split)
|
|
382
|
+
log.debug("config: %s", config)
|
|
383
|
+
|
|
384
|
+
if config.semi_sorted_projection_name:
|
|
385
|
+
self.tx._rpc.features.check_enforce_semisorted_projection()
|
|
386
|
+
|
|
333
387
|
query_data_request = _internal.build_query_data_request(
|
|
334
388
|
schema=query_schema,
|
|
335
389
|
predicate=predicate,
|
|
@@ -485,7 +539,7 @@ class Table:
|
|
|
485
539
|
if columns is None:
|
|
486
540
|
columns = [name for name in rows.schema.names if name != INTERNAL_ROW_ID]
|
|
487
541
|
|
|
488
|
-
update_fields = [
|
|
542
|
+
update_fields = [INTERNAL_ROW_ID_SORTED_FIELD if self.sorted_table else INTERNAL_ROW_ID_FIELD]
|
|
489
543
|
update_values = [_combine_chunks(rows_chunk)]
|
|
490
544
|
for col in columns:
|
|
491
545
|
update_fields.append(rows.field(col))
|
|
@@ -511,7 +565,7 @@ class Table:
|
|
|
511
565
|
rows_chunk = rows[INTERNAL_ROW_ID]
|
|
512
566
|
except KeyError:
|
|
513
567
|
raise errors.MissingRowIdColumn
|
|
514
|
-
delete_rows_rb = pa.record_batch(schema=pa.schema([
|
|
568
|
+
delete_rows_rb = pa.record_batch(schema=pa.schema([INTERNAL_ROW_ID_SORTED_FIELD if self.sorted_table else INTERNAL_ROW_ID_FIELD]),
|
|
515
569
|
data=[_combine_chunks(rows_chunk)])
|
|
516
570
|
|
|
517
571
|
delete_rows_rb = util.sort_record_batch_if_needed(delete_rows_rb, INTERNAL_ROW_ID)
|
|
@@ -535,6 +589,13 @@ class Table:
|
|
|
535
589
|
log.info("Renamed table from %s to %s ", self.name, new_name)
|
|
536
590
|
self.name = new_name
|
|
537
591
|
|
|
592
|
+
def add_sorting_key(self, sorting_key: list) -> None:
|
|
593
|
+
"""Ads a sorting key to a table that doesn't have any."""
|
|
594
|
+
self.tx._rpc.features.check_elysium()
|
|
595
|
+
self.tx._rpc.api.alter_table(
|
|
596
|
+
self.bucket.name, self.schema.name, self.name, txid=self.tx.txid, sorting_key=sorting_key)
|
|
597
|
+
log.info("Enabled Elysium for table %s with sorting key %s ", self.name, str(sorting_key))
|
|
598
|
+
|
|
538
599
|
def add_column(self, new_column: pa.Schema) -> None:
|
|
539
600
|
"""Add a new column."""
|
|
540
601
|
if self._imports_table:
|
|
@@ -583,7 +644,7 @@ class Table:
|
|
|
583
644
|
def imports_table(self) -> Optional["Table"]:
|
|
584
645
|
"""Get the imports table of this table."""
|
|
585
646
|
self.tx._rpc.features.check_imports_table()
|
|
586
|
-
return Table(name=self.name, schema=self.schema, handle=int(self.handle), _imports_table=True)
|
|
647
|
+
return Table(name=self.name, schema=self.schema, handle=int(self.handle), _imports_table=True, sorted_table=self.sorted_table)
|
|
587
648
|
|
|
588
649
|
def __getitem__(self, col_name: str):
|
|
589
650
|
"""Allow constructing ibis-like column expressions from this table.
|
vastdb/tests/test_imports.py
CHANGED
|
@@ -6,11 +6,27 @@ import pyarrow.parquet as pq
|
|
|
6
6
|
import pytest
|
|
7
7
|
|
|
8
8
|
from vastdb import util
|
|
9
|
-
from vastdb.
|
|
9
|
+
from vastdb.config import ImportConfig
|
|
10
|
+
from vastdb.errors import (
|
|
11
|
+
ImportFilesError,
|
|
12
|
+
InternalServerError,
|
|
13
|
+
InvalidArgument,
|
|
14
|
+
NotSupportedVersion,
|
|
15
|
+
)
|
|
10
16
|
|
|
11
17
|
log = logging.getLogger(__name__)
|
|
12
18
|
|
|
13
19
|
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def zip_import_session(session):
|
|
22
|
+
with session.transaction() as tx:
|
|
23
|
+
try:
|
|
24
|
+
tx._rpc.features.check_zip_import()
|
|
25
|
+
return session
|
|
26
|
+
except NotSupportedVersion:
|
|
27
|
+
pytest.skip("Skipped because this test requires version 5.3.1")
|
|
28
|
+
|
|
29
|
+
|
|
14
30
|
def test_parallel_imports(session, clean_bucket_name, s3):
|
|
15
31
|
num_rows = 1000
|
|
16
32
|
num_files = 53
|
|
@@ -54,6 +70,59 @@ def test_parallel_imports(session, clean_bucket_name, s3):
|
|
|
54
70
|
assert len(object_names) == len(objects_name['ObjectName'])
|
|
55
71
|
|
|
56
72
|
|
|
73
|
+
def test_zip_imports(zip_import_session, clean_bucket_name, s3):
|
|
74
|
+
num_rows = 10
|
|
75
|
+
num_files = 5
|
|
76
|
+
files = []
|
|
77
|
+
ids = [i for i in range(num_rows)]
|
|
78
|
+
symbols = [chr(c) for c in range(ord('a'), ord('a') + num_rows)]
|
|
79
|
+
for i in range(num_files):
|
|
80
|
+
ds = {'id': ids,
|
|
81
|
+
'symbol': symbols,
|
|
82
|
+
f'feature{i}': [i * 10 + k for k in range(num_rows)]}
|
|
83
|
+
table = pa.Table.from_pydict(ds)
|
|
84
|
+
with NamedTemporaryFile() as f:
|
|
85
|
+
pq.write_table(table, f.name)
|
|
86
|
+
pname = f'prq{i}'
|
|
87
|
+
s3.put_object(Bucket=clean_bucket_name, Key=pname, Body=f)
|
|
88
|
+
files.append(f'/{clean_bucket_name}/{pname}')
|
|
89
|
+
|
|
90
|
+
with zip_import_session.transaction() as tx:
|
|
91
|
+
b = tx.bucket(clean_bucket_name)
|
|
92
|
+
s = b.create_schema('s1')
|
|
93
|
+
t = s.create_table('t1', pa.schema([('vastdb_rowid', pa.int64()), ('id', pa.int64()), ('symbol', pa.string())]))
|
|
94
|
+
columns = pa.schema([
|
|
95
|
+
('vastdb_rowid', pa.int64()),
|
|
96
|
+
('id', pa.int64()),
|
|
97
|
+
('symbol', pa.string()),
|
|
98
|
+
])
|
|
99
|
+
ext_row_ids = [10 + i for i in range(num_rows)]
|
|
100
|
+
arrow_table = pa.table(schema=columns, data=[
|
|
101
|
+
ext_row_ids,
|
|
102
|
+
ids,
|
|
103
|
+
symbols,
|
|
104
|
+
])
|
|
105
|
+
row_ids_array = t.insert(arrow_table)
|
|
106
|
+
row_ids = row_ids_array.to_pylist()
|
|
107
|
+
assert row_ids == ext_row_ids
|
|
108
|
+
|
|
109
|
+
with zip_import_session.transaction() as tx:
|
|
110
|
+
s = tx.bucket(clean_bucket_name).schema('s1')
|
|
111
|
+
t = s.table('t1')
|
|
112
|
+
log.info("Starting import of %d files", num_files)
|
|
113
|
+
config = ImportConfig()
|
|
114
|
+
config.key_names = ['id', 'symbol']
|
|
115
|
+
t.import_files(files, config=config)
|
|
116
|
+
|
|
117
|
+
with zip_import_session.transaction() as tx:
|
|
118
|
+
s = tx.bucket(clean_bucket_name).schema('s1')
|
|
119
|
+
t = s.table('t1')
|
|
120
|
+
arrow_table = t.select(columns=['feature0']).read_all()
|
|
121
|
+
assert arrow_table.num_rows == num_rows
|
|
122
|
+
log.debug(f"table schema={t.arrow_schema}")
|
|
123
|
+
assert len(t.arrow_schema) == 8
|
|
124
|
+
|
|
125
|
+
|
|
57
126
|
def test_create_table_from_files(session, clean_bucket_name, s3):
|
|
58
127
|
datasets = [
|
|
59
128
|
{'num': [0],
|
vastdb/tests/test_tables.py
CHANGED
|
@@ -3,6 +3,7 @@ import decimal
|
|
|
3
3
|
import logging
|
|
4
4
|
import random
|
|
5
5
|
import threading
|
|
6
|
+
import time
|
|
6
7
|
from contextlib import closing
|
|
7
8
|
from tempfile import NamedTemporaryFile
|
|
8
9
|
|
|
@@ -13,6 +14,8 @@ import pyarrow.parquet as pq
|
|
|
13
14
|
import pytest
|
|
14
15
|
from requests.exceptions import HTTPError
|
|
15
16
|
|
|
17
|
+
from vastdb.errors import BadRequest
|
|
18
|
+
|
|
16
19
|
from .. import errors
|
|
17
20
|
from ..table import INTERNAL_ROW_ID, QueryConfig
|
|
18
21
|
from .util import prepare_data
|
|
@@ -20,6 +23,16 @@ from .util import prepare_data
|
|
|
20
23
|
log = logging.getLogger(__name__)
|
|
21
24
|
|
|
22
25
|
|
|
26
|
+
@pytest.fixture
|
|
27
|
+
def elysium_session(session):
|
|
28
|
+
with session.transaction() as tx:
|
|
29
|
+
try:
|
|
30
|
+
tx._rpc.features.check_elysium()
|
|
31
|
+
return session
|
|
32
|
+
except errors.NotSupportedVersion:
|
|
33
|
+
pytest.skip("Skipped because this test requires version 5.3.5 with Elysium")
|
|
34
|
+
|
|
35
|
+
|
|
23
36
|
def test_tables(session, clean_bucket_name):
|
|
24
37
|
columns = pa.schema([
|
|
25
38
|
('a', pa.int64()),
|
|
@@ -990,3 +1003,207 @@ def test_multiple_contains_clauses(session, clean_bucket_name):
|
|
|
990
1003
|
for pred in failed_preds:
|
|
991
1004
|
with pytest.raises(NotImplementedError):
|
|
992
1005
|
t.select(predicate=pred(t)).read_all()
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
def test_tables_elysium(elysium_session, clean_bucket_name):
|
|
1009
|
+
columns = pa.schema([
|
|
1010
|
+
('a', pa.int8()),
|
|
1011
|
+
('b', pa.int32()),
|
|
1012
|
+
('c', pa.int16()),
|
|
1013
|
+
])
|
|
1014
|
+
expected = pa.table(schema=columns, data=[
|
|
1015
|
+
[1, 2, 3],
|
|
1016
|
+
[111111, 222222, 333333],
|
|
1017
|
+
[111, 222, 333],
|
|
1018
|
+
])
|
|
1019
|
+
sorting = [2, 1]
|
|
1020
|
+
with prepare_data(elysium_session, clean_bucket_name, 's', 't', expected, sorting_key=sorting) as t:
|
|
1021
|
+
sorted_columns = t.sorted_columns()
|
|
1022
|
+
assert sorted_columns[0].name == 'c'
|
|
1023
|
+
assert sorted_columns[1].name == 'b'
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
# Fails because of a known issue: ORION-240102
|
|
1027
|
+
# def test_enable_elysium(session, clean_bucket_name):
|
|
1028
|
+
# columns = pa.schema([
|
|
1029
|
+
# ('a', pa.int8()),
|
|
1030
|
+
# ('b', pa.int32()),
|
|
1031
|
+
# ('c', pa.int16()),
|
|
1032
|
+
# ])
|
|
1033
|
+
# expected = pa.table(schema=columns, data=[
|
|
1034
|
+
# [1,2,3],
|
|
1035
|
+
# [111111,222222,333333],
|
|
1036
|
+
# [111, 222, 333],
|
|
1037
|
+
# ])
|
|
1038
|
+
# sorting = [2, 1]
|
|
1039
|
+
# with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
|
|
1040
|
+
# sorted_columns = t.sorted_columns()
|
|
1041
|
+
# assert len(sorted_columns) == 0
|
|
1042
|
+
# t.add_sorting_key(sorting)
|
|
1043
|
+
# time.sleep(10)
|
|
1044
|
+
# sorted_columns = t.sorted_columns()
|
|
1045
|
+
# assert len(sorted_columns) == 2
|
|
1046
|
+
# assert sorted_columns[0].name == 'c'
|
|
1047
|
+
# assert sorted_columns[1].name == 'b'
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
def test_elysium_tx(elysium_session, clean_bucket_name):
|
|
1051
|
+
columns = pa.schema([
|
|
1052
|
+
('a', pa.int8()),
|
|
1053
|
+
('b', pa.int32()),
|
|
1054
|
+
('c', pa.int16()),
|
|
1055
|
+
])
|
|
1056
|
+
arrow_table = pa.table(schema=columns, data=[
|
|
1057
|
+
[1, 2, 3],
|
|
1058
|
+
[111111, 222222, 333333],
|
|
1059
|
+
[111, 222, 333],
|
|
1060
|
+
])
|
|
1061
|
+
sorting = [2, 1]
|
|
1062
|
+
schema_name = 's'
|
|
1063
|
+
table_name = 't'
|
|
1064
|
+
with elysium_session.transaction() as tx:
|
|
1065
|
+
s = tx.bucket(clean_bucket_name).create_schema(schema_name)
|
|
1066
|
+
t = s.create_table(table_name, arrow_table.schema)
|
|
1067
|
+
row_ids_array = t.insert(arrow_table)
|
|
1068
|
+
row_ids = row_ids_array.to_pylist()
|
|
1069
|
+
assert row_ids == list(range(arrow_table.num_rows))
|
|
1070
|
+
sorted_columns = t.sorted_columns()
|
|
1071
|
+
assert len(sorted_columns) == 0
|
|
1072
|
+
t.add_sorting_key(sorting)
|
|
1073
|
+
|
|
1074
|
+
with elysium_session.transaction() as tx:
|
|
1075
|
+
s = tx.bucket(clean_bucket_name).schema(schema_name)
|
|
1076
|
+
t = s.table(table_name)
|
|
1077
|
+
sorted_columns = t.sorted_columns()
|
|
1078
|
+
assert len(sorted_columns) == 2
|
|
1079
|
+
assert sorted_columns[0].name == 'c'
|
|
1080
|
+
assert sorted_columns[1].name == 'b'
|
|
1081
|
+
t.drop()
|
|
1082
|
+
s.drop()
|
|
1083
|
+
|
|
1084
|
+
|
|
1085
|
+
def test_elysium_double_enable(elysium_session, clean_bucket_name):
|
|
1086
|
+
columns = pa.schema([
|
|
1087
|
+
('a', pa.int8()),
|
|
1088
|
+
('b', pa.int32()),
|
|
1089
|
+
('c', pa.int16()),
|
|
1090
|
+
])
|
|
1091
|
+
expected = pa.table(schema=columns, data=[
|
|
1092
|
+
[1, 2, 3],
|
|
1093
|
+
[111111, 222222, 333333],
|
|
1094
|
+
[111, 222, 333],
|
|
1095
|
+
])
|
|
1096
|
+
sorting = [2, 1]
|
|
1097
|
+
with pytest.raises(BadRequest):
|
|
1098
|
+
with prepare_data(elysium_session, clean_bucket_name, 's', 't', expected, sorting_key=sorting) as t:
|
|
1099
|
+
sorted_columns = t.sorted_columns()
|
|
1100
|
+
assert sorted_columns[0].name == 'c'
|
|
1101
|
+
assert sorted_columns[1].name == 'b'
|
|
1102
|
+
t.add_sorting_key(sorting)
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
def test_elysium_update_table_tx(elysium_session, clean_bucket_name):
|
|
1106
|
+
columns = pa.schema([
|
|
1107
|
+
('a', pa.int64()),
|
|
1108
|
+
('b', pa.float32()),
|
|
1109
|
+
('s', pa.utf8()),
|
|
1110
|
+
])
|
|
1111
|
+
arrow_table = pa.table(schema=columns, data=[
|
|
1112
|
+
[111, 222, 333],
|
|
1113
|
+
[0.5, 1.5, 2.5],
|
|
1114
|
+
['a', 'bb', 'ccc'],
|
|
1115
|
+
])
|
|
1116
|
+
sorting = [2, 1]
|
|
1117
|
+
schema_name = 's'
|
|
1118
|
+
table_name = 't'
|
|
1119
|
+
with elysium_session.transaction() as tx:
|
|
1120
|
+
s = tx.bucket(clean_bucket_name).create_schema(schema_name)
|
|
1121
|
+
t = s.create_table(table_name, arrow_table.schema, sorting_key=sorting)
|
|
1122
|
+
row_ids_array = t.insert(arrow_table)
|
|
1123
|
+
row_ids = row_ids_array.to_pylist()
|
|
1124
|
+
assert row_ids == list(range(arrow_table.num_rows))
|
|
1125
|
+
sorted_columns = t.sorted_columns()
|
|
1126
|
+
assert sorted_columns[0].name == 's'
|
|
1127
|
+
assert sorted_columns[1].name == 'b'
|
|
1128
|
+
|
|
1129
|
+
with elysium_session.transaction() as tx:
|
|
1130
|
+
s = tx.bucket(clean_bucket_name).schema(schema_name)
|
|
1131
|
+
t = s.table(table_name)
|
|
1132
|
+
sorted_columns = t.sorted_columns()
|
|
1133
|
+
assert sorted_columns[0].name == 's'
|
|
1134
|
+
assert sorted_columns[1].name == 'b'
|
|
1135
|
+
|
|
1136
|
+
actual = t.select(columns=['a', 'b'], predicate=(t['a'] == 222), internal_row_id=True).read_all()
|
|
1137
|
+
column_index = actual.column_names.index('a')
|
|
1138
|
+
column_field = actual.field(column_index)
|
|
1139
|
+
new_data = pc.add(actual.column('a'), 2000)
|
|
1140
|
+
update_table = actual.set_column(column_index, column_field, new_data)
|
|
1141
|
+
|
|
1142
|
+
t.update(update_table, columns=['a'])
|
|
1143
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
1144
|
+
assert actual.to_pydict() == {
|
|
1145
|
+
'a': [111, 2222, 333],
|
|
1146
|
+
'b': [0.5, 1.5, 2.5]
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
actual = t.select(columns=['a', 'b'], predicate=(t['a'] != 2222), internal_row_id=True).read_all()
|
|
1150
|
+
column_index = actual.column_names.index('a')
|
|
1151
|
+
column_field = actual.field(column_index)
|
|
1152
|
+
new_data = pc.divide(actual.column('a'), 10)
|
|
1153
|
+
update_table = actual.set_column(column_index, column_field, new_data)
|
|
1154
|
+
|
|
1155
|
+
t.update(update_table.to_batches()[0], columns=['a'])
|
|
1156
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
1157
|
+
assert actual.to_pydict() == {
|
|
1158
|
+
'a': [11, 2222, 33],
|
|
1159
|
+
'b': [0.5, 1.5, 2.5]
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
actual = t.select(columns=['a', 'b'], predicate=(t['a'] < 222), internal_row_id=True).read_all()
|
|
1163
|
+
column_index = actual.column_names.index('a')
|
|
1164
|
+
column_field = actual.field(column_index)
|
|
1165
|
+
new_data = pc.divide(actual.column('a'), 10)
|
|
1166
|
+
delete_rows = actual.set_column(column_index, column_field, new_data)
|
|
1167
|
+
|
|
1168
|
+
t.delete(delete_rows)
|
|
1169
|
+
actual = t.select(columns=['a', 'b']).read_all()
|
|
1170
|
+
assert actual.to_pydict() == {
|
|
1171
|
+
'a': [2222],
|
|
1172
|
+
'b': [1.5]
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
|
|
1176
|
+
def test_elysium_splits(elysium_session, clean_bucket_name):
|
|
1177
|
+
columns = pa.schema([
|
|
1178
|
+
('a', pa.int32())
|
|
1179
|
+
])
|
|
1180
|
+
|
|
1181
|
+
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
1182
|
+
data = data * 10000
|
|
1183
|
+
arrow_table = pa.table(schema=columns, data=[data])
|
|
1184
|
+
|
|
1185
|
+
config = QueryConfig()
|
|
1186
|
+
config.rows_per_split = 1000
|
|
1187
|
+
|
|
1188
|
+
sorting = [0]
|
|
1189
|
+
schema_name = 's'
|
|
1190
|
+
table_name = 't'
|
|
1191
|
+
|
|
1192
|
+
with elysium_session.transaction() as tx:
|
|
1193
|
+
s = tx.bucket(clean_bucket_name).create_schema(schema_name)
|
|
1194
|
+
t = s.create_table(table_name, arrow_table.schema, sorting_key=sorting)
|
|
1195
|
+
row_ids_array = t.insert(arrow_table)
|
|
1196
|
+
row_ids = row_ids_array.to_pylist()
|
|
1197
|
+
assert row_ids == list(range(arrow_table.num_rows))
|
|
1198
|
+
sorted_columns = t.sorted_columns()
|
|
1199
|
+
assert sorted_columns[0].name == 'a'
|
|
1200
|
+
|
|
1201
|
+
time.sleep(300)
|
|
1202
|
+
with elysium_session.transaction() as tx:
|
|
1203
|
+
s = tx.bucket(clean_bucket_name).schema(schema_name)
|
|
1204
|
+
t = s.table(table_name)
|
|
1205
|
+
sorted_columns = t.sorted_columns()
|
|
1206
|
+
assert sorted_columns[0].name == 'a'
|
|
1207
|
+
|
|
1208
|
+
actual = t.select(columns=['a'], predicate=(t['a'] == 1), config=config).read_all()
|
|
1209
|
+
assert len(actual) == 10000
|
vastdb/tests/util.py
CHANGED
|
@@ -5,10 +5,10 @@ log = logging.getLogger(__name__)
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
@contextmanager
|
|
8
|
-
def prepare_data(session, clean_bucket_name, schema_name, table_name, arrow_table):
|
|
8
|
+
def prepare_data(session, clean_bucket_name, schema_name, table_name, arrow_table, sorting_key=[]):
|
|
9
9
|
with session.transaction() as tx:
|
|
10
10
|
s = tx.bucket(clean_bucket_name).create_schema(schema_name)
|
|
11
|
-
t = s.create_table(table_name, arrow_table.schema)
|
|
11
|
+
t = s.create_table(table_name, arrow_table.schema, sorting_key=sorting_key)
|
|
12
12
|
row_ids_array = t.insert(arrow_table)
|
|
13
13
|
row_ids = row_ids_array.to_pylist()
|
|
14
14
|
assert row_ids == list(range(arrow_table.num_rows))
|
|
@@ -32,7 +32,7 @@ class Aggregate(object):
|
|
|
32
32
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
|
|
33
33
|
if o != 0:
|
|
34
34
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
35
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelId import RelId
|
|
35
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelId import RelId
|
|
36
36
|
obj = RelId()
|
|
37
37
|
obj.Init(self._tab.Bytes, x)
|
|
38
38
|
return obj
|
|
@@ -44,7 +44,7 @@ class Aggregate(object):
|
|
|
44
44
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
|
|
45
45
|
if o != 0:
|
|
46
46
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
47
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation import Relation
|
|
47
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation import Relation
|
|
48
48
|
obj = Relation()
|
|
49
49
|
obj.Init(self._tab.Bytes, x)
|
|
50
50
|
return obj
|
|
@@ -59,7 +59,7 @@ class Aggregate(object):
|
|
|
59
59
|
x = self._tab.Vector(o)
|
|
60
60
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
|
|
61
61
|
x = self._tab.Indirect(x)
|
|
62
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
62
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
63
63
|
obj = Expression()
|
|
64
64
|
obj.Init(self._tab.Bytes, x)
|
|
65
65
|
return obj
|
|
@@ -98,7 +98,7 @@ class Aggregate(object):
|
|
|
98
98
|
x = self._tab.Vector(o)
|
|
99
99
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
|
|
100
100
|
x = self._tab.Indirect(x)
|
|
101
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Grouping import Grouping
|
|
101
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Grouping import Grouping
|
|
102
102
|
obj = Grouping()
|
|
103
103
|
obj.Init(self._tab.Bytes, x)
|
|
104
104
|
return obj
|
|
@@ -41,7 +41,7 @@ class Call(object):
|
|
|
41
41
|
x = self._tab.Vector(o)
|
|
42
42
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
|
|
43
43
|
x = self._tab.Indirect(x)
|
|
44
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
44
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
45
45
|
obj = Expression()
|
|
46
46
|
obj.Init(self._tab.Bytes, x)
|
|
47
47
|
return obj
|
|
@@ -69,7 +69,7 @@ class Call(object):
|
|
|
69
69
|
x = self._tab.Vector(o)
|
|
70
70
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
|
|
71
71
|
x = self._tab.Indirect(x)
|
|
72
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.SortKey import SortKey
|
|
72
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.SortKey import SortKey
|
|
73
73
|
obj = SortKey()
|
|
74
74
|
obj.Init(self._tab.Bytes, x)
|
|
75
75
|
return obj
|
|
@@ -30,7 +30,7 @@ class CaseFragment(object):
|
|
|
30
30
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
|
|
31
31
|
if o != 0:
|
|
32
32
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
33
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
33
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
34
34
|
obj = Expression()
|
|
35
35
|
obj.Init(self._tab.Bytes, x)
|
|
36
36
|
return obj
|
|
@@ -41,7 +41,7 @@ class CaseFragment(object):
|
|
|
41
41
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
|
|
42
42
|
if o != 0:
|
|
43
43
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
44
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
44
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
45
45
|
obj = Expression()
|
|
46
46
|
obj.Init(self._tab.Bytes, x)
|
|
47
47
|
return obj
|
|
@@ -31,7 +31,7 @@ class Cast(object):
|
|
|
31
31
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
|
|
32
32
|
if o != 0:
|
|
33
33
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
34
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
34
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
35
35
|
obj = Expression()
|
|
36
36
|
obj.Init(self._tab.Bytes, x)
|
|
37
37
|
return obj
|
|
@@ -47,7 +47,7 @@ class Cast(object):
|
|
|
47
47
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
|
|
48
48
|
if o != 0:
|
|
49
49
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
50
|
-
from vast_flatbuf.org.apache.arrow.flatbuf.Field import Field
|
|
50
|
+
from vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Field import Field
|
|
51
51
|
obj = Field()
|
|
52
52
|
obj.Init(self._tab.Bytes, x)
|
|
53
53
|
return obj
|
|
@@ -33,7 +33,7 @@ class ConditionalCase(object):
|
|
|
33
33
|
x = self._tab.Vector(o)
|
|
34
34
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
|
|
35
35
|
x = self._tab.Indirect(x)
|
|
36
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.CaseFragment import CaseFragment
|
|
36
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.CaseFragment import CaseFragment
|
|
37
37
|
obj = CaseFragment()
|
|
38
38
|
obj.Init(self._tab.Bytes, x)
|
|
39
39
|
return obj
|
|
@@ -61,7 +61,7 @@ class ConditionalCase(object):
|
|
|
61
61
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
|
|
62
62
|
if o != 0:
|
|
63
63
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
64
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
64
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
65
65
|
obj = Expression()
|
|
66
66
|
obj.Init(self._tab.Bytes, x)
|
|
67
67
|
return obj
|
|
@@ -32,7 +32,7 @@ class Filter(object):
|
|
|
32
32
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
|
|
33
33
|
if o != 0:
|
|
34
34
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
35
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelId import RelId
|
|
35
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelId import RelId
|
|
36
36
|
obj = RelId()
|
|
37
37
|
obj.Init(self._tab.Bytes, x)
|
|
38
38
|
return obj
|
|
@@ -44,7 +44,7 @@ class Filter(object):
|
|
|
44
44
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
|
|
45
45
|
if o != 0:
|
|
46
46
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
47
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation import Relation
|
|
47
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation import Relation
|
|
48
48
|
obj = Relation()
|
|
49
49
|
obj.Init(self._tab.Bytes, x)
|
|
50
50
|
return obj
|
|
@@ -58,7 +58,7 @@ class Filter(object):
|
|
|
58
58
|
o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8))
|
|
59
59
|
if o != 0:
|
|
60
60
|
x = self._tab.Indirect(o + self._tab.Pos)
|
|
61
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
61
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
62
62
|
obj = Expression()
|
|
63
63
|
obj.Init(self._tab.Bytes, x)
|
|
64
64
|
return obj
|
|
@@ -33,7 +33,7 @@ class Grouping(object):
|
|
|
33
33
|
x = self._tab.Vector(o)
|
|
34
34
|
x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
|
|
35
35
|
x = self._tab.Indirect(x)
|
|
36
|
-
from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
36
|
+
from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
|
|
37
37
|
obj = Expression()
|
|
38
38
|
obj.Init(self._tab.Bytes, x)
|
|
39
39
|
return obj
|