vastdb 1.3.7__py3-none-any.whl → 1.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. vastdb/__init__.py +2 -2
  2. vastdb/_internal.py +197 -83
  3. vastdb/bench/test_perf.py +2 -2
  4. vastdb/config.py +3 -0
  5. vastdb/errors.py +6 -0
  6. vastdb/features.py +9 -0
  7. vastdb/schema.py +5 -3
  8. vastdb/table.py +99 -17
  9. vastdb/tests/test_imports.py +70 -1
  10. vastdb/tests/test_tables.py +217 -0
  11. vastdb/tests/util.py +2 -2
  12. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Aggregate.py +4 -4
  13. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Call.py +2 -2
  14. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/CaseFragment.py +2 -2
  15. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Cast.py +2 -2
  16. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ConditionalCase.py +2 -2
  17. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Filter.py +3 -3
  18. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Grouping.py +1 -1
  19. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Join.py +4 -4
  20. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/KeyValue.py +2 -2
  21. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Limit.py +2 -2
  22. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ListLiteral.py +1 -1
  23. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Literal.py +1 -1
  24. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/LiteralColumn.py +1 -1
  25. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/LiteralRelation.py +2 -2
  26. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/MapKey.py +1 -1
  27. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/MapLiteral.py +1 -1
  28. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/OrderBy.py +3 -3
  29. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Plan.py +1 -1
  30. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Project.py +3 -3
  31. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SetOperation.py +2 -2
  32. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SimpleCase.py +3 -3
  33. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SortKey.py +1 -1
  34. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Source.py +4 -4
  35. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/StructLiteral.py +1 -1
  36. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/WindowCall.py +3 -3
  37. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DictionaryBatch.py +1 -1
  38. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DictionaryEncoding.py +1 -1
  39. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Field.py +3 -3
  40. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Footer.py +4 -4
  41. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Message.py +1 -1
  42. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/RecordBatch.py +3 -3
  43. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Schema.py +2 -2
  44. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.py +4 -4
  45. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensor.py +2 -2
  46. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensorIndexCOO.py +2 -2
  47. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensorIndexCSF.py +4 -4
  48. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Tensor.py +2 -2
  49. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/CreateProjectionRequest.py +1 -1
  50. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/GetRowColumnSecurityResponse.py +4 -4
  51. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/GetTableStatsResponse.py +1 -1
  52. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ImportDataRequest.py +34 -1
  53. vastdb/vast_flatbuf/tabular/KeyName.py +45 -0
  54. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListProjectionsResponse.py +1 -1
  55. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListSchemasResponse.py +1 -1
  56. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListTablesResponse.py +1 -1
  57. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListViewsResponse.py +1 -1
  58. {vastdb-1.3.7.dist-info → vastdb-1.3.9.dist-info}/METADATA +1 -1
  59. vastdb-1.3.9.dist-info/RECORD +216 -0
  60. vastdb-1.3.9.dist-info/top_level.txt +1 -0
  61. vastdb-1.3.7.dist-info/RECORD +0 -215
  62. vastdb-1.3.7.dist-info/top_level.txt +0 -2
  63. {vast_flatbuf → vastdb/vast_flatbuf}/__init__.py +0 -0
  64. {vast_flatbuf → vastdb/vast_flatbuf}/org/__init__.py +0 -0
  65. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/__init__.py +0 -0
  66. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/__init__.py +0 -0
  67. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/__init__.py +0 -0
  68. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ArraySlice.py +0 -0
  69. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ArraySubscript.py +0 -0
  70. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/BinaryLiteral.py +0 -0
  71. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/BooleanLiteral.py +0 -0
  72. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Bound.py +0 -0
  73. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ConcreteBoundImpl.py +0 -0
  74. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/CurrentRow.py +0 -0
  75. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/DateLiteral.py +0 -0
  76. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/DecimalLiteral.py +0 -0
  77. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Deref.py +0 -0
  78. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/DurationLiteral.py +0 -0
  79. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Expression.py +0 -0
  80. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ExpressionImpl.py +0 -0
  81. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/FieldIndex.py +0 -0
  82. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/FieldRef.py +0 -0
  83. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/FixedSizeBinaryLiteral.py +0 -0
  84. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Float16Literal.py +0 -0
  85. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Float32Literal.py +0 -0
  86. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Float64Literal.py +0 -0
  87. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Following.py +0 -0
  88. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Frame.py +0 -0
  89. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int16Literal.py +0 -0
  90. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int32Literal.py +0 -0
  91. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int64Literal.py +0 -0
  92. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int8Literal.py +0 -0
  93. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteral.py +0 -0
  94. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteralDaysMilliseconds.py +0 -0
  95. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteralImpl.py +0 -0
  96. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteralMonths.py +0 -0
  97. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/JoinKind.py +0 -0
  98. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/LiteralImpl.py +0 -0
  99. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Ordering.py +0 -0
  100. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Preceding.py +0 -0
  101. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/RelId.py +0 -0
  102. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Relation.py +0 -0
  103. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/RelationImpl.py +0 -0
  104. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SetOpKind.py +0 -0
  105. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/StringLiteral.py +0 -0
  106. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/StructField.py +0 -0
  107. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/TimeLiteral.py +0 -0
  108. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/TimestampLiteral.py +0 -0
  109. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt16Literal.py +0 -0
  110. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt32Literal.py +0 -0
  111. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt64Literal.py +0 -0
  112. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt8Literal.py +0 -0
  113. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Unbounded.py +0 -0
  114. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/__init__.py +0 -0
  115. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Binary.py +0 -0
  116. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Block.py +0 -0
  117. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/BodyCompression.py +0 -0
  118. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/BodyCompressionMethod.py +0 -0
  119. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Bool.py +0 -0
  120. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Buffer.py +0 -0
  121. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/CompressionType.py +0 -0
  122. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Date.py +0 -0
  123. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DateUnit.py +0 -0
  124. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Decimal.py +0 -0
  125. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DictionaryKind.py +0 -0
  126. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Duration.py +0 -0
  127. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Endianness.py +0 -0
  128. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Feature.py +0 -0
  129. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FieldNode.py +0 -0
  130. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FixedSizeBinary.py +0 -0
  131. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FixedSizeList.py +0 -0
  132. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FloatingPoint.py +0 -0
  133. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Int.py +0 -0
  134. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Interval.py +0 -0
  135. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/IntervalUnit.py +0 -0
  136. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/KeyValue.py +0 -0
  137. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/LargeBinary.py +0 -0
  138. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/LargeList.py +0 -0
  139. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/LargeUtf8.py +0 -0
  140. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/List.py +0 -0
  141. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Map.py +0 -0
  142. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/MessageHeader.py +0 -0
  143. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/MetadataVersion.py +0 -0
  144. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Null.py +0 -0
  145. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Precision.py +0 -0
  146. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.py +0 -0
  147. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensorIndex.py +0 -0
  148. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Struct_.py +0 -0
  149. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/TensorDim.py +0 -0
  150. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Time.py +0 -0
  151. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/TimeUnit.py +0 -0
  152. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Timestamp.py +0 -0
  153. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Type.py +0 -0
  154. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Union.py +0 -0
  155. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/UnionMode.py +0 -0
  156. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Utf8.py +0 -0
  157. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/__init__.py +0 -0
  158. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterColumnRequest.py +0 -0
  159. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterProjectionTableRequest.py +0 -0
  160. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterSchemaRequest.py +0 -0
  161. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterTableRequest.py +0 -0
  162. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/Column.py +0 -0
  163. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ColumnDetails.py +0 -0
  164. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ColumnType.py +0 -0
  165. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/CreateSchemaRequest.py +0 -0
  166. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/CreateViewRequest.py +0 -0
  167. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/FilterString.py +0 -0
  168. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/GetProjectionTableStatsResponse.py +0 -0
  169. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/NameString.py +0 -0
  170. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ObjectDetails.py +0 -0
  171. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/S3File.py +0 -0
  172. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/VipRange.py +0 -0
  173. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/__init__.py +0 -0
  174. {vastdb-1.3.7.dist-info → vastdb-1.3.9.dist-info}/LICENSE +0 -0
  175. {vastdb-1.3.7.dist-info → vastdb-1.3.9.dist-info}/WHEEL +0 -0
vastdb/table.py CHANGED
@@ -21,6 +21,7 @@ log = logging.getLogger(__name__)
21
21
 
22
22
  INTERNAL_ROW_ID = "$row_id"
23
23
  INTERNAL_ROW_ID_FIELD = pa.field(INTERNAL_ROW_ID, pa.uint64())
24
+ INTERNAL_ROW_ID_SORTED_FIELD = pa.field(INTERNAL_ROW_ID, pa.decimal128(38, 0)) # Sorted tables have longer row ids
24
25
 
25
26
  MAX_ROWS_PER_BATCH = 512 * 1024
26
27
  # for insert we need a smaller limit due to response amplification
@@ -28,6 +29,7 @@ MAX_ROWS_PER_BATCH = 512 * 1024
28
29
  MAX_INSERT_ROWS_PER_PATCH = 512 * 1024
29
30
  # in case insert has TooWideRow - need to insert in smaller batches - each cell could contain up to 128K, and our wire is limited to 5MB
30
31
  MAX_COLUMN_IN_BATCH = int(5 * 1024 / 128)
32
+ SORTING_SCORE_BITS = 63
31
33
 
32
34
 
33
35
  @dataclass
@@ -36,7 +38,12 @@ class TableStats:
36
38
 
37
39
  num_rows: int
38
40
  size_in_bytes: int
41
+ sorting_score: int
42
+ write_amplification: int
43
+ acummulative_row_inserition_count: int
39
44
  is_external_rowid_alloc: bool = False
45
+ sorting_key_enabled: bool = False
46
+ sorting_done: bool = False
40
47
  endpoints: Tuple[str, ...] = ()
41
48
 
42
49
 
@@ -115,6 +122,7 @@ class Table:
115
122
  arrow_schema: pa.Schema = field(init=False, compare=False, repr=False)
116
123
  _ibis_table: ibis.Schema = field(init=False, compare=False, repr=False)
117
124
  _imports_table: bool
125
+ sorted_table: bool
118
126
 
119
127
  def __post_init__(self):
120
128
  """Also, load columns' metadata."""
@@ -157,6 +165,29 @@ class Table:
157
165
  self.arrow_schema = pa.schema(fields)
158
166
  return self.arrow_schema
159
167
 
168
+ def sorted_columns(self) -> list:
169
+ """Return sorted columns' metadata."""
170
+ fields = []
171
+ try:
172
+ self.tx._rpc.features.check_elysium()
173
+ next_key = 0
174
+ while True:
175
+ cur_columns, next_key, is_truncated, _count = self.tx._rpc.api.list_sorted_columns(
176
+ bucket=self.bucket.name, schema=self.schema.name, table=self.name, next_key=next_key, txid=self.tx.txid, list_imports_table=self._imports_table)
177
+ fields.extend(cur_columns)
178
+ if not is_truncated:
179
+ break
180
+ except errors.BadRequest:
181
+ pass
182
+ except errors.InternalServerError as ise:
183
+ log.warning("Failed to get the sorted columns Elysium might not be supported: %s", ise)
184
+ pass
185
+ except errors.NotSupportedVersion:
186
+ log.warning("Failed to get the sorted columns, Elysium not supported")
187
+ pass
188
+
189
+ return fields
190
+
160
191
  def projection(self, name: str) -> "Projection":
161
192
  """Get a specific semi-sorted projection of this table."""
162
193
  if self._imports_table:
@@ -228,6 +259,10 @@ class Table:
228
259
  endpoints = [self.tx._rpc.api.url for _ in range(config.import_concurrency)] # TODO: use valid endpoints...
229
260
  files_queue = queue.Queue()
230
261
 
262
+ key_names = config.key_names or []
263
+ if key_names:
264
+ self.tx._rpc.features.check_zip_import()
265
+
231
266
  for source_file in source_files.items():
232
267
  files_queue.put(source_file)
233
268
 
@@ -248,9 +283,11 @@ class Table:
248
283
  except queue.Empty:
249
284
  pass
250
285
  if files_batch:
251
- log.debug("Starting import batch of %s files", len(files_batch))
286
+ log.info("Starting import batch of %s files", len(files_batch))
287
+ log.debug(f"starting import of {files_batch}")
252
288
  session.import_data(
253
- self.bucket.name, self.schema.name, self.name, files_batch, txid=self.tx.txid)
289
+ self.bucket.name, self.schema.name, self.name, files_batch, txid=self.tx.txid,
290
+ key_names=key_names)
254
291
  except (Exception, KeyboardInterrupt) as e:
255
292
  stop_event.set()
256
293
  log.error("Got exception inside import_worker. exception: %s", e)
@@ -277,6 +314,21 @@ class Table:
277
314
  imports_table_stats=self._imports_table)
278
315
  return TableStats(**stats_tuple._asdict())
279
316
 
317
+ def _get_row_estimate(self, columns: List[str], predicate: ibis.expr.types.BooleanColumn, arrow_schema: pa.Schema):
318
+ query_data_request = _internal.build_query_data_request(
319
+ schema=arrow_schema,
320
+ predicate=predicate,
321
+ field_names=columns)
322
+ response = self.tx._rpc.api.query_data(
323
+ bucket=self.bucket.name,
324
+ schema=self.schema.name,
325
+ table=self.name,
326
+ params=query_data_request.serialized,
327
+ split=(0xffffffff - 3, 1, 1),
328
+ txid=self.tx.txid)
329
+ batch = _internal.read_first_batch(response.raw)
330
+ return batch.num_rows * 2**16 if batch is not None else 0
331
+
280
332
  def select(self, columns: Optional[List[str]] = None,
281
333
  predicate: Union[ibis.expr.types.BooleanColumn, ibis.common.deferred.Deferred] = None,
282
334
  config: Optional[QueryConfig] = None,
@@ -293,30 +345,22 @@ class Table:
293
345
  if config is None:
294
346
  config = QueryConfig()
295
347
 
348
+ stats = None
296
349
  # Retrieve snapshots only if needed
297
- if config.data_endpoints is None or config.num_splits is None:
350
+ if config.data_endpoints is None:
298
351
  stats = self.get_stats()
299
352
  log.debug("stats: %s", stats)
300
-
301
- if config.data_endpoints is None:
302
353
  endpoints = stats.endpoints
303
354
  else:
304
355
  endpoints = tuple(config.data_endpoints)
305
356
  log.debug("endpoints: %s", endpoints)
306
357
 
307
- if config.num_splits is None:
308
- config.num_splits = max(1, stats.num_rows // config.rows_per_split)
309
- log.debug("config: %s", config)
310
-
311
- if config.semi_sorted_projection_name:
312
- self.tx._rpc.features.check_enforce_semisorted_projection()
313
-
314
358
  if columns is None:
315
359
  columns = [f.name for f in self.arrow_schema]
316
360
 
317
361
  query_schema = self.arrow_schema
318
362
  if internal_row_id:
319
- queried_fields = [INTERNAL_ROW_ID_FIELD]
363
+ queried_fields = [INTERNAL_ROW_ID_SORTED_FIELD if self.sorted_table else INTERNAL_ROW_ID_FIELD]
320
364
  queried_fields.extend(column for column in self.arrow_schema)
321
365
  query_schema = pa.schema(queried_fields)
322
366
  columns.append(INTERNAL_ROW_ID)
@@ -330,6 +374,22 @@ class Table:
330
374
  if isinstance(predicate, ibis.common.deferred.Deferred):
331
375
  predicate = predicate.resolve(self._ibis_table) # may raise if the predicate is invalid (e.g. wrong types / missing column)
332
376
 
377
+ if config.num_splits is None:
378
+ num_rows = 0
379
+ if self.sorted_table:
380
+ num_rows = self._get_row_estimate(columns, predicate, query_schema)
381
+ log.debug(f'sorted estimate: {num_rows}')
382
+ if num_rows == 0:
383
+ if stats is None:
384
+ stats = self.get_stats()
385
+ num_rows = stats.num_rows
386
+
387
+ config.num_splits = max(1, num_rows // config.rows_per_split)
388
+ log.debug("config: %s", config)
389
+
390
+ if config.semi_sorted_projection_name:
391
+ self.tx._rpc.features.check_enforce_semisorted_projection()
392
+
333
393
  query_data_request = _internal.build_query_data_request(
334
394
  schema=query_schema,
335
395
  predicate=predicate,
@@ -485,7 +545,7 @@ class Table:
485
545
  if columns is None:
486
546
  columns = [name for name in rows.schema.names if name != INTERNAL_ROW_ID]
487
547
 
488
- update_fields = [(INTERNAL_ROW_ID, pa.uint64())]
548
+ update_fields = [INTERNAL_ROW_ID_SORTED_FIELD if self.sorted_table else INTERNAL_ROW_ID_FIELD]
489
549
  update_values = [_combine_chunks(rows_chunk)]
490
550
  for col in columns:
491
551
  update_fields.append(rows.field(col))
@@ -511,7 +571,7 @@ class Table:
511
571
  rows_chunk = rows[INTERNAL_ROW_ID]
512
572
  except KeyError:
513
573
  raise errors.MissingRowIdColumn
514
- delete_rows_rb = pa.record_batch(schema=pa.schema([(INTERNAL_ROW_ID, pa.uint64())]),
574
+ delete_rows_rb = pa.record_batch(schema=pa.schema([INTERNAL_ROW_ID_SORTED_FIELD if self.sorted_table else INTERNAL_ROW_ID_FIELD]),
515
575
  data=[_combine_chunks(rows_chunk)])
516
576
 
517
577
  delete_rows_rb = util.sort_record_batch_if_needed(delete_rows_rb, INTERNAL_ROW_ID)
@@ -535,6 +595,13 @@ class Table:
535
595
  log.info("Renamed table from %s to %s ", self.name, new_name)
536
596
  self.name = new_name
537
597
 
598
+ def add_sorting_key(self, sorting_key: list) -> None:
599
+ """Ads a sorting key to a table that doesn't have any."""
600
+ self.tx._rpc.features.check_elysium()
601
+ self.tx._rpc.api.alter_table(
602
+ self.bucket.name, self.schema.name, self.name, txid=self.tx.txid, sorting_key=sorting_key)
603
+ log.info("Enabled Elysium for table %s with sorting key %s ", self.name, str(sorting_key))
604
+
538
605
  def add_column(self, new_column: pa.Schema) -> None:
539
606
  """Add a new column."""
540
607
  if self._imports_table:
@@ -583,7 +650,7 @@ class Table:
583
650
  def imports_table(self) -> Optional["Table"]:
584
651
  """Get the imports table of this table."""
585
652
  self.tx._rpc.features.check_imports_table()
586
- return Table(name=self.name, schema=self.schema, handle=int(self.handle), _imports_table=True)
653
+ return Table(name=self.name, schema=self.schema, handle=int(self.handle), _imports_table=True, sorted_table=self.sorted_table)
587
654
 
588
655
  def __getitem__(self, col_name: str):
589
656
  """Allow constructing ibis-like column expressions from this table.
@@ -592,6 +659,20 @@ class Table:
592
659
  """
593
660
  return self._ibis_table[col_name]
594
661
 
662
+ def sorting_done(self) -> int:
663
+ """Sorting done indicator for the table. Always False for unsorted tables."""
664
+ if not self.sorted_table:
665
+ return False
666
+ raw_sorting_score = self.tx._rpc.api.raw_sorting_score(self.schema.bucket.name, self.schema.name, self.schema.tx.txid, self.name)
667
+ return bool(raw_sorting_score >> SORTING_SCORE_BITS)
668
+
669
+ def sorting_score(self) -> int:
670
+ """Sorting score for the table. Always 0 for unsorted tables."""
671
+ if not self.sorted_table:
672
+ return 0
673
+ raw_sorting_score = self.tx._rpc.api.raw_sorting_score(self.schema.bucket.name, self.schema.name, self.schema.tx.txid, self.name)
674
+ return raw_sorting_score & ((1 << SORTING_SCORE_BITS) - 1)
675
+
595
676
 
596
677
  @dataclass
597
678
  class Projection:
@@ -649,7 +730,8 @@ class Projection:
649
730
 
650
731
  def _parse_projection_info(projection_info, table: "Table"):
651
732
  log.info("Projection info %s", str(projection_info))
652
- stats = TableStats(num_rows=projection_info.num_rows, size_in_bytes=projection_info.size_in_bytes)
733
+ stats = TableStats(num_rows=projection_info.num_rows, size_in_bytes=projection_info.size_in_bytes,
734
+ sorting_score=0, write_amplification=0, acummulative_row_inserition_count=0)
653
735
  return Projection(name=projection_info.name, table=table, stats=stats, handle=int(projection_info.handle))
654
736
 
655
737
 
@@ -6,11 +6,27 @@ import pyarrow.parquet as pq
6
6
  import pytest
7
7
 
8
8
  from vastdb import util
9
- from vastdb.errors import ImportFilesError, InternalServerError, InvalidArgument
9
+ from vastdb.config import ImportConfig
10
+ from vastdb.errors import (
11
+ ImportFilesError,
12
+ InternalServerError,
13
+ InvalidArgument,
14
+ NotSupportedVersion,
15
+ )
10
16
 
11
17
  log = logging.getLogger(__name__)
12
18
 
13
19
 
20
+ @pytest.fixture
21
+ def zip_import_session(session):
22
+ with session.transaction() as tx:
23
+ try:
24
+ tx._rpc.features.check_zip_import()
25
+ return session
26
+ except NotSupportedVersion:
27
+ pytest.skip("Skipped because this test requires version 5.3.1")
28
+
29
+
14
30
  def test_parallel_imports(session, clean_bucket_name, s3):
15
31
  num_rows = 1000
16
32
  num_files = 53
@@ -54,6 +70,59 @@ def test_parallel_imports(session, clean_bucket_name, s3):
54
70
  assert len(object_names) == len(objects_name['ObjectName'])
55
71
 
56
72
 
73
+ def test_zip_imports(zip_import_session, clean_bucket_name, s3):
74
+ num_rows = 10
75
+ num_files = 5
76
+ files = []
77
+ ids = [i for i in range(num_rows)]
78
+ symbols = [chr(c) for c in range(ord('a'), ord('a') + num_rows)]
79
+ for i in range(num_files):
80
+ ds = {'id': ids,
81
+ 'symbol': symbols,
82
+ f'feature{i}': [i * 10 + k for k in range(num_rows)]}
83
+ table = pa.Table.from_pydict(ds)
84
+ with NamedTemporaryFile() as f:
85
+ pq.write_table(table, f.name)
86
+ pname = f'prq{i}'
87
+ s3.put_object(Bucket=clean_bucket_name, Key=pname, Body=f)
88
+ files.append(f'/{clean_bucket_name}/{pname}')
89
+
90
+ with zip_import_session.transaction() as tx:
91
+ b = tx.bucket(clean_bucket_name)
92
+ s = b.create_schema('s1')
93
+ t = s.create_table('t1', pa.schema([('vastdb_rowid', pa.int64()), ('id', pa.int64()), ('symbol', pa.string())]))
94
+ columns = pa.schema([
95
+ ('vastdb_rowid', pa.int64()),
96
+ ('id', pa.int64()),
97
+ ('symbol', pa.string()),
98
+ ])
99
+ ext_row_ids = [10 + i for i in range(num_rows)]
100
+ arrow_table = pa.table(schema=columns, data=[
101
+ ext_row_ids,
102
+ ids,
103
+ symbols,
104
+ ])
105
+ row_ids_array = t.insert(arrow_table)
106
+ row_ids = row_ids_array.to_pylist()
107
+ assert row_ids == ext_row_ids
108
+
109
+ with zip_import_session.transaction() as tx:
110
+ s = tx.bucket(clean_bucket_name).schema('s1')
111
+ t = s.table('t1')
112
+ log.info("Starting import of %d files", num_files)
113
+ config = ImportConfig()
114
+ config.key_names = ['id', 'symbol']
115
+ t.import_files(files, config=config)
116
+
117
+ with zip_import_session.transaction() as tx:
118
+ s = tx.bucket(clean_bucket_name).schema('s1')
119
+ t = s.table('t1')
120
+ arrow_table = t.select(columns=['feature0']).read_all()
121
+ assert arrow_table.num_rows == num_rows
122
+ log.debug(f"table schema={t.arrow_schema}")
123
+ assert len(t.arrow_schema) == 8
124
+
125
+
57
126
  def test_create_table_from_files(session, clean_bucket_name, s3):
58
127
  datasets = [
59
128
  {'num': [0],
@@ -3,6 +3,7 @@ import decimal
3
3
  import logging
4
4
  import random
5
5
  import threading
6
+ import time
6
7
  from contextlib import closing
7
8
  from tempfile import NamedTemporaryFile
8
9
 
@@ -13,6 +14,8 @@ import pyarrow.parquet as pq
13
14
  import pytest
14
15
  from requests.exceptions import HTTPError
15
16
 
17
+ from vastdb.errors import BadRequest
18
+
16
19
  from .. import errors
17
20
  from ..table import INTERNAL_ROW_ID, QueryConfig
18
21
  from .util import prepare_data
@@ -20,6 +23,16 @@ from .util import prepare_data
20
23
  log = logging.getLogger(__name__)
21
24
 
22
25
 
26
+ @pytest.fixture
27
+ def elysium_session(session):
28
+ with session.transaction() as tx:
29
+ try:
30
+ tx._rpc.features.check_elysium()
31
+ return session
32
+ except errors.NotSupportedVersion:
33
+ pytest.skip("Skipped because this test requires version 5.3.5 with Elysium")
34
+
35
+
23
36
  def test_tables(session, clean_bucket_name):
24
37
  columns = pa.schema([
25
38
  ('a', pa.int64()),
@@ -990,3 +1003,207 @@ def test_multiple_contains_clauses(session, clean_bucket_name):
990
1003
  for pred in failed_preds:
991
1004
  with pytest.raises(NotImplementedError):
992
1005
  t.select(predicate=pred(t)).read_all()
1006
+
1007
+
1008
+ def test_tables_elysium(elysium_session, clean_bucket_name):
1009
+ columns = pa.schema([
1010
+ ('a', pa.int8()),
1011
+ ('b', pa.int32()),
1012
+ ('c', pa.int16()),
1013
+ ])
1014
+ expected = pa.table(schema=columns, data=[
1015
+ [1, 2, 3],
1016
+ [111111, 222222, 333333],
1017
+ [111, 222, 333],
1018
+ ])
1019
+ sorting = [2, 1]
1020
+ with prepare_data(elysium_session, clean_bucket_name, 's', 't', expected, sorting_key=sorting) as t:
1021
+ sorted_columns = t.sorted_columns()
1022
+ assert sorted_columns[0].name == 'c'
1023
+ assert sorted_columns[1].name == 'b'
1024
+
1025
+
1026
+ # Fails because of a known issue: ORION-240102
1027
+ # def test_enable_elysium(session, clean_bucket_name):
1028
+ # columns = pa.schema([
1029
+ # ('a', pa.int8()),
1030
+ # ('b', pa.int32()),
1031
+ # ('c', pa.int16()),
1032
+ # ])
1033
+ # expected = pa.table(schema=columns, data=[
1034
+ # [1,2,3],
1035
+ # [111111,222222,333333],
1036
+ # [111, 222, 333],
1037
+ # ])
1038
+ # sorting = [2, 1]
1039
+ # with prepare_data(session, clean_bucket_name, 's', 't', expected) as t:
1040
+ # sorted_columns = t.sorted_columns()
1041
+ # assert len(sorted_columns) == 0
1042
+ # t.add_sorting_key(sorting)
1043
+ # time.sleep(10)
1044
+ # sorted_columns = t.sorted_columns()
1045
+ # assert len(sorted_columns) == 2
1046
+ # assert sorted_columns[0].name == 'c'
1047
+ # assert sorted_columns[1].name == 'b'
1048
+
1049
+
1050
+ def test_elysium_tx(elysium_session, clean_bucket_name):
1051
+ columns = pa.schema([
1052
+ ('a', pa.int8()),
1053
+ ('b', pa.int32()),
1054
+ ('c', pa.int16()),
1055
+ ])
1056
+ arrow_table = pa.table(schema=columns, data=[
1057
+ [1, 2, 3],
1058
+ [111111, 222222, 333333],
1059
+ [111, 222, 333],
1060
+ ])
1061
+ sorting = [2, 1]
1062
+ schema_name = 's'
1063
+ table_name = 't'
1064
+ with elysium_session.transaction() as tx:
1065
+ s = tx.bucket(clean_bucket_name).create_schema(schema_name)
1066
+ t = s.create_table(table_name, arrow_table.schema)
1067
+ row_ids_array = t.insert(arrow_table)
1068
+ row_ids = row_ids_array.to_pylist()
1069
+ assert row_ids == list(range(arrow_table.num_rows))
1070
+ sorted_columns = t.sorted_columns()
1071
+ assert len(sorted_columns) == 0
1072
+ t.add_sorting_key(sorting)
1073
+
1074
+ with elysium_session.transaction() as tx:
1075
+ s = tx.bucket(clean_bucket_name).schema(schema_name)
1076
+ t = s.table(table_name)
1077
+ sorted_columns = t.sorted_columns()
1078
+ assert len(sorted_columns) == 2
1079
+ assert sorted_columns[0].name == 'c'
1080
+ assert sorted_columns[1].name == 'b'
1081
+ t.drop()
1082
+ s.drop()
1083
+
1084
+
1085
+ def test_elysium_double_enable(elysium_session, clean_bucket_name):
1086
+ columns = pa.schema([
1087
+ ('a', pa.int8()),
1088
+ ('b', pa.int32()),
1089
+ ('c', pa.int16()),
1090
+ ])
1091
+ expected = pa.table(schema=columns, data=[
1092
+ [1, 2, 3],
1093
+ [111111, 222222, 333333],
1094
+ [111, 222, 333],
1095
+ ])
1096
+ sorting = [2, 1]
1097
+ with pytest.raises(BadRequest):
1098
+ with prepare_data(elysium_session, clean_bucket_name, 's', 't', expected, sorting_key=sorting) as t:
1099
+ sorted_columns = t.sorted_columns()
1100
+ assert sorted_columns[0].name == 'c'
1101
+ assert sorted_columns[1].name == 'b'
1102
+ t.add_sorting_key(sorting)
1103
+
1104
+
1105
+ def test_elysium_update_table_tx(elysium_session, clean_bucket_name):
1106
+ columns = pa.schema([
1107
+ ('a', pa.int64()),
1108
+ ('b', pa.float32()),
1109
+ ('s', pa.utf8()),
1110
+ ])
1111
+ arrow_table = pa.table(schema=columns, data=[
1112
+ [111, 222, 333],
1113
+ [0.5, 1.5, 2.5],
1114
+ ['a', 'bb', 'ccc'],
1115
+ ])
1116
+ sorting = [2, 1]
1117
+ schema_name = 's'
1118
+ table_name = 't'
1119
+ with elysium_session.transaction() as tx:
1120
+ s = tx.bucket(clean_bucket_name).create_schema(schema_name)
1121
+ t = s.create_table(table_name, arrow_table.schema, sorting_key=sorting)
1122
+ row_ids_array = t.insert(arrow_table)
1123
+ row_ids = row_ids_array.to_pylist()
1124
+ assert row_ids == list(range(arrow_table.num_rows))
1125
+ sorted_columns = t.sorted_columns()
1126
+ assert sorted_columns[0].name == 's'
1127
+ assert sorted_columns[1].name == 'b'
1128
+
1129
+ with elysium_session.transaction() as tx:
1130
+ s = tx.bucket(clean_bucket_name).schema(schema_name)
1131
+ t = s.table(table_name)
1132
+ sorted_columns = t.sorted_columns()
1133
+ assert sorted_columns[0].name == 's'
1134
+ assert sorted_columns[1].name == 'b'
1135
+
1136
+ actual = t.select(columns=['a', 'b'], predicate=(t['a'] == 222), internal_row_id=True).read_all()
1137
+ column_index = actual.column_names.index('a')
1138
+ column_field = actual.field(column_index)
1139
+ new_data = pc.add(actual.column('a'), 2000)
1140
+ update_table = actual.set_column(column_index, column_field, new_data)
1141
+
1142
+ t.update(update_table, columns=['a'])
1143
+ actual = t.select(columns=['a', 'b']).read_all()
1144
+ assert actual.to_pydict() == {
1145
+ 'a': [111, 2222, 333],
1146
+ 'b': [0.5, 1.5, 2.5]
1147
+ }
1148
+
1149
+ actual = t.select(columns=['a', 'b'], predicate=(t['a'] != 2222), internal_row_id=True).read_all()
1150
+ column_index = actual.column_names.index('a')
1151
+ column_field = actual.field(column_index)
1152
+ new_data = pc.divide(actual.column('a'), 10)
1153
+ update_table = actual.set_column(column_index, column_field, new_data)
1154
+
1155
+ t.update(update_table.to_batches()[0], columns=['a'])
1156
+ actual = t.select(columns=['a', 'b']).read_all()
1157
+ assert actual.to_pydict() == {
1158
+ 'a': [11, 2222, 33],
1159
+ 'b': [0.5, 1.5, 2.5]
1160
+ }
1161
+
1162
+ actual = t.select(columns=['a', 'b'], predicate=(t['a'] < 222), internal_row_id=True).read_all()
1163
+ column_index = actual.column_names.index('a')
1164
+ column_field = actual.field(column_index)
1165
+ new_data = pc.divide(actual.column('a'), 10)
1166
+ delete_rows = actual.set_column(column_index, column_field, new_data)
1167
+
1168
+ t.delete(delete_rows)
1169
+ actual = t.select(columns=['a', 'b']).read_all()
1170
+ assert actual.to_pydict() == {
1171
+ 'a': [2222],
1172
+ 'b': [1.5]
1173
+ }
1174
+
1175
+
1176
+ def test_elysium_splits(elysium_session, clean_bucket_name):
1177
+ columns = pa.schema([
1178
+ ('a', pa.int32())
1179
+ ])
1180
+
1181
+ data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
1182
+ data = data * 10000
1183
+ arrow_table = pa.table(schema=columns, data=[data])
1184
+
1185
+ config = QueryConfig()
1186
+ config.rows_per_split = 1000
1187
+
1188
+ sorting = [0]
1189
+ schema_name = 's'
1190
+ table_name = 't'
1191
+
1192
+ with elysium_session.transaction() as tx:
1193
+ s = tx.bucket(clean_bucket_name).create_schema(schema_name)
1194
+ t = s.create_table(table_name, arrow_table.schema, sorting_key=sorting)
1195
+ row_ids_array = t.insert(arrow_table)
1196
+ row_ids = row_ids_array.to_pylist()
1197
+ assert row_ids == list(range(arrow_table.num_rows))
1198
+ sorted_columns = t.sorted_columns()
1199
+ assert sorted_columns[0].name == 'a'
1200
+
1201
+ time.sleep(300)
1202
+ with elysium_session.transaction() as tx:
1203
+ s = tx.bucket(clean_bucket_name).schema(schema_name)
1204
+ t = s.table(table_name)
1205
+ sorted_columns = t.sorted_columns()
1206
+ assert sorted_columns[0].name == 'a'
1207
+
1208
+ actual = t.select(columns=['a'], predicate=(t['a'] == 1), config=config).read_all()
1209
+ assert len(actual) == 10000
vastdb/tests/util.py CHANGED
@@ -5,10 +5,10 @@ log = logging.getLogger(__name__)
5
5
 
6
6
 
7
7
  @contextmanager
8
- def prepare_data(session, clean_bucket_name, schema_name, table_name, arrow_table):
8
+ def prepare_data(session, clean_bucket_name, schema_name, table_name, arrow_table, sorting_key=[]):
9
9
  with session.transaction() as tx:
10
10
  s = tx.bucket(clean_bucket_name).create_schema(schema_name)
11
- t = s.create_table(table_name, arrow_table.schema)
11
+ t = s.create_table(table_name, arrow_table.schema, sorting_key=sorting_key)
12
12
  row_ids_array = t.insert(arrow_table)
13
13
  row_ids = row_ids_array.to_pylist()
14
14
  assert row_ids == list(range(arrow_table.num_rows))
@@ -32,7 +32,7 @@ class Aggregate(object):
32
32
  o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
33
33
  if o != 0:
34
34
  x = self._tab.Indirect(o + self._tab.Pos)
35
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelId import RelId
35
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelId import RelId
36
36
  obj = RelId()
37
37
  obj.Init(self._tab.Bytes, x)
38
38
  return obj
@@ -44,7 +44,7 @@ class Aggregate(object):
44
44
  o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
45
45
  if o != 0:
46
46
  x = self._tab.Indirect(o + self._tab.Pos)
47
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation import Relation
47
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation import Relation
48
48
  obj = Relation()
49
49
  obj.Init(self._tab.Bytes, x)
50
50
  return obj
@@ -59,7 +59,7 @@ class Aggregate(object):
59
59
  x = self._tab.Vector(o)
60
60
  x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
61
61
  x = self._tab.Indirect(x)
62
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
62
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
63
63
  obj = Expression()
64
64
  obj.Init(self._tab.Bytes, x)
65
65
  return obj
@@ -98,7 +98,7 @@ class Aggregate(object):
98
98
  x = self._tab.Vector(o)
99
99
  x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
100
100
  x = self._tab.Indirect(x)
101
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Grouping import Grouping
101
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Grouping import Grouping
102
102
  obj = Grouping()
103
103
  obj.Init(self._tab.Bytes, x)
104
104
  return obj
@@ -41,7 +41,7 @@ class Call(object):
41
41
  x = self._tab.Vector(o)
42
42
  x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
43
43
  x = self._tab.Indirect(x)
44
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
44
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
45
45
  obj = Expression()
46
46
  obj.Init(self._tab.Bytes, x)
47
47
  return obj
@@ -69,7 +69,7 @@ class Call(object):
69
69
  x = self._tab.Vector(o)
70
70
  x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4
71
71
  x = self._tab.Indirect(x)
72
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.SortKey import SortKey
72
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.SortKey import SortKey
73
73
  obj = SortKey()
74
74
  obj.Init(self._tab.Bytes, x)
75
75
  return obj
@@ -30,7 +30,7 @@ class CaseFragment(object):
30
30
  o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
31
31
  if o != 0:
32
32
  x = self._tab.Indirect(o + self._tab.Pos)
33
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
33
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
34
34
  obj = Expression()
35
35
  obj.Init(self._tab.Bytes, x)
36
36
  return obj
@@ -41,7 +41,7 @@ class CaseFragment(object):
41
41
  o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
42
42
  if o != 0:
43
43
  x = self._tab.Indirect(o + self._tab.Pos)
44
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
44
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
45
45
  obj = Expression()
46
46
  obj.Init(self._tab.Bytes, x)
47
47
  return obj
@@ -31,7 +31,7 @@ class Cast(object):
31
31
  o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4))
32
32
  if o != 0:
33
33
  x = self._tab.Indirect(o + self._tab.Pos)
34
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
34
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression import Expression
35
35
  obj = Expression()
36
36
  obj.Init(self._tab.Bytes, x)
37
37
  return obj
@@ -47,7 +47,7 @@ class Cast(object):
47
47
  o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6))
48
48
  if o != 0:
49
49
  x = self._tab.Indirect(o + self._tab.Pos)
50
- from vast_flatbuf.org.apache.arrow.flatbuf.Field import Field
50
+ from vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Field import Field
51
51
  obj = Field()
52
52
  obj.Init(self._tab.Bytes, x)
53
53
  return obj