vastdb 1.3.7__py3-none-any.whl → 1.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. vastdb/_internal.py +158 -77
  2. vastdb/bench/test_perf.py +2 -2
  3. vastdb/config.py +3 -0
  4. vastdb/errors.py +6 -0
  5. vastdb/features.py +9 -0
  6. vastdb/schema.py +5 -3
  7. vastdb/table.py +76 -15
  8. vastdb/tests/test_imports.py +70 -1
  9. vastdb/tests/test_tables.py +217 -0
  10. vastdb/tests/util.py +2 -2
  11. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Aggregate.py +4 -4
  12. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Call.py +2 -2
  13. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/CaseFragment.py +2 -2
  14. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Cast.py +2 -2
  15. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ConditionalCase.py +2 -2
  16. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Filter.py +3 -3
  17. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Grouping.py +1 -1
  18. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Join.py +4 -4
  19. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/KeyValue.py +2 -2
  20. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Limit.py +2 -2
  21. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ListLiteral.py +1 -1
  22. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Literal.py +1 -1
  23. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/LiteralColumn.py +1 -1
  24. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/LiteralRelation.py +2 -2
  25. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/MapKey.py +1 -1
  26. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/MapLiteral.py +1 -1
  27. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/OrderBy.py +3 -3
  28. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Plan.py +1 -1
  29. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Project.py +3 -3
  30. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SetOperation.py +2 -2
  31. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SimpleCase.py +3 -3
  32. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SortKey.py +1 -1
  33. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Source.py +4 -4
  34. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/StructLiteral.py +1 -1
  35. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/WindowCall.py +3 -3
  36. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DictionaryBatch.py +1 -1
  37. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DictionaryEncoding.py +1 -1
  38. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Field.py +3 -3
  39. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Footer.py +4 -4
  40. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Message.py +1 -1
  41. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/RecordBatch.py +3 -3
  42. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Schema.py +2 -2
  43. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.py +4 -4
  44. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensor.py +2 -2
  45. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensorIndexCOO.py +2 -2
  46. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensorIndexCSF.py +4 -4
  47. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Tensor.py +2 -2
  48. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/CreateProjectionRequest.py +1 -1
  49. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/GetRowColumnSecurityResponse.py +4 -4
  50. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/GetTableStatsResponse.py +1 -1
  51. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ImportDataRequest.py +34 -1
  52. vastdb/vast_flatbuf/tabular/KeyName.py +45 -0
  53. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListProjectionsResponse.py +1 -1
  54. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListSchemasResponse.py +1 -1
  55. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListTablesResponse.py +1 -1
  56. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ListViewsResponse.py +1 -1
  57. {vastdb-1.3.7.dist-info → vastdb-1.3.8.dist-info}/METADATA +1 -1
  58. vastdb-1.3.8.dist-info/RECORD +216 -0
  59. vastdb-1.3.8.dist-info/top_level.txt +1 -0
  60. vastdb-1.3.7.dist-info/RECORD +0 -215
  61. vastdb-1.3.7.dist-info/top_level.txt +0 -2
  62. {vast_flatbuf → vastdb/vast_flatbuf}/__init__.py +0 -0
  63. {vast_flatbuf → vastdb/vast_flatbuf}/org/__init__.py +0 -0
  64. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/__init__.py +0 -0
  65. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/__init__.py +0 -0
  66. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/__init__.py +0 -0
  67. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ArraySlice.py +0 -0
  68. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ArraySubscript.py +0 -0
  69. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/BinaryLiteral.py +0 -0
  70. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/BooleanLiteral.py +0 -0
  71. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Bound.py +0 -0
  72. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ConcreteBoundImpl.py +0 -0
  73. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/CurrentRow.py +0 -0
  74. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/DateLiteral.py +0 -0
  75. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/DecimalLiteral.py +0 -0
  76. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Deref.py +0 -0
  77. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/DurationLiteral.py +0 -0
  78. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Expression.py +0 -0
  79. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/ExpressionImpl.py +0 -0
  80. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/FieldIndex.py +0 -0
  81. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/FieldRef.py +0 -0
  82. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/FixedSizeBinaryLiteral.py +0 -0
  83. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Float16Literal.py +0 -0
  84. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Float32Literal.py +0 -0
  85. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Float64Literal.py +0 -0
  86. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Following.py +0 -0
  87. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Frame.py +0 -0
  88. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int16Literal.py +0 -0
  89. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int32Literal.py +0 -0
  90. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int64Literal.py +0 -0
  91. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Int8Literal.py +0 -0
  92. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteral.py +0 -0
  93. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteralDaysMilliseconds.py +0 -0
  94. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteralImpl.py +0 -0
  95. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/IntervalLiteralMonths.py +0 -0
  96. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/JoinKind.py +0 -0
  97. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/LiteralImpl.py +0 -0
  98. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Ordering.py +0 -0
  99. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Preceding.py +0 -0
  100. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/RelId.py +0 -0
  101. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Relation.py +0 -0
  102. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/RelationImpl.py +0 -0
  103. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/SetOpKind.py +0 -0
  104. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/StringLiteral.py +0 -0
  105. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/StructField.py +0 -0
  106. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/TimeLiteral.py +0 -0
  107. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/TimestampLiteral.py +0 -0
  108. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt16Literal.py +0 -0
  109. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt32Literal.py +0 -0
  110. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt64Literal.py +0 -0
  111. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/UInt8Literal.py +0 -0
  112. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/Unbounded.py +0 -0
  113. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/computeir/flatbuf/__init__.py +0 -0
  114. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Binary.py +0 -0
  115. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Block.py +0 -0
  116. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/BodyCompression.py +0 -0
  117. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/BodyCompressionMethod.py +0 -0
  118. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Bool.py +0 -0
  119. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Buffer.py +0 -0
  120. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/CompressionType.py +0 -0
  121. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Date.py +0 -0
  122. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DateUnit.py +0 -0
  123. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Decimal.py +0 -0
  124. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/DictionaryKind.py +0 -0
  125. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Duration.py +0 -0
  126. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Endianness.py +0 -0
  127. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Feature.py +0 -0
  128. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FieldNode.py +0 -0
  129. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FixedSizeBinary.py +0 -0
  130. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FixedSizeList.py +0 -0
  131. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/FloatingPoint.py +0 -0
  132. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Int.py +0 -0
  133. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Interval.py +0 -0
  134. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/IntervalUnit.py +0 -0
  135. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/KeyValue.py +0 -0
  136. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/LargeBinary.py +0 -0
  137. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/LargeList.py +0 -0
  138. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/LargeUtf8.py +0 -0
  139. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/List.py +0 -0
  140. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Map.py +0 -0
  141. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/MessageHeader.py +0 -0
  142. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/MetadataVersion.py +0 -0
  143. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Null.py +0 -0
  144. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Precision.py +0 -0
  145. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.py +0 -0
  146. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/SparseTensorIndex.py +0 -0
  147. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Struct_.py +0 -0
  148. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/TensorDim.py +0 -0
  149. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Time.py +0 -0
  150. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/TimeUnit.py +0 -0
  151. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Timestamp.py +0 -0
  152. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Type.py +0 -0
  153. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Union.py +0 -0
  154. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/UnionMode.py +0 -0
  155. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/Utf8.py +0 -0
  156. {vast_flatbuf → vastdb/vast_flatbuf}/org/apache/arrow/flatbuf/__init__.py +0 -0
  157. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterColumnRequest.py +0 -0
  158. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterProjectionTableRequest.py +0 -0
  159. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterSchemaRequest.py +0 -0
  160. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/AlterTableRequest.py +0 -0
  161. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/Column.py +0 -0
  162. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ColumnDetails.py +0 -0
  163. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ColumnType.py +0 -0
  164. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/CreateSchemaRequest.py +0 -0
  165. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/CreateViewRequest.py +0 -0
  166. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/FilterString.py +0 -0
  167. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/GetProjectionTableStatsResponse.py +0 -0
  168. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/NameString.py +0 -0
  169. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/ObjectDetails.py +0 -0
  170. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/S3File.py +0 -0
  171. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/VipRange.py +0 -0
  172. {vast_flatbuf → vastdb/vast_flatbuf}/tabular/__init__.py +0 -0
  173. {vastdb-1.3.7.dist-info → vastdb-1.3.8.dist-info}/LICENSE +0 -0
  174. {vastdb-1.3.7.dist-info → vastdb-1.3.8.dist-info}/WHEEL +0 -0
vastdb/_internal.py CHANGED
@@ -38,75 +38,82 @@ from ibis.expr.operations.relations import Field
38
38
  from ibis.expr.operations.strings import StartsWith, StringContains
39
39
  from ibis.expr.operations.structs import StructField
40
40
 
41
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
42
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BooleanLiteral as fb_bool_lit
43
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Call as fb_call
44
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.DateLiteral as fb_date32_lit
45
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.DecimalLiteral as fb_decimal_lit
46
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression as fb_expression
47
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.FieldIndex as fb_field_index
48
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.FieldRef as fb_field_ref
49
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Float32Literal as fb_float32_lit
50
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Float64Literal as fb_float64_lit
51
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int8Literal as fb_int8_lit
52
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int16Literal as fb_int16_lit
53
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int32Literal as fb_int32_lit
54
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int64Literal as fb_int64_lit
55
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Literal as fb_literal
56
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation as fb_relation
57
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelationImpl as rel_impl
58
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.Source as fb_source
59
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.StringLiteral as fb_string_lit
60
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.TimeLiteral as fb_time_lit
61
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.TimestampLiteral as fb_timestamp_lit
62
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.UInt8Literal as fb_uint8_lit
63
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.UInt16Literal as fb_uint16_lit
64
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.UInt32Literal as fb_uint32_lit
65
- import vast_flatbuf.org.apache.arrow.computeir.flatbuf.UInt64Literal as fb_uint64_lit
66
- import vast_flatbuf.org.apache.arrow.flatbuf.Binary as fb_binary
67
- import vast_flatbuf.org.apache.arrow.flatbuf.Bool as fb_bool
68
- import vast_flatbuf.org.apache.arrow.flatbuf.Date as fb_date
69
- import vast_flatbuf.org.apache.arrow.flatbuf.Decimal as fb_decimal
70
- import vast_flatbuf.org.apache.arrow.flatbuf.Field as fb_field
71
- import vast_flatbuf.org.apache.arrow.flatbuf.FixedSizeBinary as fb_fixed_size_binary
72
- import vast_flatbuf.org.apache.arrow.flatbuf.FloatingPoint as fb_floating_point
73
- import vast_flatbuf.org.apache.arrow.flatbuf.Int as fb_int
74
- import vast_flatbuf.org.apache.arrow.flatbuf.List as fb_list
75
- import vast_flatbuf.org.apache.arrow.flatbuf.Map as fb_map
76
- import vast_flatbuf.org.apache.arrow.flatbuf.Schema as fb_schema
77
- import vast_flatbuf.org.apache.arrow.flatbuf.Struct_ as fb_struct
78
- import vast_flatbuf.org.apache.arrow.flatbuf.Time as fb_time
79
- import vast_flatbuf.org.apache.arrow.flatbuf.Timestamp as fb_timestamp
80
- import vast_flatbuf.org.apache.arrow.flatbuf.Utf8 as fb_utf8
81
- import vast_flatbuf.tabular.AlterColumnRequest as tabular_alter_column
82
- import vast_flatbuf.tabular.AlterProjectionTableRequest as tabular_alter_projection
83
- import vast_flatbuf.tabular.AlterSchemaRequest as tabular_alter_schema
84
- import vast_flatbuf.tabular.AlterTableRequest as tabular_alter_table
85
- import vast_flatbuf.tabular.Column as tabular_projecion_column
86
- import vast_flatbuf.tabular.ColumnType as tabular_proj_column_type
87
- import vast_flatbuf.tabular.CreateProjectionRequest as tabular_create_projection
88
- import vast_flatbuf.tabular.CreateSchemaRequest as tabular_create_schema
89
- import vast_flatbuf.tabular.ImportDataRequest as tabular_import_data
90
- import vast_flatbuf.tabular.S3File as tabular_s3_file
91
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.Deref import Deref
92
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.ExpressionImpl import (
41
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
42
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.BooleanLiteral as fb_bool_lit
43
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Call as fb_call
44
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.DateLiteral as fb_date32_lit
45
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.DecimalLiteral as fb_decimal_lit
46
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Expression as fb_expression
47
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.FieldIndex as fb_field_index
48
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.FieldRef as fb_field_ref
49
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Float32Literal as fb_float32_lit
50
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Float64Literal as fb_float64_lit
51
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int8Literal as fb_int8_lit
52
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int16Literal as fb_int16_lit
53
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int32Literal as fb_int32_lit
54
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Int64Literal as fb_int64_lit
55
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Literal as fb_literal
56
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Relation as fb_relation
57
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.RelationImpl as rel_impl
58
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Source as fb_source
59
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.StringLiteral as fb_string_lit
60
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.TimeLiteral as fb_time_lit
61
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.TimestampLiteral as fb_timestamp_lit
62
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.UInt8Literal as fb_uint8_lit
63
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.UInt16Literal as fb_uint16_lit
64
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.UInt32Literal as fb_uint32_lit
65
+ import vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.UInt64Literal as fb_uint64_lit
66
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Binary as fb_binary
67
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Bool as fb_bool
68
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Date as fb_date
69
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Decimal as fb_decimal
70
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Field as fb_field
71
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.FixedSizeBinary as fb_fixed_size_binary
72
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.FloatingPoint as fb_floating_point
73
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Int as fb_int
74
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.List as fb_list
75
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Map as fb_map
76
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Schema as fb_schema
77
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Struct_ as fb_struct
78
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Time as fb_time
79
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Timestamp as fb_timestamp
80
+ import vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Utf8 as fb_utf8
81
+ import vastdb.vast_flatbuf.tabular.AlterColumnRequest as tabular_alter_column
82
+ import vastdb.vast_flatbuf.tabular.AlterProjectionTableRequest as tabular_alter_projection
83
+ import vastdb.vast_flatbuf.tabular.AlterSchemaRequest as tabular_alter_schema
84
+ import vastdb.vast_flatbuf.tabular.AlterTableRequest as tabular_alter_table
85
+ import vastdb.vast_flatbuf.tabular.Column as tabular_projecion_column
86
+ import vastdb.vast_flatbuf.tabular.ColumnType as tabular_proj_column_type
87
+ import vastdb.vast_flatbuf.tabular.CreateProjectionRequest as tabular_create_projection
88
+ import vastdb.vast_flatbuf.tabular.CreateSchemaRequest as tabular_create_schema
89
+ import vastdb.vast_flatbuf.tabular.ImportDataRequest as tabular_import_data
90
+ import vastdb.vast_flatbuf.tabular.KeyName as import_key_name
91
+ import vastdb.vast_flatbuf.tabular.S3File as tabular_s3_file
92
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.Deref import Deref
93
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.ExpressionImpl import (
93
94
  ExpressionImpl,
94
95
  )
95
- from vast_flatbuf.org.apache.arrow.computeir.flatbuf.LiteralImpl import LiteralImpl
96
- from vast_flatbuf.org.apache.arrow.flatbuf.DateUnit import DateUnit
97
- from vast_flatbuf.org.apache.arrow.flatbuf.TimeUnit import TimeUnit
98
- from vast_flatbuf.org.apache.arrow.flatbuf.Type import Type
99
- from vast_flatbuf.tabular.GetProjectionTableStatsResponse import (
96
+ from vastdb.vast_flatbuf.org.apache.arrow.computeir.flatbuf.LiteralImpl import (
97
+ LiteralImpl,
98
+ )
99
+ from vastdb.vast_flatbuf.org.apache.arrow.flatbuf.DateUnit import DateUnit
100
+ from vastdb.vast_flatbuf.org.apache.arrow.flatbuf.TimeUnit import TimeUnit
101
+ from vastdb.vast_flatbuf.org.apache.arrow.flatbuf.Type import Type
102
+ from vastdb.vast_flatbuf.tabular.GetProjectionTableStatsResponse import (
100
103
  GetProjectionTableStatsResponse as get_projection_table_stats,
101
104
  )
102
- from vast_flatbuf.tabular.GetTableStatsResponse import (
105
+ from vastdb.vast_flatbuf.tabular.GetTableStatsResponse import (
103
106
  GetTableStatsResponse as get_table_stats,
104
107
  )
105
- from vast_flatbuf.tabular.ListProjectionsResponse import (
108
+ from vastdb.vast_flatbuf.tabular.ListProjectionsResponse import (
106
109
  ListProjectionsResponse as list_projections,
107
110
  )
108
- from vast_flatbuf.tabular.ListSchemasResponse import ListSchemasResponse as list_schemas
109
- from vast_flatbuf.tabular.ListTablesResponse import ListTablesResponse as list_tables
111
+ from vastdb.vast_flatbuf.tabular.ListSchemasResponse import (
112
+ ListSchemasResponse as list_schemas,
113
+ )
114
+ from vastdb.vast_flatbuf.tabular.ListTablesResponse import (
115
+ ListTablesResponse as list_tables,
116
+ )
110
117
 
111
118
  from . import errors, util
112
119
  from .config import BackoffConfig
@@ -787,7 +794,7 @@ def _decode_table_props(s):
787
794
  return {y: _prop_coding[x][1](z) for x, y, z in triplets if z != ''}
788
795
 
789
796
 
790
- TableInfo = namedtuple('TableInfo', 'name properties handle num_rows size_in_bytes num_partitions')
797
+ TableInfo = namedtuple('TableInfo', 'name properties handle num_rows size_in_bytes num_partitions sorting_key_enabled')
791
798
 
792
799
 
793
800
  def _parse_table_info(obj, parse_properties):
@@ -798,7 +805,8 @@ def _parse_table_info(obj, parse_properties):
798
805
  used_bytes = obj.SizeInBytes()
799
806
  num_partitions = obj.NumPartitions()
800
807
  properties = parse_properties(properties)
801
- return TableInfo(name, properties, handle, num_rows, used_bytes, num_partitions)
808
+ sorting_key_enabled = obj.SortingKeyEnabled()
809
+ return TableInfo(name, properties, handle, num_rows, used_bytes, num_partitions, sorting_key_enabled)
802
810
 
803
811
 
804
812
  # Results that returns from tablestats
@@ -962,12 +970,12 @@ class VastdbApi:
962
970
  prefix += '&'.join(params_list)
963
971
  return prefix
964
972
 
965
- def _fill_common_headers(self, txid=0, client_tags=[], version_id=1):
973
+ def _fill_common_headers(self, txid=0, client_tags=[], version_id=1, sorting_key=[]):
966
974
  common_headers = {
967
975
  'tabular-txid': str(txid),
968
976
  'tabular-api-version-id': str(version_id),
969
977
  'tabular-client-name': 'tabular-api'
970
- }
978
+ } | {f"tabular-sorted-column-{i}": str(k) for i, k in enumerate(sorting_key)}
971
979
 
972
980
  return common_headers | {f'tabular-client-tags-{index}': tag for index, tag in enumerate(client_tags)}
973
981
 
@@ -1122,11 +1130,12 @@ class VastdbApi:
1122
1130
 
1123
1131
  def create_table(self, bucket, schema, name, arrow_schema=None,
1124
1132
  txid=0, client_tags=[], expected_retvals=[],
1125
- create_imports_table=False, use_external_row_ids_allocation=False, table_props=None):
1133
+ create_imports_table=False, use_external_row_ids_allocation=False, table_props=None,
1134
+ sorting_key=[]):
1126
1135
  self._create_table_internal(bucket=bucket, schema=schema, name=name, arrow_schema=arrow_schema,
1127
1136
  txid=txid, client_tags=client_tags, expected_retvals=expected_retvals,
1128
1137
  create_imports_table=create_imports_table, use_external_row_ids_allocation=use_external_row_ids_allocation,
1129
- table_props=table_props)
1138
+ table_props=table_props, sorting_key=sorting_key)
1130
1139
 
1131
1140
  def create_topic(self, bucket, name, topic_partitions, expected_retvals=[],
1132
1141
  message_timestamp_type=None, retention_ms=None, message_timestamp_after_max_ms=None,
@@ -1143,7 +1152,8 @@ class VastdbApi:
1143
1152
 
1144
1153
  def _create_table_internal(self, bucket, schema, name, arrow_schema=None,
1145
1154
  txid=0, client_tags=[], expected_retvals=[], topic_partitions=0,
1146
- create_imports_table=False, use_external_row_ids_allocation=False, table_props=None):
1155
+ create_imports_table=False, use_external_row_ids_allocation=False, table_props=None,
1156
+ sorting_key=[]):
1147
1157
  """
1148
1158
  Create a table, use the following request
1149
1159
  POST /bucket/schema/table?table HTTP/1.1
@@ -1160,8 +1170,7 @@ class VastdbApi:
1160
1170
  The request will look like:
1161
1171
  POST /bucket/schema/table?table&sub-table=vastdb-imported-objects HTTP/1.1
1162
1172
  """
1163
- headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1164
-
1173
+ headers = self._fill_common_headers(txid=txid, client_tags=client_tags, sorting_key=sorting_key)
1165
1174
  if arrow_schema is None:
1166
1175
  arrow_schema = pa.schema([])
1167
1176
 
@@ -1222,7 +1231,7 @@ class VastdbApi:
1222
1231
  table_properties=table_properties, new_name=new_name, expected_retvals=expected_retvals)
1223
1232
 
1224
1233
  def alter_table(self, bucket, schema, name, txid=0, client_tags=[], table_properties="",
1225
- new_name="", expected_retvals=[]):
1234
+ new_name="", expected_retvals=[], sorting_key=[]):
1226
1235
  """
1227
1236
  PUT /mybucket/myschema/mytable?table HTTP/1.1
1228
1237
  Content-Length: ContentLength
@@ -1246,7 +1255,7 @@ class VastdbApi:
1246
1255
  builder.Finish(params)
1247
1256
  alter_table_req = builder.Output()
1248
1257
 
1249
- headers = self._fill_common_headers(txid=txid, client_tags=client_tags)
1258
+ headers = self._fill_common_headers(txid=txid, client_tags=client_tags, sorting_key=sorting_key)
1250
1259
  headers['Content-Length'] = str(len(alter_table_req))
1251
1260
  url_params = {'tabular-new-table-name': schema + "/" + new_name} if len(new_name) else {}
1252
1261
 
@@ -1415,9 +1424,9 @@ class VastdbApi:
1415
1424
  url=self._url(bucket=bucket, schema=schema, table=table, command="column"),
1416
1425
  data=serialized_schema, headers=headers)
1417
1426
 
1418
- def list_columns(self, bucket, schema, table, *, txid=0, client_tags=None, max_keys=None, next_key=0,
1419
- count_only=False, name_prefix="", exact_match=False,
1420
- expected_retvals=None, bc_list_internals=False, list_imports_table=False):
1427
+ def _list_columns_internal(self, command, bucket, schema, table, txid, client_tags, max_keys, next_key,
1428
+ count_only, name_prefix, exact_match, expected_retvals, bc_list_internals,
1429
+ list_imports_table):
1421
1430
  """
1422
1431
  GET /mybucket/myschema/mytable?columns HTTP/1.1
1423
1432
  tabular-txid: TransactionId
@@ -1447,7 +1456,7 @@ class VastdbApi:
1447
1456
  url_params = {'sub-table': IMPORTED_OBJECTS_TABLE_NAME} if list_imports_table else {}
1448
1457
  res = self._request(
1449
1458
  method="GET",
1450
- url=self._url(bucket=bucket, schema=schema, table=table, command="column", url_params=url_params),
1459
+ url=self._url(bucket=bucket, schema=schema, table=table, command=command, url_params=url_params),
1451
1460
  headers=headers)
1452
1461
 
1453
1462
  res_headers = res.headers
@@ -1458,6 +1467,20 @@ class VastdbApi:
1458
1467
 
1459
1468
  return columns, next_key, is_truncated, count
1460
1469
 
1470
+ def list_columns(self, bucket, schema, table, *, txid=0, client_tags=None, max_keys=None, next_key=0,
1471
+ count_only=False, name_prefix="", exact_match=False,
1472
+ expected_retvals=None, bc_list_internals=False, list_imports_table=False):
1473
+ return self._list_columns_internal('column', bucket, schema, table, txid, client_tags, max_keys, next_key,
1474
+ count_only, name_prefix, exact_match, expected_retvals, bc_list_internals,
1475
+ list_imports_table)
1476
+
1477
+ def list_sorted_columns(self, bucket, schema, table, *, txid=0, client_tags=None, max_keys=None, next_key=0,
1478
+ count_only=False, name_prefix="", exact_match=False,
1479
+ expected_retvals=None, bc_list_internals=False, list_imports_table=False):
1480
+ return self._list_columns_internal('sorted-columns', bucket, schema, table, txid, client_tags, max_keys, next_key,
1481
+ count_only, name_prefix, exact_match, expected_retvals, bc_list_internals,
1482
+ list_imports_table)
1483
+
1461
1484
  def head_bucket(self, bucket_name):
1462
1485
  """
1463
1486
  Reimplemented, instead of depending on boto3 for checking the existence of a bucket.
@@ -1596,7 +1619,7 @@ class VastdbApi:
1596
1619
  source_files: list of (bucket_name, file_name)
1597
1620
  """
1598
1621
  def import_data(self, bucket, schema, table, source_files, txid=0, client_tags=[], expected_retvals=[], case_sensitive=True,
1599
- schedule_id=None, retry_count=0, blocking=True):
1622
+ schedule_id=None, retry_count=0, blocking=True, key_names=[]):
1600
1623
  """
1601
1624
  POST /mybucket/myschema/mytable?data HTTP/1.1
1602
1625
  Content-Length: ContentLength
@@ -1638,8 +1661,23 @@ class VastdbApi:
1638
1661
  builder.PrependUOffsetTRelative(f)
1639
1662
 
1640
1663
  files = builder.EndVector()
1664
+
1665
+ key_names_arr = []
1666
+ for key in key_names:
1667
+ kname = builder.CreateString(key)
1668
+ import_key_name.Start(builder)
1669
+ import_key_name.AddName(builder, kname)
1670
+ key_names_arr.append(import_key_name.End(builder))
1671
+
1672
+ tabular_import_data.StartKeyNamesVector(builder, len(key_names_arr))
1673
+ for key in reversed(key_names_arr):
1674
+ builder.PrependUOffsetTRelative(key)
1675
+
1676
+ key_names_vec = builder.EndVector()
1677
+
1641
1678
  tabular_import_data.Start(builder)
1642
1679
  tabular_import_data.AddS3Files(builder, files)
1680
+ tabular_import_data.AddKeyNames(builder, key_names_vec)
1643
1681
  params = tabular_import_data.End(builder)
1644
1682
  builder.Finish(params)
1645
1683
  import_req = builder.Output()
@@ -1953,6 +1991,49 @@ class QueryDataInternalError(Exception):
1953
1991
  pass
1954
1992
 
1955
1993
 
1994
+ def read_first_batch(fileobj):
1995
+ readers = {} # {stream_id: pa.ipc.RecordBatchStreamReader}
1996
+ while True:
1997
+ stream_id_bytes = fileobj.read(4)
1998
+ if not stream_id_bytes:
1999
+ if readers:
2000
+ raise EOFError(f'no readers ({readers}) should be open at EOF')
2001
+ break
2002
+
2003
+ stream_id, = struct.unpack('<L', stream_id_bytes)
2004
+ if stream_id == TABULAR_KEEP_ALIVE_STREAM_ID:
2005
+ continue
2006
+
2007
+ if stream_id == TABULAR_QUERY_DATA_COMPLETED_STREAM_ID:
2008
+ # read the terminating end chunk from socket
2009
+ res = fileobj.read()
2010
+ _logger.debug("stream_id=%d res=%s (finish)", stream_id, res)
2011
+ return None
2012
+
2013
+ if stream_id == TABULAR_QUERY_DATA_FAILED_STREAM_ID:
2014
+ # read the terminating end chunk from socket
2015
+ res = fileobj.read()
2016
+ _logger.debug("stream_id=%d res=%s (failed)", stream_id, res)
2017
+ raise QueryDataInternalError() # connection closed by server due to an internal error
2018
+
2019
+ next_row_id_bytes = fileobj.read(8)
2020
+ next_row_id, = struct.unpack('<Q', next_row_id_bytes)
2021
+ _logger.debug("stream_id=%d next_row_id=%d", stream_id, next_row_id)
2022
+
2023
+ if stream_id not in readers:
2024
+ # we implicitly read 1st message (Arrow schema) when constructing RecordBatchStreamReader
2025
+ reader = pa.ipc.RecordBatchStreamReader(fileobj)
2026
+ _logger.debug("stream_id=%d schema=%s", stream_id, reader.schema)
2027
+ readers[stream_id] = reader
2028
+ continue
2029
+
2030
+ reader = readers[stream_id]
2031
+ try:
2032
+ return reader.read_next_batch() # read single-column chunk data
2033
+ except StopIteration: # we got an end-of-stream IPC message for a given stream ID
2034
+ return None
2035
+
2036
+
1956
2037
  def _iter_query_data_response_columns(fileobj, stream_ids=None):
1957
2038
  readers = {} # {stream_id: pa.ipc.RecordBatchStreamReader}
1958
2039
  while True:
vastdb/bench/test_perf.py CHANGED
@@ -10,11 +10,11 @@ log = logging.getLogger(__name__)
10
10
 
11
11
 
12
12
  @pytest.mark.benchmark
13
- def test_bench(session, bucket_name, parquets_path, crater_path):
13
+ def test_bench(session, test_bucket_name, parquets_path, crater_path):
14
14
  files = [str(parquets_path / f) for f in (parquets_path.glob('**/*.pq'))]
15
15
 
16
16
  with session.transaction() as tx:
17
- b = tx.bucket(bucket_name)
17
+ b = tx.bucket(test_bucket_name)
18
18
  s = b.create_schema('s1')
19
19
  t = util.create_table_from_files(s, 't1', files, config=ImportConfig(import_concurrency=8))
20
20
  config = QueryConfig(num_splits=8, num_sub_splits=4)
vastdb/config.py CHANGED
@@ -63,3 +63,6 @@ class ImportConfig:
63
63
  """Import execution configiration."""
64
64
 
65
65
  import_concurrency: int = 2
66
+
67
+ # import key column names
68
+ key_names: Optional[List[str]] = None
vastdb/errors.py CHANGED
@@ -17,6 +17,7 @@ class HttpStatus(Enum):
17
17
  INTERNAL_SERVER_ERROR = 500
18
18
  NOT_IMPLEMENTED = 501
19
19
  SERVICE_UNAVAILABLE = 503
20
+ INSUFFICIENT_CAPACITY = 507
20
21
 
21
22
 
22
23
  log = logging.getLogger(__name__)
@@ -79,6 +80,10 @@ class UnexpectedError(HttpError):
79
80
  pass
80
81
 
81
82
 
83
+ class InsufficientCapacity(HttpError):
84
+ pass
85
+
86
+
82
87
  @dataclass
83
88
  class ImportFilesError(Exception):
84
89
  message: str
@@ -197,6 +202,7 @@ ERROR_TYPES_MAP = {
197
202
  HttpStatus.INTERNAL_SERVER_ERROR: InternalServerError,
198
203
  HttpStatus.NOT_IMPLEMENTED: NotImplemented,
199
204
  HttpStatus.SERVICE_UNAVAILABLE: handle_unavailable,
205
+ HttpStatus.INSUFFICIENT_CAPACITY: InsufficientCapacity,
200
206
  }
201
207
 
202
208
 
vastdb/features.py CHANGED
@@ -13,6 +13,7 @@ class Features:
13
13
  def __init__(self, vast_version):
14
14
  """Save the server version."""
15
15
  self.vast_version = vast_version
16
+ log.info("VAST version: %s", self.vast_version)
16
17
 
17
18
  self.check_imports_table = self._check(
18
19
  "Imported objects' table feature requires 5.2+ VAST release",
@@ -30,6 +31,14 @@ class Features:
30
31
  "External row IDs allocation requires 5.1+ VAST release",
31
32
  vast_version >= (5, 1))
32
33
 
34
+ self.check_elysium = self._check(
35
+ "Elysium requires 5.3.5+ VAST release",
36
+ vast_version >= (5, 3, 5))
37
+
38
+ self.check_zip_import = self._check(
39
+ "Zip import requires 5.3.1+ VAST release",
40
+ vast_version >= (5, 3, 1))
41
+
33
42
  def _check(self, msg, supported):
34
43
  log.debug("%s (current version is %s): supported=%s", msg, self.vast_version, supported)
35
44
  if not supported:
vastdb/schema.py CHANGED
@@ -76,7 +76,8 @@ class Schema:
76
76
  break
77
77
  return result
78
78
 
79
- def create_table(self, table_name: str, columns: pa.Schema, fail_if_exists=True, use_external_row_ids_allocation=False) -> "Table":
79
+ def create_table(self, table_name: str, columns: pa.Schema, fail_if_exists=True,
80
+ use_external_row_ids_allocation=False, sorting_key=[]) -> "Table":
80
81
  """Create a new table under this schema.
81
82
 
82
83
  A virtual `vastdb_rowid` column (of `int64` type) can be created to access and filter by internal VAST row IDs.
@@ -91,7 +92,8 @@ class Schema:
91
92
  self.tx._rpc.features.check_external_row_ids_allocation()
92
93
 
93
94
  self.tx._rpc.api.create_table(self.bucket.name, self.name, table_name, columns, txid=self.tx.txid,
94
- use_external_row_ids_allocation=use_external_row_ids_allocation)
95
+ use_external_row_ids_allocation=use_external_row_ids_allocation,
96
+ sorting_key=sorting_key)
95
97
  log.info("Created table: %s", table_name)
96
98
  return self.table(table_name) # type: ignore[return-value]
97
99
 
@@ -149,4 +151,4 @@ class Schema:
149
151
 
150
152
 
151
153
  def _parse_table_info(table_info, schema: "schema.Schema"):
152
- return table.Table(name=table_info.name, schema=schema, handle=int(table_info.handle), _imports_table=False)
154
+ return table.Table(name=table_info.name, schema=schema, handle=int(table_info.handle), _imports_table=False, sorted_table=table_info.sorting_key_enabled)
vastdb/table.py CHANGED
@@ -21,6 +21,7 @@ log = logging.getLogger(__name__)
21
21
 
22
22
  INTERNAL_ROW_ID = "$row_id"
23
23
  INTERNAL_ROW_ID_FIELD = pa.field(INTERNAL_ROW_ID, pa.uint64())
24
+ INTERNAL_ROW_ID_SORTED_FIELD = pa.field(INTERNAL_ROW_ID, pa.decimal128(38, 0)) # Sorted tables have longer row ids
24
25
 
25
26
  MAX_ROWS_PER_BATCH = 512 * 1024
26
27
  # for insert we need a smaller limit due to response amplification
@@ -115,6 +116,7 @@ class Table:
115
116
  arrow_schema: pa.Schema = field(init=False, compare=False, repr=False)
116
117
  _ibis_table: ibis.Schema = field(init=False, compare=False, repr=False)
117
118
  _imports_table: bool
119
+ sorted_table: bool
118
120
 
119
121
  def __post_init__(self):
120
122
  """Also, load columns' metadata."""
@@ -157,6 +159,29 @@ class Table:
157
159
  self.arrow_schema = pa.schema(fields)
158
160
  return self.arrow_schema
159
161
 
162
+ def sorted_columns(self) -> list:
163
+ """Return sorted columns' metadata."""
164
+ fields = []
165
+ try:
166
+ self.tx._rpc.features.check_elysium()
167
+ next_key = 0
168
+ while True:
169
+ cur_columns, next_key, is_truncated, _count = self.tx._rpc.api.list_sorted_columns(
170
+ bucket=self.bucket.name, schema=self.schema.name, table=self.name, next_key=next_key, txid=self.tx.txid, list_imports_table=self._imports_table)
171
+ fields.extend(cur_columns)
172
+ if not is_truncated:
173
+ break
174
+ except errors.BadRequest:
175
+ pass
176
+ except errors.InternalServerError as ise:
177
+ log.warning("Failed to get the sorted columns Elysium might not be supported: %s", ise)
178
+ pass
179
+ except errors.NotSupportedVersion:
180
+ log.warning("Failed to get the sorted columns, Elysium not supported")
181
+ pass
182
+
183
+ return fields
184
+
160
185
  def projection(self, name: str) -> "Projection":
161
186
  """Get a specific semi-sorted projection of this table."""
162
187
  if self._imports_table:
@@ -228,6 +253,10 @@ class Table:
228
253
  endpoints = [self.tx._rpc.api.url for _ in range(config.import_concurrency)] # TODO: use valid endpoints...
229
254
  files_queue = queue.Queue()
230
255
 
256
+ key_names = config.key_names or []
257
+ if key_names:
258
+ self.tx._rpc.features.check_zip_import()
259
+
231
260
  for source_file in source_files.items():
232
261
  files_queue.put(source_file)
233
262
 
@@ -249,8 +278,10 @@ class Table:
249
278
  pass
250
279
  if files_batch:
251
280
  log.debug("Starting import batch of %s files", len(files_batch))
281
+ log.info(f"starting import of {files_batch}")
252
282
  session.import_data(
253
- self.bucket.name, self.schema.name, self.name, files_batch, txid=self.tx.txid)
283
+ self.bucket.name, self.schema.name, self.name, files_batch, txid=self.tx.txid,
284
+ key_names=key_names)
254
285
  except (Exception, KeyboardInterrupt) as e:
255
286
  stop_event.set()
256
287
  log.error("Got exception inside import_worker. exception: %s", e)
@@ -277,6 +308,21 @@ class Table:
277
308
  imports_table_stats=self._imports_table)
278
309
  return TableStats(**stats_tuple._asdict())
279
310
 
311
+ def _get_row_estimate(self, columns: List[str], predicate: ibis.expr.types.BooleanColumn, arrow_schema: pa.Schema):
312
+ query_data_request = _internal.build_query_data_request(
313
+ schema=arrow_schema,
314
+ predicate=predicate,
315
+ field_names=columns)
316
+ response = self.tx._rpc.api.query_data(
317
+ bucket=self.bucket.name,
318
+ schema=self.schema.name,
319
+ table=self.name,
320
+ params=query_data_request.serialized,
321
+ split=(0xffffffff - 3, 1, 1),
322
+ txid=self.tx.txid)
323
+ batch = _internal.read_first_batch(response.raw)
324
+ return batch.num_rows * 2**16 if batch is not None else 0
325
+
280
326
  def select(self, columns: Optional[List[str]] = None,
281
327
  predicate: Union[ibis.expr.types.BooleanColumn, ibis.common.deferred.Deferred] = None,
282
328
  config: Optional[QueryConfig] = None,
@@ -293,30 +339,22 @@ class Table:
293
339
  if config is None:
294
340
  config = QueryConfig()
295
341
 
342
+ stats = None
296
343
  # Retrieve snapshots only if needed
297
- if config.data_endpoints is None or config.num_splits is None:
344
+ if config.data_endpoints is None:
298
345
  stats = self.get_stats()
299
346
  log.debug("stats: %s", stats)
300
-
301
- if config.data_endpoints is None:
302
347
  endpoints = stats.endpoints
303
348
  else:
304
349
  endpoints = tuple(config.data_endpoints)
305
350
  log.debug("endpoints: %s", endpoints)
306
351
 
307
- if config.num_splits is None:
308
- config.num_splits = max(1, stats.num_rows // config.rows_per_split)
309
- log.debug("config: %s", config)
310
-
311
- if config.semi_sorted_projection_name:
312
- self.tx._rpc.features.check_enforce_semisorted_projection()
313
-
314
352
  if columns is None:
315
353
  columns = [f.name for f in self.arrow_schema]
316
354
 
317
355
  query_schema = self.arrow_schema
318
356
  if internal_row_id:
319
- queried_fields = [INTERNAL_ROW_ID_FIELD]
357
+ queried_fields = [INTERNAL_ROW_ID_SORTED_FIELD if self.sorted_table else INTERNAL_ROW_ID_FIELD]
320
358
  queried_fields.extend(column for column in self.arrow_schema)
321
359
  query_schema = pa.schema(queried_fields)
322
360
  columns.append(INTERNAL_ROW_ID)
@@ -330,6 +368,22 @@ class Table:
330
368
  if isinstance(predicate, ibis.common.deferred.Deferred):
331
369
  predicate = predicate.resolve(self._ibis_table) # may raise if the predicate is invalid (e.g. wrong types / missing column)
332
370
 
371
+ if config.num_splits is None:
372
+ num_rows = 0
373
+ if self.sorted_table:
374
+ num_rows = self._get_row_estimate(columns, predicate, query_schema)
375
+ log.info(f'sorted estimate: {num_rows}')
376
+ if num_rows == 0:
377
+ if stats is None:
378
+ stats = self.get_stats()
379
+ num_rows = stats.num_rows
380
+
381
+ config.num_splits = max(1, num_rows // config.rows_per_split)
382
+ log.debug("config: %s", config)
383
+
384
+ if config.semi_sorted_projection_name:
385
+ self.tx._rpc.features.check_enforce_semisorted_projection()
386
+
333
387
  query_data_request = _internal.build_query_data_request(
334
388
  schema=query_schema,
335
389
  predicate=predicate,
@@ -485,7 +539,7 @@ class Table:
485
539
  if columns is None:
486
540
  columns = [name for name in rows.schema.names if name != INTERNAL_ROW_ID]
487
541
 
488
- update_fields = [(INTERNAL_ROW_ID, pa.uint64())]
542
+ update_fields = [INTERNAL_ROW_ID_SORTED_FIELD if self.sorted_table else INTERNAL_ROW_ID_FIELD]
489
543
  update_values = [_combine_chunks(rows_chunk)]
490
544
  for col in columns:
491
545
  update_fields.append(rows.field(col))
@@ -511,7 +565,7 @@ class Table:
511
565
  rows_chunk = rows[INTERNAL_ROW_ID]
512
566
  except KeyError:
513
567
  raise errors.MissingRowIdColumn
514
- delete_rows_rb = pa.record_batch(schema=pa.schema([(INTERNAL_ROW_ID, pa.uint64())]),
568
+ delete_rows_rb = pa.record_batch(schema=pa.schema([INTERNAL_ROW_ID_SORTED_FIELD if self.sorted_table else INTERNAL_ROW_ID_FIELD]),
515
569
  data=[_combine_chunks(rows_chunk)])
516
570
 
517
571
  delete_rows_rb = util.sort_record_batch_if_needed(delete_rows_rb, INTERNAL_ROW_ID)
@@ -535,6 +589,13 @@ class Table:
535
589
  log.info("Renamed table from %s to %s ", self.name, new_name)
536
590
  self.name = new_name
537
591
 
592
+ def add_sorting_key(self, sorting_key: list) -> None:
593
+ """Ads a sorting key to a table that doesn't have any."""
594
+ self.tx._rpc.features.check_elysium()
595
+ self.tx._rpc.api.alter_table(
596
+ self.bucket.name, self.schema.name, self.name, txid=self.tx.txid, sorting_key=sorting_key)
597
+ log.info("Enabled Elysium for table %s with sorting key %s ", self.name, str(sorting_key))
598
+
538
599
  def add_column(self, new_column: pa.Schema) -> None:
539
600
  """Add a new column."""
540
601
  if self._imports_table:
@@ -583,7 +644,7 @@ class Table:
583
644
  def imports_table(self) -> Optional["Table"]:
584
645
  """Get the imports table of this table."""
585
646
  self.tx._rpc.features.check_imports_table()
586
- return Table(name=self.name, schema=self.schema, handle=int(self.handle), _imports_table=True)
647
+ return Table(name=self.name, schema=self.schema, handle=int(self.handle), _imports_table=True, sorted_table=self.sorted_table)
587
648
 
588
649
  def __getitem__(self, col_name: str):
589
650
  """Allow constructing ibis-like column expressions from this table.