vastdb 1.0.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. {vastdb-1.0.0 → vastdb-1.1.1}/CHANGELOG.md +21 -0
  2. {vastdb-1.0.0 → vastdb-1.1.1}/PKG-INFO +1 -2
  3. {vastdb-1.0.0 → vastdb-1.1.1}/README.md +51 -41
  4. {vastdb-1.0.0 → vastdb-1.1.1}/requirements.txt +0 -1
  5. {vastdb-1.0.0 → vastdb-1.1.1}/setup.py +1 -1
  6. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/_internal.py +41 -10
  7. vastdb-1.1.1/vastdb/bench/perf_bench/bench_repo/mega_combo.py +87 -0
  8. vastdb-1.1.1/vastdb/bench/perf_bench/cli.py +225 -0
  9. vastdb-1.1.1/vastdb/bench/perf_bench/common/__init__.py +0 -0
  10. vastdb-1.1.1/vastdb/bench/perf_bench/common/constants.py +96 -0
  11. vastdb-1.1.1/vastdb/bench/perf_bench/common/log_utils.py +67 -0
  12. vastdb-1.1.1/vastdb/bench/perf_bench/common/types.py +34 -0
  13. vastdb-1.1.1/vastdb/bench/perf_bench/common/utils.py +219 -0
  14. vastdb-1.1.1/vastdb/bench/perf_bench/dataset/__init__.py +0 -0
  15. vastdb-1.1.1/vastdb/bench/perf_bench/dataset/generate_secmaster.py +105 -0
  16. vastdb-1.1.1/vastdb/bench/perf_bench/dataset/generate_stocks_dataset.py +242 -0
  17. vastdb-1.1.1/vastdb/bench/perf_bench/dataset/schemas.py +101 -0
  18. vastdb-1.1.1/vastdb/bench/perf_bench/dataset/secmaster.py +33 -0
  19. vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/__init__.py +0 -0
  20. vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/bench_spec.py +91 -0
  21. vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/results_helpers.py +126 -0
  22. vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/scenario.py +109 -0
  23. vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/scenario_generator.py +144 -0
  24. vastdb-1.1.1/vastdb/bench/perf_bench/query/__init__.py +0 -0
  25. vastdb-1.1.1/vastdb/bench/perf_bench/query/arrow_common.py +59 -0
  26. vastdb-1.1.1/vastdb/bench/perf_bench/query/query.py +42 -0
  27. vastdb-1.1.1/vastdb/bench/perf_bench/query/query_pyarrow.py +70 -0
  28. vastdb-1.1.1/vastdb/bench/perf_bench/query/query_vastdb.py +78 -0
  29. vastdb-1.1.1/vastdb/bench/perf_bench/run.py +79 -0
  30. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/bench/test_sample.py +4 -2
  31. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/conftest.py +1 -1
  32. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/session.py +0 -6
  33. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/table.py +35 -35
  34. vastdb-1.1.1/vastdb/tests/__init__.py +0 -0
  35. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_nested.py +58 -0
  36. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_tables.py +13 -0
  37. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/transaction.py +4 -8
  38. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/util.py +5 -0
  39. vastdb-1.1.1/vastdb/vast_tests/__init__.py +0 -0
  40. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/PKG-INFO +1 -2
  41. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/SOURCES.txt +25 -0
  42. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/requires.txt +0 -1
  43. {vastdb-1.0.0 → vastdb-1.1.1}/CONTRIBUTING.md +0 -0
  44. {vastdb-1.0.0 → vastdb-1.1.1}/LICENSE +0 -0
  45. {vastdb-1.0.0 → vastdb-1.1.1}/MANIFEST.in +0 -0
  46. {vastdb-1.0.0 → vastdb-1.1.1}/setup.cfg +0 -0
  47. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/__init__.py +0 -0
  48. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/__init__.py +0 -0
  49. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/__init__.py +0 -0
  50. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/__init__.py +0 -0
  51. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/__init__.py +0 -0
  52. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Aggregate.py +0 -0
  53. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySlice.py +0 -0
  54. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySubscript.py +0 -0
  55. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BinaryLiteral.py +0 -0
  56. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BooleanLiteral.py +0 -0
  57. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Bound.py +0 -0
  58. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Call.py +0 -0
  59. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CaseFragment.py +0 -0
  60. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Cast.py +0 -0
  61. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConcreteBoundImpl.py +0 -0
  62. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConditionalCase.py +0 -0
  63. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CurrentRow.py +0 -0
  64. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DateLiteral.py +0 -0
  65. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DecimalLiteral.py +0 -0
  66. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Deref.py +0 -0
  67. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DurationLiteral.py +0 -0
  68. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Expression.py +0 -0
  69. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ExpressionImpl.py +0 -0
  70. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldIndex.py +0 -0
  71. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldRef.py +0 -0
  72. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Filter.py +0 -0
  73. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FixedSizeBinaryLiteral.py +0 -0
  74. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float16Literal.py +0 -0
  75. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float32Literal.py +0 -0
  76. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float64Literal.py +0 -0
  77. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Following.py +0 -0
  78. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Frame.py +0 -0
  79. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Grouping.py +0 -0
  80. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int16Literal.py +0 -0
  81. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int32Literal.py +0 -0
  82. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int64Literal.py +0 -0
  83. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int8Literal.py +0 -0
  84. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteral.py +0 -0
  85. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralDaysMilliseconds.py +0 -0
  86. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralImpl.py +0 -0
  87. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralMonths.py +0 -0
  88. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Join.py +0 -0
  89. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/JoinKind.py +0 -0
  90. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/KeyValue.py +0 -0
  91. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Limit.py +0 -0
  92. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ListLiteral.py +0 -0
  93. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Literal.py +0 -0
  94. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralColumn.py +0 -0
  95. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralImpl.py +0 -0
  96. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralRelation.py +0 -0
  97. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapKey.py +0 -0
  98. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapLiteral.py +0 -0
  99. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/OrderBy.py +0 -0
  100. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Ordering.py +0 -0
  101. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Plan.py +0 -0
  102. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Preceding.py +0 -0
  103. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Project.py +0 -0
  104. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelId.py +0 -0
  105. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Relation.py +0 -0
  106. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelationImpl.py +0 -0
  107. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOpKind.py +0 -0
  108. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOperation.py +0 -0
  109. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SimpleCase.py +0 -0
  110. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SortKey.py +0 -0
  111. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Source.py +0 -0
  112. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StringLiteral.py +0 -0
  113. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructField.py +0 -0
  114. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructLiteral.py +0 -0
  115. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimeLiteral.py +0 -0
  116. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimestampLiteral.py +0 -0
  117. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt16Literal.py +0 -0
  118. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt32Literal.py +0 -0
  119. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt64Literal.py +0 -0
  120. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt8Literal.py +0 -0
  121. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Unbounded.py +0 -0
  122. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/WindowCall.py +0 -0
  123. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/__init__.py +0 -0
  124. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Binary.py +0 -0
  125. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Block.py +0 -0
  126. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompression.py +0 -0
  127. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompressionMethod.py +0 -0
  128. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Bool.py +0 -0
  129. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Buffer.py +0 -0
  130. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/CompressionType.py +0 -0
  131. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Date.py +0 -0
  132. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/DateUnit.py +0 -0
  133. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Decimal.py +0 -0
  134. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryBatch.py +0 -0
  135. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryEncoding.py +0 -0
  136. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryKind.py +0 -0
  137. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Duration.py +0 -0
  138. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Endianness.py +0 -0
  139. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Feature.py +0 -0
  140. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Field.py +0 -0
  141. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/FieldNode.py +0 -0
  142. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeBinary.py +0 -0
  143. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeList.py +0 -0
  144. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/FloatingPoint.py +0 -0
  145. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Footer.py +0 -0
  146. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Int.py +0 -0
  147. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Interval.py +0 -0
  148. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/IntervalUnit.py +0 -0
  149. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/KeyValue.py +0 -0
  150. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/LargeBinary.py +0 -0
  151. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/LargeList.py +0 -0
  152. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/LargeUtf8.py +0 -0
  153. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/List.py +0 -0
  154. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Map.py +0 -0
  155. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Message.py +0 -0
  156. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/MessageHeader.py +0 -0
  157. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/MetadataVersion.py +0 -0
  158. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Null.py +0 -0
  159. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Precision.py +0 -0
  160. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/RecordBatch.py +0 -0
  161. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Schema.py +0 -0
  162. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.py +0 -0
  163. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.py +0 -0
  164. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensor.py +0 -0
  165. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndex.py +0 -0
  166. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCOO.py +0 -0
  167. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCSF.py +0 -0
  168. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Struct_.py +0 -0
  169. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Tensor.py +0 -0
  170. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/TensorDim.py +0 -0
  171. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Time.py +0 -0
  172. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/TimeUnit.py +0 -0
  173. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Timestamp.py +0 -0
  174. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Type.py +0 -0
  175. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Union.py +0 -0
  176. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/UnionMode.py +0 -0
  177. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Utf8.py +0 -0
  178. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/__init__.py +0 -0
  179. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/AlterColumnRequest.py +0 -0
  180. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/AlterProjectionTableRequest.py +0 -0
  181. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/AlterSchemaRequest.py +0 -0
  182. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/AlterTableRequest.py +0 -0
  183. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/Column.py +0 -0
  184. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ColumnType.py +0 -0
  185. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/CreateProjectionRequest.py +0 -0
  186. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/CreateSchemaRequest.py +0 -0
  187. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/GetProjectionTableStatsResponse.py +0 -0
  188. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/GetTableStatsResponse.py +0 -0
  189. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ImportDataRequest.py +0 -0
  190. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ListProjectionsResponse.py +0 -0
  191. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ListSchemasResponse.py +0 -0
  192. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ListTablesResponse.py +0 -0
  193. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ObjectDetails.py +0 -0
  194. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/S3File.py +0 -0
  195. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/VipRange.py +0 -0
  196. {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/__init__.py +0 -0
  197. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/__init__.py +0 -0
  198. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/bench/__init__.py +0 -0
  199. {vastdb-1.0.0/vastdb/tests → vastdb-1.1.1/vastdb/bench/perf_bench}/__init__.py +0 -0
  200. {vastdb-1.0.0/vastdb/vast_tests → vastdb-1.1.1/vastdb/bench/perf_bench/bench_repo}/__init__.py +0 -0
  201. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/bench/test_perf.py +0 -0
  202. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/bucket.py +0 -0
  203. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/config.py +0 -0
  204. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/errors.py +0 -0
  205. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/features.py +0 -0
  206. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/schema.py +0 -0
  207. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/metrics.py +0 -0
  208. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_duckdb.py +0 -0
  209. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_imports.py +0 -0
  210. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_projections.py +0 -0
  211. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_sanity.py +0 -0
  212. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_schemas.py +0 -0
  213. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_util.py +0 -0
  214. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/util.py +0 -0
  215. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/vast_tests/test_ha.py +0 -0
  216. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/vast_tests/test_scale.py +0 -0
  217. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/dependency_links.txt +0 -0
  218. {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/top_level.txt +0 -0
@@ -4,6 +4,27 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6
6
 
7
+ ## [1.1.1] (2024-08-11)
8
+ [1.1.1]: https://github.com/vast-data/vastdb_sdk/compare/v1.1.0...v1.1.1
9
+
10
+ ### Fixed
11
+ - Combine record batches when inserting `pyarrow.Table`
12
+
13
+ ### Changed
14
+ - Don't require `boto3` for checking bucket existence
15
+ - Improve documentation
16
+
17
+
18
+ ## [1.1.0] (2024-07-25)
19
+ [1.1.0]: https://github.com/vast-data/vastdb_sdk/compare/v1.0.0...v1.1.0
20
+
21
+ ## Fixed
22
+ - Close internal sessions in `select` and `import`
23
+
24
+ ## Added
25
+ - VastDB/Parquet performance benchmark harness
26
+ - Support predicate pushdown over subfields
27
+
7
28
  ## [1.0.0] (2024-07-21)
8
29
  [1.0.0]: https://github.com/vast-data/vastdb_sdk/compare/v0.1.11...v1.0.0
9
30
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vastdb
3
- Version: 1.0.0
3
+ Version: 1.1.1
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
@@ -19,7 +19,6 @@ Requires-Python: >=3.9.0
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: aws-requests-auth
22
- Requires-Dist: boto3
23
22
  Requires-Dist: flatbuffers
24
23
  Requires-Dist: ibis-framework==9.0.0
25
24
  Requires-Dist: pyarrow
@@ -4,24 +4,26 @@
4
4
 
5
5
  ## Introduction
6
6
 
7
- `vastdb` is a Python-based SDK designed for interacting with [VAST Database](https://vastdata.com/database) & [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database), enabling schema and table management, efficient ingest, query and modification of columnar data. For more details, see [our whitepaper](https://vastdata.com/whitepaper/#TheVASTDataBase).
7
+ `vastdb` is a Python-based SDK designed for interacting with a [VAST Database](https://vastdata.com/database) and the [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database), enabling schema and table management, efficient ingest, query, and modification of columnar data.
8
+
9
+ For more details about the VAST Database, see [this whitepaper](https://vastdata.com/whitepaper/#TheVASTDataBase).
8
10
 
9
11
  [![vastdb](docs/vastdb.png)](https://vastdata.com/database)
10
12
 
11
- ## Getting started
13
+ ## Getting Started
12
14
 
13
15
  ### Requirements
14
16
 
15
- - Linux client with Python 3.9+ and a network access to the VAST cluster
17
+ - Linux client with Python 3.9 or later, and network access to the VAST Cluster
16
18
  - [Virtual IP pool configured with DNS service](https://support.vastdata.com/s/topic/0TOV40000000FThOAM/configuring-network-access-v50)
17
- - [S3 access & secret keys on VAST cluster](https://support.vastdata.com/s/article/UUID-4d2e7e23-b2fb-7900-d98f-96c31a499626)
19
+ - [S3 access & secret keys on the VAST cluster](https://support.vastdata.com/s/article/UUID-4d2e7e23-b2fb-7900-d98f-96c31a499626)
18
20
  - [Tabular identity policy with the proper permissions](https://support.vastdata.com/s/article/UUID-14322b60-d6a2-89ac-3df0-3dfbb6974182)
19
21
 
20
- ### Required VAST release
22
+ ### Required VAST Cluster release
21
23
 
22
- Currently, VAST DB Python SDK requires `5.0.0-sp10` or later VAST release.
24
+ VAST DB Python SDK requires VAST Cluster release `5.0.0-sp10` or later.
23
25
 
24
- If the cluster is running an older VAST release, please contact customer.support@vastdata.com for more details.
26
+ If your VAST Cluster is running an older release, please contact customer.support@vastdata.com.
25
27
 
26
28
  ### Installation
27
29
 
@@ -29,11 +31,11 @@ If the cluster is running an older VAST release, please contact customer.support
29
31
  pip install vastdb
30
32
  ```
31
33
 
32
- Also, see [our release notes](CHANGELOG.md).
34
+ See the [Release Notes](CHANGELOG.md) for the SDK.
33
35
 
34
- ### Quickstart
36
+ ### Quick Start
35
37
 
36
- Creating schemas and tables + basic inserts and selects:
38
+ Create schemas and tables, basic inserts, and selects:
37
39
 
38
40
  ```python
39
41
  import pyarrow as pa
@@ -74,11 +76,15 @@ with session.transaction() as tx:
74
76
  # the transaction is automatically committed when exiting the context
75
77
  ```
76
78
 
79
+ For configuration examples, see [here](docs/config.md).
80
+
77
81
  Note: the transaction must be remain open while the returned [pyarrow.RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html) generator is being used.
78
82
 
79
- ## Filters and projections
83
+ ## Use Cases
84
+
85
+ ### Filters and Projections
80
86
 
81
- Our SDK supports predicate and projection pushdown:
87
+ The SDK supports predicate and projection pushdown:
82
88
 
83
89
  ```python
84
90
  from ibis import _
@@ -97,9 +103,9 @@ Our SDK supports predicate and projection pushdown:
97
103
 
98
104
  See [here for more details](docs/predicate.md).
99
105
 
100
- ## Import a single Parquet file via S3 protocol
106
+ ### Import a single Parquet file via S3 protocol
101
107
 
102
- It is possible to efficiently create a table from a Parquet file (without copying it via the client):
108
+ You can efficiently create tables from Parquet files (without copying them via the client):
103
109
 
104
110
  ```python
105
111
  with tempfile.NamedTemporaryFile() as f:
@@ -112,9 +118,9 @@ It is possible to efficiently create a table from a Parquet file (without copyin
112
118
  parquet_files=['/bucket-name/staging/file.parquet'])
113
119
  ```
114
120
 
115
- ## Import multiple Parquet files concurrently via S3 protocol
121
+ ### Import multiple Parquet files concurrently via S3 protocol
116
122
 
117
- We can import multiple files concurrently into a table (by utilizing multiple CNodes' cores):
123
+ Import multiple files concurrently into a table (by using multiple CNodes' cores):
118
124
 
119
125
  ```python
120
126
  schema = tx.bucket('bucket-name').schema('schema-name')
@@ -123,6 +129,27 @@ We can import multiple files concurrently into a table (by utilizing multiple CN
123
129
  parquet_files=[f'/bucket-name/staging/file{i}.parquet' for i in range(10)])
124
130
  ```
125
131
 
132
+
133
+ ### Semi-sorted Projections
134
+
135
+ Create, list and delete [available semi-sorted projections](https://support.vastdata.com/s/article/UUID-e4ca42ab-d15b-6b72-bd6b-f3c77b455de4):
136
+
137
+ ```python
138
+ p = table.create_projection('proj', sorted=['c3'], unsorted=['c1'])
139
+ print(table.projections())
140
+ print(p.get_stats())
141
+ p.drop()
142
+ ```
143
+
144
+ ### Snapshots
145
+
146
+ You can access the VAST Database using [snapshots](https://vastdata.com/blog/bringing-snapshots-to-vasts-element-store):
147
+
148
+ ```python
149
+ snaps = bucket.list_snapshots()
150
+ batches = snaps[0].schema('schema-name').table('table-name').select()
151
+ ```
152
+
126
153
  ## Post-processing
127
154
 
128
155
  ### Export
@@ -136,9 +163,9 @@ with contextlib.closing(pa.parquet.ParquetWriter('/path/to/file.parquet', batche
136
163
  writer.write_batch(batch)
137
164
  ```
138
165
 
139
- ### DuckDB integration
166
+ ### DuckDB Integration
140
167
 
141
- We can use [DuckDB](https://duckdb.org/docs/guides/python/sql_on_arrow.html) to post-process the resulting stream of [PyArrow record batches](https://arrow.apache.org/docs/python/data.html#record-batches):
168
+ Use [DuckDB](https://duckdb.org/docs/guides/python/sql_on_arrow.html) to post-process the resulting stream of [PyArrow record batches](https://arrow.apache.org/docs/python/data.html#record-batches):
142
169
 
143
170
  ```python
144
171
  from ibis import _
@@ -152,31 +179,11 @@ with session.transaction() as tx:
152
179
  print(conn.execute("SELECT sum(c1) FROM batches").arrow())
153
180
  ```
154
181
 
155
- Note: the transaction must be active while DuckDB query is executing and fetching results using the Python SDK.
156
-
157
- ## Semi-sorted projections
158
-
159
- We can create, list and delete [available semi-sorted projections](https://support.vastdata.com/s/article/UUID-e4ca42ab-d15b-6b72-bd6b-f3c77b455de4):
160
-
161
- ```python
162
- p = table.create_projection('proj', sorted=['c3'], unsorted=['c1'])
163
- print(table.projections())
164
- print(p.get_stats())
165
- p.drop()
166
- ```
167
-
168
- ## Snapshots
169
-
170
- It is possible to use [snapshots](https://vastdata.com/blog/bringing-snapshots-to-vasts-element-store) for accessing the Database:
171
-
172
- ```python
173
- snaps = bucket.list_snapshots()
174
- batches = snaps[0].schema('schema-name').table('table-name').select()
175
- ```
182
+ Note: the transaction must be active while the DuckDB query is executing and fetching results using the Python SDK.
176
183
 
177
184
  ## VAST Catalog
178
185
 
179
- [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database) can be queried as a regular table:
186
+ The [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database) can be queried as a regular table:
180
187
 
181
188
  ```python
182
189
  import pyarrow as pa
@@ -201,8 +208,11 @@ with session.transaction() as tx:
201
208
  print("Distinct element types on the system:")
202
209
  print(distinct_elements)
203
210
  ```
211
+ ## More Information
204
212
 
205
- See the following blog posts for more examples:
213
+ See these blog posts for more examples:
206
214
 
207
215
  - https://vastdata.com/blog/the-vast-catalog-in-action-part-1
208
216
  - https://vastdata.com/blog/the-vast-catalog-in-action-part-2
217
+
218
+ See also the [full Vast DB Python SDK documentation](https://vastdb-sdk.readthedocs.io/en/v1.1.0/)
@@ -1,5 +1,4 @@
1
1
  aws-requests-auth
2
- boto3
3
2
  flatbuffers
4
3
  ibis-framework==9.0.0
5
4
  pyarrow
@@ -29,7 +29,7 @@ setup(
29
29
  name='vastdb',
30
30
  python_requires='>=3.9.0',
31
31
  description='VAST Data SDK',
32
- version='1.0.0' + suffix,
32
+ version='1.1.1' + suffix,
33
33
  url='https://github.com/vast-data/vastdb_sdk',
34
34
  author='VAST DATA',
35
35
  author_email='hello@vastdata.com',
@@ -35,6 +35,7 @@ from ibis.expr.operations.logical import (
35
35
  )
36
36
  from ibis.expr.operations.relations import Field
37
37
  from ibis.expr.operations.strings import StringContains
38
+ from ibis.expr.operations.structs import StructField
38
39
 
39
40
  import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
40
41
  import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BooleanLiteral as fb_bool_lit
@@ -182,7 +183,7 @@ class Predicate:
182
183
  _logger.debug('OR args: %s op %s', or_args, op)
183
184
  inner_offsets = []
184
185
 
185
- prev_field_name = None
186
+ prev_field_path = None
186
187
  for inner_op in or_args:
187
188
  _logger.debug('inner_op %s', inner_op)
188
189
  op_type = type(inner_op)
@@ -216,28 +217,38 @@ class Predicate:
216
217
  if not isinstance(literal, Literal):
217
218
  raise NotImplementedError(self.expr)
218
219
 
220
+ field_path = []
221
+ while isinstance(column, StructField):
222
+ column, subfield_name = column.args
223
+ field_path.append(subfield_name)
224
+
219
225
  if not isinstance(column, Field):
220
226
  raise NotImplementedError(self.expr)
221
227
 
222
- field_name = column.name
223
- if prev_field_name is None:
224
- prev_field_name = field_name
225
- elif prev_field_name != field_name:
228
+ field_path.append(column.name)
229
+ field_path.reverse() # first entry should be the top-level column name
230
+
231
+ if prev_field_path is None:
232
+ prev_field_path = field_path
233
+ elif prev_field_path != field_path:
226
234
  raise NotImplementedError(self.expr)
227
235
 
228
- node = self.nodes_map[field_name]
236
+ nodes_map = self.nodes_map
237
+ for name in field_path:
238
+ node = nodes_map[name]
239
+ nodes_map = node.children_map
240
+
229
241
  # TODO: support predicate pushdown for leaf nodes (ORION-160338)
230
242
  if node.children:
231
243
  raise NotImplementedError(node.field) # no predicate pushdown for nested columns
232
244
  column_offset = self.build_column(position=node.index)
233
- field = self.schema.field(field_name)
234
245
  for literal in literals:
235
246
  args_offsets = [column_offset]
236
247
  if literal is not None:
237
- args_offsets.append(self.build_literal(field=field, value=literal.value))
248
+ args_offsets.append(self.build_literal(field=node.field, value=literal.value))
238
249
  if builder_func == self.build_between:
239
- args_offsets.append(self.build_literal(field=field, value=lower.value))
240
- args_offsets.append(self.build_literal(field=field, value=upper.value))
250
+ args_offsets.append(self.build_literal(field=node.field, value=lower.value))
251
+ args_offsets.append(self.build_literal(field=node.field, value=upper.value))
241
252
 
242
253
  inner_offsets.append(builder_func(*args_offsets))
243
254
 
@@ -572,6 +583,8 @@ class FieldNode:
572
583
  else:
573
584
  self.children = [] # for non-nested types
574
585
 
586
+ self.children_map = {c.field.name: c for c in self.children}
587
+
575
588
  def _iter_to_root(self) -> Iterator['FieldNode']:
576
589
  yield self
577
590
  if self.parent is not None:
@@ -811,7 +824,16 @@ class VastdbApi:
811
824
  _logger.critical(msg)
812
825
  raise NotImplementedError(msg)
813
826
 
827
+ def __enter__(self):
828
+ """Allow using this session as a context manager."""
829
+ return self
830
+
831
+ def __exit__(self, *args):
832
+ """Make sure that the connections closed."""
833
+ self._session.close()
834
+
814
835
  def with_endpoint(self, endpoint):
836
+ """Open a new session for targeting a specific endpoint."""
815
837
  return VastdbApi(endpoint=endpoint,
816
838
  access_key=self.access_key,
817
839
  secret_key=self.secret_key,
@@ -1276,6 +1298,15 @@ class VastdbApi:
1276
1298
 
1277
1299
  return columns, next_key, is_truncated, count
1278
1300
 
1301
+ def head_bucket(self, bucket_name):
1302
+ """
1303
+ Reimplemented, instead of depending on boto3 for checking the existence of a bucket.
1304
+ https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html
1305
+ """
1306
+ return self._request(
1307
+ method="HEAD",
1308
+ url=self._url(bucket=bucket_name))
1309
+
1279
1310
  def begin_transaction(self, client_tags=[], expected_retvals=[]):
1280
1311
  """
1281
1312
  POST /?transaction HTTP/1.1
@@ -0,0 +1,87 @@
1
+ from typing import List, Optional
2
+
3
+ from vastdb.bench.perf_bench.common.constants import (
4
+ LOCAL_FS_DS_PATH,
5
+ NFS_DS_PATH, # noqa: F401
6
+ S3_DS_PATH, # noqa: F401
7
+ ParquetCompression,
8
+ VastConnDetails,
9
+ )
10
+ from vastdb.bench.perf_bench.dataset.schemas import DEFAULT_BARS_COLUMNS
11
+ from vastdb.bench.perf_bench.orchestrate.scenario import BenchScenario
12
+ from vastdb.bench.perf_bench.orchestrate.scenario_generator import (
13
+ generate_perf_bench_scenarios,
14
+ )
15
+ from vastdb.bench.perf_bench.query.query import QueryBackend
16
+
17
+
18
+ def build_scenarios(
19
+ base_key: str,
20
+ conn_details: Optional[VastConnDetails] = None,
21
+ ) -> List[BenchScenario]:
22
+ return generate_perf_bench_scenarios(
23
+ base_key=base_key,
24
+ conn_details=conn_details or VastConnDetails(),
25
+ query_backends=[
26
+ QueryBackend.pyarrow,
27
+ # QueryBackend.vastdb,
28
+ ],
29
+ columns_choices=(DEFAULT_BARS_COLUMNS,),
30
+ universe_choices=(
31
+ "Single",
32
+ "Tiny",
33
+ "SmallSeq",
34
+ "Medium",
35
+ "Medium2",
36
+ "Large",
37
+ ),
38
+ num_bdays=[
39
+ 1, # 1d
40
+ 5, # 1w
41
+ # 22, # 1m
42
+ 65, # 3m
43
+ # 130, # 6m
44
+ 252, # 1y
45
+ ],
46
+
47
+ # Arrow-specific options
48
+ fs_path_choices=[
49
+ # NFS_DS_PATH,
50
+ LOCAL_FS_DS_PATH,
51
+ # S3_DS_PATH,
52
+ ],
53
+ rowgroup_size_choices=[ # make sure you have previously generated the respective datasets
54
+ # 64 * 1024,
55
+ # 128 * 1024,
56
+ 256 * 1024,
57
+ # 512 * 1024,
58
+ # DEFAULT_ROW_GROUP_SIZE,
59
+ # int(1.5 * 1024 * 1024),
60
+ ],
61
+ compression_choices=[
62
+ ParquetCompression.LZ4,
63
+ ],
64
+ arrow_batching_spec_choices=[
65
+ # {"batch_size": 2*2**16, "batch_readahead": 16, "fragment_readahead": 4},
66
+ # {"batch_size": 6*2**16, "batch_readahead": 12, "fragment_readahead": 4},
67
+ # DEFAULT_ARROW_KWARGS,
68
+ {"batch_size": 16 * 2 ** 16, "batch_readahead": 16, "fragment_readahead": 4},
69
+ # {"batch_size": 24 * 2 ** 16, "batch_readahead": 12, "fragment_readahead": 4},
70
+ # {"batch_size": 32*2**16, "batch_readahead": 12, "fragment_readahead": 4},
71
+ # {"batch_size": 64*2**16, "batch_readahead": 12, "fragment_readahead": 4},
72
+ # {"batch_size": 128*2**16, "batch_readahead": 12, "fragment_readahead": 4},
73
+ ],
74
+
75
+ # VastDB-specific options
76
+ vdb_num_sub_splits_choices=(
77
+ # 1, # Default
78
+ # 4,
79
+ 8,
80
+ # 16,
81
+ ),
82
+ vdb_num_row_groups_per_sub_split_choices=(
83
+ # 1,
84
+ # 4,
85
+ 8, # Default
86
+ ),
87
+ )
@@ -0,0 +1,225 @@
1
+ from pathlib import Path
2
+ from typing import Annotated, List, Optional
3
+
4
+ import pandas as pd
5
+ import typer
6
+
7
+ from vastdb.bench.perf_bench.common.constants import (
8
+ DEFAULT_END_T,
9
+ DEFAULT_RESULTS_DIR,
10
+ DEFAULT_START_T,
11
+ DFAULT_PARQUET_COMPRESSION,
12
+ LOCAL_FS_DS_PATH,
13
+ LogLevel,
14
+ ParquetCompression,
15
+ )
16
+ from vastdb.bench.perf_bench.common.log_utils import (
17
+ get_logger,
18
+ set_log_file,
19
+ set_log_level,
20
+ )
21
+ from vastdb.bench.perf_bench.common.utils import getenv_flag, load_module_from_path
22
+ from vastdb.bench.perf_bench.dataset.generate_secmaster import (
23
+ SM_PATH,
24
+ generate_secmaster,
25
+ )
26
+ from vastdb.bench.perf_bench.dataset.generate_stocks_dataset import (
27
+ generate_concurrent_synthetic_stock_1m_bars,
28
+ )
29
+ from vastdb.bench.perf_bench.orchestrate.scenario import BenchScenario
30
+ from vastdb.bench.perf_bench.run import run_scenarios
31
+
32
+ app = typer.Typer(pretty_exceptions_enable=getenv_flag("TYPER_PRETTY_EXCEPTIONS"))
33
+
34
+ _MY_DIR = Path(__file__).parent
35
+
36
+
37
+ # noinspection PyUnusedLocal
38
+ @app.callback()
39
+ def cli_common(
40
+ ctx: typer.Context,
41
+ verbose: Annotated[
42
+ bool,
43
+ typer.Option(
44
+ "--verbose",
45
+ is_flag=True,
46
+ ),
47
+ ] = False,
48
+ log_level: Annotated[
49
+ Optional[LogLevel],
50
+ typer.Option(
51
+ "--log-level",
52
+ case_sensitive=False,
53
+ ),
54
+ ] = None,
55
+ log_file: Annotated[
56
+ Optional[Path],
57
+ typer.Option(
58
+ "--log-file",
59
+ writable=True,
60
+ file_okay=True,
61
+ dir_okay=False,
62
+ resolve_path=True,
63
+ ),
64
+ ] = None,
65
+ ):
66
+ if verbose:
67
+ log_level = LogLevel.DEBUG
68
+ if log_level:
69
+ set_log_level(log_level)
70
+ if log_file:
71
+ set_log_file(log_file)
72
+ get_logger(__name__).info("CLI common setup done.")
73
+
74
+
75
+ def _positive_int(value: str) -> int:
76
+ i_value = int(value)
77
+ if i_value <= 0:
78
+ raise typer.BadParameter(f"Must be a positive integer: {value}.")
79
+ return i_value
80
+
81
+
82
+ # noinspection PyUnusedLocal
83
+ @app.command()
84
+ def run_bench(
85
+ ctx: typer.Context,
86
+ bench_name: Annotated[
87
+ str,
88
+ typer.Option(
89
+ "--bench-name",
90
+ ),
91
+ ],
92
+ parallelism: Annotated[
93
+ List[int],
94
+ typer.Option(
95
+ "--parallelism",
96
+ callback=lambda par: [_positive_int(p) for p in par],
97
+ ),
98
+ ],
99
+ runs_per_bench: Annotated[
100
+ int,
101
+ typer.Option(
102
+ "--runs-per-bench",
103
+ callback=_positive_int,
104
+ ),
105
+ ] = 3,
106
+ bench_generator_path: Annotated[
107
+ Path,
108
+ typer.Option(
109
+ "--bench-generator-path",
110
+ readable=True,
111
+ file_okay=True,
112
+ dir_okay=False,
113
+ resolve_path=True,
114
+ ),
115
+ ] = _MY_DIR / "bench_repo" / "mega_combo.py",
116
+ results_base_dir: Annotated[
117
+ Path,
118
+ typer.Option(
119
+ "--log-file",
120
+ writable=True,
121
+ file_okay=False,
122
+ dir_okay=True,
123
+ resolve_path=True,
124
+ ),
125
+ ] = DEFAULT_RESULTS_DIR,
126
+ ):
127
+ if not (bench_name := bench_name.strip()):
128
+ raise typer.BadParameter("Bench name must be non-empty.")
129
+
130
+ mod = load_module_from_path(bench_generator_path)
131
+ scenarios: List[BenchScenario] = mod.build_scenarios(base_key=bench_name)
132
+ for para in parallelism:
133
+ run_scenarios(
134
+ scenarios=scenarios,
135
+ runs_per_bench=runs_per_bench,
136
+ parallelism=para,
137
+ results_base_dir=str(results_base_dir),
138
+ )
139
+
140
+
141
+ # noinspection PyUnusedLocal
142
+ @app.command()
143
+ def build_secmaster(
144
+ ctx: typer.Context,
145
+ ):
146
+ generate_secmaster()
147
+
148
+
149
+ # noinspection PyUnusedLocal
150
+ @app.command()
151
+ def build_dataset(
152
+ ctx: typer.Context,
153
+ start_date: Annotated[
154
+ str,
155
+ typer.Option(
156
+ "--start-date",
157
+ help="Start date for the dataset.",
158
+ callback=lambda d: pd.Timestamp(d).normalize(),
159
+ ),
160
+ ] = DEFAULT_START_T.strftime("%Y%m%d"),
161
+ end_date: Annotated[
162
+ str,
163
+ typer.Option(
164
+ "--end-date",
165
+ help="Start date for the dataset.",
166
+ callback=lambda d: pd.Timestamp(d).normalize(),
167
+ ),
168
+ ] = DEFAULT_END_T.strftime("%Y%m%d"),
169
+ output_dir: Annotated[
170
+ Path,
171
+ typer.Option(
172
+ "--output-dir",
173
+ writable=True,
174
+ file_okay=False,
175
+ dir_okay=True,
176
+ resolve_path=True,
177
+ ),
178
+ ] = LOCAL_FS_DS_PATH,
179
+ parallelism: Annotated[
180
+ int,
181
+ typer.Option(
182
+ "--parallelism",
183
+ callback=_positive_int,
184
+ ),
185
+ ] = 6,
186
+ row_group_size: Annotated[
187
+ int,
188
+ typer.Option(
189
+ "--row-group-size",
190
+ callback=_positive_int,
191
+ help=(
192
+ "Row group size for the dataset, some common values are: 64 * 1024, 128 * 1024, 256"
193
+ " * 1024, 512 * 1024,1024 * 1024, 1.5 * 1024 * 1024."
194
+ ),
195
+ ),
196
+ ] = 256 * 1024,
197
+ compression: Annotated[
198
+ ParquetCompression,
199
+ typer.Option(
200
+ "--compression",
201
+ help="Parquet compression algorithm.",
202
+ ),
203
+ ] = DFAULT_PARQUET_COMPRESSION,
204
+ ):
205
+ if row_group_size < 1024:
206
+ raise typer.BadParameter("Row group size must be at least 1024.")
207
+ if parallelism < 1:
208
+ raise typer.BadParameter("Parallelism must be at least 1.")
209
+ if start_date > end_date:
210
+ raise typer.BadParameter("Start date must be before the end date.")
211
+ if not SM_PATH.is_file():
212
+ generate_secmaster()
213
+ generate_concurrent_synthetic_stock_1m_bars(
214
+ from_t=start_date,
215
+ to_t=end_date,
216
+ output_dir=output_dir,
217
+ num_workers=parallelism,
218
+ row_group_size=row_group_size,
219
+ compression=compression,
220
+ )
221
+
222
+
223
+ if __name__ == "__main__":
224
+ # Set the metadata only if we execute the main (not on just importing this module)
225
+ app()