vastdb 0.1.11__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. {vastdb-0.1.11 → vastdb-1.1.0}/CHANGELOG.md +17 -0
  2. vastdb-1.1.0/CONTRIBUTING.md +33 -0
  3. {vastdb-0.1.11/vastdb.egg-info → vastdb-1.1.0}/PKG-INFO +2 -2
  4. {vastdb-0.1.11 → vastdb-1.1.0}/README.md +0 -2
  5. {vastdb-0.1.11 → vastdb-1.1.0}/setup.py +2 -2
  6. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/_internal.py +46 -17
  7. vastdb-1.1.0/vastdb/bench/perf_bench/bench_repo/mega_combo.py +87 -0
  8. vastdb-1.1.0/vastdb/bench/perf_bench/cli.py +225 -0
  9. vastdb-1.1.0/vastdb/bench/perf_bench/common/__init__.py +0 -0
  10. vastdb-1.1.0/vastdb/bench/perf_bench/common/constants.py +96 -0
  11. vastdb-1.1.0/vastdb/bench/perf_bench/common/log_utils.py +67 -0
  12. vastdb-1.1.0/vastdb/bench/perf_bench/common/types.py +34 -0
  13. vastdb-1.1.0/vastdb/bench/perf_bench/common/utils.py +219 -0
  14. vastdb-1.1.0/vastdb/bench/perf_bench/dataset/__init__.py +0 -0
  15. vastdb-1.1.0/vastdb/bench/perf_bench/dataset/generate_secmaster.py +105 -0
  16. vastdb-1.1.0/vastdb/bench/perf_bench/dataset/generate_stocks_dataset.py +242 -0
  17. vastdb-1.1.0/vastdb/bench/perf_bench/dataset/schemas.py +101 -0
  18. vastdb-1.1.0/vastdb/bench/perf_bench/dataset/secmaster.py +33 -0
  19. vastdb-1.1.0/vastdb/bench/perf_bench/orchestrate/__init__.py +0 -0
  20. vastdb-1.1.0/vastdb/bench/perf_bench/orchestrate/bench_spec.py +91 -0
  21. vastdb-1.1.0/vastdb/bench/perf_bench/orchestrate/results_helpers.py +126 -0
  22. vastdb-1.1.0/vastdb/bench/perf_bench/orchestrate/scenario.py +109 -0
  23. vastdb-1.1.0/vastdb/bench/perf_bench/orchestrate/scenario_generator.py +144 -0
  24. vastdb-1.1.0/vastdb/bench/perf_bench/query/__init__.py +0 -0
  25. vastdb-1.1.0/vastdb/bench/perf_bench/query/arrow_common.py +59 -0
  26. vastdb-1.1.0/vastdb/bench/perf_bench/query/query.py +42 -0
  27. vastdb-1.1.0/vastdb/bench/perf_bench/query/query_pyarrow.py +70 -0
  28. vastdb-1.1.0/vastdb/bench/perf_bench/query/query_vastdb.py +78 -0
  29. vastdb-1.1.0/vastdb/bench/perf_bench/run.py +79 -0
  30. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/table.py +34 -34
  31. vastdb-1.1.0/vastdb/tests/__init__.py +0 -0
  32. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/test_nested.py +58 -0
  33. vastdb-1.1.0/vastdb/vast_tests/__init__.py +0 -0
  34. {vastdb-0.1.11 → vastdb-1.1.0/vastdb.egg-info}/PKG-INFO +2 -2
  35. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb.egg-info/SOURCES.txt +26 -0
  36. {vastdb-0.1.11 → vastdb-1.1.0}/LICENSE +0 -0
  37. {vastdb-0.1.11 → vastdb-1.1.0}/MANIFEST.in +0 -0
  38. {vastdb-0.1.11 → vastdb-1.1.0}/requirements.txt +0 -0
  39. {vastdb-0.1.11 → vastdb-1.1.0}/setup.cfg +0 -0
  40. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/__init__.py +0 -0
  41. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/__init__.py +0 -0
  42. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/__init__.py +0 -0
  43. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/__init__.py +0 -0
  44. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/__init__.py +0 -0
  45. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Aggregate.py +0 -0
  46. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySlice.py +0 -0
  47. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySubscript.py +0 -0
  48. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BinaryLiteral.py +0 -0
  49. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BooleanLiteral.py +0 -0
  50. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Bound.py +0 -0
  51. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Call.py +0 -0
  52. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CaseFragment.py +0 -0
  53. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Cast.py +0 -0
  54. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConcreteBoundImpl.py +0 -0
  55. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConditionalCase.py +0 -0
  56. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CurrentRow.py +0 -0
  57. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DateLiteral.py +0 -0
  58. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DecimalLiteral.py +0 -0
  59. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Deref.py +0 -0
  60. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DurationLiteral.py +0 -0
  61. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Expression.py +0 -0
  62. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ExpressionImpl.py +0 -0
  63. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldIndex.py +0 -0
  64. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldRef.py +0 -0
  65. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Filter.py +0 -0
  66. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FixedSizeBinaryLiteral.py +0 -0
  67. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float16Literal.py +0 -0
  68. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float32Literal.py +0 -0
  69. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float64Literal.py +0 -0
  70. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Following.py +0 -0
  71. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Frame.py +0 -0
  72. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Grouping.py +0 -0
  73. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int16Literal.py +0 -0
  74. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int32Literal.py +0 -0
  75. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int64Literal.py +0 -0
  76. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int8Literal.py +0 -0
  77. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteral.py +0 -0
  78. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralDaysMilliseconds.py +0 -0
  79. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralImpl.py +0 -0
  80. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralMonths.py +0 -0
  81. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Join.py +0 -0
  82. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/JoinKind.py +0 -0
  83. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/KeyValue.py +0 -0
  84. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Limit.py +0 -0
  85. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ListLiteral.py +0 -0
  86. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Literal.py +0 -0
  87. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralColumn.py +0 -0
  88. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralImpl.py +0 -0
  89. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralRelation.py +0 -0
  90. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapKey.py +0 -0
  91. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapLiteral.py +0 -0
  92. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/OrderBy.py +0 -0
  93. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Ordering.py +0 -0
  94. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Plan.py +0 -0
  95. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Preceding.py +0 -0
  96. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Project.py +0 -0
  97. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelId.py +0 -0
  98. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Relation.py +0 -0
  99. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelationImpl.py +0 -0
  100. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOpKind.py +0 -0
  101. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOperation.py +0 -0
  102. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SimpleCase.py +0 -0
  103. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SortKey.py +0 -0
  104. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Source.py +0 -0
  105. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StringLiteral.py +0 -0
  106. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructField.py +0 -0
  107. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructLiteral.py +0 -0
  108. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimeLiteral.py +0 -0
  109. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimestampLiteral.py +0 -0
  110. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt16Literal.py +0 -0
  111. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt32Literal.py +0 -0
  112. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt64Literal.py +0 -0
  113. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt8Literal.py +0 -0
  114. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Unbounded.py +0 -0
  115. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/WindowCall.py +0 -0
  116. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/__init__.py +0 -0
  117. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Binary.py +0 -0
  118. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Block.py +0 -0
  119. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompression.py +0 -0
  120. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompressionMethod.py +0 -0
  121. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Bool.py +0 -0
  122. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Buffer.py +0 -0
  123. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/CompressionType.py +0 -0
  124. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Date.py +0 -0
  125. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/DateUnit.py +0 -0
  126. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Decimal.py +0 -0
  127. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryBatch.py +0 -0
  128. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryEncoding.py +0 -0
  129. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryKind.py +0 -0
  130. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Duration.py +0 -0
  131. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Endianness.py +0 -0
  132. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Feature.py +0 -0
  133. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Field.py +0 -0
  134. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/FieldNode.py +0 -0
  135. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeBinary.py +0 -0
  136. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeList.py +0 -0
  137. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/FloatingPoint.py +0 -0
  138. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Footer.py +0 -0
  139. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Int.py +0 -0
  140. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Interval.py +0 -0
  141. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/IntervalUnit.py +0 -0
  142. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/KeyValue.py +0 -0
  143. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/LargeBinary.py +0 -0
  144. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/LargeList.py +0 -0
  145. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/LargeUtf8.py +0 -0
  146. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/List.py +0 -0
  147. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Map.py +0 -0
  148. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Message.py +0 -0
  149. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/MessageHeader.py +0 -0
  150. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/MetadataVersion.py +0 -0
  151. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Null.py +0 -0
  152. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Precision.py +0 -0
  153. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/RecordBatch.py +0 -0
  154. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Schema.py +0 -0
  155. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.py +0 -0
  156. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.py +0 -0
  157. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensor.py +0 -0
  158. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndex.py +0 -0
  159. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCOO.py +0 -0
  160. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCSF.py +0 -0
  161. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Struct_.py +0 -0
  162. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Tensor.py +0 -0
  163. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/TensorDim.py +0 -0
  164. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Time.py +0 -0
  165. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/TimeUnit.py +0 -0
  166. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Timestamp.py +0 -0
  167. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Type.py +0 -0
  168. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Union.py +0 -0
  169. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/UnionMode.py +0 -0
  170. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/Utf8.py +0 -0
  171. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/org/apache/arrow/flatbuf/__init__.py +0 -0
  172. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/AlterColumnRequest.py +0 -0
  173. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/AlterProjectionTableRequest.py +0 -0
  174. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/AlterSchemaRequest.py +0 -0
  175. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/AlterTableRequest.py +0 -0
  176. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/Column.py +0 -0
  177. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/ColumnType.py +0 -0
  178. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/CreateProjectionRequest.py +0 -0
  179. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/CreateSchemaRequest.py +0 -0
  180. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/GetProjectionTableStatsResponse.py +0 -0
  181. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/GetTableStatsResponse.py +0 -0
  182. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/ImportDataRequest.py +0 -0
  183. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/ListProjectionsResponse.py +0 -0
  184. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/ListSchemasResponse.py +0 -0
  185. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/ListTablesResponse.py +0 -0
  186. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/ObjectDetails.py +0 -0
  187. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/S3File.py +0 -0
  188. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/VipRange.py +0 -0
  189. {vastdb-0.1.11 → vastdb-1.1.0}/vast_flatbuf/tabular/__init__.py +0 -0
  190. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/__init__.py +0 -0
  191. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/bench/__init__.py +0 -0
  192. {vastdb-0.1.11/vastdb/tests → vastdb-1.1.0/vastdb/bench/perf_bench}/__init__.py +0 -0
  193. {vastdb-0.1.11/vastdb/vast_tests → vastdb-1.1.0/vastdb/bench/perf_bench/bench_repo}/__init__.py +0 -0
  194. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/bench/test_perf.py +0 -0
  195. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/bench/test_sample.py +0 -0
  196. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/bucket.py +0 -0
  197. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/config.py +0 -0
  198. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/conftest.py +0 -0
  199. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/errors.py +0 -0
  200. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/features.py +0 -0
  201. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/schema.py +0 -0
  202. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/session.py +0 -0
  203. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/metrics.py +0 -0
  204. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/test_duckdb.py +0 -0
  205. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/test_imports.py +0 -0
  206. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/test_projections.py +0 -0
  207. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/test_sanity.py +0 -0
  208. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/test_schemas.py +0 -0
  209. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/test_tables.py +0 -0
  210. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/test_util.py +0 -0
  211. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/tests/util.py +0 -0
  212. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/transaction.py +0 -0
  213. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/util.py +0 -0
  214. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/vast_tests/test_ha.py +0 -0
  215. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb/vast_tests/test_scale.py +0 -0
  216. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb.egg-info/dependency_links.txt +0 -0
  217. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb.egg-info/requires.txt +0 -0
  218. {vastdb-0.1.11 → vastdb-1.1.0}/vastdb.egg-info/top_level.txt +0 -0
@@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6
6
 
7
+ ## [1.1.0] (2024-07-25)
8
+ [1.1.0]: https://github.com/vast-data/vastdb_sdk/compare/v1.0.0...v1.1.0
9
+
10
+ ## Fixed
11
+ - Close internal sessions in `select` and `import`
12
+
13
+ ## Added
14
+ - VastDB/Parquet performance benchmark harness
15
+ - Support predicate pushdown over subfields
16
+
17
+ ## [1.0.0] (2024-07-21)
18
+ [1.0.0]: https://github.com/vast-data/vastdb_sdk/compare/v0.1.11...v1.0.0
19
+
20
+ ## Added
21
+ - Add CLA (`CONTRIBUTING.md`)
22
+ - Support setting HTTP timeouts
23
+
7
24
  ## [0.1.11] (2024-07-17)
8
25
  [0.1.11]: https://github.com/vast-data/vastdb_sdk/compare/v0.1.10...v0.1.11
9
26
 
@@ -0,0 +1,33 @@
1
+ # VAST DB SDK Contribution License Agreement
2
+
3
+ Thank you for your interest in VAST DB SDK project (“Project”). You hereby accept and agree to the following terms and conditions (“Agreement”) for Your present and future Contributions (as defined below) submitted to the Project or as part of. Except for the license granted herein to the Project originator – Vast Data Ltd. together with any of its affiliates, assignee, or successor (“Vast Data” or “us” or “we”), You reserve all right, title, and interest in and to Your Contributions.
4
+
5
+ ## Definitions.
6
+
7
+ "You" (or "Your") shall mean the copyright owner or legal entity authorized by the copyright owner that is making this Agreement with Vast Data. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single contributor. For the purposes of this definition, "control" means (i) the power, directly or indirectly, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
8
+
9
+ "Contribution" shall mean the code, documentation, or any original work of authorship, including any modifications or additions to an existing work, that is intentionally submitted by You to the Project (or to Vast Data), inclusion in, or documentation of, the Project (the "Work"). For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Project or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Project for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution."
10
+
11
+ ## Licenses
12
+
13
+ Grant of Copyright License. Subject to the terms and conditions of this Agreement, You hereby grant to Vast Data and at Vast Data’ discretion, its affiliates, a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, sell, and distribute Your Contributions and such derivative works.
14
+
15
+ Grant of Patent License. Subject to the terms and conditions of this Agreement, You hereby grant to Vast Data and, at Vast Data’ discretion, its affiliates a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by the combination of Your Contribution(s) with the Work to which such Contribution(s) was submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that your Contribution, or the Work to which you have contributed, constitutes a direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed.
16
+
17
+ ## Representations
18
+
19
+ You represent that You are legally entitled to grant the above license. If your employer(s) has rights to intellectual property that you create that includes Your Contributions, You represent that you have received permission to make Contributions on behalf of that employer, that your employer has waived such rights for your Contributions to Vast Data, or that your employer has executed a separate written agreement with Vast Data.
20
+
21
+ You represent that each of Your Contributions is Your original creation (see below for submissions on behalf of others). You represent that Your Contribution submissions include complete details of any third-party license or other restriction (including, but not limited to, related patents and trademarks) of which you are personally aware, and which are associated with any part of Your Contributions.
22
+
23
+ You are not expected to provide support for Your Contributions, except to the extent You desire to provide support. You may provide support for free, for a fee, or not at all.
24
+
25
+ UNLESS REQUIRED BY APPLICABLE LAW, AGREED TO IN WRITING, OR AS STATED IN THIS CLA, YOU PROVIDE YOUR CONTRIBUTION ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
26
+
27
+ Should You wish to submit work that is not Your original creation, You may submit it to Vast Data separately from any Contribution, identifying the complete details of its source and of any license or other restriction (including, but not limited to, related patents, trademarks, and license agreements) of which you are personally aware, and conspicuously marking the work as "Submitted on behalf of a third-party: [named here]". Vast Data shall have the right to determine at its discretion whether or not to use such Contribution.
28
+
29
+ Code of Conduct
30
+
31
+ In Your Contribution and/or Work or otherwise in your participation in the Project, you agree to: (i) not use or include any defamatory, illegal, derogatory, abusive, harassing, destructive, malicious, or otherwise, illegitimate content, code or language; or (ii) publish others’ private or personal information without their explicit permission (“Code of Conduct”) Vast Data’ will be entitled but not obligated to enforce the Code of Conduct and may take appropriate and fair action in response to any behaviour that is reasonably deemed by us to be inappropriate, threatening, offensive, or harmful. Further Vast Data will have the right but not the obligation to remove, edit, or reject any comments, code, issues, and other Contributions or Works submitted hereunder including without limitation by reason of nonalignment with the Code of Conduct.
32
+
33
+ You agree to notify Vast Data of any facts or circumstances of which you become aware that would make these representations inaccurate in any respect.
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vastdb
3
- Version: 0.1.11
3
+ Version: 1.1.0
4
4
  Summary: VAST Data SDK
5
5
  Home-page: https://github.com/vast-data/vastdb_sdk
6
6
  Author: VAST DATA
7
7
  Author-email: hello@vastdata.com
8
8
  License: Copyright (C) VAST Data Ltd.
9
- Classifier: Development Status :: 4 - Beta
9
+ Classifier: Development Status :: 5 - Production/Stable
10
10
  Classifier: License :: OSI Approved :: Apache Software License
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.9
@@ -2,8 +2,6 @@
2
2
 
3
3
  # VAST DB Python SDK
4
4
 
5
- 🚧 Please note that this package in a pre-release stage. Until version 1.x is officially released, the API should be considered unstable.
6
-
7
5
  ## Introduction
8
6
 
9
7
  `vastdb` is a Python-based SDK designed for interacting with [VAST Database](https://vastdata.com/database) & [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database), enabling schema and table management, efficient ingest, query and modification of columnar data. For more details, see [our whitepaper](https://vastdata.com/whitepaper/#TheVASTDataBase).
@@ -29,7 +29,7 @@ setup(
29
29
  name='vastdb',
30
30
  python_requires='>=3.9.0',
31
31
  description='VAST Data SDK',
32
- version='0.1.11' + suffix,
32
+ version='1.1.0' + suffix,
33
33
  url='https://github.com/vast-data/vastdb_sdk',
34
34
  author='VAST DATA',
35
35
  author_email='hello@vastdata.com',
@@ -39,7 +39,7 @@ setup(
39
39
  long_description=long_description,
40
40
  long_description_content_type='text/markdown',
41
41
  classifiers=[
42
- 'Development Status :: 4 - Beta',
42
+ 'Development Status :: 5 - Production/Stable',
43
43
  'License :: OSI Approved :: Apache Software License',
44
44
  'Programming Language :: Python :: 3',
45
45
  'Programming Language :: Python :: 3.9',
@@ -35,6 +35,7 @@ from ibis.expr.operations.logical import (
35
35
  )
36
36
  from ibis.expr.operations.relations import Field
37
37
  from ibis.expr.operations.strings import StringContains
38
+ from ibis.expr.operations.structs import StructField
38
39
 
39
40
  import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
40
41
  import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BooleanLiteral as fb_bool_lit
@@ -182,7 +183,7 @@ class Predicate:
182
183
  _logger.debug('OR args: %s op %s', or_args, op)
183
184
  inner_offsets = []
184
185
 
185
- prev_field_name = None
186
+ prev_field_path = None
186
187
  for inner_op in or_args:
187
188
  _logger.debug('inner_op %s', inner_op)
188
189
  op_type = type(inner_op)
@@ -216,28 +217,38 @@ class Predicate:
216
217
  if not isinstance(literal, Literal):
217
218
  raise NotImplementedError(self.expr)
218
219
 
220
+ field_path = []
221
+ while isinstance(column, StructField):
222
+ column, subfield_name = column.args
223
+ field_path.append(subfield_name)
224
+
219
225
  if not isinstance(column, Field):
220
226
  raise NotImplementedError(self.expr)
221
227
 
222
- field_name = column.name
223
- if prev_field_name is None:
224
- prev_field_name = field_name
225
- elif prev_field_name != field_name:
228
+ field_path.append(column.name)
229
+ field_path.reverse() # first entry should be the top-level column name
230
+
231
+ if prev_field_path is None:
232
+ prev_field_path = field_path
233
+ elif prev_field_path != field_path:
226
234
  raise NotImplementedError(self.expr)
227
235
 
228
- node = self.nodes_map[field_name]
236
+ nodes_map = self.nodes_map
237
+ for name in field_path:
238
+ node = nodes_map[name]
239
+ nodes_map = node.children_map
240
+
229
241
  # TODO: support predicate pushdown for leaf nodes (ORION-160338)
230
242
  if node.children:
231
243
  raise NotImplementedError(node.field) # no predicate pushdown for nested columns
232
244
  column_offset = self.build_column(position=node.index)
233
- field = self.schema.field(field_name)
234
245
  for literal in literals:
235
246
  args_offsets = [column_offset]
236
247
  if literal is not None:
237
- args_offsets.append(self.build_literal(field=field, value=literal.value))
248
+ args_offsets.append(self.build_literal(field=node.field, value=literal.value))
238
249
  if builder_func == self.build_between:
239
- args_offsets.append(self.build_literal(field=field, value=lower.value))
240
- args_offsets.append(self.build_literal(field=field, value=upper.value))
250
+ args_offsets.append(self.build_literal(field=node.field, value=lower.value))
251
+ args_offsets.append(self.build_literal(field=node.field, value=upper.value))
241
252
 
242
253
  inner_offsets.append(builder_func(*args_offsets))
243
254
 
@@ -572,6 +583,8 @@ class FieldNode:
572
583
  else:
573
584
  self.children = [] # for non-nested types
574
585
 
586
+ self.children_map = {c.field.name: c for c in self.children}
587
+
575
588
  def _iter_to_root(self) -> Iterator['FieldNode']:
576
589
  yield self
577
590
  if self.parent is not None:
@@ -748,7 +761,6 @@ class VastdbApi:
748
761
 
749
762
  def __init__(self, endpoint, access_key, secret_key,
750
763
  *,
751
- auth_type=AuthType.SIGV4,
752
764
  ssl_verify=True,
753
765
  timeout=None,
754
766
  backoff_config: Optional[BackoffConfig] = None):
@@ -766,15 +778,15 @@ class VastdbApi:
766
778
  self._session.verify = ssl_verify
767
779
  self._session.headers['user-agent'] = self.client_sdk_version
768
780
 
769
- backoff_config = backoff_config or BackoffConfig()
781
+ self.backoff_config = backoff_config or BackoffConfig()
770
782
  self._backoff_decorator = backoff.on_exception(
771
- wait_gen=backoff_config.wait_gen,
783
+ wait_gen=self.backoff_config.wait_gen,
772
784
  exception=_RETRIABLE_EXCEPTIONS,
773
785
  giveup=_backoff_giveup,
774
- max_tries=backoff_config.max_tries,
775
- max_time=backoff_config.max_time,
776
- max_value=backoff_config.max_value, # passed to `backoff_config.wait_gen`
777
- backoff_log_level=backoff_config.backoff_log_level)
786
+ max_tries=self.backoff_config.max_tries,
787
+ max_time=self.backoff_config.max_time,
788
+ max_value=self.backoff_config.max_value, # passed to `self.backoff_config.wait_gen`
789
+ backoff_log_level=self.backoff_config.backoff_log_level)
778
790
  self._request = self._backoff_decorator(self._single_request)
779
791
 
780
792
  if url.port in {80, 443, None}:
@@ -812,6 +824,23 @@ class VastdbApi:
812
824
  _logger.critical(msg)
813
825
  raise NotImplementedError(msg)
814
826
 
827
+ def __enter__(self):
828
+ """Allow using this session as a context manager."""
829
+ return self
830
+
831
+ def __exit__(self, *args):
832
+ """Make sure that the connections closed."""
833
+ self._session.close()
834
+
835
+ def with_endpoint(self, endpoint):
836
+ """Open a new session for targeting a specific endpoint."""
837
+ return VastdbApi(endpoint=endpoint,
838
+ access_key=self.access_key,
839
+ secret_key=self.secret_key,
840
+ ssl_verify=self._session.verify,
841
+ timeout=self.timeout,
842
+ backoff_config=self.backoff_config)
843
+
815
844
  def _single_request(self, *, method, url, skip_status_check=False, **kwargs):
816
845
  _logger.debug("Sending request: %s %s %s timeout=%s", method, url, kwargs, self.timeout)
817
846
  try:
@@ -0,0 +1,87 @@
1
+ from typing import List, Optional
2
+
3
+ from vastdb.bench.perf_bench.common.constants import (
4
+ LOCAL_FS_DS_PATH,
5
+ NFS_DS_PATH, # noqa: F401
6
+ S3_DS_PATH, # noqa: F401
7
+ ParquetCompression,
8
+ VastConnDetails,
9
+ )
10
+ from vastdb.bench.perf_bench.dataset.schemas import DEFAULT_BARS_COLUMNS
11
+ from vastdb.bench.perf_bench.orchestrate.scenario import BenchScenario
12
+ from vastdb.bench.perf_bench.orchestrate.scenario_generator import (
13
+ generate_perf_bench_scenarios,
14
+ )
15
+ from vastdb.bench.perf_bench.query.query import QueryBackend
16
+
17
+
18
+ def build_scenarios(
19
+ base_key: str,
20
+ conn_details: Optional[VastConnDetails] = None,
21
+ ) -> List[BenchScenario]:
22
+ return generate_perf_bench_scenarios(
23
+ base_key=base_key,
24
+ conn_details=conn_details or VastConnDetails(),
25
+ query_backends=[
26
+ QueryBackend.pyarrow,
27
+ # QueryBackend.vastdb,
28
+ ],
29
+ columns_choices=(DEFAULT_BARS_COLUMNS,),
30
+ universe_choices=(
31
+ "Single",
32
+ "Tiny",
33
+ "SmallSeq",
34
+ "Medium",
35
+ "Medium2",
36
+ "Large",
37
+ ),
38
+ num_bdays=[
39
+ 1, # 1d
40
+ 5, # 1w
41
+ # 22, # 1m
42
+ 65, # 3m
43
+ # 130, # 6m
44
+ 252, # 1y
45
+ ],
46
+
47
+ # Arrow-specific options
48
+ fs_path_choices=[
49
+ # NFS_DS_PATH,
50
+ LOCAL_FS_DS_PATH,
51
+ # S3_DS_PATH,
52
+ ],
53
+ rowgroup_size_choices=[ # make sure you have previously generated the respective datasets
54
+ # 64 * 1024,
55
+ # 128 * 1024,
56
+ 256 * 1024,
57
+ # 512 * 1024,
58
+ # DEFAULT_ROW_GROUP_SIZE,
59
+ # int(1.5 * 1024 * 1024),
60
+ ],
61
+ compression_choices=[
62
+ ParquetCompression.LZ4,
63
+ ],
64
+ arrow_batching_spec_choices=[
65
+ # {"batch_size": 2*2**16, "batch_readahead": 16, "fragment_readahead": 4},
66
+ # {"batch_size": 6*2**16, "batch_readahead": 12, "fragment_readahead": 4},
67
+ # DEFAULT_ARROW_KWARGS,
68
+ {"batch_size": 16 * 2 ** 16, "batch_readahead": 16, "fragment_readahead": 4},
69
+ # {"batch_size": 24 * 2 ** 16, "batch_readahead": 12, "fragment_readahead": 4},
70
+ # {"batch_size": 32*2**16, "batch_readahead": 12, "fragment_readahead": 4},
71
+ # {"batch_size": 64*2**16, "batch_readahead": 12, "fragment_readahead": 4},
72
+ # {"batch_size": 128*2**16, "batch_readahead": 12, "fragment_readahead": 4},
73
+ ],
74
+
75
+ # VastDB-specific options
76
+ vdb_num_sub_splits_choices=(
77
+ # 1, # Default
78
+ # 4,
79
+ 8,
80
+ # 16,
81
+ ),
82
+ vdb_num_row_groups_per_sub_split_choices=(
83
+ # 1,
84
+ # 4,
85
+ 8, # Default
86
+ ),
87
+ )
@@ -0,0 +1,225 @@
1
+ from pathlib import Path
2
+ from typing import Annotated, List, Optional
3
+
4
+ import pandas as pd
5
+ import typer
6
+
7
+ from vastdb.bench.perf_bench.common.constants import (
8
+ DEFAULT_END_T,
9
+ DEFAULT_RESULTS_DIR,
10
+ DEFAULT_START_T,
11
+ DFAULT_PARQUET_COMPRESSION,
12
+ LOCAL_FS_DS_PATH,
13
+ LogLevel,
14
+ ParquetCompression,
15
+ )
16
+ from vastdb.bench.perf_bench.common.log_utils import (
17
+ get_logger,
18
+ set_log_file,
19
+ set_log_level,
20
+ )
21
+ from vastdb.bench.perf_bench.common.utils import getenv_flag, load_module_from_path
22
+ from vastdb.bench.perf_bench.dataset.generate_secmaster import (
23
+ SM_PATH,
24
+ generate_secmaster,
25
+ )
26
+ from vastdb.bench.perf_bench.dataset.generate_stocks_dataset import (
27
+ generate_concurrent_synthetic_stock_1m_bars,
28
+ )
29
+ from vastdb.bench.perf_bench.orchestrate.scenario import BenchScenario
30
+ from vastdb.bench.perf_bench.run import run_scenarios
31
+
32
+ app = typer.Typer(pretty_exceptions_enable=getenv_flag("TYPER_PRETTY_EXCEPTIONS"))
33
+
34
+ _MY_DIR = Path(__file__).parent
35
+
36
+
37
+ # noinspection PyUnusedLocal
38
+ @app.callback()
39
+ def cli_common(
40
+ ctx: typer.Context,
41
+ verbose: Annotated[
42
+ bool,
43
+ typer.Option(
44
+ "--verbose",
45
+ is_flag=True,
46
+ ),
47
+ ] = False,
48
+ log_level: Annotated[
49
+ Optional[LogLevel],
50
+ typer.Option(
51
+ "--log-level",
52
+ case_sensitive=False,
53
+ ),
54
+ ] = None,
55
+ log_file: Annotated[
56
+ Optional[Path],
57
+ typer.Option(
58
+ "--log-file",
59
+ writable=True,
60
+ file_okay=True,
61
+ dir_okay=False,
62
+ resolve_path=True,
63
+ ),
64
+ ] = None,
65
+ ):
66
+ if verbose:
67
+ log_level = LogLevel.DEBUG
68
+ if log_level:
69
+ set_log_level(log_level)
70
+ if log_file:
71
+ set_log_file(log_file)
72
+ get_logger(__name__).info("CLI common setup done.")
73
+
74
+
75
+ def _positive_int(value: str) -> int:
76
+ i_value = int(value)
77
+ if i_value <= 0:
78
+ raise typer.BadParameter(f"Must be a positive integer: {value}.")
79
+ return i_value
80
+
81
+
82
+ # noinspection PyUnusedLocal
83
+ @app.command()
84
+ def run_bench(
85
+ ctx: typer.Context,
86
+ bench_name: Annotated[
87
+ str,
88
+ typer.Option(
89
+ "--bench-name",
90
+ ),
91
+ ],
92
+ parallelism: Annotated[
93
+ List[int],
94
+ typer.Option(
95
+ "--parallelism",
96
+ callback=lambda par: [_positive_int(p) for p in par],
97
+ ),
98
+ ],
99
+ runs_per_bench: Annotated[
100
+ int,
101
+ typer.Option(
102
+ "--runs-per-bench",
103
+ callback=_positive_int,
104
+ ),
105
+ ] = 3,
106
+ bench_generator_path: Annotated[
107
+ Path,
108
+ typer.Option(
109
+ "--bench-generator-path",
110
+ readable=True,
111
+ file_okay=True,
112
+ dir_okay=False,
113
+ resolve_path=True,
114
+ ),
115
+ ] = _MY_DIR / "bench_repo" / "mega_combo.py",
116
+ results_base_dir: Annotated[
117
+ Path,
118
+ typer.Option(
119
+ "--log-file",
120
+ writable=True,
121
+ file_okay=False,
122
+ dir_okay=True,
123
+ resolve_path=True,
124
+ ),
125
+ ] = DEFAULT_RESULTS_DIR,
126
+ ):
127
+ if not (bench_name := bench_name.strip()):
128
+ raise typer.BadParameter("Bench name must be non-empty.")
129
+
130
+ mod = load_module_from_path(bench_generator_path)
131
+ scenarios: List[BenchScenario] = mod.build_scenarios(base_key=bench_name)
132
+ for para in parallelism:
133
+ run_scenarios(
134
+ scenarios=scenarios,
135
+ runs_per_bench=runs_per_bench,
136
+ parallelism=para,
137
+ results_base_dir=str(results_base_dir),
138
+ )
139
+
140
+
141
+ # noinspection PyUnusedLocal
142
+ @app.command()
143
+ def build_secmaster(
144
+ ctx: typer.Context,
145
+ ):
146
+ generate_secmaster()
147
+
148
+
149
+ # noinspection PyUnusedLocal
150
+ @app.command()
151
+ def build_dataset(
152
+ ctx: typer.Context,
153
+ start_date: Annotated[
154
+ str,
155
+ typer.Option(
156
+ "--start-date",
157
+ help="Start date for the dataset.",
158
+ callback=lambda d: pd.Timestamp(d).normalize(),
159
+ ),
160
+ ] = DEFAULT_START_T.strftime("%Y%m%d"),
161
+ end_date: Annotated[
162
+ str,
163
+ typer.Option(
164
+ "--end-date",
165
+ help="Start date for the dataset.",
166
+ callback=lambda d: pd.Timestamp(d).normalize(),
167
+ ),
168
+ ] = DEFAULT_END_T.strftime("%Y%m%d"),
169
+ output_dir: Annotated[
170
+ Path,
171
+ typer.Option(
172
+ "--output-dir",
173
+ writable=True,
174
+ file_okay=False,
175
+ dir_okay=True,
176
+ resolve_path=True,
177
+ ),
178
+ ] = LOCAL_FS_DS_PATH,
179
+ parallelism: Annotated[
180
+ int,
181
+ typer.Option(
182
+ "--parallelism",
183
+ callback=_positive_int,
184
+ ),
185
+ ] = 6,
186
+ row_group_size: Annotated[
187
+ int,
188
+ typer.Option(
189
+ "--row-group-size",
190
+ callback=_positive_int,
191
+ help=(
192
+ "Row group size for the dataset, some common values are: 64 * 1024, 128 * 1024, 256"
193
+ " * 1024, 512 * 1024,1024 * 1024, 1.5 * 1024 * 1024."
194
+ ),
195
+ ),
196
+ ] = 256 * 1024,
197
+ compression: Annotated[
198
+ ParquetCompression,
199
+ typer.Option(
200
+ "--compression",
201
+ help="Parquet compression algorithm.",
202
+ ),
203
+ ] = DFAULT_PARQUET_COMPRESSION,
204
+ ):
205
+ if row_group_size < 1024:
206
+ raise typer.BadParameter("Row group size must be at least 1024.")
207
+ if parallelism < 1:
208
+ raise typer.BadParameter("Parallelism must be at least 1.")
209
+ if start_date > end_date:
210
+ raise typer.BadParameter("Start date must be before the end date.")
211
+ if not SM_PATH.is_file():
212
+ generate_secmaster()
213
+ generate_concurrent_synthetic_stock_1m_bars(
214
+ from_t=start_date,
215
+ to_t=end_date,
216
+ output_dir=output_dir,
217
+ num_workers=parallelism,
218
+ row_group_size=row_group_size,
219
+ compression=compression,
220
+ )
221
+
222
+
223
+ if __name__ == "__main__":
224
+ # Set the metadata only if we execute the main (not on just importing this module)
225
+ app()
@@ -0,0 +1,96 @@
1
+ import logging
2
+ import os
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ from urllib3.util import parse_url
8
+
9
+ from vastdb.bench.perf_bench.common.types import StrEnum
10
+
11
+ _MY_DIR = Path(__file__).parent
12
+
13
+ # VastDB details
14
+ VASTDB_ENDPOINT = os.getenv("VASTDB_ENDPOINT", "")
15
+ VASTDB_BUCKET_NAME = os.getenv("VASTDB_BUCKET_NAME", "")
16
+ VASTDB_TEST_SCHEMA_NAME = os.getenv("VASTDB_TEST_SCHEMA_NAME", "")
17
+ VASTDB_TEST_TABLE_NAME = os.getenv("VASTDB_TEST_TABLE_NAME", "")
18
+
19
+ # Regular S3 details
20
+ S3_BUCKET_NAME = "my-s3-bucket"
21
+ DEFAULT_S3_SSL_PORT = int(os.getenv("DEFAULT_S3_SSL_PORT", 443))
22
+ if DEFAULT_S3_ENDPOINT_URL := os.getenv("AWS_S3_ENDPOINT_URL", ""):
23
+ _parsed = parse_url(DEFAULT_S3_ENDPOINT_URL)
24
+ DEFAULT_S3_HOST = str(_parsed.host)
25
+ DEFAULT_S3_PORT = int(_parsed.port or 80)
26
+ else:
27
+ DEFAULT_S3_HOST = os.getenv("DEFAULT_S3_HOST", "1.1.1.1")
28
+ DEFAULT_S3_PORT = int(os.getenv("DEFAULT_S3_PORT", 80))
29
+ DEFAULT_S3_ENDPOINT_URL = f"http://{DEFAULT_S3_HOST}:{DEFAULT_S3_PORT}"
30
+
31
+ # Paths
32
+ DEFAULT_RESULTS_DIR = Path(__file__).parent.parent / "benchmark_results"
33
+ NFS_DS_PATH = f"/mnt/data/{VASTDB_BUCKET_NAME}/data"
34
+ LOCAL_FS_DS_PATH = _MY_DIR.parent / "dataset" / "test_dataset"
35
+ S3_DS_PATH = f"s3://{VASTDB_BUCKET_NAME}/data"
36
+
37
+ # Access keys
38
+ DEFAULT_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY_ID", "some_access_key")
39
+ DEFAULT_SECRET_KEY = os.getenv("AWS_SECRET_ACCESS_KEY", "some_secret_key")
40
+
41
+ # Dataset start/end dates
42
+ DEFAULT_START_T = pd.Timestamp("20180101")
43
+ DEFAULT_END_T = pd.Timestamp("20200201")
44
+
45
+ # Arrow related constants
46
+ DEFAULT_ROW_GROUP_SIZE = 1024 * 1024
47
+ DEFAULT_ARROW_KWARGS = {
48
+ "batch_size": (DEFAULT_ARROW_BATCH_SIZE := 786432),
49
+ "batch_readahead": (DEFAULT_ARROW_BATCH_READAHEAD := 12),
50
+ "fragment_readahead": (DEFAULT_ARROW_BATCH_FRAGMENT_READAHEAD := 4),
51
+ }
52
+
53
+
54
+ class ParquetCompression(StrEnum):
55
+ NONE = "NONE"
56
+ SNAPPY = "SNAPPY"
57
+ GZIP = "GZIP"
58
+ # LZO = "LZO"
59
+ BROTLI = "BROTLI"
60
+ LZ4 = "LZ4"
61
+ ZSTD = "ZSTD"
62
+
63
+
64
+ DFAULT_PARQUET_COMPRESSION = ParquetCompression.LZ4
65
+
66
+
67
+ class LogLevel(StrEnum):
68
+ CRITICAL = "CRITICAL"
69
+ FATAL = "FATAL"
70
+ ERROR = "ERROR"
71
+ WARN = "WARNING"
72
+ WARNING = "WARNING"
73
+ INFO = "INFO"
74
+ DEBUG = "DEBUG"
75
+ NOTSET = "NOTSET"
76
+
77
+ def to_int(self) -> int:
78
+ # noinspection PyUnresolvedReferences,PyProtectedMember
79
+ return logging._nameToLevel[self] # noqa: SLF001
80
+
81
+
82
+ @dataclass(frozen=True)
83
+ class VastConnDetails:
84
+ """VAST Config."""
85
+
86
+ access: str = DEFAULT_ACCESS_KEY
87
+ secret: str = DEFAULT_SECRET_KEY
88
+ vastdb_bucket: str = VASTDB_BUCKET_NAME
89
+ vastdb_endpoint: str = VASTDB_ENDPOINT
90
+ vastdb_ssl_verify: bool = True
91
+ vastdb_schema: str = VASTDB_TEST_SCHEMA_NAME
92
+ vastdb_table: str = VASTDB_TEST_TABLE_NAME
93
+ s3_host: str = DEFAULT_S3_HOST
94
+ s3_bucket: str = VASTDB_BUCKET_NAME
95
+ s3_port: int = DEFAULT_S3_PORT
96
+ s3_ssl_port: int = DEFAULT_S3_SSL_PORT