vastdb 1.0.0__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vastdb-1.0.0 → vastdb-1.1.1}/CHANGELOG.md +21 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/PKG-INFO +1 -2
- {vastdb-1.0.0 → vastdb-1.1.1}/README.md +51 -41
- {vastdb-1.0.0 → vastdb-1.1.1}/requirements.txt +0 -1
- {vastdb-1.0.0 → vastdb-1.1.1}/setup.py +1 -1
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/_internal.py +41 -10
- vastdb-1.1.1/vastdb/bench/perf_bench/bench_repo/mega_combo.py +87 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/cli.py +225 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/common/__init__.py +0 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/common/constants.py +96 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/common/log_utils.py +67 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/common/types.py +34 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/common/utils.py +219 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/dataset/__init__.py +0 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/dataset/generate_secmaster.py +105 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/dataset/generate_stocks_dataset.py +242 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/dataset/schemas.py +101 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/dataset/secmaster.py +33 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/__init__.py +0 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/bench_spec.py +91 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/results_helpers.py +126 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/scenario.py +109 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/orchestrate/scenario_generator.py +144 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/query/__init__.py +0 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/query/arrow_common.py +59 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/query/query.py +42 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/query/query_pyarrow.py +70 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/query/query_vastdb.py +78 -0
- vastdb-1.1.1/vastdb/bench/perf_bench/run.py +79 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/bench/test_sample.py +4 -2
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/conftest.py +1 -1
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/session.py +0 -6
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/table.py +35 -35
- vastdb-1.1.1/vastdb/tests/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_nested.py +58 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_tables.py +13 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/transaction.py +4 -8
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/util.py +5 -0
- vastdb-1.1.1/vastdb/vast_tests/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/PKG-INFO +1 -2
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/SOURCES.txt +25 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/requires.txt +0 -1
- {vastdb-1.0.0 → vastdb-1.1.1}/CONTRIBUTING.md +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/LICENSE +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/MANIFEST.in +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/setup.cfg +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Aggregate.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySlice.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySubscript.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BinaryLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BooleanLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Bound.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Call.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CaseFragment.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Cast.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConcreteBoundImpl.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConditionalCase.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CurrentRow.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DateLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DecimalLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Deref.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DurationLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Expression.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ExpressionImpl.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldIndex.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldRef.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Filter.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FixedSizeBinaryLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float16Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float32Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float64Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Following.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Frame.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Grouping.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int16Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int32Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int64Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int8Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralDaysMilliseconds.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralImpl.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralMonths.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Join.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/JoinKind.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/KeyValue.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Limit.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ListLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralColumn.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralImpl.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralRelation.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapKey.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/OrderBy.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Ordering.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Plan.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Preceding.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Project.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelId.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Relation.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelationImpl.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOpKind.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOperation.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SimpleCase.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SortKey.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Source.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StringLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructField.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimeLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimestampLiteral.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt16Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt32Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt64Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt8Literal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Unbounded.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/WindowCall.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/computeir/flatbuf/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Binary.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Block.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompression.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompressionMethod.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Bool.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Buffer.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/CompressionType.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Date.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/DateUnit.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Decimal.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryBatch.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryEncoding.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryKind.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Duration.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Endianness.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Feature.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Field.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/FieldNode.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeBinary.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeList.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/FloatingPoint.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Footer.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Int.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Interval.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/IntervalUnit.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/KeyValue.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/LargeBinary.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/LargeList.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/LargeUtf8.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/List.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Map.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Message.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/MessageHeader.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/MetadataVersion.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Null.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Precision.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/RecordBatch.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Schema.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensor.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndex.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCOO.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCSF.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Struct_.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Tensor.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/TensorDim.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Time.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/TimeUnit.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Timestamp.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Type.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Union.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/UnionMode.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/Utf8.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/org/apache/arrow/flatbuf/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/AlterColumnRequest.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/AlterProjectionTableRequest.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/AlterSchemaRequest.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/AlterTableRequest.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/Column.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ColumnType.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/CreateProjectionRequest.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/CreateSchemaRequest.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/GetProjectionTableStatsResponse.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/GetTableStatsResponse.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ImportDataRequest.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ListProjectionsResponse.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ListSchemasResponse.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ListTablesResponse.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/ObjectDetails.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/S3File.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/VipRange.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vast_flatbuf/tabular/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/bench/__init__.py +0 -0
- {vastdb-1.0.0/vastdb/tests → vastdb-1.1.1/vastdb/bench/perf_bench}/__init__.py +0 -0
- {vastdb-1.0.0/vastdb/vast_tests → vastdb-1.1.1/vastdb/bench/perf_bench/bench_repo}/__init__.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/bench/test_perf.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/bucket.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/config.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/errors.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/features.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/schema.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/metrics.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_duckdb.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_imports.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_projections.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_sanity.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_schemas.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/test_util.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/tests/util.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/vast_tests/test_ha.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb/vast_tests/test_scale.py +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/dependency_links.txt +0 -0
- {vastdb-1.0.0 → vastdb-1.1.1}/vastdb.egg-info/top_level.txt +0 -0
|
@@ -4,6 +4,27 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
6
6
|
|
|
7
|
+
## [1.1.1] (2024-08-11)
|
|
8
|
+
[1.1.1]: https://github.com/vast-data/vastdb_sdk/compare/v1.1.0...v1.1.1
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- Combine record batches when inserting `pyarrow.Table`
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
- Don't require `boto3` for checking bucket existence
|
|
15
|
+
- Improve documentation
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## [1.1.0] (2024-07-25)
|
|
19
|
+
[1.1.0]: https://github.com/vast-data/vastdb_sdk/compare/v1.0.0...v1.1.0
|
|
20
|
+
|
|
21
|
+
## Fixed
|
|
22
|
+
- Close internal sessions in `select` and `import`
|
|
23
|
+
|
|
24
|
+
## Added
|
|
25
|
+
- VastDB/Parquet performance benchmark harness
|
|
26
|
+
- Support predicate pushdown over subfields
|
|
27
|
+
|
|
7
28
|
## [1.0.0] (2024-07-21)
|
|
8
29
|
[1.0.0]: https://github.com/vast-data/vastdb_sdk/compare/v0.1.11...v1.0.0
|
|
9
30
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: vastdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: VAST Data SDK
|
|
5
5
|
Home-page: https://github.com/vast-data/vastdb_sdk
|
|
6
6
|
Author: VAST DATA
|
|
@@ -19,7 +19,6 @@ Requires-Python: >=3.9.0
|
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
21
|
Requires-Dist: aws-requests-auth
|
|
22
|
-
Requires-Dist: boto3
|
|
23
22
|
Requires-Dist: flatbuffers
|
|
24
23
|
Requires-Dist: ibis-framework==9.0.0
|
|
25
24
|
Requires-Dist: pyarrow
|
|
@@ -4,24 +4,26 @@
|
|
|
4
4
|
|
|
5
5
|
## Introduction
|
|
6
6
|
|
|
7
|
-
`vastdb` is a Python-based SDK designed for interacting with [VAST Database](https://vastdata.com/database)
|
|
7
|
+
`vastdb` is a Python-based SDK designed for interacting with a [VAST Database](https://vastdata.com/database) and the [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database), enabling schema and table management, efficient ingest, query, and modification of columnar data.
|
|
8
|
+
|
|
9
|
+
For more details about the VAST Database, see [this whitepaper](https://vastdata.com/whitepaper/#TheVASTDataBase).
|
|
8
10
|
|
|
9
11
|
[](https://vastdata.com/database)
|
|
10
12
|
|
|
11
|
-
## Getting
|
|
13
|
+
## Getting Started
|
|
12
14
|
|
|
13
15
|
### Requirements
|
|
14
16
|
|
|
15
|
-
- Linux client with Python 3.9
|
|
17
|
+
- Linux client with Python 3.9 or later, and network access to the VAST Cluster
|
|
16
18
|
- [Virtual IP pool configured with DNS service](https://support.vastdata.com/s/topic/0TOV40000000FThOAM/configuring-network-access-v50)
|
|
17
|
-
- [S3 access & secret keys on VAST cluster](https://support.vastdata.com/s/article/UUID-4d2e7e23-b2fb-7900-d98f-96c31a499626)
|
|
19
|
+
- [S3 access & secret keys on the VAST cluster](https://support.vastdata.com/s/article/UUID-4d2e7e23-b2fb-7900-d98f-96c31a499626)
|
|
18
20
|
- [Tabular identity policy with the proper permissions](https://support.vastdata.com/s/article/UUID-14322b60-d6a2-89ac-3df0-3dfbb6974182)
|
|
19
21
|
|
|
20
|
-
### Required VAST release
|
|
22
|
+
### Required VAST Cluster release
|
|
21
23
|
|
|
22
|
-
|
|
24
|
+
VAST DB Python SDK requires VAST Cluster release `5.0.0-sp10` or later.
|
|
23
25
|
|
|
24
|
-
If
|
|
26
|
+
If your VAST Cluster is running an older release, please contact customer.support@vastdata.com.
|
|
25
27
|
|
|
26
28
|
### Installation
|
|
27
29
|
|
|
@@ -29,11 +31,11 @@ If the cluster is running an older VAST release, please contact customer.support
|
|
|
29
31
|
pip install vastdb
|
|
30
32
|
```
|
|
31
33
|
|
|
32
|
-
|
|
34
|
+
See the [Release Notes](CHANGELOG.md) for the SDK.
|
|
33
35
|
|
|
34
|
-
###
|
|
36
|
+
### Quick Start
|
|
35
37
|
|
|
36
|
-
|
|
38
|
+
Create schemas and tables, basic inserts, and selects:
|
|
37
39
|
|
|
38
40
|
```python
|
|
39
41
|
import pyarrow as pa
|
|
@@ -74,11 +76,15 @@ with session.transaction() as tx:
|
|
|
74
76
|
# the transaction is automatically committed when exiting the context
|
|
75
77
|
```
|
|
76
78
|
|
|
79
|
+
For configuration examples, see [here](docs/config.md).
|
|
80
|
+
|
|
77
81
|
Note: the transaction must be remain open while the returned [pyarrow.RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html) generator is being used.
|
|
78
82
|
|
|
79
|
-
##
|
|
83
|
+
## Use Cases
|
|
84
|
+
|
|
85
|
+
### Filters and Projections
|
|
80
86
|
|
|
81
|
-
|
|
87
|
+
The SDK supports predicate and projection pushdown:
|
|
82
88
|
|
|
83
89
|
```python
|
|
84
90
|
from ibis import _
|
|
@@ -97,9 +103,9 @@ Our SDK supports predicate and projection pushdown:
|
|
|
97
103
|
|
|
98
104
|
See [here for more details](docs/predicate.md).
|
|
99
105
|
|
|
100
|
-
|
|
106
|
+
### Import a single Parquet file via S3 protocol
|
|
101
107
|
|
|
102
|
-
|
|
108
|
+
You can efficiently create tables from Parquet files (without copying them via the client):
|
|
103
109
|
|
|
104
110
|
```python
|
|
105
111
|
with tempfile.NamedTemporaryFile() as f:
|
|
@@ -112,9 +118,9 @@ It is possible to efficiently create a table from a Parquet file (without copyin
|
|
|
112
118
|
parquet_files=['/bucket-name/staging/file.parquet'])
|
|
113
119
|
```
|
|
114
120
|
|
|
115
|
-
|
|
121
|
+
### Import multiple Parquet files concurrently via S3 protocol
|
|
116
122
|
|
|
117
|
-
|
|
123
|
+
Import multiple files concurrently into a table (by using multiple CNodes' cores):
|
|
118
124
|
|
|
119
125
|
```python
|
|
120
126
|
schema = tx.bucket('bucket-name').schema('schema-name')
|
|
@@ -123,6 +129,27 @@ We can import multiple files concurrently into a table (by utilizing multiple CN
|
|
|
123
129
|
parquet_files=[f'/bucket-name/staging/file{i}.parquet' for i in range(10)])
|
|
124
130
|
```
|
|
125
131
|
|
|
132
|
+
|
|
133
|
+
### Semi-sorted Projections
|
|
134
|
+
|
|
135
|
+
Create, list and delete [available semi-sorted projections](https://support.vastdata.com/s/article/UUID-e4ca42ab-d15b-6b72-bd6b-f3c77b455de4):
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
p = table.create_projection('proj', sorted=['c3'], unsorted=['c1'])
|
|
139
|
+
print(table.projections())
|
|
140
|
+
print(p.get_stats())
|
|
141
|
+
p.drop()
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Snapshots
|
|
145
|
+
|
|
146
|
+
You can access the VAST Database using [snapshots](https://vastdata.com/blog/bringing-snapshots-to-vasts-element-store):
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
snaps = bucket.list_snapshots()
|
|
150
|
+
batches = snaps[0].schema('schema-name').table('table-name').select()
|
|
151
|
+
```
|
|
152
|
+
|
|
126
153
|
## Post-processing
|
|
127
154
|
|
|
128
155
|
### Export
|
|
@@ -136,9 +163,9 @@ with contextlib.closing(pa.parquet.ParquetWriter('/path/to/file.parquet', batche
|
|
|
136
163
|
writer.write_batch(batch)
|
|
137
164
|
```
|
|
138
165
|
|
|
139
|
-
### DuckDB
|
|
166
|
+
### DuckDB Integration
|
|
140
167
|
|
|
141
|
-
|
|
168
|
+
Use [DuckDB](https://duckdb.org/docs/guides/python/sql_on_arrow.html) to post-process the resulting stream of [PyArrow record batches](https://arrow.apache.org/docs/python/data.html#record-batches):
|
|
142
169
|
|
|
143
170
|
```python
|
|
144
171
|
from ibis import _
|
|
@@ -152,31 +179,11 @@ with session.transaction() as tx:
|
|
|
152
179
|
print(conn.execute("SELECT sum(c1) FROM batches").arrow())
|
|
153
180
|
```
|
|
154
181
|
|
|
155
|
-
Note: the transaction must be active while DuckDB query is executing and fetching results using the Python SDK.
|
|
156
|
-
|
|
157
|
-
## Semi-sorted projections
|
|
158
|
-
|
|
159
|
-
We can create, list and delete [available semi-sorted projections](https://support.vastdata.com/s/article/UUID-e4ca42ab-d15b-6b72-bd6b-f3c77b455de4):
|
|
160
|
-
|
|
161
|
-
```python
|
|
162
|
-
p = table.create_projection('proj', sorted=['c3'], unsorted=['c1'])
|
|
163
|
-
print(table.projections())
|
|
164
|
-
print(p.get_stats())
|
|
165
|
-
p.drop()
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
## Snapshots
|
|
169
|
-
|
|
170
|
-
It is possible to use [snapshots](https://vastdata.com/blog/bringing-snapshots-to-vasts-element-store) for accessing the Database:
|
|
171
|
-
|
|
172
|
-
```python
|
|
173
|
-
snaps = bucket.list_snapshots()
|
|
174
|
-
batches = snaps[0].schema('schema-name').table('table-name').select()
|
|
175
|
-
```
|
|
182
|
+
Note: the transaction must be active while the DuckDB query is executing and fetching results using the Python SDK.
|
|
176
183
|
|
|
177
184
|
## VAST Catalog
|
|
178
185
|
|
|
179
|
-
[VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database) can be queried as a regular table:
|
|
186
|
+
The [VAST Catalog](https://vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database) can be queried as a regular table:
|
|
180
187
|
|
|
181
188
|
```python
|
|
182
189
|
import pyarrow as pa
|
|
@@ -201,8 +208,11 @@ with session.transaction() as tx:
|
|
|
201
208
|
print("Distinct element types on the system:")
|
|
202
209
|
print(distinct_elements)
|
|
203
210
|
```
|
|
211
|
+
## More Information
|
|
204
212
|
|
|
205
|
-
See
|
|
213
|
+
See these blog posts for more examples:
|
|
206
214
|
|
|
207
215
|
- https://vastdata.com/blog/the-vast-catalog-in-action-part-1
|
|
208
216
|
- https://vastdata.com/blog/the-vast-catalog-in-action-part-2
|
|
217
|
+
|
|
218
|
+
See also the [full Vast DB Python SDK documentation](https://vastdb-sdk.readthedocs.io/en/v1.1.0/)
|
|
@@ -35,6 +35,7 @@ from ibis.expr.operations.logical import (
|
|
|
35
35
|
)
|
|
36
36
|
from ibis.expr.operations.relations import Field
|
|
37
37
|
from ibis.expr.operations.strings import StringContains
|
|
38
|
+
from ibis.expr.operations.structs import StructField
|
|
38
39
|
|
|
39
40
|
import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BinaryLiteral as fb_binary_lit
|
|
40
41
|
import vast_flatbuf.org.apache.arrow.computeir.flatbuf.BooleanLiteral as fb_bool_lit
|
|
@@ -182,7 +183,7 @@ class Predicate:
|
|
|
182
183
|
_logger.debug('OR args: %s op %s', or_args, op)
|
|
183
184
|
inner_offsets = []
|
|
184
185
|
|
|
185
|
-
|
|
186
|
+
prev_field_path = None
|
|
186
187
|
for inner_op in or_args:
|
|
187
188
|
_logger.debug('inner_op %s', inner_op)
|
|
188
189
|
op_type = type(inner_op)
|
|
@@ -216,28 +217,38 @@ class Predicate:
|
|
|
216
217
|
if not isinstance(literal, Literal):
|
|
217
218
|
raise NotImplementedError(self.expr)
|
|
218
219
|
|
|
220
|
+
field_path = []
|
|
221
|
+
while isinstance(column, StructField):
|
|
222
|
+
column, subfield_name = column.args
|
|
223
|
+
field_path.append(subfield_name)
|
|
224
|
+
|
|
219
225
|
if not isinstance(column, Field):
|
|
220
226
|
raise NotImplementedError(self.expr)
|
|
221
227
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
228
|
+
field_path.append(column.name)
|
|
229
|
+
field_path.reverse() # first entry should be the top-level column name
|
|
230
|
+
|
|
231
|
+
if prev_field_path is None:
|
|
232
|
+
prev_field_path = field_path
|
|
233
|
+
elif prev_field_path != field_path:
|
|
226
234
|
raise NotImplementedError(self.expr)
|
|
227
235
|
|
|
228
|
-
|
|
236
|
+
nodes_map = self.nodes_map
|
|
237
|
+
for name in field_path:
|
|
238
|
+
node = nodes_map[name]
|
|
239
|
+
nodes_map = node.children_map
|
|
240
|
+
|
|
229
241
|
# TODO: support predicate pushdown for leaf nodes (ORION-160338)
|
|
230
242
|
if node.children:
|
|
231
243
|
raise NotImplementedError(node.field) # no predicate pushdown for nested columns
|
|
232
244
|
column_offset = self.build_column(position=node.index)
|
|
233
|
-
field = self.schema.field(field_name)
|
|
234
245
|
for literal in literals:
|
|
235
246
|
args_offsets = [column_offset]
|
|
236
247
|
if literal is not None:
|
|
237
|
-
args_offsets.append(self.build_literal(field=field, value=literal.value))
|
|
248
|
+
args_offsets.append(self.build_literal(field=node.field, value=literal.value))
|
|
238
249
|
if builder_func == self.build_between:
|
|
239
|
-
args_offsets.append(self.build_literal(field=field, value=lower.value))
|
|
240
|
-
args_offsets.append(self.build_literal(field=field, value=upper.value))
|
|
250
|
+
args_offsets.append(self.build_literal(field=node.field, value=lower.value))
|
|
251
|
+
args_offsets.append(self.build_literal(field=node.field, value=upper.value))
|
|
241
252
|
|
|
242
253
|
inner_offsets.append(builder_func(*args_offsets))
|
|
243
254
|
|
|
@@ -572,6 +583,8 @@ class FieldNode:
|
|
|
572
583
|
else:
|
|
573
584
|
self.children = [] # for non-nested types
|
|
574
585
|
|
|
586
|
+
self.children_map = {c.field.name: c for c in self.children}
|
|
587
|
+
|
|
575
588
|
def _iter_to_root(self) -> Iterator['FieldNode']:
|
|
576
589
|
yield self
|
|
577
590
|
if self.parent is not None:
|
|
@@ -811,7 +824,16 @@ class VastdbApi:
|
|
|
811
824
|
_logger.critical(msg)
|
|
812
825
|
raise NotImplementedError(msg)
|
|
813
826
|
|
|
827
|
+
def __enter__(self):
|
|
828
|
+
"""Allow using this session as a context manager."""
|
|
829
|
+
return self
|
|
830
|
+
|
|
831
|
+
def __exit__(self, *args):
|
|
832
|
+
"""Make sure that the connections closed."""
|
|
833
|
+
self._session.close()
|
|
834
|
+
|
|
814
835
|
def with_endpoint(self, endpoint):
|
|
836
|
+
"""Open a new session for targeting a specific endpoint."""
|
|
815
837
|
return VastdbApi(endpoint=endpoint,
|
|
816
838
|
access_key=self.access_key,
|
|
817
839
|
secret_key=self.secret_key,
|
|
@@ -1276,6 +1298,15 @@ class VastdbApi:
|
|
|
1276
1298
|
|
|
1277
1299
|
return columns, next_key, is_truncated, count
|
|
1278
1300
|
|
|
1301
|
+
def head_bucket(self, bucket_name):
|
|
1302
|
+
"""
|
|
1303
|
+
Reimplemented, instead of depending on boto3 for checking the existence of a bucket.
|
|
1304
|
+
https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html
|
|
1305
|
+
"""
|
|
1306
|
+
return self._request(
|
|
1307
|
+
method="HEAD",
|
|
1308
|
+
url=self._url(bucket=bucket_name))
|
|
1309
|
+
|
|
1279
1310
|
def begin_transaction(self, client_tags=[], expected_retvals=[]):
|
|
1280
1311
|
"""
|
|
1281
1312
|
POST /?transaction HTTP/1.1
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from vastdb.bench.perf_bench.common.constants import (
|
|
4
|
+
LOCAL_FS_DS_PATH,
|
|
5
|
+
NFS_DS_PATH, # noqa: F401
|
|
6
|
+
S3_DS_PATH, # noqa: F401
|
|
7
|
+
ParquetCompression,
|
|
8
|
+
VastConnDetails,
|
|
9
|
+
)
|
|
10
|
+
from vastdb.bench.perf_bench.dataset.schemas import DEFAULT_BARS_COLUMNS
|
|
11
|
+
from vastdb.bench.perf_bench.orchestrate.scenario import BenchScenario
|
|
12
|
+
from vastdb.bench.perf_bench.orchestrate.scenario_generator import (
|
|
13
|
+
generate_perf_bench_scenarios,
|
|
14
|
+
)
|
|
15
|
+
from vastdb.bench.perf_bench.query.query import QueryBackend
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def build_scenarios(
|
|
19
|
+
base_key: str,
|
|
20
|
+
conn_details: Optional[VastConnDetails] = None,
|
|
21
|
+
) -> List[BenchScenario]:
|
|
22
|
+
return generate_perf_bench_scenarios(
|
|
23
|
+
base_key=base_key,
|
|
24
|
+
conn_details=conn_details or VastConnDetails(),
|
|
25
|
+
query_backends=[
|
|
26
|
+
QueryBackend.pyarrow,
|
|
27
|
+
# QueryBackend.vastdb,
|
|
28
|
+
],
|
|
29
|
+
columns_choices=(DEFAULT_BARS_COLUMNS,),
|
|
30
|
+
universe_choices=(
|
|
31
|
+
"Single",
|
|
32
|
+
"Tiny",
|
|
33
|
+
"SmallSeq",
|
|
34
|
+
"Medium",
|
|
35
|
+
"Medium2",
|
|
36
|
+
"Large",
|
|
37
|
+
),
|
|
38
|
+
num_bdays=[
|
|
39
|
+
1, # 1d
|
|
40
|
+
5, # 1w
|
|
41
|
+
# 22, # 1m
|
|
42
|
+
65, # 3m
|
|
43
|
+
# 130, # 6m
|
|
44
|
+
252, # 1y
|
|
45
|
+
],
|
|
46
|
+
|
|
47
|
+
# Arrow-specific options
|
|
48
|
+
fs_path_choices=[
|
|
49
|
+
# NFS_DS_PATH,
|
|
50
|
+
LOCAL_FS_DS_PATH,
|
|
51
|
+
# S3_DS_PATH,
|
|
52
|
+
],
|
|
53
|
+
rowgroup_size_choices=[ # make sure you have previously generated the respective datasets
|
|
54
|
+
# 64 * 1024,
|
|
55
|
+
# 128 * 1024,
|
|
56
|
+
256 * 1024,
|
|
57
|
+
# 512 * 1024,
|
|
58
|
+
# DEFAULT_ROW_GROUP_SIZE,
|
|
59
|
+
# int(1.5 * 1024 * 1024),
|
|
60
|
+
],
|
|
61
|
+
compression_choices=[
|
|
62
|
+
ParquetCompression.LZ4,
|
|
63
|
+
],
|
|
64
|
+
arrow_batching_spec_choices=[
|
|
65
|
+
# {"batch_size": 2*2**16, "batch_readahead": 16, "fragment_readahead": 4},
|
|
66
|
+
# {"batch_size": 6*2**16, "batch_readahead": 12, "fragment_readahead": 4},
|
|
67
|
+
# DEFAULT_ARROW_KWARGS,
|
|
68
|
+
{"batch_size": 16 * 2 ** 16, "batch_readahead": 16, "fragment_readahead": 4},
|
|
69
|
+
# {"batch_size": 24 * 2 ** 16, "batch_readahead": 12, "fragment_readahead": 4},
|
|
70
|
+
# {"batch_size": 32*2**16, "batch_readahead": 12, "fragment_readahead": 4},
|
|
71
|
+
# {"batch_size": 64*2**16, "batch_readahead": 12, "fragment_readahead": 4},
|
|
72
|
+
# {"batch_size": 128*2**16, "batch_readahead": 12, "fragment_readahead": 4},
|
|
73
|
+
],
|
|
74
|
+
|
|
75
|
+
# VastDB-specific options
|
|
76
|
+
vdb_num_sub_splits_choices=(
|
|
77
|
+
# 1, # Default
|
|
78
|
+
# 4,
|
|
79
|
+
8,
|
|
80
|
+
# 16,
|
|
81
|
+
),
|
|
82
|
+
vdb_num_row_groups_per_sub_split_choices=(
|
|
83
|
+
# 1,
|
|
84
|
+
# 4,
|
|
85
|
+
8, # Default
|
|
86
|
+
),
|
|
87
|
+
)
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Annotated, List, Optional
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from vastdb.bench.perf_bench.common.constants import (
|
|
8
|
+
DEFAULT_END_T,
|
|
9
|
+
DEFAULT_RESULTS_DIR,
|
|
10
|
+
DEFAULT_START_T,
|
|
11
|
+
DFAULT_PARQUET_COMPRESSION,
|
|
12
|
+
LOCAL_FS_DS_PATH,
|
|
13
|
+
LogLevel,
|
|
14
|
+
ParquetCompression,
|
|
15
|
+
)
|
|
16
|
+
from vastdb.bench.perf_bench.common.log_utils import (
|
|
17
|
+
get_logger,
|
|
18
|
+
set_log_file,
|
|
19
|
+
set_log_level,
|
|
20
|
+
)
|
|
21
|
+
from vastdb.bench.perf_bench.common.utils import getenv_flag, load_module_from_path
|
|
22
|
+
from vastdb.bench.perf_bench.dataset.generate_secmaster import (
|
|
23
|
+
SM_PATH,
|
|
24
|
+
generate_secmaster,
|
|
25
|
+
)
|
|
26
|
+
from vastdb.bench.perf_bench.dataset.generate_stocks_dataset import (
|
|
27
|
+
generate_concurrent_synthetic_stock_1m_bars,
|
|
28
|
+
)
|
|
29
|
+
from vastdb.bench.perf_bench.orchestrate.scenario import BenchScenario
|
|
30
|
+
from vastdb.bench.perf_bench.run import run_scenarios
|
|
31
|
+
|
|
32
|
+
app = typer.Typer(pretty_exceptions_enable=getenv_flag("TYPER_PRETTY_EXCEPTIONS"))
|
|
33
|
+
|
|
34
|
+
_MY_DIR = Path(__file__).parent
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# noinspection PyUnusedLocal
|
|
38
|
+
@app.callback()
|
|
39
|
+
def cli_common(
|
|
40
|
+
ctx: typer.Context,
|
|
41
|
+
verbose: Annotated[
|
|
42
|
+
bool,
|
|
43
|
+
typer.Option(
|
|
44
|
+
"--verbose",
|
|
45
|
+
is_flag=True,
|
|
46
|
+
),
|
|
47
|
+
] = False,
|
|
48
|
+
log_level: Annotated[
|
|
49
|
+
Optional[LogLevel],
|
|
50
|
+
typer.Option(
|
|
51
|
+
"--log-level",
|
|
52
|
+
case_sensitive=False,
|
|
53
|
+
),
|
|
54
|
+
] = None,
|
|
55
|
+
log_file: Annotated[
|
|
56
|
+
Optional[Path],
|
|
57
|
+
typer.Option(
|
|
58
|
+
"--log-file",
|
|
59
|
+
writable=True,
|
|
60
|
+
file_okay=True,
|
|
61
|
+
dir_okay=False,
|
|
62
|
+
resolve_path=True,
|
|
63
|
+
),
|
|
64
|
+
] = None,
|
|
65
|
+
):
|
|
66
|
+
if verbose:
|
|
67
|
+
log_level = LogLevel.DEBUG
|
|
68
|
+
if log_level:
|
|
69
|
+
set_log_level(log_level)
|
|
70
|
+
if log_file:
|
|
71
|
+
set_log_file(log_file)
|
|
72
|
+
get_logger(__name__).info("CLI common setup done.")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _positive_int(value: str) -> int:
|
|
76
|
+
i_value = int(value)
|
|
77
|
+
if i_value <= 0:
|
|
78
|
+
raise typer.BadParameter(f"Must be a positive integer: {value}.")
|
|
79
|
+
return i_value
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# noinspection PyUnusedLocal
|
|
83
|
+
@app.command()
|
|
84
|
+
def run_bench(
|
|
85
|
+
ctx: typer.Context,
|
|
86
|
+
bench_name: Annotated[
|
|
87
|
+
str,
|
|
88
|
+
typer.Option(
|
|
89
|
+
"--bench-name",
|
|
90
|
+
),
|
|
91
|
+
],
|
|
92
|
+
parallelism: Annotated[
|
|
93
|
+
List[int],
|
|
94
|
+
typer.Option(
|
|
95
|
+
"--parallelism",
|
|
96
|
+
callback=lambda par: [_positive_int(p) for p in par],
|
|
97
|
+
),
|
|
98
|
+
],
|
|
99
|
+
runs_per_bench: Annotated[
|
|
100
|
+
int,
|
|
101
|
+
typer.Option(
|
|
102
|
+
"--runs-per-bench",
|
|
103
|
+
callback=_positive_int,
|
|
104
|
+
),
|
|
105
|
+
] = 3,
|
|
106
|
+
bench_generator_path: Annotated[
|
|
107
|
+
Path,
|
|
108
|
+
typer.Option(
|
|
109
|
+
"--bench-generator-path",
|
|
110
|
+
readable=True,
|
|
111
|
+
file_okay=True,
|
|
112
|
+
dir_okay=False,
|
|
113
|
+
resolve_path=True,
|
|
114
|
+
),
|
|
115
|
+
] = _MY_DIR / "bench_repo" / "mega_combo.py",
|
|
116
|
+
results_base_dir: Annotated[
|
|
117
|
+
Path,
|
|
118
|
+
typer.Option(
|
|
119
|
+
"--log-file",
|
|
120
|
+
writable=True,
|
|
121
|
+
file_okay=False,
|
|
122
|
+
dir_okay=True,
|
|
123
|
+
resolve_path=True,
|
|
124
|
+
),
|
|
125
|
+
] = DEFAULT_RESULTS_DIR,
|
|
126
|
+
):
|
|
127
|
+
if not (bench_name := bench_name.strip()):
|
|
128
|
+
raise typer.BadParameter("Bench name must be non-empty.")
|
|
129
|
+
|
|
130
|
+
mod = load_module_from_path(bench_generator_path)
|
|
131
|
+
scenarios: List[BenchScenario] = mod.build_scenarios(base_key=bench_name)
|
|
132
|
+
for para in parallelism:
|
|
133
|
+
run_scenarios(
|
|
134
|
+
scenarios=scenarios,
|
|
135
|
+
runs_per_bench=runs_per_bench,
|
|
136
|
+
parallelism=para,
|
|
137
|
+
results_base_dir=str(results_base_dir),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# noinspection PyUnusedLocal
|
|
142
|
+
@app.command()
|
|
143
|
+
def build_secmaster(
|
|
144
|
+
ctx: typer.Context,
|
|
145
|
+
):
|
|
146
|
+
generate_secmaster()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# noinspection PyUnusedLocal
|
|
150
|
+
@app.command()
|
|
151
|
+
def build_dataset(
|
|
152
|
+
ctx: typer.Context,
|
|
153
|
+
start_date: Annotated[
|
|
154
|
+
str,
|
|
155
|
+
typer.Option(
|
|
156
|
+
"--start-date",
|
|
157
|
+
help="Start date for the dataset.",
|
|
158
|
+
callback=lambda d: pd.Timestamp(d).normalize(),
|
|
159
|
+
),
|
|
160
|
+
] = DEFAULT_START_T.strftime("%Y%m%d"),
|
|
161
|
+
end_date: Annotated[
|
|
162
|
+
str,
|
|
163
|
+
typer.Option(
|
|
164
|
+
"--end-date",
|
|
165
|
+
help="Start date for the dataset.",
|
|
166
|
+
callback=lambda d: pd.Timestamp(d).normalize(),
|
|
167
|
+
),
|
|
168
|
+
] = DEFAULT_END_T.strftime("%Y%m%d"),
|
|
169
|
+
output_dir: Annotated[
|
|
170
|
+
Path,
|
|
171
|
+
typer.Option(
|
|
172
|
+
"--output-dir",
|
|
173
|
+
writable=True,
|
|
174
|
+
file_okay=False,
|
|
175
|
+
dir_okay=True,
|
|
176
|
+
resolve_path=True,
|
|
177
|
+
),
|
|
178
|
+
] = LOCAL_FS_DS_PATH,
|
|
179
|
+
parallelism: Annotated[
|
|
180
|
+
int,
|
|
181
|
+
typer.Option(
|
|
182
|
+
"--parallelism",
|
|
183
|
+
callback=_positive_int,
|
|
184
|
+
),
|
|
185
|
+
] = 6,
|
|
186
|
+
row_group_size: Annotated[
|
|
187
|
+
int,
|
|
188
|
+
typer.Option(
|
|
189
|
+
"--row-group-size",
|
|
190
|
+
callback=_positive_int,
|
|
191
|
+
help=(
|
|
192
|
+
"Row group size for the dataset, some common values are: 64 * 1024, 128 * 1024, 256"
|
|
193
|
+
" * 1024, 512 * 1024,1024 * 1024, 1.5 * 1024 * 1024."
|
|
194
|
+
),
|
|
195
|
+
),
|
|
196
|
+
] = 256 * 1024,
|
|
197
|
+
compression: Annotated[
|
|
198
|
+
ParquetCompression,
|
|
199
|
+
typer.Option(
|
|
200
|
+
"--compression",
|
|
201
|
+
help="Parquet compression algorithm.",
|
|
202
|
+
),
|
|
203
|
+
] = DFAULT_PARQUET_COMPRESSION,
|
|
204
|
+
):
|
|
205
|
+
if row_group_size < 1024:
|
|
206
|
+
raise typer.BadParameter("Row group size must be at least 1024.")
|
|
207
|
+
if parallelism < 1:
|
|
208
|
+
raise typer.BadParameter("Parallelism must be at least 1.")
|
|
209
|
+
if start_date > end_date:
|
|
210
|
+
raise typer.BadParameter("Start date must be before the end date.")
|
|
211
|
+
if not SM_PATH.is_file():
|
|
212
|
+
generate_secmaster()
|
|
213
|
+
generate_concurrent_synthetic_stock_1m_bars(
|
|
214
|
+
from_t=start_date,
|
|
215
|
+
to_t=end_date,
|
|
216
|
+
output_dir=output_dir,
|
|
217
|
+
num_workers=parallelism,
|
|
218
|
+
row_group_size=row_group_size,
|
|
219
|
+
compression=compression,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
if __name__ == "__main__":
|
|
224
|
+
# Set the metadata only if we execute the main (not on just importing this module)
|
|
225
|
+
app()
|
|
File without changes
|