vastdb 2.0.2__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vastdb-2.0.2 → vastdb-2.0.3}/CHANGELOG.md +10 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/PKG-INFO +2 -1
- {vastdb-2.0.2 → vastdb-2.0.3}/README.md +63 -43
- {vastdb-2.0.2 → vastdb-2.0.3}/requirements.txt +1 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/setup.py +1 -1
- vastdb-2.0.3/vastdb/_adbc.py +194 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/_internal.py +101 -12
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/_table_interface.py +20 -3
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/conftest.py +23 -1
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/errors.py +5 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/schema.py +17 -2
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/session.py +12 -5
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/table.py +56 -20
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/table_metadata.py +58 -34
- vastdb-2.0.3/vastdb/tests/test_adbc_integration.py +89 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_tables.py +35 -1
- vastdb-2.0.3/vastdb/tests/test_vector_index.py +162 -0
- vastdb-2.0.3/vastdb/tests/test_vector_search.py +210 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/util.py +3 -2
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/transaction.py +30 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/GetTableStatsResponse.py +51 -59
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/ObjectDetails.py +36 -59
- vastdb-2.0.3/vastdb/vast_flatbuf/tabular/VectorIndexMetadata.py +67 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/VipRange.py +19 -12
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb.egg-info/PKG-INFO +2 -1
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb.egg-info/SOURCES.txt +5 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb.egg-info/requires.txt +1 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/CONTRIBUTING.md +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/LICENSE +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/MANIFEST.in +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/setup.cfg +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/_ibis_support.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/bench_repo/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/bench_repo/mega_combo.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/cli.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/common/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/common/constants.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/common/log_utils.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/common/types.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/common/utils.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/dataset/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/dataset/generate_secmaster.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/dataset/generate_stocks_dataset.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/dataset/schemas.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/dataset/secmaster.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/orchestrate/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/orchestrate/bench_spec.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/orchestrate/results_helpers.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/orchestrate/scenario.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/orchestrate/scenario_generator.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/query/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/query/arrow_common.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/query/query.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/query/query_pyarrow.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/query/query_vastdb.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/perf_bench/run.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/test_perf.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bench/test_sample.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/bucket.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/config.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/features.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/metrics.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_duckdb.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_fixed_list.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_imports.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_nested.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_projections.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_sanity.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_schemas.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_table_in_tx.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/tests/test_util.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/util.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Aggregate.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySlice.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySubscript.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BinaryLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BooleanLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Bound.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Call.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CaseFragment.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Cast.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConcreteBoundImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConditionalCase.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CurrentRow.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DateLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DecimalLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Deref.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DurationLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Expression.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ExpressionImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldIndex.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldRef.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Filter.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FixedSizeBinaryLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float16Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float32Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float64Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Following.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Frame.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Grouping.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int16Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int32Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int64Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int8Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralDaysMilliseconds.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralMonths.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Join.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/JoinKind.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/KeyValue.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Limit.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ListLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralColumn.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralRelation.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapKey.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/OrderBy.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Ordering.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Plan.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Preceding.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Project.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelId.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Relation.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelationImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOpKind.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOperation.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SimpleCase.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SortKey.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Source.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StringLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructField.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimeLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimestampLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt16Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt32Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt64Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt8Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Unbounded.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/WindowCall.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Binary.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Block.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompression.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompressionMethod.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Bool.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Buffer.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/CompressionType.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Date.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/DateUnit.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Decimal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryBatch.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryEncoding.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryKind.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Duration.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Endianness.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Feature.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Field.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/FieldNode.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeBinary.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeList.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/FloatingPoint.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Footer.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Int.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Interval.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/IntervalUnit.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/KeyValue.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/LargeBinary.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/LargeList.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/LargeUtf8.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/List.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Map.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Message.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/MessageHeader.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/MetadataVersion.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Null.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Precision.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/RecordBatch.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Schema.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensor.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndex.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCOO.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCSF.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Struct_.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Tensor.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/TensorDim.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Time.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/TimeUnit.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Timestamp.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Type.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Union.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/UnionMode.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Utf8.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/AlterColumnRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/AlterProjectionTableRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/AlterSchemaRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/AlterTableRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/Column.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/ColumnDetails.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/ColumnType.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/CreateProjectionRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/CreateSchemaRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/CreateViewRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/FilterString.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/GetProjectionTableStatsResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/GetRowColumnSecurityResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/ImportDataRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/KeyName.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/ListProjectionsResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/ListSchemasResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/ListTablesResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/ListViewsResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/NameString.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/S3File.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_flatbuf/tabular/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_tests/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_tests/test_ha.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb/vast_tests/test_scale.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb.egg-info/dependency_links.txt +0 -0
- {vastdb-2.0.2 → vastdb-2.0.3}/vastdb.egg-info/top_level.txt +0 -0
|
@@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
6
6
|
|
|
7
|
+
## [2.0.3] (2025-11-25)
|
|
8
|
+
[2.0.2]: https://github.com/vast-data/vastdb_sdk/compare/v2.0.2...v2.0.3
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Support for vastdb's adbc driver
|
|
12
|
+
- Session now gets an Optional[AdbcDriver]
|
|
13
|
+
- AdbcDriver is created out of a local path to a driver `.so` (shared library) or a url
|
|
14
|
+
- Vector Search capability
|
|
15
|
+
- `Table.vector_search` - Approximate Top-N closest vectors
|
|
16
|
+
|
|
7
17
|
## [2.0.2] (2025-10-22)
|
|
8
18
|
[2.0.2]: https://github.com/vast-data/vastdb_sdk/compare/v2.0.1...v2.0.2
|
|
9
19
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vastdb
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: VAST Data SDK
|
|
5
5
|
Home-page: https://github.com/vast-data/vastdb_sdk
|
|
6
6
|
Author: VAST DATA
|
|
@@ -29,6 +29,7 @@ Requires-Dist: numpy
|
|
|
29
29
|
Requires-Dist: requests
|
|
30
30
|
Requires-Dist: xmltodict
|
|
31
31
|
Requires-Dist: backoff==2.2.1
|
|
32
|
+
Requires-Dist: adbc_driver_manager
|
|
32
33
|
Dynamic: author
|
|
33
34
|
Dynamic: author-email
|
|
34
35
|
Dynamic: classifier
|
|
@@ -19,16 +19,12 @@ For technical details about VAST Database architecture, see the [whitepaper](htt
|
|
|
19
19
|
### Requirements
|
|
20
20
|
|
|
21
21
|
- Linux client with Python 3.10 - 3.13, and network access to the VAST Cluster
|
|
22
|
+
- VAST Cluster release `5.0.0-sp10` or later
|
|
23
|
+
- If your VAST Cluster is running an older release, please contact customer.support@vastdata.com.
|
|
22
24
|
- [Virtual IP pool configured with DNS service](https://support.vastdata.com/s/topic/0TOV40000000FThOAM/configuring-network-access-v50)
|
|
23
25
|
- [S3 access & secret keys on the VAST cluster](https://support.vastdata.com/s/article/UUID-4d2e7e23-b2fb-7900-d98f-96c31a499626)
|
|
24
26
|
- [Tabular identity policy with the proper permissions](https://support.vastdata.com/s/article/UUID-14322b60-d6a2-89ac-3df0-3dfbb6974182)
|
|
25
27
|
|
|
26
|
-
### Required VAST Cluster release
|
|
27
|
-
|
|
28
|
-
VAST DB Python SDK requires VAST Cluster release `5.0.0-sp10` or later.
|
|
29
|
-
|
|
30
|
-
If your VAST Cluster is running an older release, please contact customer.support@vastdata.com.
|
|
31
|
-
|
|
32
28
|
### Installation
|
|
33
29
|
|
|
34
30
|
```bash
|
|
@@ -80,61 +76,81 @@ with session.transaction() as tx:
|
|
|
80
76
|
# the transaction is automatically committed when exiting the context
|
|
81
77
|
```
|
|
82
78
|
|
|
83
|
-
|
|
79
|
+
Note: the transaction must be remain open while the returned [pyarrow.RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html) generator is being used.
|
|
84
80
|
|
|
85
81
|
The list of supported data types can be found [here](docs/types.md).
|
|
86
82
|
|
|
87
|
-
|
|
83
|
+
## Features
|
|
88
84
|
|
|
89
|
-
|
|
85
|
+
### Select Performance
|
|
90
86
|
|
|
91
|
-
|
|
87
|
+
The `Table.select()` method accepts a [QueryConfig](vastdb/config.py) object that modifies how the select is fulfilled.
|
|
92
88
|
|
|
93
|
-
The SDK
|
|
89
|
+
The most important setting is the `data_endpoints` parameter that, when set, will allow the SDK to parallelize the select across multiple CNodes. Without this, only the CNode specified in the `connect()` will service the select.
|
|
94
90
|
|
|
95
91
|
```python
|
|
96
|
-
|
|
92
|
+
from vastdb.config import QueryConfig
|
|
97
93
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
predicate=(_.c2 > 2) & _.c3.isnull())
|
|
94
|
+
# load default configuration values
|
|
95
|
+
cfg = QueryConfig()
|
|
101
96
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
97
|
+
# set data_endpoints to CNode VIPs
|
|
98
|
+
cfg.data_endpoints = [
|
|
99
|
+
"http://172.19.196.1",
|
|
100
|
+
"http://172.19.196.2",
|
|
101
|
+
"http://172.19.196.3",
|
|
102
|
+
"http://172.19.196.4",
|
|
103
|
+
]
|
|
105
104
|
|
|
106
|
-
|
|
107
|
-
table.select(predicate=_.c3.contains('substring'))
|
|
105
|
+
table.select(columns=['c1'], predicate=(_.c2 > 2), config=cfg)
|
|
108
106
|
```
|
|
109
107
|
|
|
110
|
-
|
|
108
|
+
If using DNS with either TTL=0 or multi-response per the [Best Practice on Load Balancing CNodes](https://support.vastdata.com/s/document-item?bundleId=z-kb-articles-publications-prod&topicId=6058049537.html&_LANG=enus), passing in the same DNS name equal to the number of VIPs is a decent proxy.
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
cfg.data_endpoints = ["http://vip-pool.v123-xy.VastENG.lab"] * 16 # assuming 16 VIPs in the pool
|
|
112
|
+
```
|
|
111
113
|
|
|
112
|
-
###
|
|
114
|
+
### Filters and Projections
|
|
113
115
|
|
|
114
|
-
|
|
116
|
+
The SDK supports predicate and projection pushdown using Ibis:
|
|
115
117
|
|
|
116
118
|
```python
|
|
117
|
-
|
|
118
|
-
pa.parquet.write_table(arrow_table, f.name)
|
|
119
|
-
s3.put_object(Bucket='bucket-name', Key='staging/file.parquet', Body=f)
|
|
119
|
+
from ibis import _
|
|
120
120
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
# SELECT c1 FROM t WHERE (c2 > 2) AND (c3 IS NULL)
|
|
122
|
+
table.select(columns=['c1'],
|
|
123
|
+
predicate=(_.c2 > 2) & _.c3.isnull())
|
|
124
|
+
|
|
125
|
+
# SELECT c2, c3 FROM t WHERE (c2 BETWEEN 0 AND 1) OR (c2 > 10)
|
|
126
|
+
table.select(columns=['c2', 'c3'],
|
|
127
|
+
predicate=(_.c2.between(0, 1) | (_.c2 > 10))
|
|
128
|
+
|
|
129
|
+
# SELECT * FROM t WHERE c3 LIKE '%substring%'
|
|
130
|
+
table.select(predicate=_.c3.contains('substring'))
|
|
125
131
|
```
|
|
126
132
|
|
|
127
|
-
|
|
133
|
+
See the [Predicate pushdown support document](docs/predicate.md) for more information on constructing predicates using Ibis.
|
|
128
134
|
|
|
129
|
-
Import
|
|
135
|
+
### Import Parquet files via S3 protocol
|
|
136
|
+
|
|
137
|
+
You can efficiently create tables from Parquet files that already exist in an S3 bucket on VAST without copying them via the client. If more than one file is included in `parquet_files` they will be loaded concurrently.
|
|
130
138
|
|
|
131
139
|
```python
|
|
132
|
-
|
|
140
|
+
with session.transaction() as tx:
|
|
141
|
+
schema = tx.bucket('database-name').schema('schema-name')
|
|
133
142
|
table = util.create_table_from_files(
|
|
134
|
-
schema=schema, table_name='
|
|
135
|
-
parquet_files=[
|
|
143
|
+
schema=schema, table_name='imported-table',
|
|
144
|
+
parquet_files=['/bucket-name/staging/file.parquet'])
|
|
136
145
|
```
|
|
137
146
|
|
|
147
|
+
If the table already exists, you can use the `table.import_files()` method to add more data to the table from Parquet files that already exist in an S3 bucket on VAST.
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
with session.transaction() as tx:
|
|
151
|
+
table = tx.bucket('database-name').schema('schema-name').table('table-name')
|
|
152
|
+
table.import_files(["/bucket-name/staging/file2.parquet"])
|
|
153
|
+
```
|
|
138
154
|
|
|
139
155
|
### Semi-sorted Projections
|
|
140
156
|
|
|
@@ -158,9 +174,9 @@ batches = snaps[0].schema('schema-name').table('table-name').select()
|
|
|
158
174
|
|
|
159
175
|
## Interactive and Non-Interactive Workflows
|
|
160
176
|
|
|
161
|
-
|
|
177
|
+
A `Table` created via the `Schema` object (`tx.bucket('..').schema('..').table('..')`) loads metadata and stats eagerly allowing for interactive development. Each object (bucket, schema, table) requires one or more round-trips to the server and `.table()` will fetch the full table schema.
|
|
162
178
|
|
|
163
|
-
|
|
179
|
+
It's generally more efficient to use the `TableMetadata` interface that allows for both lazy loading of the schemas as it's needed, as well as allowing reusing the metadata across transactions.
|
|
164
180
|
|
|
165
181
|
```python
|
|
166
182
|
# load the table schema & stats into an object we can use across transactions
|
|
@@ -172,7 +188,7 @@ with session.transaction() as tx:
|
|
|
172
188
|
table_md = TableMetadata(TableRef("bucket-name", "schema-name", "table-name"),
|
|
173
189
|
arrow_schema=<some-arrow-schema>)
|
|
174
190
|
|
|
175
|
-
# now we can reuse without the overhead of reloading the schema and stats,
|
|
191
|
+
# now we can reuse it without the overhead of reloading the schema and stats,
|
|
176
192
|
# such as for inserts:
|
|
177
193
|
with session.transaction() as tx:
|
|
178
194
|
table = tx.table_from_metadata(table_md)
|
|
@@ -188,6 +204,15 @@ with session.transaction() as tx:
|
|
|
188
204
|
print(results)
|
|
189
205
|
```
|
|
190
206
|
|
|
207
|
+
Some table operations, like `table.import_files()`, does not require the client to know the table schema, and using the `TableMetadata` interface will bypass fetching the schema entirely.
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
table_md = TableMetadata(TableRef("bucket-name", "schema-name", "table-name"))
|
|
211
|
+
|
|
212
|
+
with session.transaction() as tx:
|
|
213
|
+
table = tx.table_from_metadata(table_md)
|
|
214
|
+
table.import_files(["/bucket-name/staging/file2.parquet"])
|
|
215
|
+
```
|
|
191
216
|
|
|
192
217
|
## Post-processing
|
|
193
218
|
|
|
@@ -248,9 +273,6 @@ with session.transaction() as tx:
|
|
|
248
273
|
print(distinct_elements)
|
|
249
274
|
```
|
|
250
275
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
276
|
## More Information
|
|
255
277
|
|
|
256
278
|
See these blog posts for more examples:
|
|
@@ -259,5 +281,3 @@ See these blog posts for more examples:
|
|
|
259
281
|
- https://vastdata.com/blog/the-vast-catalog-in-action-part-2
|
|
260
282
|
|
|
261
283
|
See also the [full Vast DB Python SDK documentation](https://vastdb-sdk.readthedocs.io/en/latest/)
|
|
262
|
-
|
|
263
|
-
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import urllib.request
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import pyarrow as pa
|
|
8
|
+
import sqlglot
|
|
9
|
+
from adbc_driver_manager.dbapi import Connection, Cursor, connect
|
|
10
|
+
from sqlglot import exp
|
|
11
|
+
|
|
12
|
+
from vastdb._internal import VectorIndex
|
|
13
|
+
from vastdb._table_interface import IbisPredicate
|
|
14
|
+
from vastdb.table_metadata import TableRef
|
|
15
|
+
|
|
16
|
+
log = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
TXID_OVERRIDE_PROPERTY: str = "vast.db.external_txid"
|
|
20
|
+
VAST_DIST_ALIAS = "vast_pysdk_vector_dist"
|
|
21
|
+
DEFAULT_ADBC_DRIVER_CACHE_DIR: str = "~/.vast/adbc_drivers_cache"
|
|
22
|
+
DEFAULT_ADBC_DRIVER_CACHE_BY_URL_DIR: str = f"{DEFAULT_ADBC_DRIVER_CACHE_DIR}/by_url"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LocalAdbcDriverNotFound(Exception):
|
|
26
|
+
"""LocalAdbcDriverNotFound."""
|
|
27
|
+
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class RemoteAdbcDriverDownloadFailed(Exception):
|
|
32
|
+
"""RemoteAdbcDriverDownloadFailed."""
|
|
33
|
+
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class AdbcDriver:
|
|
38
|
+
_local_path: str
|
|
39
|
+
|
|
40
|
+
def __init__(self, local_path: str):
|
|
41
|
+
self._local_path = local_path
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def from_local_path(local_path: str) -> "AdbcDriver":
|
|
45
|
+
"""AdbcDriver from a local_path to shared-library."""
|
|
46
|
+
if not os.path.exists(local_path):
|
|
47
|
+
raise LocalAdbcDriverNotFound(local_path)
|
|
48
|
+
|
|
49
|
+
return AdbcDriver(local_path)
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def from_url(url: str) -> "AdbcDriver":
|
|
53
|
+
"""AdbcDriver to be downloaded by url to shared-library (uses cache if exists)."""
|
|
54
|
+
expected_local_path = AdbcDriver._url_to_local_path(url)
|
|
55
|
+
|
|
56
|
+
if os.path.exists(expected_local_path):
|
|
57
|
+
return AdbcDriver(expected_local_path)
|
|
58
|
+
|
|
59
|
+
AdbcDriver._download_driver(url, expected_local_path)
|
|
60
|
+
return AdbcDriver(expected_local_path)
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def _url_to_local_path(url: str) -> str:
|
|
64
|
+
url_hash = hashlib.sha256(url.encode("utf-8")).hexdigest()
|
|
65
|
+
return os.path.join(DEFAULT_ADBC_DRIVER_CACHE_BY_URL_DIR, url_hash)
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _download_driver(url: str, target_path: str):
|
|
69
|
+
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
log.info(f"Downloading ADBC driver from {url} to {target_path}...")
|
|
73
|
+
urllib.request.urlretrieve(url, target_path)
|
|
74
|
+
log.info(f"Successfully downloaded driver to {target_path}.")
|
|
75
|
+
except Exception as e:
|
|
76
|
+
raise RemoteAdbcDriverDownloadFailed(
|
|
77
|
+
f"Failed to download ADBC driver from {url}: {e}"
|
|
78
|
+
) from e
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def local_path(self) -> str:
|
|
82
|
+
return self._local_path
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _get_adbc_connection(
|
|
86
|
+
adbc_driver_path: str, endpoint: str, access_key: str, secret_key: str, txid: int
|
|
87
|
+
) -> Connection:
|
|
88
|
+
"""Get an adbc connection in transaction."""
|
|
89
|
+
return connect(
|
|
90
|
+
driver=adbc_driver_path,
|
|
91
|
+
db_kwargs={
|
|
92
|
+
"vast.db.endpoint": endpoint,
|
|
93
|
+
"vast.db.access_key": access_key,
|
|
94
|
+
"vast.db.secret_key": secret_key,
|
|
95
|
+
},
|
|
96
|
+
conn_kwargs={TXID_OVERRIDE_PROPERTY: str(txid)},
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _remove_table_qualification_from_columns(expression: exp.Expression):
|
|
101
|
+
"""Goes over all columns which are fully qualified with "t0" table reference (ibis default table qualification for unbound tables.
|
|
102
|
+
|
|
103
|
+
Note: use only if one table is involved - if two tables exist in the expression columns might become ambiguous.
|
|
104
|
+
"""
|
|
105
|
+
for col in expression.find_all(exp.Column):
|
|
106
|
+
col.set("table", None)
|
|
107
|
+
return expression
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _ibis_to_qe_predicates(predicate: IbisPredicate) -> str:
|
|
111
|
+
ibis_sql = predicate.to_sql()
|
|
112
|
+
parsed = sqlglot.parse_one(ibis_sql)
|
|
113
|
+
|
|
114
|
+
# currently there is a single table
|
|
115
|
+
# removing the
|
|
116
|
+
without_table_qualification = _remove_table_qualification_from_columns(
|
|
117
|
+
parsed.expressions[0].this
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return without_table_qualification.sql()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _vector_search_sql(
|
|
124
|
+
query_vector: list[float],
|
|
125
|
+
vector_index: VectorIndex,
|
|
126
|
+
table_ref: TableRef,
|
|
127
|
+
columns: list[str],
|
|
128
|
+
limit: int,
|
|
129
|
+
predicate: Optional[IbisPredicate] = None,
|
|
130
|
+
) -> str:
|
|
131
|
+
query_vector_dim = len(query_vector)
|
|
132
|
+
|
|
133
|
+
query_vector_literal = f"{query_vector}::FLOAT[{query_vector_dim}]"
|
|
134
|
+
dist_func = f"{vector_index.sql_distance_function}({vector_index.column}::FLOAT[{query_vector_dim}], {query_vector_literal})"
|
|
135
|
+
dist_alias = f"{dist_func} as {VAST_DIST_ALIAS}"
|
|
136
|
+
|
|
137
|
+
projection_str = ",".join(columns + [dist_alias])
|
|
138
|
+
|
|
139
|
+
if predicate is not None:
|
|
140
|
+
where = f"WHERE {_ibis_to_qe_predicates(predicate)}"
|
|
141
|
+
else:
|
|
142
|
+
where = ""
|
|
143
|
+
|
|
144
|
+
return f"""
|
|
145
|
+
SELECT {projection_str}
|
|
146
|
+
FROM {table_ref.query_engine_full_path}
|
|
147
|
+
{where}
|
|
148
|
+
ORDER BY {VAST_DIST_ALIAS}
|
|
149
|
+
LIMIT {limit}"""
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class AdbcConnection:
|
|
153
|
+
def __init__(
|
|
154
|
+
self,
|
|
155
|
+
adbc_driver: AdbcDriver,
|
|
156
|
+
endpoint: str,
|
|
157
|
+
access_key: str,
|
|
158
|
+
secret_key: str,
|
|
159
|
+
txid: int,
|
|
160
|
+
):
|
|
161
|
+
self._adbc_conn = _get_adbc_connection(
|
|
162
|
+
adbc_driver.local_path, endpoint, access_key, secret_key, txid
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
self._cursor = self._adbc_conn.cursor()
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def cursor(self) -> Cursor:
|
|
169
|
+
return self._cursor
|
|
170
|
+
|
|
171
|
+
def close(self):
|
|
172
|
+
self._cursor.close()
|
|
173
|
+
|
|
174
|
+
def vector_search(
|
|
175
|
+
self,
|
|
176
|
+
query_vector: list[float],
|
|
177
|
+
vector_index: VectorIndex,
|
|
178
|
+
table_ref: TableRef,
|
|
179
|
+
columns: list[str],
|
|
180
|
+
limit: int,
|
|
181
|
+
predicate: Optional[IbisPredicate] = None,
|
|
182
|
+
) -> pa.RecordBatchReader:
|
|
183
|
+
"""Top-n on vector-column."""
|
|
184
|
+
sql = _vector_search_sql(
|
|
185
|
+
query_vector=query_vector,
|
|
186
|
+
vector_index=vector_index,
|
|
187
|
+
table_ref=table_ref,
|
|
188
|
+
columns=columns,
|
|
189
|
+
limit=limit,
|
|
190
|
+
predicate=predicate,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
self._cursor.execute(sql)
|
|
194
|
+
return self._cursor.fetch_record_batch()
|
|
@@ -6,6 +6,7 @@ import struct
|
|
|
6
6
|
import time
|
|
7
7
|
import urllib.parse
|
|
8
8
|
from collections import defaultdict, namedtuple
|
|
9
|
+
from dataclasses import dataclass
|
|
9
10
|
from enum import Enum
|
|
10
11
|
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast
|
|
11
12
|
|
|
@@ -116,6 +117,7 @@ from vastdb.vast_flatbuf.tabular.ListSchemasResponse import (
|
|
|
116
117
|
from vastdb.vast_flatbuf.tabular.ListTablesResponse import (
|
|
117
118
|
ListTablesResponse as list_tables,
|
|
118
119
|
)
|
|
120
|
+
from vastdb.vast_flatbuf.tabular.VectorIndexMetadata import VectorIndexMetadata
|
|
119
121
|
|
|
120
122
|
from . import errors, util
|
|
121
123
|
from .config import BackoffConfig
|
|
@@ -803,10 +805,40 @@ def _parse_table_info(obj, parse_properties):
|
|
|
803
805
|
sorting_score, write_amplification, acummulative_row_insertion_count, sorting_done)
|
|
804
806
|
|
|
805
807
|
|
|
806
|
-
|
|
808
|
+
@dataclass
|
|
809
|
+
class VectorIndexSpec:
|
|
810
|
+
"""
|
|
811
|
+
Vector Index Specification when creating a table.
|
|
812
|
+
"""
|
|
813
|
+
column: str
|
|
814
|
+
distance_metric: str
|
|
815
|
+
|
|
816
|
+
|
|
817
|
+
@dataclass
|
|
818
|
+
class VectorIndex:
|
|
819
|
+
column: str
|
|
820
|
+
distance_metric: str
|
|
821
|
+
sql_distance_function: str
|
|
807
822
|
|
|
823
|
+
def to_vector_index_spec(self) -> VectorIndexSpec:
|
|
824
|
+
return VectorIndexSpec(self.column,
|
|
825
|
+
self.distance_metric)
|
|
808
826
|
|
|
809
|
-
|
|
827
|
+
|
|
828
|
+
@dataclass
|
|
829
|
+
class TableStats:
|
|
830
|
+
"""Table-related information."""
|
|
831
|
+
|
|
832
|
+
num_rows: int
|
|
833
|
+
size_in_bytes: int
|
|
834
|
+
sorting_score: int
|
|
835
|
+
write_amplification: int
|
|
836
|
+
acummulative_row_inserition_count: int
|
|
837
|
+
is_external_rowid_alloc: bool = False
|
|
838
|
+
sorting_key_enabled: bool = False
|
|
839
|
+
sorting_done: bool = False
|
|
840
|
+
endpoints: Tuple[str, ...] = ()
|
|
841
|
+
vector_index: Optional[VectorIndex] = None
|
|
810
842
|
|
|
811
843
|
|
|
812
844
|
_RETRIABLE_EXCEPTIONS = (
|
|
@@ -1128,11 +1160,31 @@ class VastdbApi:
|
|
|
1128
1160
|
def create_table(self, bucket, schema, name, arrow_schema=None,
|
|
1129
1161
|
txid=0, client_tags=[], expected_retvals=[],
|
|
1130
1162
|
create_imports_table=False, use_external_row_ids_allocation=False, table_props=None,
|
|
1131
|
-
sorting_key=[]):
|
|
1163
|
+
sorting_key=[], vector_index: Optional[VectorIndexSpec] = None):
|
|
1164
|
+
"""
|
|
1165
|
+
Create a table in the specified bucket and schema.
|
|
1166
|
+
|
|
1167
|
+
Args:
|
|
1168
|
+
bucket: Name of the bucket
|
|
1169
|
+
schema: Name of the schema
|
|
1170
|
+
name: Name of the table
|
|
1171
|
+
arrow_schema: PyArrow schema defining the table columns
|
|
1172
|
+
txid: Transaction ID
|
|
1173
|
+
client_tags: Client tags for the request
|
|
1174
|
+
expected_retvals: Expected return values
|
|
1175
|
+
create_imports_table: Whether this is an imports table
|
|
1176
|
+
use_external_row_ids_allocation: Whether to use external row ID allocation
|
|
1177
|
+
table_props: Table properties
|
|
1178
|
+
sorting_key: List of column indices to sort by (for Elysium tables)
|
|
1179
|
+
vector_index: Optional vector index
|
|
1180
|
+
"""
|
|
1132
1181
|
self._create_table_internal(bucket=bucket, schema=schema, name=name, arrow_schema=arrow_schema,
|
|
1133
|
-
txid=txid, client_tags=client_tags,
|
|
1134
|
-
|
|
1135
|
-
|
|
1182
|
+
txid=txid, client_tags=client_tags,
|
|
1183
|
+
expected_retvals=expected_retvals,
|
|
1184
|
+
create_imports_table=create_imports_table,
|
|
1185
|
+
use_external_row_ids_allocation=use_external_row_ids_allocation,
|
|
1186
|
+
table_props=table_props, sorting_key=sorting_key,
|
|
1187
|
+
vector_index=vector_index)
|
|
1136
1188
|
|
|
1137
1189
|
def create_topic(self, bucket, name, topic_partitions, expected_retvals=[],
|
|
1138
1190
|
message_timestamp_type=None, retention_ms=None, message_timestamp_after_max_ms=None,
|
|
@@ -1149,8 +1201,9 @@ class VastdbApi:
|
|
|
1149
1201
|
|
|
1150
1202
|
def _create_table_internal(self, bucket, schema, name, arrow_schema=None,
|
|
1151
1203
|
txid=0, client_tags=[], expected_retvals=[], topic_partitions=0,
|
|
1152
|
-
create_imports_table=False, use_external_row_ids_allocation=False,
|
|
1153
|
-
sorting_key=[]
|
|
1204
|
+
create_imports_table=False, use_external_row_ids_allocation=False,
|
|
1205
|
+
table_props=None, sorting_key=[],
|
|
1206
|
+
vector_index: Optional[VectorIndexSpec] = None):
|
|
1154
1207
|
"""
|
|
1155
1208
|
Create a table, use the following request
|
|
1156
1209
|
POST /bucket/schema/table?table HTTP/1.1
|
|
@@ -1176,6 +1229,10 @@ class VastdbApi:
|
|
|
1176
1229
|
if use_external_row_ids_allocation:
|
|
1177
1230
|
headers['use-external-row-ids-alloc'] = str(use_external_row_ids_allocation)
|
|
1178
1231
|
|
|
1232
|
+
if vector_index is not None:
|
|
1233
|
+
headers['tabular-vector-index-column'] = vector_index.column
|
|
1234
|
+
headers['tabular-vector-index-distance-metric'] = vector_index.distance_metric
|
|
1235
|
+
|
|
1179
1236
|
url_params = {'topic_partitions': str(topic_partitions)} if topic_partitions else {}
|
|
1180
1237
|
if create_imports_table:
|
|
1181
1238
|
url_params['sub-table'] = IMPORTED_OBJECTS_TABLE_NAME
|
|
@@ -1188,10 +1245,10 @@ class VastdbApi:
|
|
|
1188
1245
|
url=self._url(bucket=bucket, schema=schema, table=name, command="table", url_params=url_params),
|
|
1189
1246
|
data=serialized_schema, headers=headers)
|
|
1190
1247
|
|
|
1191
|
-
def get_topic_stats(self, bucket, name, expected_retvals=[]):
|
|
1248
|
+
def get_topic_stats(self, bucket, name, expected_retvals=[]) -> TableStats:
|
|
1192
1249
|
return self.get_table_stats(bucket=bucket, schema=KAFKA_TOPICS_SCHEMA_NAME, name=name, expected_retvals=expected_retvals)
|
|
1193
1250
|
|
|
1194
|
-
def get_table_stats(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], imports_table_stats=False):
|
|
1251
|
+
def get_table_stats(self, bucket, schema, name, txid=0, client_tags=[], expected_retvals=[], imports_table_stats=False) -> TableStats:
|
|
1195
1252
|
"""
|
|
1196
1253
|
GET /mybucket/myschema/mytable?stats HTTP/1.1
|
|
1197
1254
|
tabular-txid: TransactionId
|
|
@@ -1218,8 +1275,40 @@ class VastdbApi:
|
|
|
1218
1275
|
sorting_score = sorting_score_raw & ((1 << 63) - 1)
|
|
1219
1276
|
sorting_done = bool(sorting_score_raw >> 63)
|
|
1220
1277
|
|
|
1278
|
+
vector_index_metadata: Optional[VectorIndexMetadata] = stats.VectorIndexMetadata()
|
|
1279
|
+
|
|
1280
|
+
if vector_index_metadata is not None:
|
|
1281
|
+
column_name = vector_index_metadata.ColumnName()
|
|
1282
|
+
distance_metric = vector_index_metadata.DistanceMetric()
|
|
1283
|
+
sql_distance_function = vector_index_metadata.SqlFunctionName()
|
|
1284
|
+
|
|
1285
|
+
if (column_name is None or
|
|
1286
|
+
distance_metric is None or
|
|
1287
|
+
sql_distance_function is None):
|
|
1288
|
+
raise errors.ApiResponseError(
|
|
1289
|
+
"VectorIndexMetadata properties (column_name, distance_metric, sql_function_name) must all be set."
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
vector_index = VectorIndex(
|
|
1293
|
+
column=column_name.decode('utf-8'),
|
|
1294
|
+
distance_metric=distance_metric.decode('utf-8'),
|
|
1295
|
+
sql_distance_function=sql_distance_function.decode('utf-8'))
|
|
1296
|
+
else:
|
|
1297
|
+
vector_index = None
|
|
1298
|
+
|
|
1221
1299
|
endpoints = [self.url] # we cannot replace the host by a VIP address in HTTPS-based URLs
|
|
1222
|
-
|
|
1300
|
+
|
|
1301
|
+
return TableStats(
|
|
1302
|
+
num_rows=num_rows,
|
|
1303
|
+
size_in_bytes=size_in_bytes,
|
|
1304
|
+
sorting_score=sorting_score,
|
|
1305
|
+
write_amplification=write_amplification,
|
|
1306
|
+
acummulative_row_inserition_count=acummulative_row_inserition_count,
|
|
1307
|
+
is_external_rowid_alloc=is_external_rowid_alloc,
|
|
1308
|
+
sorting_key_enabled=sorting_key_enabled,
|
|
1309
|
+
sorting_done=sorting_done,
|
|
1310
|
+
endpoints=tuple(endpoints),
|
|
1311
|
+
vector_index=vector_index)
|
|
1223
1312
|
|
|
1224
1313
|
def alter_topic(self, bucket, name,
|
|
1225
1314
|
new_name="", expected_retvals=[],
|
|
@@ -2339,7 +2428,7 @@ def build_field(builder: flatbuffers.Builder, f: pa.Field, include_name=True):
|
|
|
2339
2428
|
class QueryDataRequest:
|
|
2340
2429
|
def __init__(self, serialized, response_schema, response_parser):
|
|
2341
2430
|
self.serialized = serialized
|
|
2342
|
-
self.response_schema = response_schema
|
|
2431
|
+
self.response_schema: pa.Schema = response_schema
|
|
2343
2432
|
self.response_parser = response_parser
|
|
2344
2433
|
|
|
2345
2434
|
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import TYPE_CHECKING, Iterable, Optional, Union
|
|
2
|
+
from typing import TYPE_CHECKING, Iterable, Optional, TypeAlias, Union
|
|
3
3
|
|
|
4
4
|
import ibis
|
|
5
5
|
import pyarrow as pa
|
|
6
6
|
|
|
7
|
+
from ._internal import VectorIndex
|
|
7
8
|
from .config import ImportConfig, QueryConfig
|
|
8
9
|
from .table_metadata import TableRef
|
|
9
10
|
|
|
10
11
|
if TYPE_CHECKING:
|
|
11
12
|
from .table import Projection
|
|
12
13
|
|
|
14
|
+
IbisPredicate: TypeAlias = Union[ibis.expr.types.BooleanColumn, ibis.common.deferred.Deferred]
|
|
15
|
+
|
|
13
16
|
|
|
14
17
|
class ITable(ABC):
|
|
15
18
|
"""Interface for VAST Table operations."""
|
|
@@ -71,8 +74,7 @@ class ITable(ABC):
|
|
|
71
74
|
@abstractmethod
|
|
72
75
|
def select(self,
|
|
73
76
|
columns: Optional[list[str]] = None,
|
|
74
|
-
predicate:
|
|
75
|
-
ibis.common.deferred.Deferred] = None,
|
|
77
|
+
predicate: Optional[IbisPredicate] = None,
|
|
76
78
|
config: Optional[QueryConfig] = None,
|
|
77
79
|
*,
|
|
78
80
|
internal_row_id: bool = False,
|
|
@@ -134,3 +136,18 @@ class ITable(ABC):
|
|
|
134
136
|
It is useful for constructing expressions for predicate pushdown in `ITable.select()` method.
|
|
135
137
|
"""
|
|
136
138
|
pass
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
@abstractmethod
|
|
142
|
+
def vector_index(self) -> Optional[VectorIndex]:
|
|
143
|
+
"""Table's Vector Index if exists."""
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
@abstractmethod
|
|
147
|
+
def vector_search(self,
|
|
148
|
+
vec: list[float],
|
|
149
|
+
columns: list[str],
|
|
150
|
+
limit: int,
|
|
151
|
+
predicate: Optional[IbisPredicate] = None) -> pa.RecordBatchReader:
|
|
152
|
+
"""Top-n on vector-column."""
|
|
153
|
+
pass
|