vastdb 2.0.2__tar.gz → 2.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vastdb-2.0.2 → vastdb-2.0.5}/CHANGELOG.md +23 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/PKG-INFO +2 -1
- {vastdb-2.0.2 → vastdb-2.0.5}/README.md +63 -43
- {vastdb-2.0.2 → vastdb-2.0.5}/requirements.txt +1 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/setup.py +1 -1
- vastdb-2.0.5/vastdb/_adbc.py +205 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/_internal.py +106 -17
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/_table_interface.py +20 -3
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/conftest.py +23 -1
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/errors.py +5 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/schema.py +19 -2
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/session.py +14 -5
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/table.py +57 -22
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/table_metadata.py +58 -34
- vastdb-2.0.5/vastdb/tests/test_adbc_integration.py +129 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_tables.py +35 -1
- vastdb-2.0.5/vastdb/tests/test_vector_index.py +162 -0
- vastdb-2.0.5/vastdb/tests/test_vector_search.py +211 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/util.py +3 -2
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/transaction.py +32 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/GetTableStatsResponse.py +51 -59
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/ObjectDetails.py +36 -59
- vastdb-2.0.5/vastdb/vast_flatbuf/tabular/VectorIndexMetadata.py +67 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/VipRange.py +19 -12
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb.egg-info/PKG-INFO +2 -1
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb.egg-info/SOURCES.txt +5 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb.egg-info/requires.txt +1 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/CONTRIBUTING.md +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/LICENSE +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/MANIFEST.in +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/setup.cfg +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/_ibis_support.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/bench_repo/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/bench_repo/mega_combo.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/cli.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/common/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/common/constants.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/common/log_utils.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/common/types.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/common/utils.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/dataset/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/dataset/generate_secmaster.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/dataset/generate_stocks_dataset.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/dataset/schemas.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/dataset/secmaster.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/orchestrate/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/orchestrate/bench_spec.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/orchestrate/results_helpers.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/orchestrate/scenario.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/orchestrate/scenario_generator.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/query/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/query/arrow_common.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/query/query.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/query/query_pyarrow.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/query/query_vastdb.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/perf_bench/run.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/test_perf.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bench/test_sample.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/bucket.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/config.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/features.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/metrics.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_duckdb.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_fixed_list.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_imports.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_nested.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_projections.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_sanity.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_schemas.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_table_in_tx.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/tests/test_util.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/util.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Aggregate.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySlice.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ArraySubscript.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BinaryLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/BooleanLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Bound.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Call.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CaseFragment.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Cast.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConcreteBoundImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ConditionalCase.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/CurrentRow.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DateLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DecimalLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Deref.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/DurationLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Expression.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ExpressionImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldIndex.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FieldRef.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Filter.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/FixedSizeBinaryLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float16Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float32Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Float64Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Following.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Frame.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Grouping.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int16Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int32Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int64Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Int8Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralDaysMilliseconds.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/IntervalLiteralMonths.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Join.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/JoinKind.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/KeyValue.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Limit.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/ListLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralColumn.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/LiteralRelation.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapKey.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/MapLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/OrderBy.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Ordering.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Plan.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Preceding.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Project.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelId.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Relation.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/RelationImpl.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOpKind.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SetOperation.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SimpleCase.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/SortKey.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Source.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StringLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructField.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/StructLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimeLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/TimestampLiteral.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt16Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt32Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt64Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/UInt8Literal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/Unbounded.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/WindowCall.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/computeir/flatbuf/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Binary.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Block.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompression.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/BodyCompressionMethod.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Bool.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Buffer.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/CompressionType.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Date.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/DateUnit.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Decimal.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryBatch.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryEncoding.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/DictionaryKind.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Duration.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Endianness.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Feature.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Field.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/FieldNode.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeBinary.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/FixedSizeList.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/FloatingPoint.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Footer.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Int.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Interval.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/IntervalUnit.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/KeyValue.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/LargeBinary.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/LargeList.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/LargeUtf8.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/List.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Map.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Message.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/MessageHeader.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/MetadataVersion.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Null.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Precision.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/RecordBatch.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Schema.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensor.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndex.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCOO.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/SparseTensorIndexCSF.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Struct_.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Tensor.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/TensorDim.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Time.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/TimeUnit.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Timestamp.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Type.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Union.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/UnionMode.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/Utf8.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/org/apache/arrow/flatbuf/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/AlterColumnRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/AlterProjectionTableRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/AlterSchemaRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/AlterTableRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/Column.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/ColumnDetails.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/ColumnType.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/CreateProjectionRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/CreateSchemaRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/CreateViewRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/FilterString.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/GetProjectionTableStatsResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/GetRowColumnSecurityResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/ImportDataRequest.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/KeyName.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/ListProjectionsResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/ListSchemasResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/ListTablesResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/ListViewsResponse.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/NameString.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/S3File.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_flatbuf/tabular/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_tests/__init__.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_tests/test_ha.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb/vast_tests/test_scale.py +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb.egg-info/dependency_links.txt +0 -0
- {vastdb-2.0.2 → vastdb-2.0.5}/vastdb.egg-info/top_level.txt +0 -0
|
@@ -4,6 +4,29 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
6
6
|
|
|
7
|
+
## [2.0.5] (2025-01-08)
|
|
8
|
+
[2.0.4]: https://github.com/vast-data/vastdb_sdk/compare/v2.0.4...v2.0.5
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- end-user impersonation for adbc connection configurable via Session
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
## [2.0.4] (2025-01-06)
|
|
15
|
+
[2.0.3]: https://github.com/vast-data/vastdb_sdk/compare/v2.0.3...v2.0.4
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
- Fix race condition in select (get data) mechanism
|
|
19
|
+
|
|
20
|
+
## [2.0.3] (2025-11-25)
|
|
21
|
+
[2.0.2]: https://github.com/vast-data/vastdb_sdk/compare/v2.0.2...v2.0.3
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
- Support for vastdb's adbc driver
|
|
25
|
+
- Session now gets an Optional[AdbcDriver]
|
|
26
|
+
- AdbcDriver is created out of a local path to a driver `.so` (shared library) or a url
|
|
27
|
+
- Vector Search capability
|
|
28
|
+
- `Table.vector_search` - Approximate Top-N closest vectors
|
|
29
|
+
|
|
7
30
|
## [2.0.2] (2025-10-22)
|
|
8
31
|
[2.0.2]: https://github.com/vast-data/vastdb_sdk/compare/v2.0.1...v2.0.2
|
|
9
32
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vastdb
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.5
|
|
4
4
|
Summary: VAST Data SDK
|
|
5
5
|
Home-page: https://github.com/vast-data/vastdb_sdk
|
|
6
6
|
Author: VAST DATA
|
|
@@ -29,6 +29,7 @@ Requires-Dist: numpy
|
|
|
29
29
|
Requires-Dist: requests
|
|
30
30
|
Requires-Dist: xmltodict
|
|
31
31
|
Requires-Dist: backoff==2.2.1
|
|
32
|
+
Requires-Dist: adbc_driver_manager
|
|
32
33
|
Dynamic: author
|
|
33
34
|
Dynamic: author-email
|
|
34
35
|
Dynamic: classifier
|
|
@@ -19,16 +19,12 @@ For technical details about VAST Database architecture, see the [whitepaper](htt
|
|
|
19
19
|
### Requirements
|
|
20
20
|
|
|
21
21
|
- Linux client with Python 3.10 - 3.13, and network access to the VAST Cluster
|
|
22
|
+
- VAST Cluster release `5.0.0-sp10` or later
|
|
23
|
+
- If your VAST Cluster is running an older release, please contact customer.support@vastdata.com.
|
|
22
24
|
- [Virtual IP pool configured with DNS service](https://support.vastdata.com/s/topic/0TOV40000000FThOAM/configuring-network-access-v50)
|
|
23
25
|
- [S3 access & secret keys on the VAST cluster](https://support.vastdata.com/s/article/UUID-4d2e7e23-b2fb-7900-d98f-96c31a499626)
|
|
24
26
|
- [Tabular identity policy with the proper permissions](https://support.vastdata.com/s/article/UUID-14322b60-d6a2-89ac-3df0-3dfbb6974182)
|
|
25
27
|
|
|
26
|
-
### Required VAST Cluster release
|
|
27
|
-
|
|
28
|
-
VAST DB Python SDK requires VAST Cluster release `5.0.0-sp10` or later.
|
|
29
|
-
|
|
30
|
-
If your VAST Cluster is running an older release, please contact customer.support@vastdata.com.
|
|
31
|
-
|
|
32
28
|
### Installation
|
|
33
29
|
|
|
34
30
|
```bash
|
|
@@ -80,61 +76,81 @@ with session.transaction() as tx:
|
|
|
80
76
|
# the transaction is automatically committed when exiting the context
|
|
81
77
|
```
|
|
82
78
|
|
|
83
|
-
|
|
79
|
+
Note: the transaction must be remain open while the returned [pyarrow.RecordBatchReader](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html) generator is being used.
|
|
84
80
|
|
|
85
81
|
The list of supported data types can be found [here](docs/types.md).
|
|
86
82
|
|
|
87
|
-
|
|
83
|
+
## Features
|
|
88
84
|
|
|
89
|
-
|
|
85
|
+
### Select Performance
|
|
90
86
|
|
|
91
|
-
|
|
87
|
+
The `Table.select()` method accepts a [QueryConfig](vastdb/config.py) object that modifies how the select is fulfilled.
|
|
92
88
|
|
|
93
|
-
The SDK
|
|
89
|
+
The most important setting is the `data_endpoints` parameter that, when set, will allow the SDK to parallelize the select across multiple CNodes. Without this, only the CNode specified in the `connect()` will service the select.
|
|
94
90
|
|
|
95
91
|
```python
|
|
96
|
-
|
|
92
|
+
from vastdb.config import QueryConfig
|
|
97
93
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
predicate=(_.c2 > 2) & _.c3.isnull())
|
|
94
|
+
# load default configuration values
|
|
95
|
+
cfg = QueryConfig()
|
|
101
96
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
97
|
+
# set data_endpoints to CNode VIPs
|
|
98
|
+
cfg.data_endpoints = [
|
|
99
|
+
"http://172.19.196.1",
|
|
100
|
+
"http://172.19.196.2",
|
|
101
|
+
"http://172.19.196.3",
|
|
102
|
+
"http://172.19.196.4",
|
|
103
|
+
]
|
|
105
104
|
|
|
106
|
-
|
|
107
|
-
table.select(predicate=_.c3.contains('substring'))
|
|
105
|
+
table.select(columns=['c1'], predicate=(_.c2 > 2), config=cfg)
|
|
108
106
|
```
|
|
109
107
|
|
|
110
|
-
|
|
108
|
+
If using DNS with either TTL=0 or multi-response per the [Best Practice on Load Balancing CNodes](https://support.vastdata.com/s/document-item?bundleId=z-kb-articles-publications-prod&topicId=6058049537.html&_LANG=enus), passing in the same DNS name equal to the number of VIPs is a decent proxy.
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
cfg.data_endpoints = ["http://vip-pool.v123-xy.VastENG.lab"] * 16 # assuming 16 VIPs in the pool
|
|
112
|
+
```
|
|
111
113
|
|
|
112
|
-
###
|
|
114
|
+
### Filters and Projections
|
|
113
115
|
|
|
114
|
-
|
|
116
|
+
The SDK supports predicate and projection pushdown using Ibis:
|
|
115
117
|
|
|
116
118
|
```python
|
|
117
|
-
|
|
118
|
-
pa.parquet.write_table(arrow_table, f.name)
|
|
119
|
-
s3.put_object(Bucket='bucket-name', Key='staging/file.parquet', Body=f)
|
|
119
|
+
from ibis import _
|
|
120
120
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
# SELECT c1 FROM t WHERE (c2 > 2) AND (c3 IS NULL)
|
|
122
|
+
table.select(columns=['c1'],
|
|
123
|
+
predicate=(_.c2 > 2) & _.c3.isnull())
|
|
124
|
+
|
|
125
|
+
# SELECT c2, c3 FROM t WHERE (c2 BETWEEN 0 AND 1) OR (c2 > 10)
|
|
126
|
+
table.select(columns=['c2', 'c3'],
|
|
127
|
+
predicate=(_.c2.between(0, 1) | (_.c2 > 10))
|
|
128
|
+
|
|
129
|
+
# SELECT * FROM t WHERE c3 LIKE '%substring%'
|
|
130
|
+
table.select(predicate=_.c3.contains('substring'))
|
|
125
131
|
```
|
|
126
132
|
|
|
127
|
-
|
|
133
|
+
See the [Predicate pushdown support document](docs/predicate.md) for more information on constructing predicates using Ibis.
|
|
128
134
|
|
|
129
|
-
Import
|
|
135
|
+
### Import Parquet files via S3 protocol
|
|
136
|
+
|
|
137
|
+
You can efficiently create tables from Parquet files that already exist in an S3 bucket on VAST without copying them via the client. If more than one file is included in `parquet_files` they will be loaded concurrently.
|
|
130
138
|
|
|
131
139
|
```python
|
|
132
|
-
|
|
140
|
+
with session.transaction() as tx:
|
|
141
|
+
schema = tx.bucket('database-name').schema('schema-name')
|
|
133
142
|
table = util.create_table_from_files(
|
|
134
|
-
schema=schema, table_name='
|
|
135
|
-
parquet_files=[
|
|
143
|
+
schema=schema, table_name='imported-table',
|
|
144
|
+
parquet_files=['/bucket-name/staging/file.parquet'])
|
|
136
145
|
```
|
|
137
146
|
|
|
147
|
+
If the table already exists, you can use the `table.import_files()` method to add more data to the table from Parquet files that already exist in an S3 bucket on VAST.
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
with session.transaction() as tx:
|
|
151
|
+
table = tx.bucket('database-name').schema('schema-name').table('table-name')
|
|
152
|
+
table.import_files(["/bucket-name/staging/file2.parquet"])
|
|
153
|
+
```
|
|
138
154
|
|
|
139
155
|
### Semi-sorted Projections
|
|
140
156
|
|
|
@@ -158,9 +174,9 @@ batches = snaps[0].schema('schema-name').table('table-name').select()
|
|
|
158
174
|
|
|
159
175
|
## Interactive and Non-Interactive Workflows
|
|
160
176
|
|
|
161
|
-
|
|
177
|
+
A `Table` created via the `Schema` object (`tx.bucket('..').schema('..').table('..')`) loads metadata and stats eagerly allowing for interactive development. Each object (bucket, schema, table) requires one or more round-trips to the server and `.table()` will fetch the full table schema.
|
|
162
178
|
|
|
163
|
-
|
|
179
|
+
It's generally more efficient to use the `TableMetadata` interface that allows for both lazy loading of the schemas as it's needed, as well as allowing reusing the metadata across transactions.
|
|
164
180
|
|
|
165
181
|
```python
|
|
166
182
|
# load the table schema & stats into an object we can use across transactions
|
|
@@ -172,7 +188,7 @@ with session.transaction() as tx:
|
|
|
172
188
|
table_md = TableMetadata(TableRef("bucket-name", "schema-name", "table-name"),
|
|
173
189
|
arrow_schema=<some-arrow-schema>)
|
|
174
190
|
|
|
175
|
-
# now we can reuse without the overhead of reloading the schema and stats,
|
|
191
|
+
# now we can reuse it without the overhead of reloading the schema and stats,
|
|
176
192
|
# such as for inserts:
|
|
177
193
|
with session.transaction() as tx:
|
|
178
194
|
table = tx.table_from_metadata(table_md)
|
|
@@ -188,6 +204,15 @@ with session.transaction() as tx:
|
|
|
188
204
|
print(results)
|
|
189
205
|
```
|
|
190
206
|
|
|
207
|
+
Some table operations, like `table.import_files()`, does not require the client to know the table schema, and using the `TableMetadata` interface will bypass fetching the schema entirely.
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
table_md = TableMetadata(TableRef("bucket-name", "schema-name", "table-name"))
|
|
211
|
+
|
|
212
|
+
with session.transaction() as tx:
|
|
213
|
+
table = tx.table_from_metadata(table_md)
|
|
214
|
+
table.import_files(["/bucket-name/staging/file2.parquet"])
|
|
215
|
+
```
|
|
191
216
|
|
|
192
217
|
## Post-processing
|
|
193
218
|
|
|
@@ -248,9 +273,6 @@ with session.transaction() as tx:
|
|
|
248
273
|
print(distinct_elements)
|
|
249
274
|
```
|
|
250
275
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
276
|
## More Information
|
|
255
277
|
|
|
256
278
|
See these blog posts for more examples:
|
|
@@ -259,5 +281,3 @@ See these blog posts for more examples:
|
|
|
259
281
|
- https://vastdata.com/blog/the-vast-catalog-in-action-part-2
|
|
260
282
|
|
|
261
283
|
See also the [full Vast DB Python SDK documentation](https://vastdb-sdk.readthedocs.io/en/latest/)
|
|
262
|
-
|
|
263
|
-
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import urllib.request
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import pyarrow as pa
|
|
8
|
+
import sqlglot
|
|
9
|
+
from adbc_driver_manager.dbapi import Connection, Cursor, connect
|
|
10
|
+
from sqlglot import exp
|
|
11
|
+
|
|
12
|
+
from vastdb._internal import VectorIndex
|
|
13
|
+
from vastdb._table_interface import IbisPredicate
|
|
14
|
+
from vastdb.table_metadata import TableRef
|
|
15
|
+
|
|
16
|
+
log = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
TXID_OVERRIDE_PROPERTY: str = "vast.db.external_txid"
|
|
20
|
+
END_USER_PROPERTY: str = "vast.db.end_user"
|
|
21
|
+
VAST_DIST_ALIAS = "vast_pysdk_vector_dist"
|
|
22
|
+
DEFAULT_ADBC_DRIVER_CACHE_DIR: str = "~/.vast/adbc_drivers_cache"
|
|
23
|
+
DEFAULT_ADBC_DRIVER_CACHE_BY_URL_DIR: str = f"{DEFAULT_ADBC_DRIVER_CACHE_DIR}/by_url"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class LocalAdbcDriverNotFound(Exception):
|
|
27
|
+
"""LocalAdbcDriverNotFound."""
|
|
28
|
+
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class RemoteAdbcDriverDownloadFailed(Exception):
|
|
33
|
+
"""RemoteAdbcDriverDownloadFailed."""
|
|
34
|
+
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class AdbcDriver:
|
|
39
|
+
_local_path: str
|
|
40
|
+
|
|
41
|
+
def __init__(self, local_path: str):
|
|
42
|
+
self._local_path = local_path
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def from_local_path(local_path: str) -> "AdbcDriver":
|
|
46
|
+
"""AdbcDriver from a local_path to shared-library."""
|
|
47
|
+
if not os.path.exists(local_path):
|
|
48
|
+
raise LocalAdbcDriverNotFound(local_path)
|
|
49
|
+
|
|
50
|
+
return AdbcDriver(local_path)
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def from_url(url: str) -> "AdbcDriver":
|
|
54
|
+
"""AdbcDriver to be downloaded by url to shared-library (uses cache if exists)."""
|
|
55
|
+
expected_local_path = AdbcDriver._url_to_local_path(url)
|
|
56
|
+
|
|
57
|
+
if os.path.exists(expected_local_path):
|
|
58
|
+
return AdbcDriver(expected_local_path)
|
|
59
|
+
|
|
60
|
+
AdbcDriver._download_driver(url, expected_local_path)
|
|
61
|
+
return AdbcDriver(expected_local_path)
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def _url_to_local_path(url: str) -> str:
|
|
65
|
+
url_hash = hashlib.sha256(url.encode("utf-8")).hexdigest()
|
|
66
|
+
return os.path.join(DEFAULT_ADBC_DRIVER_CACHE_BY_URL_DIR, url_hash)
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def _download_driver(url: str, target_path: str):
|
|
70
|
+
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
log.info(f"Downloading ADBC driver from {url} to {target_path}...")
|
|
74
|
+
urllib.request.urlretrieve(url, target_path)
|
|
75
|
+
log.info(f"Successfully downloaded driver to {target_path}.")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
raise RemoteAdbcDriverDownloadFailed(
|
|
78
|
+
f"Failed to download ADBC driver from {url}: {e}"
|
|
79
|
+
) from e
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def local_path(self) -> str:
|
|
83
|
+
return self._local_path
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _get_adbc_connection(
|
|
87
|
+
adbc_driver_path: str,
|
|
88
|
+
endpoint: str,
|
|
89
|
+
access_key: str,
|
|
90
|
+
secret_key: str,
|
|
91
|
+
txid: int,
|
|
92
|
+
end_user: Optional[str],
|
|
93
|
+
) -> Connection:
|
|
94
|
+
"""Get an adbc connection in transaction."""
|
|
95
|
+
conn_kwargs = {TXID_OVERRIDE_PROPERTY: str(txid)}
|
|
96
|
+
if end_user is not None:
|
|
97
|
+
conn_kwargs[END_USER_PROPERTY] = end_user
|
|
98
|
+
|
|
99
|
+
return connect(
|
|
100
|
+
driver=adbc_driver_path,
|
|
101
|
+
db_kwargs={
|
|
102
|
+
"vast.db.endpoint": endpoint,
|
|
103
|
+
"vast.db.access_key": access_key,
|
|
104
|
+
"vast.db.secret_key": secret_key,
|
|
105
|
+
},
|
|
106
|
+
conn_kwargs=conn_kwargs,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _remove_table_qualification_from_columns(expression: exp.Expression):
|
|
111
|
+
"""Goes over all columns which are fully qualified with "t0" table reference (ibis default table qualification for unbound tables.
|
|
112
|
+
|
|
113
|
+
Note: use only if one table is involved - if two tables exist in the expression columns might become ambiguous.
|
|
114
|
+
"""
|
|
115
|
+
for col in expression.find_all(exp.Column):
|
|
116
|
+
col.set("table", None)
|
|
117
|
+
return expression
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _ibis_to_qe_predicates(predicate: IbisPredicate) -> str:
|
|
121
|
+
ibis_sql = predicate.to_sql()
|
|
122
|
+
parsed = sqlglot.parse_one(ibis_sql)
|
|
123
|
+
|
|
124
|
+
# currently there is a single table
|
|
125
|
+
# removing the
|
|
126
|
+
without_table_qualification = _remove_table_qualification_from_columns(
|
|
127
|
+
parsed.expressions[0].this
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
return without_table_qualification.sql()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _vector_search_sql(
|
|
134
|
+
query_vector: list[float],
|
|
135
|
+
vector_index: VectorIndex,
|
|
136
|
+
table_ref: TableRef,
|
|
137
|
+
columns: list[str],
|
|
138
|
+
limit: int,
|
|
139
|
+
predicate: Optional[IbisPredicate] = None,
|
|
140
|
+
) -> str:
|
|
141
|
+
query_vector_dim = len(query_vector)
|
|
142
|
+
|
|
143
|
+
query_vector_literal = f"{query_vector}::FLOAT[{query_vector_dim}]"
|
|
144
|
+
dist_func = f"{vector_index.sql_distance_function}({vector_index.column}::FLOAT[{query_vector_dim}], {query_vector_literal})"
|
|
145
|
+
dist_alias = f"{dist_func} as {VAST_DIST_ALIAS}"
|
|
146
|
+
|
|
147
|
+
projection_str = ",".join(columns + [dist_alias])
|
|
148
|
+
|
|
149
|
+
if predicate is not None:
|
|
150
|
+
where = f"WHERE {_ibis_to_qe_predicates(predicate)}"
|
|
151
|
+
else:
|
|
152
|
+
where = ""
|
|
153
|
+
|
|
154
|
+
return f"""
|
|
155
|
+
SELECT {projection_str}
|
|
156
|
+
FROM {table_ref.query_engine_full_path}
|
|
157
|
+
{where}
|
|
158
|
+
ORDER BY {VAST_DIST_ALIAS}
|
|
159
|
+
LIMIT {limit}"""
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class AdbcConnection:
|
|
163
|
+
def __init__(
|
|
164
|
+
self,
|
|
165
|
+
adbc_driver: AdbcDriver,
|
|
166
|
+
endpoint: str,
|
|
167
|
+
access_key: str,
|
|
168
|
+
secret_key: str,
|
|
169
|
+
txid: int,
|
|
170
|
+
end_user: Optional[str] = None,
|
|
171
|
+
):
|
|
172
|
+
self._adbc_conn = _get_adbc_connection(
|
|
173
|
+
adbc_driver.local_path, endpoint, access_key, secret_key, txid, end_user
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
self._cursor = self._adbc_conn.cursor()
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def cursor(self) -> Cursor:
|
|
180
|
+
return self._cursor
|
|
181
|
+
|
|
182
|
+
def close(self):
|
|
183
|
+
self._cursor.close()
|
|
184
|
+
|
|
185
|
+
def vector_search(
|
|
186
|
+
self,
|
|
187
|
+
query_vector: list[float],
|
|
188
|
+
vector_index: VectorIndex,
|
|
189
|
+
table_ref: TableRef,
|
|
190
|
+
columns: list[str],
|
|
191
|
+
limit: int,
|
|
192
|
+
predicate: Optional[IbisPredicate] = None,
|
|
193
|
+
) -> pa.RecordBatchReader:
|
|
194
|
+
"""Top-n on vector-column."""
|
|
195
|
+
sql = _vector_search_sql(
|
|
196
|
+
query_vector=query_vector,
|
|
197
|
+
vector_index=vector_index,
|
|
198
|
+
table_ref=table_ref,
|
|
199
|
+
columns=columns,
|
|
200
|
+
limit=limit,
|
|
201
|
+
predicate=predicate,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
self._cursor.execute(sql)
|
|
205
|
+
return self._cursor.fetch_record_batch()
|