vast-daft 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ dist/
8
+ build/
9
+ .eggs/
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # uv
17
+ uv.lock
18
+
19
+ # IDE
20
+ .idea/
21
+ .vscode/
22
+ *.swp
23
+ *.swo
24
+ *~
25
+ .DS_Store
26
+
27
+ # Testing
28
+ .pytest_cache/
29
+ .coverage
30
+ htmlcov/
31
+ .mypy_cache/
32
+
33
+ # Distribution
34
+ *.whl
35
+ *.tar.gz
36
+
37
+ # Environment
38
+ .env
39
+ .env.*
40
+ !.env.example
41
+
42
+ # Local example data (Lance datasets, downloaded corpora)
43
+ data/
@@ -0,0 +1,82 @@
1
+ Metadata-Version: 2.4
2
+ Name: vast-daft
3
+ Version: 0.1.1
4
+ Summary: Daft custom connector (DataSource/DataSink) for VastDB
5
+ Project-URL: Homepage, https://github.com/vast-data/vast-daft
6
+ Project-URL: Repository, https://github.com/vast-data/vast-daft
7
+ Author-email: Ofer Helman <ofer.helman@vastdata.com>
8
+ License-Expression: Apache-2.0
9
+ Requires-Python: >=3.12
10
+ Requires-Dist: daft==0.7.10
11
+ Requires-Dist: ibis-framework>=9.0
12
+ Requires-Dist: numpy<2
13
+ Requires-Dist: pyarrow>=15.0
14
+ Requires-Dist: vastdb>=1.2
15
+ Provides-Extra: dev
16
+ Requires-Dist: pyiceberg[s3fs,sql-sqlite]>=0.11.1; extra == 'dev'
17
+ Requires-Dist: pylance>=0.39.0; extra == 'dev'
18
+ Requires-Dist: pyright>=1.1; extra == 'dev'
19
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
20
+ Requires-Dist: pytest>=8.0; extra == 'dev'
21
+ Requires-Dist: python-dotenv>=1.0; extra == 'dev'
22
+ Requires-Dist: ray==2.53.0; extra == 'dev'
23
+ Requires-Dist: ruff>=0.4; extra == 'dev'
24
+ Provides-Extra: examples
25
+ Requires-Dist: python-dotenv>=1.0; extra == 'examples'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # vast-daft
29
+
30
+ Daft custom connector (`DataSource` / `DataSink`) for [VastDB](https://vastdata.com).
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install vast-daft
36
+ ```
37
+
38
+ Or with uv:
39
+
40
+ ```bash
41
+ uv add vast-daft
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ### Reading from VastDB
47
+
48
+ ```python
49
+ import pyarrow as pa
50
+ from vast_daft import VastDBConfig, VastDBDataSource
51
+
52
+ config = VastDBConfig(
53
+ endpoint="http://vastdb:9090",
54
+ access_key="YOUR_ACCESS_KEY",
55
+ secret_key="YOUR_SECRET_KEY",
56
+ bucket="my-bucket",
57
+ schema="my-schema",
58
+ )
59
+
60
+ schema = pa.schema([
61
+ ("id", pa.string()),
62
+ ("name", pa.string()),
63
+ ("value", pa.float64()),
64
+ ])
65
+
66
+ source = VastDBDataSource(config, "my_table", schema)
67
+ df = source.read()
68
+ df.show()
69
+ ```
70
+
71
+ ### Writing to VastDB
72
+
73
+ ```python
74
+ import daft
75
+ from vast_daft import VastDBConfig, VastDBDataSink
76
+
77
+ config = VastDBConfig(...)
78
+ schema = pa.schema([("id", pa.string()), ("value", pa.float64())])
79
+
80
+ sink = VastDBDataSink(config, "my_table", schema)
81
+ daft.from_pydict({"id": ["a", "b"], "value": [1.0, 2.0]}).write_sink(sink).show()
82
+ ```
@@ -0,0 +1,55 @@
1
+ # vast-daft
2
+
3
+ Daft custom connector (`DataSource` / `DataSink`) for [VastDB](https://vastdata.com).
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install vast-daft
9
+ ```
10
+
11
+ Or with uv:
12
+
13
+ ```bash
14
+ uv add vast-daft
15
+ ```
16
+
17
+ ## Quick Start
18
+
19
+ ### Reading from VastDB
20
+
21
+ ```python
22
+ import pyarrow as pa
23
+ from vast_daft import VastDBConfig, VastDBDataSource
24
+
25
+ config = VastDBConfig(
26
+ endpoint="http://vastdb:9090",
27
+ access_key="YOUR_ACCESS_KEY",
28
+ secret_key="YOUR_SECRET_KEY",
29
+ bucket="my-bucket",
30
+ schema="my-schema",
31
+ )
32
+
33
+ schema = pa.schema([
34
+ ("id", pa.string()),
35
+ ("name", pa.string()),
36
+ ("value", pa.float64()),
37
+ ])
38
+
39
+ source = VastDBDataSource(config, "my_table", schema)
40
+ df = source.read()
41
+ df.show()
42
+ ```
43
+
44
+ ### Writing to VastDB
45
+
46
+ ```python
47
+ import daft
48
+ from vast_daft import VastDBConfig, VastDBDataSink
49
+
50
+ config = VastDBConfig(...)
51
+ schema = pa.schema([("id", pa.string()), ("value", pa.float64())])
52
+
53
+ sink = VastDBDataSink(config, "my_table", schema)
54
+ daft.from_pydict({"id": ["a", "b"], "value": [1.0, 2.0]}).write_sink(sink).show()
55
+ ```
@@ -0,0 +1,59 @@
1
+ [project]
2
+ name = "vast-daft"
3
+ version = "0.1.1"
4
+ description = "Daft custom connector (DataSource/DataSink) for VastDB"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ license = "Apache-2.0"
8
+ authors = [{ name = "Ofer Helman", email = "ofer.helman@vastdata.com" }]
9
+ dependencies = [
10
+ "daft==0.7.10",
11
+ "numpy<2",
12
+ "vastdb>=1.2",
13
+ "pyarrow>=15.0",
14
+ "ibis-framework>=9.0",
15
+ ]
16
+
17
+ [project.urls]
18
+ Homepage = "https://github.com/vast-data/vast-daft"
19
+ Repository = "https://github.com/vast-data/vast-daft"
20
+
21
+ [project.optional-dependencies]
22
+ dev = [
23
+ "pytest>=8.0",
24
+ "pytest-cov>=5.0",
25
+ "ruff>=0.4",
26
+ "pyright>=1.1",
27
+ # "ty>=0.0.0a1",
28
+ "python-dotenv>=1.0",
29
+ "ray==2.53.0",
30
+ "pylance>=0.39.0",
31
+ "pyiceberg[s3fs,sql-sqlite]>=0.11.1",
32
+ ]
33
+ examples = [
34
+ "python-dotenv>=1.0",
35
+ ]
36
+
37
+ [build-system]
38
+ requires = ["hatchling"]
39
+ build-backend = "hatchling.build"
40
+
41
+ [tool.pyright]
42
+ typeCheckingMode = "basic"
43
+
44
+ [tool.pytest]
45
+ testpaths = ["tests"]
46
+
47
+ [tool.ruff]
48
+ target-version = "py312"
49
+ line-length = 120
50
+
51
+ [tool.ruff.lint]
52
+ select = ["E", "F", "I", "W", "UP"]
53
+
54
+ [tool.ty.environment]
55
+ python-version = "3.12"
56
+
57
+ [tool.ty.rules]
58
+ unsupported-operator = "ignore"
59
+
@@ -0,0 +1,186 @@
1
+ """vast-daft: Daft custom connector for VastDB.
2
+
3
+ Provides ``DataSource`` / ``DataSink`` implementations for reading from
4
+ and writing to VastDB, and a Daft catalog integration for native SQL queries.
5
+
6
+ Importing this module also registers two convenience methods on
7
+ ``daft.DataFrame``:
8
+
9
+ * ``df.read_vastdb(config, table_name, ...)`` — read a VastDB table
10
+ * ``df.write_vastdb(config, table_name, schema, ...)`` — write to a VastDB table
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import Any
16
+
17
+ import daft
18
+ import pyarrow as pa
19
+
20
+ from vast_daft.config import VastDBConfig
21
+ from vast_daft.connection import VastDBConnection
22
+ from vast_daft.scan import VastDBScanOperator
23
+ from vast_daft.sink import VastDBDataSink
24
+ from vast_daft.source import VastDBDataSource
25
+ from vast_daft.table import VastDBCatalog, VastDBTable
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # df.read_vastdb()
29
+ # ---------------------------------------------------------------------------
30
+
31
+ def _read_vastdb(
32
+ config: VastDBConfig,
33
+ table_name: str,
34
+ *,
35
+ bucket: str | None = None,
36
+ schema: str | None = None,
37
+ columns: list[str] | None = None,
38
+ num_splits: int | None = None,
39
+ ) -> daft.DataFrame:
40
+ """Read a VastDB table into a Daft DataFrame.
41
+
42
+ This is a module-level factory (not a DataFrame method) that mirrors the
43
+ ergonomics of ``daft.read_parquet()``, ``daft.read_iceberg()``, etc.
44
+
45
+ Parameters
46
+ ----------
47
+ config:
48
+ VastDB connection configuration.
49
+ table_name:
50
+ Name of the VastDB table to read.
51
+ bucket:
52
+ VastDB bucket. Overrides ``config.bucket`` when provided.
53
+ schema:
54
+ VastDB schema. Overrides ``config.schema`` when provided.
55
+ columns:
56
+ Optional list of column names to project.
57
+ num_splits:
58
+ Number of parallel read splits. Auto-estimated when omitted.
59
+
60
+ Returns
61
+ -------
62
+ daft.DataFrame
63
+
64
+ Examples
65
+ --------
66
+ >>> import vast_daft
67
+ >>> from vast_daft import VastDBConfig
68
+ >>> config = VastDBConfig(endpoint="http://vastdb:9090", access_key="...", secret_key="...")
69
+ >>> df = vast_daft.read_vastdb(config, "orders")
70
+ >>> df.show()
71
+ """
72
+ from daft.daft import ScanOperatorHandle
73
+ from daft.logical.builder import LogicalPlanBuilder
74
+
75
+ _bucket = bucket or config.bucket
76
+ _schema = schema or config.schema
77
+ if _bucket is None or _schema is None:
78
+ raise ValueError("bucket and schema must be provided either as arguments or via VastDBConfig")
79
+
80
+ # Discover the PyArrow schema from the live table
81
+ conn = VastDBConnection(config)
82
+ with conn.session.transaction() as tx:
83
+ db_table = tx.bucket(_bucket).schema(_schema).table(table_name)
84
+ pa_schema = db_table.columns()
85
+
86
+ if columns is not None:
87
+ pa_schema = pa.schema([pa_schema.field(c) for c in columns])
88
+
89
+ kwargs: dict[str, Any] = {}
90
+ if num_splits is not None:
91
+ kwargs["num_splits"] = num_splits
92
+
93
+ scan_op = VastDBScanOperator(
94
+ config,
95
+ table_name,
96
+ pa_schema,
97
+ bucket=_bucket,
98
+ db_schema=_schema,
99
+ columns=columns,
100
+ **kwargs,
101
+ )
102
+ handle = ScanOperatorHandle.from_python_scan_operator(scan_op)
103
+ builder = LogicalPlanBuilder.from_tabular_scan(scan_operator=handle)
104
+ return daft.DataFrame(builder)
105
+
106
+
107
+ # ---------------------------------------------------------------------------
108
+ # df.write_vastdb()
109
+ # ---------------------------------------------------------------------------
110
+
111
+ def _write_vastdb(
112
+ self: daft.DataFrame,
113
+ config: VastDBConfig,
114
+ table_name: str,
115
+ table_schema: pa.Schema,
116
+ *,
117
+ bucket: str | None = None,
118
+ schema: str | None = None,
119
+ create_if_missing: bool = True,
120
+ ) -> daft.DataFrame:
121
+ """Write the DataFrame to a VastDB table.
122
+
123
+ Registered on ``daft.DataFrame`` when ``vast_daft`` is imported.
124
+
125
+ Parameters
126
+ ----------
127
+ config:
128
+ VastDB connection configuration.
129
+ table_name:
130
+ Name of the target VastDB table.
131
+ table_schema:
132
+ PyArrow schema of the target table.
133
+ bucket:
134
+ VastDB bucket. Overrides ``config.bucket`` when provided.
135
+ schema:
136
+ VastDB schema. Overrides ``config.schema`` when provided.
137
+ create_if_missing:
138
+ Auto-create the table if it does not exist (default: ``True``).
139
+
140
+ Returns
141
+ -------
142
+ daft.DataFrame
143
+ A DataFrame with ``rows_written`` and ``bytes_written`` columns,
144
+ one row per partition written.
145
+
146
+ Examples
147
+ --------
148
+ >>> import daft, pyarrow as pa, vast_daft
149
+ >>> from vast_daft import VastDBConfig
150
+ >>> config = VastDBConfig(endpoint="http://vastdb:9090", access_key="...", secret_key="...")
151
+ >>> schema = pa.schema([("id", pa.int64()), ("value", pa.float64())])
152
+ >>> df = daft.from_pydict({"id": [1, 2], "value": [0.1, 0.2]})
153
+ >>> df.write_vastdb(config, "my_table", schema)
154
+ """
155
+ sink = VastDBDataSink(
156
+ config,
157
+ table_name,
158
+ table_schema,
159
+ bucket=bucket,
160
+ schema=schema,
161
+ create_if_missing=create_if_missing,
162
+ )
163
+ return self.write_sink(sink)
164
+
165
+
166
+ # Register df.write_vastdb on daft.DataFrame
167
+ daft.DataFrame.write_vastdb = _write_vastdb # type: ignore[attr-defined]
168
+
169
+
170
+ __all__ = [
171
+ # Core
172
+ "VastDBConfig",
173
+ "VastDBConnection",
174
+ # Daft connectors
175
+ "VastDBDataSource",
176
+ "VastDBDataSink",
177
+ "VastDBScanOperator",
178
+ # Catalog / Table (Daft interfaces — use with daft.attach_catalog + daft.sql)
179
+ "VastDBCatalog",
180
+ "VastDBTable",
181
+ # Convenience top-level functions (mirrors daft.read_parquet etc.)
182
+ "read_vastdb",
183
+ ]
184
+
185
+ # Public alias — lets users call vast_daft.read_vastdb(...)
186
+ read_vastdb = _read_vastdb
@@ -0,0 +1,245 @@
1
+ """Translate Daft ``Pushdowns`` to ibis predicates and projection columns.
2
+
3
+ Daft calls ``get_tasks(pushdowns)`` with a :class:`~daft.io.pushdowns.Pushdowns`
4
+ object that may carry:
5
+
6
+ * ``filters`` — a ``daft.expressions.Expression`` (the combined filter predicate)
7
+ * ``columns`` — a ``list[str]`` of column names to project
8
+ * ``limit`` — an ``int`` row limit
9
+
10
+ This module converts the ``filters`` expression into an ibis predicate that can
11
+ be passed to :func:`vastdb.table.Table.select_splits`, achieving genuine
12
+ server-side push-down for filter, column projection, and row limit.
13
+
14
+ Supported Daft expression nodes
15
+ ---------------------------------
16
+ Binary comparisons: ``==``, ``!=``, ``>``, ``>=``, ``<``, ``<=``
17
+ Membership: ``col.is_in([...])``
18
+ Between: ``col.between(lo, hi)``
19
+ Null checks: ``col.is_null()``, ``col.not_null()``
20
+ Logical: ``expr & expr``, ``expr | expr``, ``~expr``
21
+ String: ``col.contains(s)``, ``col.startswith(s)``, ``col.endswith(s)``
22
+
23
+ Any unsupported node causes the entire filter to be returned un-translated
24
+ (``None``), leaving Daft to apply it client-side as a post-filter.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import logging
30
+ from typing import Any
31
+
32
+ import ibis
33
+ from daft.expressions import Expression, ExpressionVisitor
34
+ from daft.io.pushdowns import Pushdowns
35
+ from ibis.common.deferred import Deferred
36
+ from ibis.expr.types import Column as BooleanColumn
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+ Predicate = BooleanColumn | Deferred
41
+
42
+ # Sentinel to signal "unsupported — fall back to client-side"
43
+ _UNSUPPORTED = object()
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Visitor
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ class _DaftToIbisVisitor(ExpressionVisitor):
52
+ """Walk a Daft expression tree and produce an ibis predicate.
53
+
54
+ Returns ``_UNSUPPORTED`` (the module-level sentinel) for any node that
55
+ cannot be translated, so callers can detect a partial failure without
56
+ raising.
57
+ """
58
+
59
+ def __init__(self) -> None:
60
+ super().__init__()
61
+ self.referenced_columns: set[str] = set()
62
+
63
+ # -- Leaves --------------------------------------------------------------
64
+
65
+ def visit_col(self, name: str) -> Any:
66
+ self.referenced_columns.add(name)
67
+ return ibis._[name]
68
+
69
+ def visit_lit(self, value: Any) -> Any:
70
+ # Daft passes the Python value directly (int, float, str, bool, None)
71
+ return value
72
+
73
+ # -- Structural nodes that we don't push down ----------------------------
74
+
75
+ def visit_alias(self, expr: Expression, alias: str) -> Any:
76
+ # Aliases don't affect predicate semantics — visit the inner expr
77
+ return self.visit(expr)
78
+
79
+ def visit_cast(self, expr: Expression, dtype: Any) -> Any:
80
+ # Casts are not translatable to ibis predicates
81
+ return _UNSUPPORTED
82
+
83
+ def visit_coalesce(self, *args: Expression) -> Any:
84
+ # Forward-compatibility with newer Daft visitor APIs.
85
+ return _UNSUPPORTED
86
+
87
+ # -- Function dispatch ---------------------------------------------------
88
+
89
+ def visit_function(self, name: str, args: list) -> Any:
90
+ """Fallback for unrecognised function names."""
91
+ return _UNSUPPORTED
92
+
93
+ # visit_() in the base class dispatches to visit_<name> if it exists,
94
+ # otherwise calls visit_function(). We register individual functions
95
+ # below as visit_<daft_function_name> methods.
96
+
97
+ def _binop(self, args: list, op: str) -> Any:
98
+ """Helper: visit two expression args and apply a binary ibis operator."""
99
+ lhs = self.visit(args[0])
100
+ rhs = self.visit(args[1])
101
+ if lhs is _UNSUPPORTED or rhs is _UNSUPPORTED:
102
+ return _UNSUPPORTED
103
+ ops = {
104
+ "equal": lambda a, b: a == b,
105
+ "not_equal": lambda a, b: a != b,
106
+ "greater_than": lambda a, b: a > b,
107
+ "greater_than_or_equal": lambda a, b: a >= b,
108
+ "less_than": lambda a, b: a < b,
109
+ "less_than_or_equal": lambda a, b: a <= b,
110
+ "and": lambda a, b: a & b,
111
+ "or": lambda a, b: a | b,
112
+ }
113
+ return ops[op](lhs, rhs)
114
+
115
+ # Comparison operators
116
+ def visit_equal(self, lhs: Expression, rhs: Expression) -> Any:
117
+ return self._binop([lhs, rhs], "equal")
118
+
119
+ def visit_not_equal(self, lhs: Expression, rhs: Expression) -> Any:
120
+ return self._binop([lhs, rhs], "not_equal")
121
+
122
+ def visit_greater_than(self, lhs: Expression, rhs: Expression) -> Any:
123
+ return self._binop([lhs, rhs], "greater_than")
124
+
125
+ def visit_greater_than_or_equal(self, lhs: Expression, rhs: Expression) -> Any:
126
+ return self._binop([lhs, rhs], "greater_than_or_equal")
127
+
128
+ def visit_less_than(self, lhs: Expression, rhs: Expression) -> Any:
129
+ return self._binop([lhs, rhs], "less_than")
130
+
131
+ def visit_less_than_or_equal(self, lhs: Expression, rhs: Expression) -> Any:
132
+ return self._binop([lhs, rhs], "less_than_or_equal")
133
+
134
+ # Logical operators
135
+ def visit_and(self, lhs: Expression, rhs: Expression) -> Any:
136
+ return self._binop([lhs, rhs], "and")
137
+
138
+ def visit_or(self, lhs: Expression, rhs: Expression) -> Any:
139
+ return self._binop([lhs, rhs], "or")
140
+
141
+ def visit_not(self, expr: Expression) -> Any:
142
+ inner = self.visit(expr)
143
+ if inner is _UNSUPPORTED:
144
+ return _UNSUPPORTED
145
+ return ~inner
146
+
147
+ # Null checks
148
+ def visit_is_null(self, expr: Expression) -> Any:
149
+ inner = self.visit(expr)
150
+ if inner is _UNSUPPORTED:
151
+ return _UNSUPPORTED
152
+ return inner.isnull()
153
+
154
+ def visit_not_null(self, expr: Expression) -> Any:
155
+ inner = self.visit(expr)
156
+ if inner is _UNSUPPORTED:
157
+ return _UNSUPPORTED
158
+ return inner.notnull()
159
+
160
+ # Membership: is_in(expr, [lit, lit, ...])
161
+ # NOTE: args[1] is a raw list of Expression objects, not pre-visited.
162
+ def visit_is_in(self, expr: Expression, values: list) -> Any:
163
+ col_ibis = self.visit(expr)
164
+ if col_ibis is _UNSUPPORTED:
165
+ return _UNSUPPORTED
166
+ visited_vals = [self.visit(v) for v in values]
167
+ if any(v is _UNSUPPORTED for v in visited_vals):
168
+ return _UNSUPPORTED
169
+ return col_ibis.isin(visited_vals)
170
+
171
+ # Between: between(expr, lo, hi)
172
+ def visit_between(self, expr: Expression, lo: Expression, hi: Expression) -> Any:
173
+ col_ibis = self.visit(expr)
174
+ lo_val = self.visit(lo)
175
+ hi_val = self.visit(hi)
176
+ if any(v is _UNSUPPORTED for v in [col_ibis, lo_val, hi_val]):
177
+ return _UNSUPPORTED
178
+ return col_ibis.between(lo_val, hi_val)
179
+
180
+ # String functions
181
+ def visit_starts_with(self, expr: Expression, prefix: Expression) -> Any:
182
+ col_ibis = self.visit(expr)
183
+ val = self.visit(prefix)
184
+ if col_ibis is _UNSUPPORTED or val is _UNSUPPORTED:
185
+ return _UNSUPPORTED
186
+ return col_ibis.startswith(val)
187
+
188
+ def visit_ends_with(self, expr: Expression, suffix: Expression) -> Any:
189
+ col_ibis = self.visit(expr)
190
+ val = self.visit(suffix)
191
+ if col_ibis is _UNSUPPORTED or val is _UNSUPPORTED:
192
+ return _UNSUPPORTED
193
+ return col_ibis.endswith(val)
194
+
195
+ def visit_contains(self, expr: Expression, pattern: Expression) -> Any:
196
+ col_ibis = self.visit(expr)
197
+ val = self.visit(pattern)
198
+ if col_ibis is _UNSUPPORTED or val is _UNSUPPORTED:
199
+ return _UNSUPPORTED
200
+ return col_ibis.contains(val)
201
+
202
+
203
+ # ---------------------------------------------------------------------------
204
+ # Public entry-point
205
+ # ---------------------------------------------------------------------------
206
+
207
+ def pushdowns_to_predicate(pushdowns: Pushdowns) -> Predicate | None:
208
+ """Convert *pushdowns.filters* to an ibis predicate, or ``None``.
209
+
210
+ Returns ``None`` when no filter is present or when the expression contains
211
+ an unsupported node. In that case the caller should not pass a predicate
212
+ to VastDB, and Daft will apply the filter client-side.
213
+ """
214
+ predicate, _ = pushdowns_to_predicate_and_columns(pushdowns)
215
+ return predicate
216
+
217
+
218
+ def pushdowns_to_predicate_and_columns(
219
+ pushdowns: Pushdowns,
220
+ ) -> tuple[Predicate | None, set[str]]:
221
+ """Translate the filter and return the columns it references.
222
+
223
+ The referenced column set is needed by callers that also push down a
224
+ column projection. VastDB's ``select_splits`` requires every column the
225
+ predicate touches to appear in ``columns``; otherwise the server raises
226
+ ``FieldNotFound``. Callers must union this set with their projection
227
+ before handing it to VastDB.
228
+
229
+ The set is empty when the predicate is unsupported or absent.
230
+ """
231
+ if pushdowns.filters is None:
232
+ return None, set()
233
+ visitor = _DaftToIbisVisitor()
234
+ try:
235
+ result = visitor.visit(pushdowns.filters)
236
+ except Exception as exc:
237
+ logger.debug("Daft filter not pushed down to VastDB — visitor error: %s", exc)
238
+ return None, set()
239
+ if result is _UNSUPPORTED:
240
+ logger.debug(
241
+ "Daft filter not pushed down to VastDB — unsupported expression: %s",
242
+ pushdowns.filters,
243
+ )
244
+ return None, set()
245
+ return result, visitor.referenced_columns