sqlalchemy-paradedb 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlalchemy_paradedb-0.1.0/.gitignore +9 -0
- sqlalchemy_paradedb-0.1.0/LICENSE +21 -0
- sqlalchemy_paradedb-0.1.0/PKG-INFO +354 -0
- sqlalchemy_paradedb-0.1.0/README.md +321 -0
- sqlalchemy_paradedb-0.1.0/paradedb/__init__.py +61 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/__init__.py +3 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/_functions.py +49 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/_pdb_cast.py +51 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/_select_introspection.py +51 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/alembic.py +399 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/diagnostics.py +106 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/errors.py +61 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/expr.py +18 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/facets.py +134 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/indexing.py +856 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/inspect.py +51 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/pdb.py +113 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/py.typed +0 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/search.py +482 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/select_with.py +73 -0
- sqlalchemy_paradedb-0.1.0/paradedb/sqlalchemy/validation.py +66 -0
- sqlalchemy_paradedb-0.1.0/pyproject.toml +100 -0
- sqlalchemy_paradedb-0.1.0/scripts/check_api_coverage.py +239 -0
- sqlalchemy_paradedb-0.1.0/scripts/check_schema_compat.py +258 -0
- sqlalchemy_paradedb-0.1.0/scripts/run_examples.sh +34 -0
- sqlalchemy_paradedb-0.1.0/scripts/run_integration_tests.sh +33 -0
- sqlalchemy_paradedb-0.1.0/scripts/run_paradedb.sh +62 -0
- sqlalchemy_paradedb-0.1.0/scripts/run_unit_tests.sh +22 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/conftest.py +167 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_advanced_search_integration.py +166 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_alembic_integration.py +872 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_core_query_compat_integration.py +81 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_diagnostics_integration.py +169 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_facets_integration.py +175 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_indexing_integration.py +672 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_paradedb_queries_integration.py +693 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_phase0_modules_integration.py +124 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_query_interface_integration.py +130 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_range_query_integration.py +72 -0
- sqlalchemy_paradedb-0.1.0/tests/integration/test_snippet_functions_integration.py +235 -0
- sqlalchemy_paradedb-0.1.0/tests/unit/test_alembic_unit.py +368 -0
- sqlalchemy_paradedb-0.1.0/tests/unit/test_facets_unit.py +137 -0
- sqlalchemy_paradedb-0.1.0/tests/unit/test_indexing_unit.py +534 -0
- sqlalchemy_paradedb-0.1.0/tests/unit/test_phase0_modules_unit.py +101 -0
- sqlalchemy_paradedb-0.1.0/tests/unit/test_sql_compilation_unit.py +631 -0
- sqlalchemy_paradedb-0.1.0/tests/unit/test_validation_cache_unit.py +184 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ParadeDB, Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sqlalchemy-paradedb
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Typed SQLAlchemy helpers for ParadeDB
|
|
5
|
+
Project-URL: Homepage, https://github.com/paradedb/sqlalchemy-paradedb
|
|
6
|
+
Project-URL: Repository, https://github.com/paradedb/sqlalchemy-paradedb
|
|
7
|
+
Project-URL: Issues, https://github.com/paradedb/sqlalchemy-paradedb/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/paradedb/sqlalchemy-paradedb/blob/main/CHANGELOG.md
|
|
9
|
+
Author: ParadeDB
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: bm25,faceted search,facets,full text search,hybrid search,paradedb,postgres,postgresql,search,sqlalchemy,text search
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
21
|
+
Classifier: Topic :: Database
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: sqlalchemy>=2.0.32
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: mypy>=1.19.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: pre-commit>=4.0.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: ruff>=0.15.0; extra == 'dev'
|
|
28
|
+
Provides-Extra: test
|
|
29
|
+
Requires-Dist: alembic>=1.13.0; extra == 'test'
|
|
30
|
+
Requires-Dist: psycopg[binary]>=3.1.18; extra == 'test'
|
|
31
|
+
Requires-Dist: pytest>=8.0.0; extra == 'test'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# sqlalchemy-paradedb
|
|
35
|
+
|
|
36
|
+
[](https://pypi.org/project/sqlalchemy-paradedb/)
|
|
37
|
+
[](https://codecov.io/gh/paradedb/sqlalchemy-paradedb)
|
|
38
|
+
[](https://github.com/paradedb/sqlalchemy-paradedb/actions/workflows/ci.yml)
|
|
39
|
+
[](https://github.com/paradedb/sqlalchemy-paradedb?tab=MIT-1-ov-file#readme)
|
|
40
|
+
[](https://paradedb.com/slack)
|
|
41
|
+
[](https://x.com/paradedb)
|
|
42
|
+
|
|
43
|
+
[ParadeDB](https://paradedb.com) integration for SQLAlchemy: typed helpers for BM25 indexes, search predicates, scoring, snippets, facets, and migration ergonomics.
|
|
44
|
+
|
|
45
|
+
## Requirements & Compatibility
|
|
46
|
+
|
|
47
|
+
| Component | Supported |
|
|
48
|
+
| ---------- | ----------------------------- |
|
|
49
|
+
| Python | 3.10+ |
|
|
50
|
+
| SQLAlchemy | 2.0.32+ |
|
|
51
|
+
| ParadeDB | 0.21.0+ (`pg_search`) |
|
|
52
|
+
| PostgreSQL | 17+ (with ParadeDB extension) |
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
uv add sqlalchemy-paradedb
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
For local development:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
uv sync --extra test --extra dev
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Quick Start
|
|
67
|
+
|
|
68
|
+
### Prerequisites
|
|
69
|
+
|
|
70
|
+
Install `pg_search` in your Postgres database and connect SQLAlchemy to that database.
|
|
71
|
+
|
|
72
|
+
### Create a BM25 Index
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from sqlalchemy import Index
|
|
76
|
+
from paradedb.sqlalchemy import indexing
|
|
77
|
+
|
|
78
|
+
products_bm25_idx = Index(
|
|
79
|
+
"products_bm25_idx",
|
|
80
|
+
indexing.BM25Field(Product.id),
|
|
81
|
+
indexing.BM25Field(
|
|
82
|
+
Product.description,
|
|
83
|
+
tokenizer=indexing.tokenize.unicode(lowercase=True),
|
|
84
|
+
),
|
|
85
|
+
indexing.BM25Field(
|
|
86
|
+
Product.category,
|
|
87
|
+
tokenizer=indexing.tokenize.literal(),
|
|
88
|
+
),
|
|
89
|
+
postgresql_using="bm25",
|
|
90
|
+
postgresql_with={"key_field": "id"},
|
|
91
|
+
)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
For JSON columns named `metadata`, use `metadata_` as the ORM attribute name.
|
|
95
|
+
|
|
96
|
+
### Query with ParadeDB Predicates
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from sqlalchemy import select
|
|
100
|
+
from sqlalchemy.orm import Session
|
|
101
|
+
from paradedb.sqlalchemy import pdb, search
|
|
102
|
+
|
|
103
|
+
stmt = (
|
|
104
|
+
select(Product.id, Product.description)
|
|
105
|
+
.where(search.match_any(Product.description, "running", "shoes"))
|
|
106
|
+
.order_by(pdb.score(Product.id).desc())
|
|
107
|
+
.limit(10)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
with Session(engine) as session:
|
|
111
|
+
rows = session.execute(stmt).all()
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Rows + Facets in a Single Query
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from sqlalchemy import select
|
|
118
|
+
from sqlalchemy.orm import Session
|
|
119
|
+
from paradedb.sqlalchemy import facets, search
|
|
120
|
+
|
|
121
|
+
base = (
|
|
122
|
+
select(Product.id, Product.description)
|
|
123
|
+
.where(search.match_all(Product.description, "running"))
|
|
124
|
+
.order_by(Product.id)
|
|
125
|
+
.limit(10)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
stmt = facets.with_rows(
|
|
129
|
+
base,
|
|
130
|
+
agg=facets.multi(
|
|
131
|
+
facets.value_count(field="id"),
|
|
132
|
+
facets.terms(field="category", size=10),
|
|
133
|
+
),
|
|
134
|
+
key_field=Product.id,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
with Session(engine) as session:
|
|
138
|
+
rows = session.execute(stmt).all()
|
|
139
|
+
facet_payload = facets.extract(rows)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Search Patterns
|
|
143
|
+
|
|
144
|
+
### Fuzzy Matching
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from paradedb.sqlalchemy import search
|
|
148
|
+
|
|
149
|
+
search.term(Product.description, "shose", distance=1)
|
|
150
|
+
search.match_any(Product.description, "wirless", distance=1, prefix=True)
|
|
151
|
+
search.term(Product.description, "rnnuing", distance=1, transpose_cost_one=True)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Use fuzzy options on `term`, `match_any`, or `match_all`; there is no separate `search.fuzzy(...)` helper.
|
|
155
|
+
|
|
156
|
+
### Phrase Prefix and More-Like-This
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from paradedb.sqlalchemy import search
|
|
160
|
+
|
|
161
|
+
search.phrase_prefix(Product.description, ["running", "sh"])
|
|
162
|
+
search.more_like_this(Product.id, document_id=1, fields=["description"])
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Proximity Composition
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from sqlalchemy import select
|
|
169
|
+
from paradedb.sqlalchemy import search
|
|
170
|
+
|
|
171
|
+
prox = search.prox_array("running").within(1, search.prox_regex("sho.*"), ordered=True)
|
|
172
|
+
stmt = select(Product.id).where(search.proximity(Product.description, prox))
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Indexing and Tokenizers
|
|
176
|
+
|
|
177
|
+
Tokenizer config can be expressed as a structured mapping:
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
from sqlalchemy import Index
|
|
181
|
+
from paradedb.sqlalchemy import indexing
|
|
182
|
+
|
|
183
|
+
products_bm25_idx = Index(
|
|
184
|
+
"products_bm25_idx",
|
|
185
|
+
indexing.BM25Field(Product.id),
|
|
186
|
+
indexing.BM25Field(
|
|
187
|
+
Product.description,
|
|
188
|
+
tokenizer=indexing.tokenize.from_config(
|
|
189
|
+
{
|
|
190
|
+
"tokenizer": "simple",
|
|
191
|
+
"filters": ["lowercase", "stemmer"],
|
|
192
|
+
"stemmer": "english",
|
|
193
|
+
"alias": "description_simple",
|
|
194
|
+
}
|
|
195
|
+
),
|
|
196
|
+
),
|
|
197
|
+
indexing.BM25Field(
|
|
198
|
+
Product.description,
|
|
199
|
+
tokenizer=indexing.tokenize.from_config(
|
|
200
|
+
{
|
|
201
|
+
"tokenizer": "ngram",
|
|
202
|
+
"args": [3, 8],
|
|
203
|
+
"named_args": {"prefix_only": True},
|
|
204
|
+
"alias": "description_ngram",
|
|
205
|
+
}
|
|
206
|
+
),
|
|
207
|
+
),
|
|
208
|
+
postgresql_using="bm25",
|
|
209
|
+
postgresql_with={"key_field": "id"},
|
|
210
|
+
)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Validate that a field is indexed with the expected tokenizer:
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
from paradedb.sqlalchemy import indexing
|
|
217
|
+
|
|
218
|
+
indexing.assert_indexed(engine, Product.category, tokenizer="literal")
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Inspect BM25 metadata for a mapped table:
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
from paradedb.sqlalchemy import indexing
|
|
225
|
+
|
|
226
|
+
meta = indexing.describe(engine, Product.__table__)
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Alembic Operations
|
|
230
|
+
|
|
231
|
+
Import once in migration environment startup so Alembic registers ParadeDB operations:
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
import paradedb.sqlalchemy.alembic # noqa: F401
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Use custom operations in migrations:
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
op.create_bm25_index(
|
|
241
|
+
"products_bm25_idx",
|
|
242
|
+
"products",
|
|
243
|
+
["id", "description"],
|
|
244
|
+
key_field="id",
|
|
245
|
+
table_schema="public",
|
|
246
|
+
)
|
|
247
|
+
op.reindex_bm25("products_bm25_idx", concurrently=True, schema="public")
|
|
248
|
+
op.drop_bm25_index("products_bm25_idx", if_exists=True, schema="public")
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
`op.reindex_bm25(..., concurrently=True)` must run outside a transaction (autocommit block).
|
|
252
|
+
|
|
253
|
+
## Diagnostics Helpers
|
|
254
|
+
|
|
255
|
+
`paradedb.sqlalchemy.diagnostics` exposes wrapper functions for ParadeDB diagnostics:
|
|
256
|
+
|
|
257
|
+
```python
|
|
258
|
+
from paradedb.sqlalchemy import diagnostics
|
|
259
|
+
|
|
260
|
+
indexes = diagnostics.paradedb_indexes(engine)
|
|
261
|
+
segments = diagnostics.paradedb_index_segments(engine, "products_bm25_idx")
|
|
262
|
+
check = diagnostics.paradedb_verify_index(engine, "products_bm25_idx", sample_rate=0.1)
|
|
263
|
+
all_checks = diagnostics.paradedb_verify_all_indexes(engine, schema_pattern="public")
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
## Common Errors
|
|
267
|
+
|
|
268
|
+
### `with_rows requires ORDER BY`
|
|
269
|
+
|
|
270
|
+
```python
|
|
271
|
+
from sqlalchemy import select
|
|
272
|
+
from paradedb.sqlalchemy import facets
|
|
273
|
+
|
|
274
|
+
# Missing order_by(...)
|
|
275
|
+
base = select(Product.id).limit(10)
|
|
276
|
+
facets.with_rows(base, agg=facets.value_count(field="id"), key_field=Product.id)
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
### `with_rows requires LIMIT`
|
|
280
|
+
|
|
281
|
+
```python
|
|
282
|
+
from sqlalchemy import select
|
|
283
|
+
from paradedb.sqlalchemy import facets
|
|
284
|
+
|
|
285
|
+
# Missing limit(...)
|
|
286
|
+
base = select(Product.id).order_by(Product.id)
|
|
287
|
+
facets.with_rows(base, agg=facets.value_count(field="id"), key_field=Product.id)
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
### `with_rows requires a ParadeDB predicate`
|
|
291
|
+
|
|
292
|
+
```python
|
|
293
|
+
from sqlalchemy import select
|
|
294
|
+
from paradedb.sqlalchemy import facets
|
|
295
|
+
|
|
296
|
+
# ensure_predicate=False disables automatic search.all(...) injection
|
|
297
|
+
facets.with_rows(
|
|
298
|
+
select(Product.id).order_by(Product.id).limit(10),
|
|
299
|
+
agg=facets.value_count(field="id"),
|
|
300
|
+
key_field=Product.id,
|
|
301
|
+
ensure_predicate=False,
|
|
302
|
+
)
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### `tokenizer config requires 'tokenizer'`
|
|
306
|
+
|
|
307
|
+
```python
|
|
308
|
+
from paradedb.sqlalchemy import indexing
|
|
309
|
+
|
|
310
|
+
indexing.tokenize.from_config({"filters": ["lowercase"]})
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
## Examples
|
|
314
|
+
|
|
315
|
+
- Quick Start: [setup](examples/quickstart/setup.py), [run](examples/quickstart/quickstart.py)
|
|
316
|
+
- Faceted Search: [setup](examples/faceted_search/setup.py), [run](examples/faceted_search/faceted_search.py)
|
|
317
|
+
- Autocomplete: [setup](examples/autocomplete/setup.py), [run](examples/autocomplete/autocomplete.py)
|
|
318
|
+
- More Like This: [setup](examples/more_like_this/setup.py), [run](examples/more_like_this/more_like_this.py)
|
|
319
|
+
- Hybrid Search (RRF): [setup](examples/hybrid_rrf/setup.py), [run](examples/hybrid_rrf/hybrid_rrf.py)
|
|
320
|
+
- RAG: [setup](examples/rag/setup.py), [run](examples/rag/rag.py)
|
|
321
|
+
|
|
322
|
+
## Testing
|
|
323
|
+
|
|
324
|
+
Use repository script helpers:
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
./scripts/run_unit_tests.sh
|
|
328
|
+
./scripts/run_integration_tests.sh
|
|
329
|
+
./scripts/run_examples.sh
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
## Documentation
|
|
333
|
+
|
|
334
|
+
- [ParadeDB Docs](https://docs.paradedb.com)
|
|
335
|
+
- [ParadeDB Website](https://paradedb.com)
|
|
336
|
+
- [Changelog](CHANGELOG.md)
|
|
337
|
+
|
|
338
|
+
## Contributing
|
|
339
|
+
|
|
340
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, linting, tests, and PR workflow.
|
|
341
|
+
|
|
342
|
+
## Support
|
|
343
|
+
|
|
344
|
+
If you found a bug or need a feature, open a [GitHub Issue](https://github.com/paradedb/sqlalchemy-paradedb/issues/new/choose).
|
|
345
|
+
|
|
346
|
+
Community and team support:
|
|
347
|
+
|
|
348
|
+
- [ParadeDB Slack Community](https://paradedb.com/slack)
|
|
349
|
+
- [ParadeDB GitHub Discussions](https://github.com/paradedb/paradedb/discussions)
|
|
350
|
+
- [Commercial support](mailto:sales@paradedb.com)
|
|
351
|
+
|
|
352
|
+
## License
|
|
353
|
+
|
|
354
|
+
sqlalchemy-paradedb is licensed under the [MIT License](https://github.com/paradedb/sqlalchemy-paradedb?tab=MIT-1-ov-file#readme).
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
# sqlalchemy-paradedb
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/sqlalchemy-paradedb/)
|
|
4
|
+
[](https://codecov.io/gh/paradedb/sqlalchemy-paradedb)
|
|
5
|
+
[](https://github.com/paradedb/sqlalchemy-paradedb/actions/workflows/ci.yml)
|
|
6
|
+
[](https://github.com/paradedb/sqlalchemy-paradedb?tab=MIT-1-ov-file#readme)
|
|
7
|
+
[](https://paradedb.com/slack)
|
|
8
|
+
[](https://x.com/paradedb)
|
|
9
|
+
|
|
10
|
+
[ParadeDB](https://paradedb.com) integration for SQLAlchemy: typed helpers for BM25 indexes, search predicates, scoring, snippets, facets, and migration ergonomics.
|
|
11
|
+
|
|
12
|
+
## Requirements & Compatibility
|
|
13
|
+
|
|
14
|
+
| Component | Supported |
|
|
15
|
+
| ---------- | ----------------------------- |
|
|
16
|
+
| Python | 3.10+ |
|
|
17
|
+
| SQLAlchemy | 2.0.32+ |
|
|
18
|
+
| ParadeDB | 0.21.0+ (`pg_search`) |
|
|
19
|
+
| PostgreSQL | 17+ (with ParadeDB extension) |
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
uv add sqlalchemy-paradedb
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
For local development:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
uv sync --extra test --extra dev
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Quick Start
|
|
34
|
+
|
|
35
|
+
### Prerequisites
|
|
36
|
+
|
|
37
|
+
Install `pg_search` in your Postgres database and connect SQLAlchemy to that database.
|
|
38
|
+
|
|
39
|
+
### Create a BM25 Index
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from sqlalchemy import Index
|
|
43
|
+
from paradedb.sqlalchemy import indexing
|
|
44
|
+
|
|
45
|
+
products_bm25_idx = Index(
|
|
46
|
+
"products_bm25_idx",
|
|
47
|
+
indexing.BM25Field(Product.id),
|
|
48
|
+
indexing.BM25Field(
|
|
49
|
+
Product.description,
|
|
50
|
+
tokenizer=indexing.tokenize.unicode(lowercase=True),
|
|
51
|
+
),
|
|
52
|
+
indexing.BM25Field(
|
|
53
|
+
Product.category,
|
|
54
|
+
tokenizer=indexing.tokenize.literal(),
|
|
55
|
+
),
|
|
56
|
+
postgresql_using="bm25",
|
|
57
|
+
postgresql_with={"key_field": "id"},
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
For JSON columns named `metadata`, use `metadata_` as the ORM attribute name.
|
|
62
|
+
|
|
63
|
+
### Query with ParadeDB Predicates
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from sqlalchemy import select
|
|
67
|
+
from sqlalchemy.orm import Session
|
|
68
|
+
from paradedb.sqlalchemy import pdb, search
|
|
69
|
+
|
|
70
|
+
stmt = (
|
|
71
|
+
select(Product.id, Product.description)
|
|
72
|
+
.where(search.match_any(Product.description, "running", "shoes"))
|
|
73
|
+
.order_by(pdb.score(Product.id).desc())
|
|
74
|
+
.limit(10)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
with Session(engine) as session:
|
|
78
|
+
rows = session.execute(stmt).all()
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Rows + Facets in a Single Query
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from sqlalchemy import select
|
|
85
|
+
from sqlalchemy.orm import Session
|
|
86
|
+
from paradedb.sqlalchemy import facets, search
|
|
87
|
+
|
|
88
|
+
base = (
|
|
89
|
+
select(Product.id, Product.description)
|
|
90
|
+
.where(search.match_all(Product.description, "running"))
|
|
91
|
+
.order_by(Product.id)
|
|
92
|
+
.limit(10)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
stmt = facets.with_rows(
|
|
96
|
+
base,
|
|
97
|
+
agg=facets.multi(
|
|
98
|
+
facets.value_count(field="id"),
|
|
99
|
+
facets.terms(field="category", size=10),
|
|
100
|
+
),
|
|
101
|
+
key_field=Product.id,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
with Session(engine) as session:
|
|
105
|
+
rows = session.execute(stmt).all()
|
|
106
|
+
facet_payload = facets.extract(rows)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Search Patterns
|
|
110
|
+
|
|
111
|
+
### Fuzzy Matching
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
from paradedb.sqlalchemy import search
|
|
115
|
+
|
|
116
|
+
search.term(Product.description, "shose", distance=1)
|
|
117
|
+
search.match_any(Product.description, "wirless", distance=1, prefix=True)
|
|
118
|
+
search.term(Product.description, "rnnuing", distance=1, transpose_cost_one=True)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Use fuzzy options on `term`, `match_any`, or `match_all`; there is no separate `search.fuzzy(...)` helper.
|
|
122
|
+
|
|
123
|
+
### Phrase Prefix and More-Like-This
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from paradedb.sqlalchemy import search
|
|
127
|
+
|
|
128
|
+
search.phrase_prefix(Product.description, ["running", "sh"])
|
|
129
|
+
search.more_like_this(Product.id, document_id=1, fields=["description"])
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Proximity Composition
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from sqlalchemy import select
|
|
136
|
+
from paradedb.sqlalchemy import search
|
|
137
|
+
|
|
138
|
+
prox = search.prox_array("running").within(1, search.prox_regex("sho.*"), ordered=True)
|
|
139
|
+
stmt = select(Product.id).where(search.proximity(Product.description, prox))
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Indexing and Tokenizers
|
|
143
|
+
|
|
144
|
+
Tokenizer config can be expressed as a structured mapping:
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from sqlalchemy import Index
|
|
148
|
+
from paradedb.sqlalchemy import indexing
|
|
149
|
+
|
|
150
|
+
products_bm25_idx = Index(
|
|
151
|
+
"products_bm25_idx",
|
|
152
|
+
indexing.BM25Field(Product.id),
|
|
153
|
+
indexing.BM25Field(
|
|
154
|
+
Product.description,
|
|
155
|
+
tokenizer=indexing.tokenize.from_config(
|
|
156
|
+
{
|
|
157
|
+
"tokenizer": "simple",
|
|
158
|
+
"filters": ["lowercase", "stemmer"],
|
|
159
|
+
"stemmer": "english",
|
|
160
|
+
"alias": "description_simple",
|
|
161
|
+
}
|
|
162
|
+
),
|
|
163
|
+
),
|
|
164
|
+
indexing.BM25Field(
|
|
165
|
+
Product.description,
|
|
166
|
+
tokenizer=indexing.tokenize.from_config(
|
|
167
|
+
{
|
|
168
|
+
"tokenizer": "ngram",
|
|
169
|
+
"args": [3, 8],
|
|
170
|
+
"named_args": {"prefix_only": True},
|
|
171
|
+
"alias": "description_ngram",
|
|
172
|
+
}
|
|
173
|
+
),
|
|
174
|
+
),
|
|
175
|
+
postgresql_using="bm25",
|
|
176
|
+
postgresql_with={"key_field": "id"},
|
|
177
|
+
)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Validate that a field is indexed with the expected tokenizer:
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
from paradedb.sqlalchemy import indexing
|
|
184
|
+
|
|
185
|
+
indexing.assert_indexed(engine, Product.category, tokenizer="literal")
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Inspect BM25 metadata for a mapped table:
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
from paradedb.sqlalchemy import indexing
|
|
192
|
+
|
|
193
|
+
meta = indexing.describe(engine, Product.__table__)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## Alembic Operations
|
|
197
|
+
|
|
198
|
+
Import once in migration environment startup so Alembic registers ParadeDB operations:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
import paradedb.sqlalchemy.alembic # noqa: F401
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Use custom operations in migrations:
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
op.create_bm25_index(
|
|
208
|
+
"products_bm25_idx",
|
|
209
|
+
"products",
|
|
210
|
+
["id", "description"],
|
|
211
|
+
key_field="id",
|
|
212
|
+
table_schema="public",
|
|
213
|
+
)
|
|
214
|
+
op.reindex_bm25("products_bm25_idx", concurrently=True, schema="public")
|
|
215
|
+
op.drop_bm25_index("products_bm25_idx", if_exists=True, schema="public")
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
`op.reindex_bm25(..., concurrently=True)` must run outside a transaction (autocommit block).
|
|
219
|
+
|
|
220
|
+
## Diagnostics Helpers
|
|
221
|
+
|
|
222
|
+
`paradedb.sqlalchemy.diagnostics` exposes wrapper functions for ParadeDB diagnostics:
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
from paradedb.sqlalchemy import diagnostics
|
|
226
|
+
|
|
227
|
+
indexes = diagnostics.paradedb_indexes(engine)
|
|
228
|
+
segments = diagnostics.paradedb_index_segments(engine, "products_bm25_idx")
|
|
229
|
+
check = diagnostics.paradedb_verify_index(engine, "products_bm25_idx", sample_rate=0.1)
|
|
230
|
+
all_checks = diagnostics.paradedb_verify_all_indexes(engine, schema_pattern="public")
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## Common Errors
|
|
234
|
+
|
|
235
|
+
### `with_rows requires ORDER BY`
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
from sqlalchemy import select
|
|
239
|
+
from paradedb.sqlalchemy import facets
|
|
240
|
+
|
|
241
|
+
# Missing order_by(...)
|
|
242
|
+
base = select(Product.id).limit(10)
|
|
243
|
+
facets.with_rows(base, agg=facets.value_count(field="id"), key_field=Product.id)
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### `with_rows requires LIMIT`
|
|
247
|
+
|
|
248
|
+
```python
|
|
249
|
+
from sqlalchemy import select
|
|
250
|
+
from paradedb.sqlalchemy import facets
|
|
251
|
+
|
|
252
|
+
# Missing limit(...)
|
|
253
|
+
base = select(Product.id).order_by(Product.id)
|
|
254
|
+
facets.with_rows(base, agg=facets.value_count(field="id"), key_field=Product.id)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### `with_rows requires a ParadeDB predicate`
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
from sqlalchemy import select
|
|
261
|
+
from paradedb.sqlalchemy import facets
|
|
262
|
+
|
|
263
|
+
# ensure_predicate=False disables automatic search.all(...) injection
|
|
264
|
+
facets.with_rows(
|
|
265
|
+
select(Product.id).order_by(Product.id).limit(10),
|
|
266
|
+
agg=facets.value_count(field="id"),
|
|
267
|
+
key_field=Product.id,
|
|
268
|
+
ensure_predicate=False,
|
|
269
|
+
)
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### `tokenizer config requires 'tokenizer'`
|
|
273
|
+
|
|
274
|
+
```python
|
|
275
|
+
from paradedb.sqlalchemy import indexing
|
|
276
|
+
|
|
277
|
+
indexing.tokenize.from_config({"filters": ["lowercase"]})
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
## Examples
|
|
281
|
+
|
|
282
|
+
- Quick Start: [setup](examples/quickstart/setup.py), [run](examples/quickstart/quickstart.py)
|
|
283
|
+
- Faceted Search: [setup](examples/faceted_search/setup.py), [run](examples/faceted_search/faceted_search.py)
|
|
284
|
+
- Autocomplete: [setup](examples/autocomplete/setup.py), [run](examples/autocomplete/autocomplete.py)
|
|
285
|
+
- More Like This: [setup](examples/more_like_this/setup.py), [run](examples/more_like_this/more_like_this.py)
|
|
286
|
+
- Hybrid Search (RRF): [setup](examples/hybrid_rrf/setup.py), [run](examples/hybrid_rrf/hybrid_rrf.py)
|
|
287
|
+
- RAG: [setup](examples/rag/setup.py), [run](examples/rag/rag.py)
|
|
288
|
+
|
|
289
|
+
## Testing
|
|
290
|
+
|
|
291
|
+
Use repository script helpers:
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
./scripts/run_unit_tests.sh
|
|
295
|
+
./scripts/run_integration_tests.sh
|
|
296
|
+
./scripts/run_examples.sh
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## Documentation
|
|
300
|
+
|
|
301
|
+
- [ParadeDB Docs](https://docs.paradedb.com)
|
|
302
|
+
- [ParadeDB Website](https://paradedb.com)
|
|
303
|
+
- [Changelog](CHANGELOG.md)
|
|
304
|
+
|
|
305
|
+
## Contributing
|
|
306
|
+
|
|
307
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, linting, tests, and PR workflow.
|
|
308
|
+
|
|
309
|
+
## Support
|
|
310
|
+
|
|
311
|
+
If you found a bug or need a feature, open a [GitHub Issue](https://github.com/paradedb/sqlalchemy-paradedb/issues/new/choose).
|
|
312
|
+
|
|
313
|
+
Community and team support:
|
|
314
|
+
|
|
315
|
+
- [ParadeDB Slack Community](https://paradedb.com/slack)
|
|
316
|
+
- [ParadeDB GitHub Discussions](https://github.com/paradedb/paradedb/discussions)
|
|
317
|
+
- [Commercial support](mailto:sales@paradedb.com)
|
|
318
|
+
|
|
319
|
+
## License
|
|
320
|
+
|
|
321
|
+
sqlalchemy-paradedb is licensed under the [MIT License](https://github.com/paradedb/sqlalchemy-paradedb?tab=MIT-1-ov-file#readme).
|