sqlmodel-ext 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/PKG-INFO +184 -1
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/README.md +177 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/README_zh.md +177 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/pyproject.toml +7 -1
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/__init__.py +1 -1
- sqlmodel_ext-0.2.0/src/sqlmodel_ext/field_types/dialects/__init__.py +7 -0
- sqlmodel_ext-0.2.0/src/sqlmodel_ext/field_types/dialects/postgresql/__init__.py +38 -0
- sqlmodel_ext-0.2.0/src/sqlmodel_ext/field_types/dialects/postgresql/array.py +103 -0
- sqlmodel_ext-0.2.0/src/sqlmodel_ext/field_types/dialects/postgresql/exceptions.py +38 -0
- sqlmodel_ext-0.2.0/src/sqlmodel_ext/field_types/dialects/postgresql/jsonb_types.py +178 -0
- sqlmodel_ext-0.2.0/src/sqlmodel_ext/field_types/dialects/postgresql/numpy_vector.py +427 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/.gitignore +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/LICENSE +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/_compat.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/_exceptions.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/_sa_type.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/_utils.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/base.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/field_types/__init__.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/field_types/_internal/__init__.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/field_types/_internal/path.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/field_types/_ssrf.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/field_types/ip_address.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/field_types/mixins/__init__.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/field_types/mixins/module_name_mixin.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/field_types/url.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/mixins/__init__.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/mixins/info_response.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/mixins/optimistic_lock.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/mixins/polymorphic.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/mixins/relation_preload.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/mixins/table.py +0 -0
- {sqlmodel_ext-0.1.0 → sqlmodel_ext-0.2.0}/src/sqlmodel_ext/pagination.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sqlmodel-ext
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Extended SQLModel infrastructure: smart metaclass, async CRUD mixins, polymorphic inheritance, optimistic locking, relation preloading, and reusable field types.
|
|
5
5
|
Project-URL: Homepage, https://github.com/Foxerine/sqlmodel-ext
|
|
6
6
|
Project-URL: Repository, https://github.com/Foxerine/sqlmodel-ext
|
|
@@ -32,6 +32,12 @@ Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
|
32
32
|
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
33
33
|
Provides-Extra: fastapi
|
|
34
34
|
Requires-Dist: fastapi>=0.100.0; extra == 'fastapi'
|
|
35
|
+
Provides-Extra: pgvector
|
|
36
|
+
Requires-Dist: numpy>=1.24; extra == 'pgvector'
|
|
37
|
+
Requires-Dist: orjson>=3.0; extra == 'pgvector'
|
|
38
|
+
Requires-Dist: pgvector>=0.3; extra == 'pgvector'
|
|
39
|
+
Provides-Extra: postgresql
|
|
40
|
+
Requires-Dist: orjson>=3.0; extra == 'postgresql'
|
|
35
41
|
Description-Content-Type: text/markdown
|
|
36
42
|
|
|
37
43
|
# sqlmodel-ext
|
|
@@ -58,6 +64,7 @@ Extended SQLModel infrastructure: smart metaclass, async CRUD mixins, polymorphi
|
|
|
58
64
|
| **RelationPreloadMixin** | Decorator-based automatic relationship preloading (prevents `MissingGreenlet` errors) |
|
|
59
65
|
| **ListResponse[T]** | Generic paginated response model for list endpoints |
|
|
60
66
|
| **Field Types** | Reusable constrained types: `Str64`, `Port`, `IPAddress`, `HttpUrl`, `SafeHttpUrl`, and more |
|
|
67
|
+
| **PostgreSQL Types** | `Array[T]` for native ARRAY, `JSON100K`/`JSONList100K` for size-limited JSONB, `NumpyVector` for pgvector+NumPy |
|
|
61
68
|
| **Info Response DTOs** | Pre-built mixins for API response models with id/timestamp fields |
|
|
62
69
|
|
|
63
70
|
## Installation
|
|
@@ -72,6 +79,18 @@ With [FastAPI](https://fastapi.tiangolo.com/) support (enables `HTTPException` i
|
|
|
72
79
|
pip install sqlmodel-ext[fastapi]
|
|
73
80
|
```
|
|
74
81
|
|
|
82
|
+
With PostgreSQL ARRAY and JSONB types (requires `orjson`):
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install sqlmodel-ext[postgresql]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
With pgvector + NumPy vector support (includes `[postgresql]`):
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install sqlmodel-ext[pgvector]
|
|
92
|
+
```
|
|
93
|
+
|
|
75
94
|
## Quick Start
|
|
76
95
|
|
|
77
96
|
### Define Models
|
|
@@ -1053,6 +1072,160 @@ class FileRecord(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
|
1053
1072
|
|
|
1054
1073
|
---
|
|
1055
1074
|
|
|
1075
|
+
### PostgreSQL Types
|
|
1076
|
+
|
|
1077
|
+
PostgreSQL-specific types live in `sqlmodel_ext.field_types.dialects.postgresql`. They are **not** imported from the top-level `sqlmodel_ext` package because they require PostgreSQL-specific dependencies.
|
|
1078
|
+
|
|
1079
|
+
```python
|
|
1080
|
+
from sqlmodel_ext.field_types.dialects.postgresql import (
|
|
1081
|
+
Array, # pip install sqlmodel-ext (uses sqlalchemy.dialects.postgresql)
|
|
1082
|
+
JSON100K, # pip install sqlmodel-ext[postgresql] (requires orjson)
|
|
1083
|
+
JSONList100K, # pip install sqlmodel-ext[postgresql] (requires orjson)
|
|
1084
|
+
NumpyVector, # pip install sqlmodel-ext[pgvector] (requires numpy + pgvector)
|
|
1085
|
+
)
|
|
1086
|
+
```
|
|
1087
|
+
|
|
1088
|
+
#### `Array[T]` -- PostgreSQL ARRAY
|
|
1089
|
+
|
|
1090
|
+
A generic array type that maps Python `list[T]` to PostgreSQL's native `ARRAY` column type.
|
|
1091
|
+
|
|
1092
|
+
```python
|
|
1093
|
+
from sqlmodel import Field
|
|
1094
|
+
from sqlmodel_ext.field_types.dialects.postgresql import Array
|
|
1095
|
+
|
|
1096
|
+
class Article(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1097
|
+
tags: Array[str] = Field(default_factory=list)
|
|
1098
|
+
"""String array stored as TEXT[] in PostgreSQL"""
|
|
1099
|
+
|
|
1100
|
+
scores: Array[int] = Field(default_factory=list)
|
|
1101
|
+
"""Integer array stored as INTEGER[] in PostgreSQL"""
|
|
1102
|
+
|
|
1103
|
+
metadata_list: Array[dict] = Field(default_factory=list)
|
|
1104
|
+
"""JSONB array stored as JSONB[] in PostgreSQL"""
|
|
1105
|
+
|
|
1106
|
+
refs: Array[UUID] = Field(default_factory=list)
|
|
1107
|
+
"""UUID array stored as UUID[] in PostgreSQL"""
|
|
1108
|
+
```
|
|
1109
|
+
|
|
1110
|
+
**With max length:**
|
|
1111
|
+
|
|
1112
|
+
```python
|
|
1113
|
+
class Config(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1114
|
+
version_vector: Array[dict, 20] = Field(default_factory=list)
|
|
1115
|
+
"""Max 20 elements, validated by Pydantic"""
|
|
1116
|
+
```
|
|
1117
|
+
|
|
1118
|
+
**Supported inner types:**
|
|
1119
|
+
|
|
1120
|
+
| Python Type | PostgreSQL Type |
|
|
1121
|
+
|-------------|----------------|
|
|
1122
|
+
| `str` | `TEXT[]` |
|
|
1123
|
+
| `int` | `INTEGER[]` |
|
|
1124
|
+
| `dict` | `JSONB[]` |
|
|
1125
|
+
| `UUID` | `UUID[]` |
|
|
1126
|
+
| `Enum` subclass | `ENUM[]` |
|
|
1127
|
+
|
|
1128
|
+
#### `JSON100K` / `JSONList100K` -- Size-Limited JSONB
|
|
1129
|
+
|
|
1130
|
+
JSONB types with a 100K character input limit, enforced at the Pydantic validation layer. Uses `orjson` for fast serialization.
|
|
1131
|
+
|
|
1132
|
+
```python
|
|
1133
|
+
from sqlmodel_ext.field_types.dialects.postgresql import JSON100K, JSONList100K
|
|
1134
|
+
|
|
1135
|
+
class Project(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1136
|
+
canvas: JSON100K
|
|
1137
|
+
"""Canvas data stored as JSONB (max 100K chars)"""
|
|
1138
|
+
|
|
1139
|
+
messages: JSONList100K
|
|
1140
|
+
"""Message list stored as JSONB (max 100K chars)"""
|
|
1141
|
+
```
|
|
1142
|
+
|
|
1143
|
+
**Behavior:**
|
|
1144
|
+
|
|
1145
|
+
| Feature | `JSON100K` | `JSONList100K` |
|
|
1146
|
+
|---------|-----------|---------------|
|
|
1147
|
+
| Python type | `dict[str, Any]` | `list[dict[str, Any]]` |
|
|
1148
|
+
| Accepts | `dict` or JSON string | `list` or JSON string |
|
|
1149
|
+
| PostgreSQL type | `JSONB` | `JSONB` |
|
|
1150
|
+
| Max input length | 100,000 chars | 100,000 chars |
|
|
1151
|
+
| API serialization | JSON string | JSON string |
|
|
1152
|
+
|
|
1153
|
+
#### `NumpyVector` -- pgvector + NumPy Integration
|
|
1154
|
+
|
|
1155
|
+
Stores vectors as pgvector's `Vector` type in PostgreSQL while exposing them as `numpy.ndarray` in Python. Supports fixed-dimension vectors with dtype enforcement.
|
|
1156
|
+
|
|
1157
|
+
```python
|
|
1158
|
+
import numpy as np
|
|
1159
|
+
from sqlmodel import Field
|
|
1160
|
+
from sqlmodel_ext.field_types.dialects.postgresql import NumpyVector
|
|
1161
|
+
|
|
1162
|
+
class SpeakerInfo(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1163
|
+
embedding: NumpyVector[1024, np.float32] = Field(...)
|
|
1164
|
+
"""1024-dimensional float32 embedding vector"""
|
|
1165
|
+
|
|
1166
|
+
# Default dtype is float32
|
|
1167
|
+
class Document(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1168
|
+
embedding: NumpyVector[768] = Field(...)
|
|
1169
|
+
"""768-dimensional vector (float32 by default)"""
|
|
1170
|
+
```
|
|
1171
|
+
|
|
1172
|
+
**API serialization format** (base64-encoded for efficiency):
|
|
1173
|
+
|
|
1174
|
+
```json
|
|
1175
|
+
{
|
|
1176
|
+
"dtype": "float32",
|
|
1177
|
+
"shape": 1024,
|
|
1178
|
+
"data_b64": "AAABAAA..."
|
|
1179
|
+
}
|
|
1180
|
+
```
|
|
1181
|
+
|
|
1182
|
+
**Accepted input formats:**
|
|
1183
|
+
|
|
1184
|
+
| Format | Example |
|
|
1185
|
+
|--------|---------|
|
|
1186
|
+
| `numpy.ndarray` | `np.zeros(1024, dtype=np.float32)` |
|
|
1187
|
+
| `list` / `tuple` | `[0.1, 0.2, ...]` |
|
|
1188
|
+
| base64 dict | `{"dtype": "float32", "shape": 1024, "data_b64": "..."}` |
|
|
1189
|
+
| pgvector string | `"[0.1, 0.2, ...]"` (from database) |
|
|
1190
|
+
|
|
1191
|
+
**Vector similarity search** with pgvector operators:
|
|
1192
|
+
|
|
1193
|
+
```python
|
|
1194
|
+
from sqlalchemy import select
|
|
1195
|
+
|
|
1196
|
+
# L2 distance (Euclidean)
|
|
1197
|
+
stmt = select(SpeakerInfo).order_by(
|
|
1198
|
+
SpeakerInfo.embedding.l2_distance(query_vector)
|
|
1199
|
+
).limit(10)
|
|
1200
|
+
|
|
1201
|
+
# Cosine distance
|
|
1202
|
+
stmt = select(SpeakerInfo).order_by(
|
|
1203
|
+
SpeakerInfo.embedding.cosine_distance(query_vector)
|
|
1204
|
+
).limit(10)
|
|
1205
|
+
|
|
1206
|
+
# Max inner product
|
|
1207
|
+
stmt = select(SpeakerInfo).order_by(
|
|
1208
|
+
SpeakerInfo.embedding.max_inner_product(query_vector)
|
|
1209
|
+
).limit(10)
|
|
1210
|
+
```
|
|
1211
|
+
|
|
1212
|
+
**Vector exceptions:**
|
|
1213
|
+
|
|
1214
|
+
| Exception | When |
|
|
1215
|
+
|-----------|------|
|
|
1216
|
+
| `VectorError` | Base class for all vector errors |
|
|
1217
|
+
| `VectorDimensionError` | Array dimensions don't match the declared size |
|
|
1218
|
+
| `VectorDTypeError` | dtype conversion fails |
|
|
1219
|
+
| `VectorDecodeError` | base64 or database format decoding fails |
|
|
1220
|
+
|
|
1221
|
+
```python
|
|
1222
|
+
from sqlmodel_ext.field_types.dialects.postgresql import (
|
|
1223
|
+
VectorError, VectorDimensionError, VectorDTypeError, VectorDecodeError,
|
|
1224
|
+
)
|
|
1225
|
+
```
|
|
1226
|
+
|
|
1227
|
+
---
|
|
1228
|
+
|
|
1056
1229
|
### Info Response DTO Mixins
|
|
1057
1230
|
|
|
1058
1231
|
Pre-built mixins for API response models that always include id and timestamp fields:
|
|
@@ -1101,6 +1274,13 @@ sqlmodel_ext/
|
|
|
1101
1274
|
url.py # Url, HttpUrl, WebSocketUrl, SafeHttpUrl
|
|
1102
1275
|
_internal/path.py # Path type handlers
|
|
1103
1276
|
mixins/ # ModuleNameMixin
|
|
1277
|
+
dialects/
|
|
1278
|
+
postgresql/
|
|
1279
|
+
__init__.py # PostgreSQL type re-exports
|
|
1280
|
+
array.py # Array[T] generic ARRAY type
|
|
1281
|
+
jsonb_types.py # JSON100K, JSONList100K (requires orjson)
|
|
1282
|
+
numpy_vector.py # NumpyVector[dims, dtype] (requires numpy + pgvector)
|
|
1283
|
+
exceptions.py # VectorError hierarchy
|
|
1104
1284
|
```
|
|
1105
1285
|
|
|
1106
1286
|
## Requirements
|
|
@@ -1110,6 +1290,9 @@ sqlmodel_ext/
|
|
|
1110
1290
|
- **pydantic** >= 2.0
|
|
1111
1291
|
- **sqlalchemy** >= 2.0
|
|
1112
1292
|
- (optional) **fastapi** >= 0.100.0
|
|
1293
|
+
- (optional) **orjson** >= 3.0 -- for `JSON100K` / `JSONList100K`
|
|
1294
|
+
- (optional) **numpy** >= 1.24 -- for `NumpyVector`
|
|
1295
|
+
- (optional) **pgvector** >= 0.3 -- for `NumpyVector`
|
|
1113
1296
|
|
|
1114
1297
|
## AI Disclosure
|
|
1115
1298
|
|
|
@@ -22,6 +22,7 @@ Extended SQLModel infrastructure: smart metaclass, async CRUD mixins, polymorphi
|
|
|
22
22
|
| **RelationPreloadMixin** | Decorator-based automatic relationship preloading (prevents `MissingGreenlet` errors) |
|
|
23
23
|
| **ListResponse[T]** | Generic paginated response model for list endpoints |
|
|
24
24
|
| **Field Types** | Reusable constrained types: `Str64`, `Port`, `IPAddress`, `HttpUrl`, `SafeHttpUrl`, and more |
|
|
25
|
+
| **PostgreSQL Types** | `Array[T]` for native ARRAY, `JSON100K`/`JSONList100K` for size-limited JSONB, `NumpyVector` for pgvector+NumPy |
|
|
25
26
|
| **Info Response DTOs** | Pre-built mixins for API response models with id/timestamp fields |
|
|
26
27
|
|
|
27
28
|
## Installation
|
|
@@ -36,6 +37,18 @@ With [FastAPI](https://fastapi.tiangolo.com/) support (enables `HTTPException` i
|
|
|
36
37
|
pip install sqlmodel-ext[fastapi]
|
|
37
38
|
```
|
|
38
39
|
|
|
40
|
+
With PostgreSQL ARRAY and JSONB types (requires `orjson`):
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install sqlmodel-ext[postgresql]
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
With pgvector + NumPy vector support (includes `[postgresql]`):
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install sqlmodel-ext[pgvector]
|
|
50
|
+
```
|
|
51
|
+
|
|
39
52
|
## Quick Start
|
|
40
53
|
|
|
41
54
|
### Define Models
|
|
@@ -1017,6 +1030,160 @@ class FileRecord(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
|
1017
1030
|
|
|
1018
1031
|
---
|
|
1019
1032
|
|
|
1033
|
+
### PostgreSQL Types
|
|
1034
|
+
|
|
1035
|
+
PostgreSQL-specific types live in `sqlmodel_ext.field_types.dialects.postgresql`. They are **not** imported from the top-level `sqlmodel_ext` package because they require PostgreSQL-specific dependencies.
|
|
1036
|
+
|
|
1037
|
+
```python
|
|
1038
|
+
from sqlmodel_ext.field_types.dialects.postgresql import (
|
|
1039
|
+
Array, # pip install sqlmodel-ext (uses sqlalchemy.dialects.postgresql)
|
|
1040
|
+
JSON100K, # pip install sqlmodel-ext[postgresql] (requires orjson)
|
|
1041
|
+
JSONList100K, # pip install sqlmodel-ext[postgresql] (requires orjson)
|
|
1042
|
+
NumpyVector, # pip install sqlmodel-ext[pgvector] (requires numpy + pgvector)
|
|
1043
|
+
)
|
|
1044
|
+
```
|
|
1045
|
+
|
|
1046
|
+
#### `Array[T]` -- PostgreSQL ARRAY
|
|
1047
|
+
|
|
1048
|
+
A generic array type that maps Python `list[T]` to PostgreSQL's native `ARRAY` column type.
|
|
1049
|
+
|
|
1050
|
+
```python
|
|
1051
|
+
from sqlmodel import Field
|
|
1052
|
+
from sqlmodel_ext.field_types.dialects.postgresql import Array
|
|
1053
|
+
|
|
1054
|
+
class Article(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1055
|
+
tags: Array[str] = Field(default_factory=list)
|
|
1056
|
+
"""String array stored as TEXT[] in PostgreSQL"""
|
|
1057
|
+
|
|
1058
|
+
scores: Array[int] = Field(default_factory=list)
|
|
1059
|
+
"""Integer array stored as INTEGER[] in PostgreSQL"""
|
|
1060
|
+
|
|
1061
|
+
metadata_list: Array[dict] = Field(default_factory=list)
|
|
1062
|
+
"""JSONB array stored as JSONB[] in PostgreSQL"""
|
|
1063
|
+
|
|
1064
|
+
refs: Array[UUID] = Field(default_factory=list)
|
|
1065
|
+
"""UUID array stored as UUID[] in PostgreSQL"""
|
|
1066
|
+
```
|
|
1067
|
+
|
|
1068
|
+
**With max length:**
|
|
1069
|
+
|
|
1070
|
+
```python
|
|
1071
|
+
class Config(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1072
|
+
version_vector: Array[dict, 20] = Field(default_factory=list)
|
|
1073
|
+
"""Max 20 elements, validated by Pydantic"""
|
|
1074
|
+
```
|
|
1075
|
+
|
|
1076
|
+
**Supported inner types:**
|
|
1077
|
+
|
|
1078
|
+
| Python Type | PostgreSQL Type |
|
|
1079
|
+
|-------------|----------------|
|
|
1080
|
+
| `str` | `TEXT[]` |
|
|
1081
|
+
| `int` | `INTEGER[]` |
|
|
1082
|
+
| `dict` | `JSONB[]` |
|
|
1083
|
+
| `UUID` | `UUID[]` |
|
|
1084
|
+
| `Enum` subclass | `ENUM[]` |
|
|
1085
|
+
|
|
1086
|
+
#### `JSON100K` / `JSONList100K` -- Size-Limited JSONB
|
|
1087
|
+
|
|
1088
|
+
JSONB types with a 100K character input limit, enforced at the Pydantic validation layer. Uses `orjson` for fast serialization.
|
|
1089
|
+
|
|
1090
|
+
```python
|
|
1091
|
+
from sqlmodel_ext.field_types.dialects.postgresql import JSON100K, JSONList100K
|
|
1092
|
+
|
|
1093
|
+
class Project(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1094
|
+
canvas: JSON100K
|
|
1095
|
+
"""Canvas data stored as JSONB (max 100K chars)"""
|
|
1096
|
+
|
|
1097
|
+
messages: JSONList100K
|
|
1098
|
+
"""Message list stored as JSONB (max 100K chars)"""
|
|
1099
|
+
```
|
|
1100
|
+
|
|
1101
|
+
**Behavior:**
|
|
1102
|
+
|
|
1103
|
+
| Feature | `JSON100K` | `JSONList100K` |
|
|
1104
|
+
|---------|-----------|---------------|
|
|
1105
|
+
| Python type | `dict[str, Any]` | `list[dict[str, Any]]` |
|
|
1106
|
+
| Accepts | `dict` or JSON string | `list` or JSON string |
|
|
1107
|
+
| PostgreSQL type | `JSONB` | `JSONB` |
|
|
1108
|
+
| Max input length | 100,000 chars | 100,000 chars |
|
|
1109
|
+
| API serialization | JSON string | JSON string |
|
|
1110
|
+
|
|
1111
|
+
#### `NumpyVector` -- pgvector + NumPy Integration
|
|
1112
|
+
|
|
1113
|
+
Stores vectors as pgvector's `Vector` type in PostgreSQL while exposing them as `numpy.ndarray` in Python. Supports fixed-dimension vectors with dtype enforcement.
|
|
1114
|
+
|
|
1115
|
+
```python
|
|
1116
|
+
import numpy as np
|
|
1117
|
+
from sqlmodel import Field
|
|
1118
|
+
from sqlmodel_ext.field_types.dialects.postgresql import NumpyVector
|
|
1119
|
+
|
|
1120
|
+
class SpeakerInfo(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1121
|
+
embedding: NumpyVector[1024, np.float32] = Field(...)
|
|
1122
|
+
"""1024-dimensional float32 embedding vector"""
|
|
1123
|
+
|
|
1124
|
+
# Default dtype is float32
|
|
1125
|
+
class Document(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1126
|
+
embedding: NumpyVector[768] = Field(...)
|
|
1127
|
+
"""768-dimensional vector (float32 by default)"""
|
|
1128
|
+
```
|
|
1129
|
+
|
|
1130
|
+
**API serialization format** (base64-encoded for efficiency):
|
|
1131
|
+
|
|
1132
|
+
```json
|
|
1133
|
+
{
|
|
1134
|
+
"dtype": "float32",
|
|
1135
|
+
"shape": 1024,
|
|
1136
|
+
"data_b64": "AAABAAA..."
|
|
1137
|
+
}
|
|
1138
|
+
```
|
|
1139
|
+
|
|
1140
|
+
**Accepted input formats:**
|
|
1141
|
+
|
|
1142
|
+
| Format | Example |
|
|
1143
|
+
|--------|---------|
|
|
1144
|
+
| `numpy.ndarray` | `np.zeros(1024, dtype=np.float32)` |
|
|
1145
|
+
| `list` / `tuple` | `[0.1, 0.2, ...]` |
|
|
1146
|
+
| base64 dict | `{"dtype": "float32", "shape": 1024, "data_b64": "..."}` |
|
|
1147
|
+
| pgvector string | `"[0.1, 0.2, ...]"` (from database) |
|
|
1148
|
+
|
|
1149
|
+
**Vector similarity search** with pgvector operators:
|
|
1150
|
+
|
|
1151
|
+
```python
|
|
1152
|
+
from sqlalchemy import select
|
|
1153
|
+
|
|
1154
|
+
# L2 distance (Euclidean)
|
|
1155
|
+
stmt = select(SpeakerInfo).order_by(
|
|
1156
|
+
SpeakerInfo.embedding.l2_distance(query_vector)
|
|
1157
|
+
).limit(10)
|
|
1158
|
+
|
|
1159
|
+
# Cosine distance
|
|
1160
|
+
stmt = select(SpeakerInfo).order_by(
|
|
1161
|
+
SpeakerInfo.embedding.cosine_distance(query_vector)
|
|
1162
|
+
).limit(10)
|
|
1163
|
+
|
|
1164
|
+
# Max inner product
|
|
1165
|
+
stmt = select(SpeakerInfo).order_by(
|
|
1166
|
+
SpeakerInfo.embedding.max_inner_product(query_vector)
|
|
1167
|
+
).limit(10)
|
|
1168
|
+
```
|
|
1169
|
+
|
|
1170
|
+
**Vector exceptions:**
|
|
1171
|
+
|
|
1172
|
+
| Exception | When |
|
|
1173
|
+
|-----------|------|
|
|
1174
|
+
| `VectorError` | Base class for all vector errors |
|
|
1175
|
+
| `VectorDimensionError` | Array dimensions don't match the declared size |
|
|
1176
|
+
| `VectorDTypeError` | dtype conversion fails |
|
|
1177
|
+
| `VectorDecodeError` | base64 or database format decoding fails |
|
|
1178
|
+
|
|
1179
|
+
```python
|
|
1180
|
+
from sqlmodel_ext.field_types.dialects.postgresql import (
|
|
1181
|
+
VectorError, VectorDimensionError, VectorDTypeError, VectorDecodeError,
|
|
1182
|
+
)
|
|
1183
|
+
```
|
|
1184
|
+
|
|
1185
|
+
---
|
|
1186
|
+
|
|
1020
1187
|
### Info Response DTO Mixins
|
|
1021
1188
|
|
|
1022
1189
|
Pre-built mixins for API response models that always include id and timestamp fields:
|
|
@@ -1065,6 +1232,13 @@ sqlmodel_ext/
|
|
|
1065
1232
|
url.py # Url, HttpUrl, WebSocketUrl, SafeHttpUrl
|
|
1066
1233
|
_internal/path.py # Path type handlers
|
|
1067
1234
|
mixins/ # ModuleNameMixin
|
|
1235
|
+
dialects/
|
|
1236
|
+
postgresql/
|
|
1237
|
+
__init__.py # PostgreSQL type re-exports
|
|
1238
|
+
array.py # Array[T] generic ARRAY type
|
|
1239
|
+
jsonb_types.py # JSON100K, JSONList100K (requires orjson)
|
|
1240
|
+
numpy_vector.py # NumpyVector[dims, dtype] (requires numpy + pgvector)
|
|
1241
|
+
exceptions.py # VectorError hierarchy
|
|
1068
1242
|
```
|
|
1069
1243
|
|
|
1070
1244
|
## Requirements
|
|
@@ -1074,6 +1248,9 @@ sqlmodel_ext/
|
|
|
1074
1248
|
- **pydantic** >= 2.0
|
|
1075
1249
|
- **sqlalchemy** >= 2.0
|
|
1076
1250
|
- (optional) **fastapi** >= 0.100.0
|
|
1251
|
+
- (optional) **orjson** >= 3.0 -- for `JSON100K` / `JSONList100K`
|
|
1252
|
+
- (optional) **numpy** >= 1.24 -- for `NumpyVector`
|
|
1253
|
+
- (optional) **pgvector** >= 0.3 -- for `NumpyVector`
|
|
1077
1254
|
|
|
1078
1255
|
## AI Disclosure
|
|
1079
1256
|
|
|
@@ -22,6 +22,7 @@ SQLModel 增强基础设施:智能元类、异步 CRUD Mixin、多态继承、
|
|
|
22
22
|
| **RelationPreloadMixin** | 基于装饰器的关系自动预加载(防止 `MissingGreenlet` 错误) |
|
|
23
23
|
| **ListResponse[T]** | 泛型分页响应模型,适用于列表接口 |
|
|
24
24
|
| **字段类型** | 可复用的约束类型:`Str64`、`Port`、`IPAddress`、`HttpUrl`、`SafeHttpUrl` 等 |
|
|
25
|
+
| **PostgreSQL 类型** | `Array[T]` 原生 ARRAY、`JSON100K`/`JSONList100K` 限长 JSONB、`NumpyVector` pgvector+NumPy 集成 |
|
|
25
26
|
| **响应 DTO Mixin** | 预构建的 API 响应模型 Mixin,包含 id/时间戳字段 |
|
|
26
27
|
|
|
27
28
|
## 安装
|
|
@@ -36,6 +37,18 @@ pip install sqlmodel-ext
|
|
|
36
37
|
pip install sqlmodel-ext[fastapi]
|
|
37
38
|
```
|
|
38
39
|
|
|
40
|
+
使用 PostgreSQL ARRAY 和 JSONB 类型(需要 `orjson`):
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install sqlmodel-ext[postgresql]
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
使用 pgvector + NumPy 向量支持(包含 `[postgresql]`):
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install sqlmodel-ext[pgvector]
|
|
50
|
+
```
|
|
51
|
+
|
|
39
52
|
## 快速开始
|
|
40
53
|
|
|
41
54
|
### 定义模型
|
|
@@ -1017,6 +1030,160 @@ class FileRecord(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
|
1017
1030
|
|
|
1018
1031
|
---
|
|
1019
1032
|
|
|
1033
|
+
### PostgreSQL 类型
|
|
1034
|
+
|
|
1035
|
+
PostgreSQL 特有的类型位于 `sqlmodel_ext.field_types.dialects.postgresql`。由于依赖 PostgreSQL 特定的库,**不会**从顶层 `sqlmodel_ext` 包导入。
|
|
1036
|
+
|
|
1037
|
+
```python
|
|
1038
|
+
from sqlmodel_ext.field_types.dialects.postgresql import (
|
|
1039
|
+
Array, # pip install sqlmodel-ext(使用 sqlalchemy.dialects.postgresql)
|
|
1040
|
+
JSON100K, # pip install sqlmodel-ext[postgresql](需要 orjson)
|
|
1041
|
+
JSONList100K, # pip install sqlmodel-ext[postgresql](需要 orjson)
|
|
1042
|
+
NumpyVector, # pip install sqlmodel-ext[pgvector](需要 numpy + pgvector)
|
|
1043
|
+
)
|
|
1044
|
+
```
|
|
1045
|
+
|
|
1046
|
+
#### `Array[T]` -- PostgreSQL ARRAY
|
|
1047
|
+
|
|
1048
|
+
泛型数组类型,将 Python `list[T]` 映射到 PostgreSQL 原生 `ARRAY` 列类型。
|
|
1049
|
+
|
|
1050
|
+
```python
|
|
1051
|
+
from sqlmodel import Field
|
|
1052
|
+
from sqlmodel_ext.field_types.dialects.postgresql import Array
|
|
1053
|
+
|
|
1054
|
+
class Article(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1055
|
+
tags: Array[str] = Field(default_factory=list)
|
|
1056
|
+
"""字符串数组,PostgreSQL 中存储为 TEXT[]"""
|
|
1057
|
+
|
|
1058
|
+
scores: Array[int] = Field(default_factory=list)
|
|
1059
|
+
"""整数数组,PostgreSQL 中存储为 INTEGER[]"""
|
|
1060
|
+
|
|
1061
|
+
metadata_list: Array[dict] = Field(default_factory=list)
|
|
1062
|
+
"""JSONB 数组,PostgreSQL 中存储为 JSONB[]"""
|
|
1063
|
+
|
|
1064
|
+
refs: Array[UUID] = Field(default_factory=list)
|
|
1065
|
+
"""UUID 数组,PostgreSQL 中存储为 UUID[]"""
|
|
1066
|
+
```
|
|
1067
|
+
|
|
1068
|
+
**带最大长度限制:**
|
|
1069
|
+
|
|
1070
|
+
```python
|
|
1071
|
+
class Config(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1072
|
+
version_vector: Array[dict, 20] = Field(default_factory=list)
|
|
1073
|
+
"""最多 20 个元素,由 Pydantic 验证"""
|
|
1074
|
+
```
|
|
1075
|
+
|
|
1076
|
+
**支持的内部类型:**
|
|
1077
|
+
|
|
1078
|
+
| Python 类型 | PostgreSQL 类型 |
|
|
1079
|
+
|-------------|----------------|
|
|
1080
|
+
| `str` | `TEXT[]` |
|
|
1081
|
+
| `int` | `INTEGER[]` |
|
|
1082
|
+
| `dict` | `JSONB[]` |
|
|
1083
|
+
| `UUID` | `UUID[]` |
|
|
1084
|
+
| `Enum` 子类 | `ENUM[]` |
|
|
1085
|
+
|
|
1086
|
+
#### `JSON100K` / `JSONList100K` -- 限长 JSONB
|
|
1087
|
+
|
|
1088
|
+
带 100K 字符输入限制的 JSONB 类型,在 Pydantic 验证层强制执行。使用 `orjson` 进行高速序列化。
|
|
1089
|
+
|
|
1090
|
+
```python
|
|
1091
|
+
from sqlmodel_ext.field_types.dialects.postgresql import JSON100K, JSONList100K
|
|
1092
|
+
|
|
1093
|
+
class Project(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1094
|
+
canvas: JSON100K
|
|
1095
|
+
"""画布数据,存储为 JSONB(最大 100K 字符)"""
|
|
1096
|
+
|
|
1097
|
+
messages: JSONList100K
|
|
1098
|
+
"""消息列表,存储为 JSONB(最大 100K 字符)"""
|
|
1099
|
+
```
|
|
1100
|
+
|
|
1101
|
+
**行为说明:**
|
|
1102
|
+
|
|
1103
|
+
| 特性 | `JSON100K` | `JSONList100K` |
|
|
1104
|
+
|------|-----------|---------------|
|
|
1105
|
+
| Python 类型 | `dict[str, Any]` | `list[dict[str, Any]]` |
|
|
1106
|
+
| 接受输入 | `dict` 或 JSON 字符串 | `list` 或 JSON 字符串 |
|
|
1107
|
+
| PostgreSQL 类型 | `JSONB` | `JSONB` |
|
|
1108
|
+
| 最大输入长度 | 100,000 字符 | 100,000 字符 |
|
|
1109
|
+
| API 序列化 | JSON 字符串 | JSON 字符串 |
|
|
1110
|
+
|
|
1111
|
+
#### `NumpyVector` -- pgvector + NumPy 集成
|
|
1112
|
+
|
|
1113
|
+
在 PostgreSQL 中以 pgvector 的 `Vector` 类型存储,在 Python 中以 `numpy.ndarray` 暴露。支持固定维度的向量数据和 dtype 约束。
|
|
1114
|
+
|
|
1115
|
+
```python
|
|
1116
|
+
import numpy as np
|
|
1117
|
+
from sqlmodel import Field
|
|
1118
|
+
from sqlmodel_ext.field_types.dialects.postgresql import NumpyVector
|
|
1119
|
+
|
|
1120
|
+
class SpeakerInfo(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1121
|
+
embedding: NumpyVector[1024, np.float32] = Field(...)
|
|
1122
|
+
"""1024 维 float32 嵌入向量"""
|
|
1123
|
+
|
|
1124
|
+
# 默认 dtype 为 float32
|
|
1125
|
+
class Document(SQLModelBase, UUIDTableBaseMixin, table=True):
|
|
1126
|
+
embedding: NumpyVector[768] = Field(...)
|
|
1127
|
+
"""768 维向量(默认 float32)"""
|
|
1128
|
+
```
|
|
1129
|
+
|
|
1130
|
+
**API 序列化格式**(base64 编码,高效传输):
|
|
1131
|
+
|
|
1132
|
+
```json
|
|
1133
|
+
{
|
|
1134
|
+
"dtype": "float32",
|
|
1135
|
+
"shape": 1024,
|
|
1136
|
+
"data_b64": "AAABAAA..."
|
|
1137
|
+
}
|
|
1138
|
+
```
|
|
1139
|
+
|
|
1140
|
+
**支持的输入格式:**
|
|
1141
|
+
|
|
1142
|
+
| 格式 | 示例 |
|
|
1143
|
+
|------|------|
|
|
1144
|
+
| `numpy.ndarray` | `np.zeros(1024, dtype=np.float32)` |
|
|
1145
|
+
| `list` / `tuple` | `[0.1, 0.2, ...]` |
|
|
1146
|
+
| base64 字典 | `{"dtype": "float32", "shape": 1024, "data_b64": "..."}` |
|
|
1147
|
+
| pgvector 字符串 | `"[0.1, 0.2, ...]"`(从数据库加载) |
|
|
1148
|
+
|
|
1149
|
+
**向量相似度搜索**(pgvector 运算符):
|
|
1150
|
+
|
|
1151
|
+
```python
|
|
1152
|
+
from sqlalchemy import select
|
|
1153
|
+
|
|
1154
|
+
# L2 距离(欧几里得距离)
|
|
1155
|
+
stmt = select(SpeakerInfo).order_by(
|
|
1156
|
+
SpeakerInfo.embedding.l2_distance(query_vector)
|
|
1157
|
+
).limit(10)
|
|
1158
|
+
|
|
1159
|
+
# 余弦距离
|
|
1160
|
+
stmt = select(SpeakerInfo).order_by(
|
|
1161
|
+
SpeakerInfo.embedding.cosine_distance(query_vector)
|
|
1162
|
+
).limit(10)
|
|
1163
|
+
|
|
1164
|
+
# 最大内积
|
|
1165
|
+
stmt = select(SpeakerInfo).order_by(
|
|
1166
|
+
SpeakerInfo.embedding.max_inner_product(query_vector)
|
|
1167
|
+
).limit(10)
|
|
1168
|
+
```
|
|
1169
|
+
|
|
1170
|
+
**向量异常:**
|
|
1171
|
+
|
|
1172
|
+
| 异常 | 触发场景 |
|
|
1173
|
+
|------|----------|
|
|
1174
|
+
| `VectorError` | 所有向量错误的基类 |
|
|
1175
|
+
| `VectorDimensionError` | 数组维度与声明的大小不匹配 |
|
|
1176
|
+
| `VectorDTypeError` | dtype 转换失败 |
|
|
1177
|
+
| `VectorDecodeError` | base64 或数据库格式解码失败 |
|
|
1178
|
+
|
|
1179
|
+
```python
|
|
1180
|
+
from sqlmodel_ext.field_types.dialects.postgresql import (
|
|
1181
|
+
VectorError, VectorDimensionError, VectorDTypeError, VectorDecodeError,
|
|
1182
|
+
)
|
|
1183
|
+
```
|
|
1184
|
+
|
|
1185
|
+
---
|
|
1186
|
+
|
|
1020
1187
|
### 响应 DTO Mixin
|
|
1021
1188
|
|
|
1022
1189
|
为 API 响应模型预构建的 Mixin,包含 id 和时间戳字段:
|
|
@@ -1065,6 +1232,13 @@ sqlmodel_ext/
|
|
|
1065
1232
|
url.py # Url、HttpUrl、WebSocketUrl、SafeHttpUrl
|
|
1066
1233
|
_internal/path.py # 路径类型处理器
|
|
1067
1234
|
mixins/ # ModuleNameMixin
|
|
1235
|
+
dialects/
|
|
1236
|
+
postgresql/
|
|
1237
|
+
__init__.py # PostgreSQL 类型重导出
|
|
1238
|
+
array.py # Array[T] 泛型 ARRAY 类型
|
|
1239
|
+
jsonb_types.py # JSON100K、JSONList100K(需要 orjson)
|
|
1240
|
+
numpy_vector.py # NumpyVector[dims, dtype](需要 numpy + pgvector)
|
|
1241
|
+
exceptions.py # VectorError 异常层次
|
|
1068
1242
|
```
|
|
1069
1243
|
|
|
1070
1244
|
## 环境要求
|
|
@@ -1074,6 +1248,9 @@ sqlmodel_ext/
|
|
|
1074
1248
|
- **pydantic** >= 2.0
|
|
1075
1249
|
- **sqlalchemy** >= 2.0
|
|
1076
1250
|
- (可选)**fastapi** >= 0.100.0
|
|
1251
|
+
- (可选)**orjson** >= 3.0 -- 用于 `JSON100K` / `JSONList100K`
|
|
1252
|
+
- (可选)**numpy** >= 1.24 -- 用于 `NumpyVector`
|
|
1253
|
+
- (可选)**pgvector** >= 0.3 -- 用于 `NumpyVector`
|
|
1077
1254
|
|
|
1078
1255
|
## AI 使用披露
|
|
1079
1256
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sqlmodel-ext"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "Extended SQLModel infrastructure: smart metaclass, async CRUD mixins, polymorphic inheritance, optimistic locking, relation preloading, and reusable field types."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -35,6 +35,12 @@ dependencies = [
|
|
|
35
35
|
|
|
36
36
|
[project.optional-dependencies]
|
|
37
37
|
fastapi = ["fastapi>=0.100.0"]
|
|
38
|
+
postgresql = ["orjson>=3.0"]
|
|
39
|
+
pgvector = [
|
|
40
|
+
"sqlmodel-ext[postgresql]",
|
|
41
|
+
"numpy>=1.24",
|
|
42
|
+
"pgvector>=0.3",
|
|
43
|
+
]
|
|
38
44
|
dev = [
|
|
39
45
|
"pytest>=8.0",
|
|
40
46
|
"pytest-asyncio>=0.24",
|