sop4py 2.0.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sop4py-2.0.23/MANIFEST.in +11 -0
- sop4py-2.0.23/PKG-INFO +219 -0
- sop4py-2.0.23/README.md +208 -0
- sop4py-2.0.23/pyproject.toml +28 -0
- sop4py-2.0.23/setup.cfg +4 -0
- sop4py-2.0.23/sop/__init__.py +10 -0
- sop4py-2.0.23/sop/ai/README.md +56 -0
- sop4py-2.0.23/sop/btree.py +643 -0
- sop4py-2.0.23/sop/call_go.py +470 -0
- sop4py-2.0.23/sop/context.py +46 -0
- sop4py-2.0.23/sop/database.py +86 -0
- sop4py-2.0.23/sop/libjsondb_amd64darwin.dylib +0 -0
- sop4py-2.0.23/sop/libjsondb_amd64darwin.h +143 -0
- sop4py-2.0.23/sop/libjsondb_amd64linux.h +135 -0
- sop4py-2.0.23/sop/libjsondb_amd64linux.so +0 -0
- sop4py-2.0.23/sop/libjsondb_amd64windows.dll +0 -0
- sop4py-2.0.23/sop/libjsondb_amd64windows.h +135 -0
- sop4py-2.0.23/sop/libjsondb_arm64darwin.dylib +0 -0
- sop4py-2.0.23/sop/libjsondb_arm64darwin.h +135 -0
- sop4py-2.0.23/sop/libjsondb_arm64linux.h +135 -0
- sop4py-2.0.23/sop/libjsondb_arm64linux.so +0 -0
- sop4py-2.0.23/sop/logger.py +26 -0
- sop4py-2.0.23/sop/redis.py +38 -0
- sop4py-2.0.23/sop/search.py +48 -0
- sop4py-2.0.23/sop/test_btree.py +458 -0
- sop4py-2.0.23/sop/test_btree_idx.py +89 -0
- sop4py-2.0.23/sop/test_btree_noredis.py +458 -0
- sop4py-2.0.23/sop/test_logging.py +68 -0
- sop4py-2.0.23/sop/transaction.py +193 -0
- sop4py-2.0.23/sop4py.egg-info/PKG-INFO +219 -0
- sop4py-2.0.23/sop4py.egg-info/SOURCES.txt +31 -0
- sop4py-2.0.23/sop4py.egg-info/dependency_links.txt +1 -0
- sop4py-2.0.23/sop4py.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
include sop/libjsondb_amd64darwin.dylib
|
|
2
|
+
include sop/libjsondb_amd64darwin.h
|
|
3
|
+
include sop/libjsondb_arm64darwin.dylib
|
|
4
|
+
include sop/libjsondb_arm64darwin.h
|
|
5
|
+
include sop/libjsondb_amd64linux.so
|
|
6
|
+
include sop/libjsondb_amd64linux.h
|
|
7
|
+
include sop/libjsondb_arm64linux.so
|
|
8
|
+
include sop/libjsondb_arm64linux.h
|
|
9
|
+
include sop/libjsondb_amd64windows.dll
|
|
10
|
+
include sop/libjsondb_amd64windows.h
|
|
11
|
+
include sop/ai/README.md
|
sop4py-2.0.23/PKG-INFO
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sop4py
|
|
3
|
+
Version: 2.0.23
|
|
4
|
+
Summary: Scalable Objects Persistence (SOP) V2 for Python. General Public Availability (GPA) Release
|
|
5
|
+
Author-email: Gerardo Recinto <gerardorecinto@yahoo.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.7
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# SOP for Python (sop4py)
|
|
13
|
+
|
|
14
|
+
**Scalable Objects Persistence (SOP)** is a high-performance, transactional storage engine for Python, powered by a robust Go backend. It combines the raw speed of direct disk I/O with the reliability of ACID transactions and the flexibility of modern AI data management.
|
|
15
|
+
|
|
16
|
+
## Key Features
|
|
17
|
+
|
|
18
|
+
* **Unified Database**: Single entry point for managing Vector, Model, and Key-Value stores.
|
|
19
|
+
* **Transactional B-Tree Store**: Unlimited, persistent B-Tree storage for key-value data.
|
|
20
|
+
* **Vector Database**: Built-in vector search (k-NN) for AI embeddings and similarity search.
|
|
21
|
+
* **Text Search**: Transactional, embedded text search engine (BM25).
|
|
22
|
+
* **AI Model Store**: Versioned storage for machine learning models (B-Tree backed).
|
|
23
|
+
* **ACID Compliance**: Full transaction support (Begin, Commit, Rollback) with isolation.
|
|
24
|
+
* **High Performance**: Written in Go with a lightweight Python wrapper (ctypes).
|
|
25
|
+
* **Caching**: Integrated Redis-backed L1/L2 caching for speed.
|
|
26
|
+
* **Replication**: Optional Erasure Coding (EC) for fault-tolerant storage across drives.
|
|
27
|
+
* **Flexible Deployment**: Supports both **Standalone** (local) and **Clustered** (distributed) modes.
|
|
28
|
+
|
|
29
|
+
## Documentation
|
|
30
|
+
|
|
31
|
+
* **[API Cookbook](COOKBOOK.md)**: Common recipes and patterns (Key-Value, Transactions, AI).
|
|
32
|
+
* **[Examples](examples/)**: Complete runnable scripts.
|
|
33
|
+
|
|
34
|
+
## Prerequisites
|
|
35
|
+
|
|
36
|
+
* **Redis**: Required for caching and transaction coordination (especially in Clustered mode). **Note**: Redis is NOT used for data storage, just for coordination & to offer built-in caching.
|
|
37
|
+
* **Storage**: Local disk space (supports multiple drives/folders).
|
|
38
|
+
* **OS**: macOS (Darwin), Linux, or Windows (AMD64).
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
1. **Build the Go Bridge**:
|
|
43
|
+
```bash
|
|
44
|
+
cd jsondb
|
|
45
|
+
go build -o jsondb.so -buildmode=c-shared main/*.go
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
2. **Install Python Dependencies**:
|
|
49
|
+
```bash
|
|
50
|
+
pip install -r jsondb/python/requirements.txt
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
3. **Set PYTHONPATH**:
|
|
54
|
+
```bash
|
|
55
|
+
export PYTHONPATH=$PYTHONPATH:$(pwd)/jsondb/python
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start Guide
|
|
59
|
+
|
|
60
|
+
SOP uses a unified `Database` object to manage all types of stores (Vector, Model, and B-Tree). All operations are performed within a **Transaction**.
|
|
61
|
+
|
|
62
|
+
### 1. Initialize Database & Context
|
|
63
|
+
|
|
64
|
+
First, create a Context and open a Database connection.
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from sop import Context, TransactionMode, TransactionOptions, Btree, BtreeOptions, Item
|
|
68
|
+
from sop.ai import Database, DBType, Item as VectorItem
|
|
69
|
+
|
|
70
|
+
# Initialize Context
|
|
71
|
+
ctx = Context()
|
|
72
|
+
|
|
73
|
+
# Open Database (Standalone Mode)
|
|
74
|
+
# This creates/opens a database at the specified path.
|
|
75
|
+
db = Database(ctx, storage_path="data/my_db", db_type=DBType.Standalone)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 2. Start a Transaction
|
|
79
|
+
|
|
80
|
+
All data operations (Create, Read, Update, Delete) must happen within a transaction.
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
# Begin a transaction (Read-Write)
|
|
84
|
+
# You can use 'with' block for auto-commit/rollback, or manage manually.
|
|
85
|
+
with db.begin_transaction(ctx, mode=TransactionMode.ForWriting.value) as tx:
|
|
86
|
+
|
|
87
|
+
# --- 3. Vector Store (AI) ---
|
|
88
|
+
# Open a Vector Store named "products"
|
|
89
|
+
vector_store = db.open_vector_store(ctx, tx, "products")
|
|
90
|
+
|
|
91
|
+
# Upsert a Vector Item
|
|
92
|
+
vector_store.upsert(ctx, VectorItem(
|
|
93
|
+
id="prod_101",
|
|
94
|
+
vector=[0.1, 0.5, 0.9],
|
|
95
|
+
payload={"name": "Laptop", "price": 999}
|
|
96
|
+
))
|
|
97
|
+
|
|
98
|
+
# --- 4. Model Store (AI) ---
|
|
99
|
+
# Open a Model Store named "classifiers"
|
|
100
|
+
model_store = db.open_model_store(ctx, tx, "classifiers")
|
|
101
|
+
|
|
102
|
+
# Save a Model
|
|
103
|
+
model_store.save(ctx, "churn", "v1.0", {
|
|
104
|
+
"algorithm": "random_forest",
|
|
105
|
+
"trees": 100
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
# --- 5. General Purpose B-Tree ---
|
|
109
|
+
# Create a new B-Tree store.
|
|
110
|
+
bo = BtreeOptions(name="user_store", is_unique=True)
|
|
111
|
+
user_store = db.new_btree(ctx, "user_store", tx, options=bo)
|
|
112
|
+
|
|
113
|
+
# --- 6. Text Search ---
|
|
114
|
+
# Open a Search Index
|
|
115
|
+
idx = db.open_search(ctx, "articles", tx)
|
|
116
|
+
idx.add("doc1", "The quick brown fox")
|
|
117
|
+
|
|
118
|
+
# Add an item.
|
|
119
|
+
user_store.add(ctx, Item(key="user1", value="John Doe"))
|
|
120
|
+
|
|
121
|
+
# Transaction commits automatically here.
|
|
122
|
+
# If an exception occurs, it rolls back.
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### 6. Querying Data
|
|
126
|
+
|
|
127
|
+
You can perform queries in a separate transaction (e.g., Read-Only).
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
# Begin a Read-Only transaction (optional optimization)
|
|
131
|
+
with db.begin_transaction(ctx, mode=TransactionMode.ForReading.value) as tx:
|
|
132
|
+
|
|
133
|
+
# --- Vector Search ---
|
|
134
|
+
vs = db.open_vector_store(ctx, tx, "products")
|
|
135
|
+
hits = vs.query(ctx, vector=[0.1, 0.5, 0.8], k=5)
|
|
136
|
+
for hit in hits:
|
|
137
|
+
print(f"Vector Match: {hit.id}, Score: {hit.score}")
|
|
138
|
+
|
|
139
|
+
# --- Model Retrieval ---
|
|
140
|
+
ms = db.open_model_store(ctx, tx, "classifiers")
|
|
141
|
+
model = ms.get(ctx, "churn", "v1.0")
|
|
142
|
+
print(f"Loaded Model: {model['algorithm']}")
|
|
143
|
+
|
|
144
|
+
# --- B-Tree Lookup ---
|
|
145
|
+
us = db.open_btree(ctx, "user_store", tx)
|
|
146
|
+
if us.find(ctx, "user1"):
|
|
147
|
+
# Fetch the current item
|
|
148
|
+
item = us.get_current_item(ctx)
|
|
149
|
+
print(f"User Found: {item.value}")
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Performance Tip**: For **Vector Search** workloads that are "Build-Once-Query-Many", use `TransactionMode.NoCheck`. This bypasses transaction overhead for maximum query throughput.
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
# High-performance Vector Search (No ACID checks)
|
|
156
|
+
with db.begin_transaction(ctx, mode=TransactionMode.NoCheck.value) as tx:
|
|
157
|
+
vs = db.open_vector_store(ctx, tx, "products")
|
|
158
|
+
hits = vs.query(ctx, vector=[0.1, 0.5, 0.8], k=5)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Advanced Configuration
|
|
162
|
+
|
|
163
|
+
### Logging
|
|
164
|
+
|
|
165
|
+
You can configure the internal logging of the SOP engine (Go backend) to output to a file or standard error, and control the verbosity.
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from sop import Logger, LogLevel
|
|
169
|
+
|
|
170
|
+
# Configure logging to a file with Debug level
|
|
171
|
+
Logger.configure(LogLevel.Debug, "sop_engine.log")
|
|
172
|
+
|
|
173
|
+
# Or configure logging to stderr (default) with Info level
|
|
174
|
+
Logger.configure(LogLevel.Info)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Transaction Options
|
|
178
|
+
|
|
179
|
+
You can configure timeouts, isolation levels, and more.
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from sop import TransactionOptions, TransactionMode
|
|
183
|
+
|
|
184
|
+
opts = TransactionOptions(
|
|
185
|
+
mode=TransactionMode.ForWriting.value,
|
|
186
|
+
max_time=15, # 15 minutes timeout
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
tx = db.begin_transaction(ctx, options=opts)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Clustered Mode
|
|
193
|
+
|
|
194
|
+
For distributed deployments, switch to `DBType.Clustered`. This requires Redis for coordination.
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
from sop.ai import DBType
|
|
198
|
+
|
|
199
|
+
db = Database(
|
|
200
|
+
ctx,
|
|
201
|
+
storage_path="/mnt/shared_data",
|
|
202
|
+
db_type=DBType.Clustered
|
|
203
|
+
)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## Architecture
|
|
207
|
+
|
|
208
|
+
SOP uses a split architecture:
|
|
209
|
+
1. **Core Engine (Go)**: Handles disk I/O, B-Tree algorithms, caching, and transactions. Compiled as a shared library (`.dylib`, `.so`, `.dll`).
|
|
210
|
+
2. **Python Wrapper**: Uses `ctypes` to interface with the Go engine, providing a Pythonic API (`sop` package).
|
|
211
|
+
|
|
212
|
+
## Project Links
|
|
213
|
+
|
|
214
|
+
* **Source Code**: [GitHub - sharedcode/sop](https://github.com/sharedcode/sop)
|
|
215
|
+
* **PyPI**: [sop4py](https://pypi.org/project/sop4py)
|
|
216
|
+
|
|
217
|
+
## Contributing
|
|
218
|
+
|
|
219
|
+
Contributions are welcome! Please check the `CONTRIBUTING.md` file in the repository for guidelines.
|
sop4py-2.0.23/README.md
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# SOP for Python (sop4py)
|
|
2
|
+
|
|
3
|
+
**Scalable Objects Persistence (SOP)** is a high-performance, transactional storage engine for Python, powered by a robust Go backend. It combines the raw speed of direct disk I/O with the reliability of ACID transactions and the flexibility of modern AI data management.
|
|
4
|
+
|
|
5
|
+
## Key Features
|
|
6
|
+
|
|
7
|
+
* **Unified Database**: Single entry point for managing Vector, Model, and Key-Value stores.
|
|
8
|
+
* **Transactional B-Tree Store**: Unlimited, persistent B-Tree storage for key-value data.
|
|
9
|
+
* **Vector Database**: Built-in vector search (k-NN) for AI embeddings and similarity search.
|
|
10
|
+
* **Text Search**: Transactional, embedded text search engine (BM25).
|
|
11
|
+
* **AI Model Store**: Versioned storage for machine learning models (B-Tree backed).
|
|
12
|
+
* **ACID Compliance**: Full transaction support (Begin, Commit, Rollback) with isolation.
|
|
13
|
+
* **High Performance**: Written in Go with a lightweight Python wrapper (ctypes).
|
|
14
|
+
* **Caching**: Integrated Redis-backed L1/L2 caching for speed.
|
|
15
|
+
* **Replication**: Optional Erasure Coding (EC) for fault-tolerant storage across drives.
|
|
16
|
+
* **Flexible Deployment**: Supports both **Standalone** (local) and **Clustered** (distributed) modes.
|
|
17
|
+
|
|
18
|
+
## Documentation
|
|
19
|
+
|
|
20
|
+
* **[API Cookbook](COOKBOOK.md)**: Common recipes and patterns (Key-Value, Transactions, AI).
|
|
21
|
+
* **[Examples](examples/)**: Complete runnable scripts.
|
|
22
|
+
|
|
23
|
+
## Prerequisites
|
|
24
|
+
|
|
25
|
+
* **Redis**: Required for caching and transaction coordination (especially in Clustered mode). **Note**: Redis is NOT used for data storage, just for coordination & to offer built-in caching.
|
|
26
|
+
* **Storage**: Local disk space (supports multiple drives/folders).
|
|
27
|
+
* **OS**: macOS (Darwin), Linux, or Windows (AMD64).
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
1. **Build the Go Bridge**:
|
|
32
|
+
```bash
|
|
33
|
+
cd jsondb
|
|
34
|
+
go build -o jsondb.so -buildmode=c-shared main/*.go
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
2. **Install Python Dependencies**:
|
|
38
|
+
```bash
|
|
39
|
+
pip install -r jsondb/python/requirements.txt
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
3. **Set PYTHONPATH**:
|
|
43
|
+
```bash
|
|
44
|
+
export PYTHONPATH=$PYTHONPATH:$(pwd)/jsondb/python
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quick Start Guide
|
|
48
|
+
|
|
49
|
+
SOP uses a unified `Database` object to manage all types of stores (Vector, Model, and B-Tree). All operations are performed within a **Transaction**.
|
|
50
|
+
|
|
51
|
+
### 1. Initialize Database & Context
|
|
52
|
+
|
|
53
|
+
First, create a Context and open a Database connection.
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from sop import Context, TransactionMode, TransactionOptions, Btree, BtreeOptions, Item
|
|
57
|
+
from sop.ai import Database, DBType, Item as VectorItem
|
|
58
|
+
|
|
59
|
+
# Initialize Context
|
|
60
|
+
ctx = Context()
|
|
61
|
+
|
|
62
|
+
# Open Database (Standalone Mode)
|
|
63
|
+
# This creates/opens a database at the specified path.
|
|
64
|
+
db = Database(ctx, storage_path="data/my_db", db_type=DBType.Standalone)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### 2. Start a Transaction
|
|
68
|
+
|
|
69
|
+
All data operations (Create, Read, Update, Delete) must happen within a transaction.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
# Begin a transaction (Read-Write)
|
|
73
|
+
# You can use 'with' block for auto-commit/rollback, or manage manually.
|
|
74
|
+
with db.begin_transaction(ctx, mode=TransactionMode.ForWriting.value) as tx:
|
|
75
|
+
|
|
76
|
+
# --- 3. Vector Store (AI) ---
|
|
77
|
+
# Open a Vector Store named "products"
|
|
78
|
+
vector_store = db.open_vector_store(ctx, tx, "products")
|
|
79
|
+
|
|
80
|
+
# Upsert a Vector Item
|
|
81
|
+
vector_store.upsert(ctx, VectorItem(
|
|
82
|
+
id="prod_101",
|
|
83
|
+
vector=[0.1, 0.5, 0.9],
|
|
84
|
+
payload={"name": "Laptop", "price": 999}
|
|
85
|
+
))
|
|
86
|
+
|
|
87
|
+
# --- 4. Model Store (AI) ---
|
|
88
|
+
# Open a Model Store named "classifiers"
|
|
89
|
+
model_store = db.open_model_store(ctx, tx, "classifiers")
|
|
90
|
+
|
|
91
|
+
# Save a Model
|
|
92
|
+
model_store.save(ctx, "churn", "v1.0", {
|
|
93
|
+
"algorithm": "random_forest",
|
|
94
|
+
"trees": 100
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
# --- 5. General Purpose B-Tree ---
|
|
98
|
+
# Create a new B-Tree store.
|
|
99
|
+
bo = BtreeOptions(name="user_store", is_unique=True)
|
|
100
|
+
user_store = db.new_btree(ctx, "user_store", tx, options=bo)
|
|
101
|
+
|
|
102
|
+
# --- 6. Text Search ---
|
|
103
|
+
# Open a Search Index
|
|
104
|
+
idx = db.open_search(ctx, "articles", tx)
|
|
105
|
+
idx.add("doc1", "The quick brown fox")
|
|
106
|
+
|
|
107
|
+
# Add an item.
|
|
108
|
+
user_store.add(ctx, Item(key="user1", value="John Doe"))
|
|
109
|
+
|
|
110
|
+
# Transaction commits automatically here.
|
|
111
|
+
# If an exception occurs, it rolls back.
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### 6. Querying Data
|
|
115
|
+
|
|
116
|
+
You can perform queries in a separate transaction (e.g., Read-Only).
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
# Begin a Read-Only transaction (optional optimization)
|
|
120
|
+
with db.begin_transaction(ctx, mode=TransactionMode.ForReading.value) as tx:
|
|
121
|
+
|
|
122
|
+
# --- Vector Search ---
|
|
123
|
+
vs = db.open_vector_store(ctx, tx, "products")
|
|
124
|
+
hits = vs.query(ctx, vector=[0.1, 0.5, 0.8], k=5)
|
|
125
|
+
for hit in hits:
|
|
126
|
+
print(f"Vector Match: {hit.id}, Score: {hit.score}")
|
|
127
|
+
|
|
128
|
+
# --- Model Retrieval ---
|
|
129
|
+
ms = db.open_model_store(ctx, tx, "classifiers")
|
|
130
|
+
model = ms.get(ctx, "churn", "v1.0")
|
|
131
|
+
print(f"Loaded Model: {model['algorithm']}")
|
|
132
|
+
|
|
133
|
+
# --- B-Tree Lookup ---
|
|
134
|
+
us = db.open_btree(ctx, "user_store", tx)
|
|
135
|
+
if us.find(ctx, "user1"):
|
|
136
|
+
# Fetch the current item
|
|
137
|
+
item = us.get_current_item(ctx)
|
|
138
|
+
print(f"User Found: {item.value}")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Performance Tip**: For **Vector Search** workloads that are "Build-Once-Query-Many", use `TransactionMode.NoCheck`. This bypasses transaction overhead for maximum query throughput.
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
# High-performance Vector Search (No ACID checks)
|
|
145
|
+
with db.begin_transaction(ctx, mode=TransactionMode.NoCheck.value) as tx:
|
|
146
|
+
vs = db.open_vector_store(ctx, tx, "products")
|
|
147
|
+
hits = vs.query(ctx, vector=[0.1, 0.5, 0.8], k=5)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Advanced Configuration
|
|
151
|
+
|
|
152
|
+
### Logging
|
|
153
|
+
|
|
154
|
+
You can configure the internal logging of the SOP engine (Go backend) to output to a file or standard error, and control the verbosity.
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from sop import Logger, LogLevel
|
|
158
|
+
|
|
159
|
+
# Configure logging to a file with Debug level
|
|
160
|
+
Logger.configure(LogLevel.Debug, "sop_engine.log")
|
|
161
|
+
|
|
162
|
+
# Or configure logging to stderr (default) with Info level
|
|
163
|
+
Logger.configure(LogLevel.Info)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Transaction Options
|
|
167
|
+
|
|
168
|
+
You can configure timeouts, isolation levels, and more.
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from sop import TransactionOptions, TransactionMode
|
|
172
|
+
|
|
173
|
+
opts = TransactionOptions(
|
|
174
|
+
mode=TransactionMode.ForWriting.value,
|
|
175
|
+
max_time=15, # 15 minutes timeout
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
tx = db.begin_transaction(ctx, options=opts)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Clustered Mode
|
|
182
|
+
|
|
183
|
+
For distributed deployments, switch to `DBType.Clustered`. This requires Redis for coordination.
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
from sop.ai import DBType
|
|
187
|
+
|
|
188
|
+
db = Database(
|
|
189
|
+
ctx,
|
|
190
|
+
storage_path="/mnt/shared_data",
|
|
191
|
+
db_type=DBType.Clustered
|
|
192
|
+
)
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
## Architecture
|
|
196
|
+
|
|
197
|
+
SOP uses a split architecture:
|
|
198
|
+
1. **Core Engine (Go)**: Handles disk I/O, B-Tree algorithms, caching, and transactions. Compiled as a shared library (`.dylib`, `.so`, `.dll`).
|
|
199
|
+
2. **Python Wrapper**: Uses `ctypes` to interface with the Go engine, providing a Pythonic API (`sop` package).
|
|
200
|
+
|
|
201
|
+
## Project Links
|
|
202
|
+
|
|
203
|
+
* **Source Code**: [GitHub - sharedcode/sop](https://github.com/sharedcode/sop)
|
|
204
|
+
* **PyPI**: [sop4py](https://pypi.org/project/sop4py)
|
|
205
|
+
|
|
206
|
+
## Contributing
|
|
207
|
+
|
|
208
|
+
Contributions are welcome! Please check the `CONTRIBUTING.md` file in the repository for guidelines.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[tool.setuptools]
|
|
6
|
+
include-package-data = true
|
|
7
|
+
packages = ["sop"]
|
|
8
|
+
|
|
9
|
+
[project]
|
|
10
|
+
name = "sop4py"
|
|
11
|
+
version = "2.0.23"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name="Gerardo Recinto", email="gerardorecinto@yahoo.com" },
|
|
14
|
+
]
|
|
15
|
+
description = "Scalable Objects Persistence (SOP) V2 for Python. General Public Availability (GPA) Release"
|
|
16
|
+
readme = "README.md"
|
|
17
|
+
requires-python = ">=3.7"
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
]
|
|
23
|
+
dependencies = []
|
|
24
|
+
|
|
25
|
+
# activate venv: sop$ source .venv/bin/activate
|
|
26
|
+
# build: python3 -m build
|
|
27
|
+
# upload to pypi using twine: python3 -m twine upload dist/*
|
|
28
|
+
# project link: https://pypi.org/project/sop-python-beta-3
|
sop4py-2.0.23/setup.cfg
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
__version__="2.0.0"
|
|
4
|
+
|
|
5
|
+
from . import ai
|
|
6
|
+
from .transaction import Transaction, TransactionOptions, TransactionMode
|
|
7
|
+
from .context import Context
|
|
8
|
+
from .btree import Btree, BtreeOptions, Item, PagingInfo, PagingDirection, ValueDataSize
|
|
9
|
+
from .database import Database
|
|
10
|
+
from .logger import Logger, LogLevel
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# SOP AI Python Wrapper
|
|
2
|
+
|
|
3
|
+
This package provides Python bindings for the SOP AI library, enabling you to build Vector Databases and manage AI models using Python.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Ensure you have the shared library (`libjsondb.so`, `.dll`, or `.dylib`) built and available in your library path.
|
|
8
|
+
|
|
9
|
+
## Usage
|
|
10
|
+
|
|
11
|
+
### Vector Database
|
|
12
|
+
|
|
13
|
+
You can use the unified `sop.ai.Database` class to manage your Vector Stores.
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import sop
|
|
17
|
+
from sop.ai import Database
|
|
18
|
+
from sop.transaction import ErasureCodingConfig, DBType
|
|
19
|
+
|
|
20
|
+
# 1. Initialize Context
|
|
21
|
+
ctx = sop.Context()
|
|
22
|
+
|
|
23
|
+
# 2. Initialize Database
|
|
24
|
+
# Standalone (Local, No Replication)
|
|
25
|
+
db = Database(ctx, storage_path="./my_vector_db", db_type=DBType.Standalone)
|
|
26
|
+
|
|
27
|
+
# Clustered (Distributed, With Replication)
|
|
28
|
+
ec_config = ErasureCodingConfig(
|
|
29
|
+
data_shards_count=2,
|
|
30
|
+
parity_shards_count=1,
|
|
31
|
+
base_folder_paths_across_drives=["/mnt/d1/sop", "/mnt/d2/sop"],
|
|
32
|
+
repair_corrupted_shards=True
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
clustered_db = Database(
|
|
36
|
+
ctx,
|
|
37
|
+
storage_path="./my_cluster_db",
|
|
38
|
+
db_type=DBType.Clustered,
|
|
39
|
+
erasure_config={"default": ec_config},
|
|
40
|
+
stores_folders=["/mnt/d1/sop", "/mnt/d2/sop"]
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# 3. Open a Store within a Transaction
|
|
44
|
+
with db.begin_transaction(ctx) as tx:
|
|
45
|
+
store = db.open_vector_store(ctx, tx, "documents")
|
|
46
|
+
|
|
47
|
+
# 4. Upsert Items
|
|
48
|
+
# ...
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Replication Support
|
|
52
|
+
|
|
53
|
+
Replication (Erasure Coding) is **optional** in all modes.
|
|
54
|
+
|
|
55
|
+
* **Standalone Mode**: Can be configured with replication, but defaults to single-folder storage.
|
|
56
|
+
* **Clustered Mode**: Can be configured with replication for high availability, or used without it for simple distributed access.
|