sop4py 2.0.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ include sop/libjsondb_amd64darwin.dylib
2
+ include sop/libjsondb_amd64darwin.h
3
+ include sop/libjsondb_arm64darwin.dylib
4
+ include sop/libjsondb_arm64darwin.h
5
+ include sop/libjsondb_amd64linux.so
6
+ include sop/libjsondb_amd64linux.h
7
+ include sop/libjsondb_arm64linux.so
8
+ include sop/libjsondb_arm64linux.h
9
+ include sop/libjsondb_amd64windows.dll
10
+ include sop/libjsondb_amd64windows.h
11
+ include sop/ai/README.md
sop4py-2.0.23/PKG-INFO ADDED
@@ -0,0 +1,219 @@
1
+ Metadata-Version: 2.4
2
+ Name: sop4py
3
+ Version: 2.0.23
4
+ Summary: Scalable Objects Persistence (SOP) V2 for Python. General Public Availability (GPA) Release
5
+ Author-email: Gerardo Recinto <gerardorecinto@yahoo.com>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.7
10
+ Description-Content-Type: text/markdown
11
+
12
+ # SOP for Python (sop4py)
13
+
14
+ **Scalable Objects Persistence (SOP)** is a high-performance, transactional storage engine for Python, powered by a robust Go backend. It combines the raw speed of direct disk I/O with the reliability of ACID transactions and the flexibility of modern AI data management.
15
+
16
+ ## Key Features
17
+
18
+ * **Unified Database**: Single entry point for managing Vector, Model, and Key-Value stores.
19
+ * **Transactional B-Tree Store**: Unlimited, persistent B-Tree storage for key-value data.
20
+ * **Vector Database**: Built-in vector search (k-NN) for AI embeddings and similarity search.
21
+ * **Text Search**: Transactional, embedded text search engine (BM25).
22
+ * **AI Model Store**: Versioned storage for machine learning models (B-Tree backed).
23
+ * **ACID Compliance**: Full transaction support (Begin, Commit, Rollback) with isolation.
24
+ * **High Performance**: Written in Go with a lightweight Python wrapper (ctypes).
25
+ * **Caching**: Integrated Redis-backed L1/L2 caching for speed.
26
+ * **Replication**: Optional Erasure Coding (EC) for fault-tolerant storage across drives.
27
+ * **Flexible Deployment**: Supports both **Standalone** (local) and **Clustered** (distributed) modes.
28
+
29
+ ## Documentation
30
+
31
+ * **[API Cookbook](COOKBOOK.md)**: Common recipes and patterns (Key-Value, Transactions, AI).
32
+ * **[Examples](examples/)**: Complete runnable scripts.
33
+
34
+ ## Prerequisites
35
+
36
+ * **Redis**: Required for caching and transaction coordination (especially in Clustered mode). **Note**: Redis is NOT used for data storage, just for coordination & to offer built-in caching.
37
+ * **Storage**: Local disk space (supports multiple drives/folders).
38
+ * **OS**: macOS (Darwin), Linux, or Windows (AMD64).
39
+
40
+ ## Installation
41
+
42
+ 1. **Build the Go Bridge**:
43
+ ```bash
44
+ cd jsondb
45
+ go build -o jsondb.so -buildmode=c-shared main/*.go
46
+ ```
47
+
48
+ 2. **Install Python Dependencies**:
49
+ ```bash
50
+ pip install -r jsondb/python/requirements.txt
51
+ ```
52
+
53
+ 3. **Set PYTHONPATH**:
54
+ ```bash
55
+ export PYTHONPATH=$PYTHONPATH:$(pwd)/jsondb/python
56
+ ```
57
+
58
+ ## Quick Start Guide
59
+
60
+ SOP uses a unified `Database` object to manage all types of stores (Vector, Model, and B-Tree). All operations are performed within a **Transaction**.
61
+
62
+ ### 1. Initialize Database & Context
63
+
64
+ First, create a Context and open a Database connection.
65
+
66
+ ```python
67
+ from sop import Context, TransactionMode, TransactionOptions, Btree, BtreeOptions, Item
68
+ from sop.ai import Database, DBType, Item as VectorItem
69
+
70
+ # Initialize Context
71
+ ctx = Context()
72
+
73
+ # Open Database (Standalone Mode)
74
+ # This creates/opens a database at the specified path.
75
+ db = Database(ctx, storage_path="data/my_db", db_type=DBType.Standalone)
76
+ ```
77
+
78
+ ### 2. Start a Transaction
79
+
80
+ All data operations (Create, Read, Update, Delete) must happen within a transaction.
81
+
82
+ ```python
83
+ # Begin a transaction (Read-Write)
84
+ # You can use 'with' block for auto-commit/rollback, or manage manually.
85
+ with db.begin_transaction(ctx, mode=TransactionMode.ForWriting.value) as tx:
86
+
87
+ # --- 3. Vector Store (AI) ---
88
+ # Open a Vector Store named "products"
89
+ vector_store = db.open_vector_store(ctx, tx, "products")
90
+
91
+ # Upsert a Vector Item
92
+ vector_store.upsert(ctx, VectorItem(
93
+ id="prod_101",
94
+ vector=[0.1, 0.5, 0.9],
95
+ payload={"name": "Laptop", "price": 999}
96
+ ))
97
+
98
+ # --- 4. Model Store (AI) ---
99
+ # Open a Model Store named "classifiers"
100
+ model_store = db.open_model_store(ctx, tx, "classifiers")
101
+
102
+ # Save a Model
103
+ model_store.save(ctx, "churn", "v1.0", {
104
+ "algorithm": "random_forest",
105
+ "trees": 100
106
+ })
107
+
108
+ # --- 5. General Purpose B-Tree ---
109
+ # Create a new B-Tree store.
110
+ bo = BtreeOptions(name="user_store", is_unique=True)
111
+ user_store = db.new_btree(ctx, "user_store", tx, options=bo)
112
+
113
+ # --- 6. Text Search ---
114
+ # Open a Search Index
115
+ idx = db.open_search(ctx, "articles", tx)
116
+ idx.add("doc1", "The quick brown fox")
117
+
118
+ # Add an item.
119
+ user_store.add(ctx, Item(key="user1", value="John Doe"))
120
+
121
+ # Transaction commits automatically here.
122
+ # If an exception occurs, it rolls back.
123
+ ```
124
+
125
+ ### 6. Querying Data
126
+
127
+ You can perform queries in a separate transaction (e.g., Read-Only).
128
+
129
+ ```python
130
+ # Begin a Read-Only transaction (optional optimization)
131
+ with db.begin_transaction(ctx, mode=TransactionMode.ForReading.value) as tx:
132
+
133
+ # --- Vector Search ---
134
+ vs = db.open_vector_store(ctx, tx, "products")
135
+ hits = vs.query(ctx, vector=[0.1, 0.5, 0.8], k=5)
136
+ for hit in hits:
137
+ print(f"Vector Match: {hit.id}, Score: {hit.score}")
138
+
139
+ # --- Model Retrieval ---
140
+ ms = db.open_model_store(ctx, tx, "classifiers")
141
+ model = ms.get(ctx, "churn", "v1.0")
142
+ print(f"Loaded Model: {model['algorithm']}")
143
+
144
+ # --- B-Tree Lookup ---
145
+ us = db.open_btree(ctx, "user_store", tx)
146
+ if us.find(ctx, "user1"):
147
+ # Fetch the current item
148
+ item = us.get_current_item(ctx)
149
+ print(f"User Found: {item.value}")
150
+ ```
151
+
152
+ **Performance Tip**: For **Vector Search** workloads that are "Build-Once-Query-Many", use `TransactionMode.NoCheck`. This bypasses transaction overhead for maximum query throughput.
153
+
154
+ ```python
155
+ # High-performance Vector Search (No ACID checks)
156
+ with db.begin_transaction(ctx, mode=TransactionMode.NoCheck.value) as tx:
157
+ vs = db.open_vector_store(ctx, tx, "products")
158
+ hits = vs.query(ctx, vector=[0.1, 0.5, 0.8], k=5)
159
+ ```
160
+
161
+ ## Advanced Configuration
162
+
163
+ ### Logging
164
+
165
+ You can configure the internal logging of the SOP engine (Go backend) to output to a file or standard error, and control the verbosity.
166
+
167
+ ```python
168
+ from sop import Logger, LogLevel
169
+
170
+ # Configure logging to a file with Debug level
171
+ Logger.configure(LogLevel.Debug, "sop_engine.log")
172
+
173
+ # Or configure logging to stderr (default) with Info level
174
+ Logger.configure(LogLevel.Info)
175
+ ```
176
+
177
+ ### Transaction Options
178
+
179
+ You can configure timeouts, isolation levels, and more.
180
+
181
+ ```python
182
+ from sop import TransactionOptions, TransactionMode
183
+
184
+ opts = TransactionOptions(
185
+ mode=TransactionMode.ForWriting.value,
186
+ max_time=15, # 15 minutes timeout
187
+ )
188
+
189
+ tx = db.begin_transaction(ctx, options=opts)
190
+ ```
191
+
192
+ ### Clustered Mode
193
+
194
+ For distributed deployments, switch to `DBType.Clustered`. This requires Redis for coordination.
195
+
196
+ ```python
197
+ from sop.ai import DBType
198
+
199
+ db = Database(
200
+ ctx,
201
+ storage_path="/mnt/shared_data",
202
+ db_type=DBType.Clustered
203
+ )
204
+ ```
205
+
206
+ ## Architecture
207
+
208
+ SOP uses a split architecture:
209
+ 1. **Core Engine (Go)**: Handles disk I/O, B-Tree algorithms, caching, and transactions. Compiled as a shared library (`.dylib`, `.so`, `.dll`).
210
+ 2. **Python Wrapper**: Uses `ctypes` to interface with the Go engine, providing a Pythonic API (`sop` package).
211
+
212
+ ## Project Links
213
+
214
+ * **Source Code**: [GitHub - sharedcode/sop](https://github.com/sharedcode/sop)
215
+ * **PyPI**: [sop4py](https://pypi.org/project/sop4py)
216
+
217
+ ## Contributing
218
+
219
+ Contributions are welcome! Please check the `CONTRIBUTING.md` file in the repository for guidelines.
@@ -0,0 +1,208 @@
1
+ # SOP for Python (sop4py)
2
+
3
+ **Scalable Objects Persistence (SOP)** is a high-performance, transactional storage engine for Python, powered by a robust Go backend. It combines the raw speed of direct disk I/O with the reliability of ACID transactions and the flexibility of modern AI data management.
4
+
5
+ ## Key Features
6
+
7
+ * **Unified Database**: Single entry point for managing Vector, Model, and Key-Value stores.
8
+ * **Transactional B-Tree Store**: Unlimited, persistent B-Tree storage for key-value data.
9
+ * **Vector Database**: Built-in vector search (k-NN) for AI embeddings and similarity search.
10
+ * **Text Search**: Transactional, embedded text search engine (BM25).
11
+ * **AI Model Store**: Versioned storage for machine learning models (B-Tree backed).
12
+ * **ACID Compliance**: Full transaction support (Begin, Commit, Rollback) with isolation.
13
+ * **High Performance**: Written in Go with a lightweight Python wrapper (ctypes).
14
+ * **Caching**: Integrated Redis-backed L1/L2 caching for speed.
15
+ * **Replication**: Optional Erasure Coding (EC) for fault-tolerant storage across drives.
16
+ * **Flexible Deployment**: Supports both **Standalone** (local) and **Clustered** (distributed) modes.
17
+
18
+ ## Documentation
19
+
20
+ * **[API Cookbook](COOKBOOK.md)**: Common recipes and patterns (Key-Value, Transactions, AI).
21
+ * **[Examples](examples/)**: Complete runnable scripts.
22
+
23
+ ## Prerequisites
24
+
25
+ * **Redis**: Required for caching and transaction coordination (especially in Clustered mode). **Note**: Redis is NOT used for data storage, just for coordination & to offer built-in caching.
26
+ * **Storage**: Local disk space (supports multiple drives/folders).
27
+ * **OS**: macOS (Darwin), Linux, or Windows (AMD64).
28
+
29
+ ## Installation
30
+
31
+ 1. **Build the Go Bridge**:
32
+ ```bash
33
+ cd jsondb
34
+ go build -o jsondb.so -buildmode=c-shared main/*.go
35
+ ```
36
+
37
+ 2. **Install Python Dependencies**:
38
+ ```bash
39
+ pip install -r jsondb/python/requirements.txt
40
+ ```
41
+
42
+ 3. **Set PYTHONPATH**:
43
+ ```bash
44
+ export PYTHONPATH=$PYTHONPATH:$(pwd)/jsondb/python
45
+ ```
46
+
47
+ ## Quick Start Guide
48
+
49
+ SOP uses a unified `Database` object to manage all types of stores (Vector, Model, and B-Tree). All operations are performed within a **Transaction**.
50
+
51
+ ### 1. Initialize Database & Context
52
+
53
+ First, create a Context and open a Database connection.
54
+
55
+ ```python
56
+ from sop import Context, TransactionMode, TransactionOptions, Btree, BtreeOptions, Item
57
+ from sop.ai import Database, DBType, Item as VectorItem
58
+
59
+ # Initialize Context
60
+ ctx = Context()
61
+
62
+ # Open Database (Standalone Mode)
63
+ # This creates/opens a database at the specified path.
64
+ db = Database(ctx, storage_path="data/my_db", db_type=DBType.Standalone)
65
+ ```
66
+
67
+ ### 2. Start a Transaction
68
+
69
+ All data operations (Create, Read, Update, Delete) must happen within a transaction.
70
+
71
+ ```python
72
+ # Begin a transaction (Read-Write)
73
+ # You can use 'with' block for auto-commit/rollback, or manage manually.
74
+ with db.begin_transaction(ctx, mode=TransactionMode.ForWriting.value) as tx:
75
+
76
+ # --- 3. Vector Store (AI) ---
77
+ # Open a Vector Store named "products"
78
+ vector_store = db.open_vector_store(ctx, tx, "products")
79
+
80
+ # Upsert a Vector Item
81
+ vector_store.upsert(ctx, VectorItem(
82
+ id="prod_101",
83
+ vector=[0.1, 0.5, 0.9],
84
+ payload={"name": "Laptop", "price": 999}
85
+ ))
86
+
87
+ # --- 4. Model Store (AI) ---
88
+ # Open a Model Store named "classifiers"
89
+ model_store = db.open_model_store(ctx, tx, "classifiers")
90
+
91
+ # Save a Model
92
+ model_store.save(ctx, "churn", "v1.0", {
93
+ "algorithm": "random_forest",
94
+ "trees": 100
95
+ })
96
+
97
+ # --- 5. General Purpose B-Tree ---
98
+ # Create a new B-Tree store.
99
+ bo = BtreeOptions(name="user_store", is_unique=True)
100
+ user_store = db.new_btree(ctx, "user_store", tx, options=bo)
101
+
102
+ # --- 6. Text Search ---
103
+ # Open a Search Index
104
+ idx = db.open_search(ctx, "articles", tx)
105
+ idx.add("doc1", "The quick brown fox")
106
+
107
+ # Add an item.
108
+ user_store.add(ctx, Item(key="user1", value="John Doe"))
109
+
110
+ # Transaction commits automatically here.
111
+ # If an exception occurs, it rolls back.
112
+ ```
113
+
114
+ ### 6. Querying Data
115
+
116
+ You can perform queries in a separate transaction (e.g., Read-Only).
117
+
118
+ ```python
119
+ # Begin a Read-Only transaction (optional optimization)
120
+ with db.begin_transaction(ctx, mode=TransactionMode.ForReading.value) as tx:
121
+
122
+ # --- Vector Search ---
123
+ vs = db.open_vector_store(ctx, tx, "products")
124
+ hits = vs.query(ctx, vector=[0.1, 0.5, 0.8], k=5)
125
+ for hit in hits:
126
+ print(f"Vector Match: {hit.id}, Score: {hit.score}")
127
+
128
+ # --- Model Retrieval ---
129
+ ms = db.open_model_store(ctx, tx, "classifiers")
130
+ model = ms.get(ctx, "churn", "v1.0")
131
+ print(f"Loaded Model: {model['algorithm']}")
132
+
133
+ # --- B-Tree Lookup ---
134
+ us = db.open_btree(ctx, "user_store", tx)
135
+ if us.find(ctx, "user1"):
136
+ # Fetch the current item
137
+ item = us.get_current_item(ctx)
138
+ print(f"User Found: {item.value}")
139
+ ```
140
+
141
+ **Performance Tip**: For **Vector Search** workloads that are "Build-Once-Query-Many", use `TransactionMode.NoCheck`. This bypasses transaction overhead for maximum query throughput.
142
+
143
+ ```python
144
+ # High-performance Vector Search (No ACID checks)
145
+ with db.begin_transaction(ctx, mode=TransactionMode.NoCheck.value) as tx:
146
+ vs = db.open_vector_store(ctx, tx, "products")
147
+ hits = vs.query(ctx, vector=[0.1, 0.5, 0.8], k=5)
148
+ ```
149
+
150
+ ## Advanced Configuration
151
+
152
+ ### Logging
153
+
154
+ You can configure the internal logging of the SOP engine (Go backend) to output to a file or standard error, and control the verbosity.
155
+
156
+ ```python
157
+ from sop import Logger, LogLevel
158
+
159
+ # Configure logging to a file with Debug level
160
+ Logger.configure(LogLevel.Debug, "sop_engine.log")
161
+
162
+ # Or configure logging to stderr (default) with Info level
163
+ Logger.configure(LogLevel.Info)
164
+ ```
165
+
166
+ ### Transaction Options
167
+
168
+ You can configure timeouts, isolation levels, and more.
169
+
170
+ ```python
171
+ from sop import TransactionOptions, TransactionMode
172
+
173
+ opts = TransactionOptions(
174
+ mode=TransactionMode.ForWriting.value,
175
+ max_time=15, # 15 minutes timeout
176
+ )
177
+
178
+ tx = db.begin_transaction(ctx, options=opts)
179
+ ```
180
+
181
+ ### Clustered Mode
182
+
183
+ For distributed deployments, switch to `DBType.Clustered`. This requires Redis for coordination.
184
+
185
+ ```python
186
+ from sop.ai import DBType
187
+
188
+ db = Database(
189
+ ctx,
190
+ storage_path="/mnt/shared_data",
191
+ db_type=DBType.Clustered
192
+ )
193
+ ```
194
+
195
+ ## Architecture
196
+
197
+ SOP uses a split architecture:
198
+ 1. **Core Engine (Go)**: Handles disk I/O, B-Tree algorithms, caching, and transactions. Compiled as a shared library (`.dylib`, `.so`, `.dll`).
199
+ 2. **Python Wrapper**: Uses `ctypes` to interface with the Go engine, providing a Pythonic API (`sop` package).
200
+
201
+ ## Project Links
202
+
203
+ * **Source Code**: [GitHub - sharedcode/sop](https://github.com/sharedcode/sop)
204
+ * **PyPI**: [sop4py](https://pypi.org/project/sop4py)
205
+
206
+ ## Contributing
207
+
208
+ Contributions are welcome! Please check the `CONTRIBUTING.md` file in the repository for guidelines.
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [tool.setuptools]
6
+ include-package-data = true
7
+ packages = ["sop"]
8
+
9
+ [project]
10
+ name = "sop4py"
11
+ version = "2.0.23"
12
+ authors = [
13
+ { name="Gerardo Recinto", email="gerardorecinto@yahoo.com" },
14
+ ]
15
+ description = "Scalable Objects Persistence (SOP) V2 for Python. General Public Availability (GPA) Release"
16
+ readme = "README.md"
17
+ requires-python = ">=3.7"
18
+ classifiers = [
19
+ "Programming Language :: Python :: 3",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ ]
23
+ dependencies = []
24
+
25
+ # activate venv: sop$ source .venv/bin/activate
26
+ # build: python3 -m build
27
+ # upload to pypi using twine: python3 -m twine upload dist/*
28
+ # project link: https://pypi.org/project/sop-python-beta-3
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,10 @@
1
+
2
+
3
+ __version__="2.0.0"
4
+
5
+ from . import ai
6
+ from .transaction import Transaction, TransactionOptions, TransactionMode
7
+ from .context import Context
8
+ from .btree import Btree, BtreeOptions, Item, PagingInfo, PagingDirection, ValueDataSize
9
+ from .database import Database
10
+ from .logger import Logger, LogLevel
@@ -0,0 +1,56 @@
1
+ # SOP AI Python Wrapper
2
+
3
+ This package provides Python bindings for the SOP AI library, enabling you to build Vector Databases and manage AI models using Python.
4
+
5
+ ## Installation
6
+
7
+ Ensure you have the shared library (`libjsondb.so`, `.dll`, or `.dylib`) built and available in your library path.
8
+
9
+ ## Usage
10
+
11
+ ### Vector Database
12
+
13
+ You can use the unified `sop.ai.Database` class to manage your Vector Stores.
14
+
15
+ ```python
16
+ import sop
17
+ from sop.ai import Database
18
+ from sop.transaction import ErasureCodingConfig, DBType
19
+
20
+ # 1. Initialize Context
21
+ ctx = sop.Context()
22
+
23
+ # 2. Initialize Database
24
+ # Standalone (Local, No Replication)
25
+ db = Database(ctx, storage_path="./my_vector_db", db_type=DBType.Standalone)
26
+
27
+ # Clustered (Distributed, With Replication)
28
+ ec_config = ErasureCodingConfig(
29
+ data_shards_count=2,
30
+ parity_shards_count=1,
31
+ base_folder_paths_across_drives=["/mnt/d1/sop", "/mnt/d2/sop"],
32
+ repair_corrupted_shards=True
33
+ )
34
+
35
+ clustered_db = Database(
36
+ ctx,
37
+ storage_path="./my_cluster_db",
38
+ db_type=DBType.Clustered,
39
+ erasure_config={"default": ec_config},
40
+ stores_folders=["/mnt/d1/sop", "/mnt/d2/sop"]
41
+ )
42
+
43
+ # 3. Open a Store within a Transaction
44
+ with db.begin_transaction(ctx) as tx:
45
+ store = db.open_vector_store(ctx, tx, "documents")
46
+
47
+ # 4. Upsert Items
48
+ # ...
49
+ ```
50
+
51
+ ### Replication Support
52
+
53
+ Replication (Erasure Coding) is **optional** in all modes.
54
+
55
+ * **Standalone Mode**: Can be configured with replication, but defaults to single-folder storage.
56
+ * **Clustered Mode**: Can be configured with replication for high availability, or used without it for simple distributed access.