smongo 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smongo-0.2.0/PKG-INFO +454 -0
- smongo-0.2.0/README.md +414 -0
- smongo-0.2.0/pyproject.toml +118 -0
- smongo-0.2.0/rust/Cargo.lock +1533 -0
- smongo-0.2.0/rust/Cargo.toml +53 -0
- smongo-0.2.0/rust/WIREDTIGER_FFI.md +116 -0
- smongo-0.2.0/rust/globtest +0 -0
- smongo-0.2.0/rust/globtest2 +0 -0
- smongo-0.2.0/rust/src/aggregation.rs +1329 -0
- smongo-0.2.0/rust/src/aggregation_joins.rs +464 -0
- smongo-0.2.0/rust/src/bson_helpers.rs +385 -0
- smongo-0.2.0/rust/src/cached_modules.rs +171 -0
- smongo-0.2.0/rust/src/index_encoding.rs +142 -0
- smongo-0.2.0/rust/src/index_manager.rs +1089 -0
- smongo-0.2.0/rust/src/lib.rs +227 -0
- smongo-0.2.0/rust/src/local_collection.rs +1732 -0
- smongo-0.2.0/rust/src/locking.rs +158 -0
- smongo-0.2.0/rust/src/objectid.rs +322 -0
- smongo-0.2.0/rust/src/oplog.rs +601 -0
- smongo-0.2.0/rust/src/paths.rs +267 -0
- smongo-0.2.0/rust/src/query_compiler.rs +628 -0
- smongo-0.2.0/rust/src/query_expressions.rs +1530 -0
- smongo-0.2.0/rust/src/query_planner.rs +697 -0
- smongo-0.2.0/rust/src/query_update.rs +636 -0
- smongo-0.2.0/rust/src/raw_bson.rs +899 -0
- smongo-0.2.0/rust/src/rbac.rs +176 -0
- smongo-0.2.0/rust/src/results.rs +84 -0
- smongo-0.2.0/rust/src/schema.rs +443 -0
- smongo-0.2.0/rust/src/scram.rs +339 -0
- smongo-0.2.0/rust/src/storage.rs +171 -0
- smongo-0.2.0/rust/src/storage_engine.rs +471 -0
- smongo-0.2.0/rust/src/streaming.rs +482 -0
- smongo-0.2.0/rust/src/sync_manager.rs +123 -0
- smongo-0.2.0/rust/src/sync_utils.rs +644 -0
- smongo-0.2.0/rust/src/transaction.rs +100 -0
- smongo-0.2.0/rust/src/wire_codec.rs +226 -0
- smongo-0.2.0/rust/src/wire_commands/admin.rs +1627 -0
- smongo-0.2.0/rust/src/wire_commands/aggregate.rs +182 -0
- smongo-0.2.0/rust/src/wire_commands/crud.rs +1269 -0
- smongo-0.2.0/rust/src/wire_commands/diagnostics.rs +220 -0
- smongo-0.2.0/rust/src/wire_commands/handshake.rs +478 -0
- smongo-0.2.0/rust/src/wire_commands/indexes.rs +293 -0
- smongo-0.2.0/rust/src/wire_commands/mod.rs +296 -0
- smongo-0.2.0/rust/src/wire_commands/sessions.rs +145 -0
- smongo-0.2.0/rust/src/wire_context.rs +628 -0
- smongo-0.2.0/rust/src/wire_cursors.rs +400 -0
- smongo-0.2.0/rust/src/wire_dispatch.rs +396 -0
- smongo-0.2.0/rust/src/wire_errors.rs +57 -0
- smongo-0.2.0/rust/src/wire_msg.rs +573 -0
- smongo-0.2.0/rust/src/wire_profiler.rs +340 -0
- smongo-0.2.0/rust/src/wire_server.rs +671 -0
- smongo-0.2.0/rust/src/wire_sessions.rs +241 -0
- smongo-0.2.0/rust/src/wire_transactions.rs +82 -0
- smongo-0.2.0/rust/src/wt_bridge.rs +672 -0
- smongo-0.2.0/rust/src/wt_safe/connection.rs +128 -0
- smongo-0.2.0/rust/src/wt_safe/cursor.rs +228 -0
- smongo-0.2.0/rust/src/wt_safe/mod.rs +482 -0
- smongo-0.2.0/rust/src/wt_safe/session.rs +146 -0
- smongo-0.2.0/rust/wiredtiger-sys/Cargo.toml +12 -0
- smongo-0.2.0/rust/wiredtiger-sys/build.rs +6 -0
- smongo-0.2.0/rust/wiredtiger-sys/src/lib.rs +617 -0
- smongo-0.2.0/rust/wiredtiger-sys/src/wt_shims.c +61 -0
- smongo-0.2.0/smongo/__init__.py +95 -0
- smongo-0.2.0/smongo/_compat.py +24 -0
- smongo-0.2.0/smongo/_types.py +14 -0
- smongo-0.2.0/smongo/aggregation/__init__.py +29 -0
- smongo-0.2.0/smongo/aggregation/constants.py +248 -0
- smongo-0.2.0/smongo/aggregation/cursor.py +372 -0
- smongo-0.2.0/smongo/aggregation/geo.py +132 -0
- smongo-0.2.0/smongo/aggregation/joins.py +71 -0
- smongo-0.2.0/smongo/aggregation/output.py +98 -0
- smongo-0.2.0/smongo/aggregation/stages.py +200 -0
- smongo-0.2.0/smongo/aggregation/vector.py +119 -0
- smongo-0.2.0/smongo/audit.py +122 -0
- smongo-0.2.0/smongo/client.py +517 -0
- smongo-0.2.0/smongo/index.py +840 -0
- smongo-0.2.0/smongo/logging.py +111 -0
- smongo-0.2.0/smongo/objectid.py +16 -0
- smongo-0.2.0/smongo/oplog.py +328 -0
- smongo-0.2.0/smongo/py.typed +0 -0
- smongo-0.2.0/smongo/query/__init__.py +24 -0
- smongo-0.2.0/smongo/query/compiler.py +52 -0
- smongo-0.2.0/smongo/query/expressions.py +8 -0
- smongo-0.2.0/smongo/query/paths.py +8 -0
- smongo-0.2.0/smongo/query/update.py +8 -0
- smongo-0.2.0/smongo/schema.py +40 -0
- smongo-0.2.0/smongo/storage/__init__.py +47 -0
- smongo-0.2.0/smongo/storage/collection.py +1178 -0
- smongo-0.2.0/smongo/storage/engine.py +169 -0
- smongo-0.2.0/smongo/storage/helpers.py +24 -0
- smongo-0.2.0/smongo/storage/locking.py +8 -0
- smongo-0.2.0/smongo/storage/results.py +8 -0
- smongo-0.2.0/smongo/storage/streaming.py +121 -0
- smongo-0.2.0/smongo/storage/transaction.py +54 -0
- smongo-0.2.0/smongo/sync.py +858 -0
- smongo-0.2.0/smongo/wire/__init__.py +33 -0
- smongo-0.2.0/smongo/wire/__main__.py +34 -0
- smongo-0.2.0/smongo/wire/_types.py +7 -0
- smongo-0.2.0/smongo/wire/bson_codec.py +26 -0
- smongo-0.2.0/smongo/wire/commands/__init__.py +25 -0
- smongo-0.2.0/smongo/wire/commands/_registry.py +248 -0
- smongo-0.2.0/smongo/wire/commands/admin.py +474 -0
- smongo-0.2.0/smongo/wire/commands/aggregation.py +55 -0
- smongo-0.2.0/smongo/wire/commands/crud.py +588 -0
- smongo-0.2.0/smongo/wire/commands/diagnostic.py +192 -0
- smongo-0.2.0/smongo/wire/commands/handshake.py +167 -0
- smongo-0.2.0/smongo/wire/commands/indexes.py +116 -0
- smongo-0.2.0/smongo/wire/commands/sessions.py +75 -0
- smongo-0.2.0/smongo/wire/commands/users.py +106 -0
- smongo-0.2.0/smongo/wire/context.py +253 -0
- smongo-0.2.0/smongo/wire/cursors.py +21 -0
- smongo-0.2.0/smongo/wire/errors.py +8 -0
- smongo-0.2.0/smongo/wire/msg.py +143 -0
- smongo-0.2.0/smongo/wire/profiler.py +22 -0
- smongo-0.2.0/smongo/wire/server.py +418 -0
- smongo-0.2.0/smongo/wire/sessions.py +15 -0
- smongo-0.2.0/smongo/wire/transactions.py +25 -0
smongo-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: smongo
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Classifier: Development Status :: 4 - Beta
|
|
5
|
+
Classifier: Intended Audience :: Developers
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
10
|
+
Classifier: Topic :: Database
|
|
11
|
+
Classifier: Topic :: Database :: Database Engines/Servers
|
|
12
|
+
Classifier: Typing :: Typed
|
|
13
|
+
Requires-Dist: wiredtiger>=4.0
|
|
14
|
+
Requires-Dist: pymongo>=4.6
|
|
15
|
+
Requires-Dist: flask>=3.0 ; extra == 'all'
|
|
16
|
+
Requires-Dist: numpy>=1.26 ; extra == 'all'
|
|
17
|
+
Requires-Dist: usearch>=2.0 ; extra == 'all'
|
|
18
|
+
Requires-Dist: pytest>=8.0 ; extra == 'dev'
|
|
19
|
+
Requires-Dist: pytest-cov>=5.0 ; extra == 'dev'
|
|
20
|
+
Requires-Dist: pytest-benchmark>=4.0 ; extra == 'dev'
|
|
21
|
+
Requires-Dist: testcontainers[mongodb]>=4.0 ; extra == 'dev'
|
|
22
|
+
Requires-Dist: ruff>=0.8,<0.9 ; extra == 'dev'
|
|
23
|
+
Requires-Dist: mypy~=1.14.1 ; extra == 'dev'
|
|
24
|
+
Requires-Dist: numpy>=1.26 ; extra == 'vector'
|
|
25
|
+
Requires-Dist: usearch>=2.0 ; extra == 'vector'
|
|
26
|
+
Requires-Dist: flask>=3.0 ; extra == 'web'
|
|
27
|
+
Provides-Extra: all
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Provides-Extra: vector
|
|
30
|
+
Provides-Extra: web
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Summary: Small MongoDB, big ambitions -- a local-first document engine with WiredTiger and Atlas sync
|
|
33
|
+
Keywords: mongodb,embedded,wiredtiger,database,local-first,sync,smongo
|
|
34
|
+
Author: smongo contributors
|
|
35
|
+
License-Expression: MIT
|
|
36
|
+
Requires-Python: >=3.11
|
|
37
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
38
|
+
Project-URL: Repository, https://github.com/smongo/smongo
|
|
39
|
+
|
|
40
|
+
# smongo
|
|
41
|
+
|
|
42
|
+
**Small MongoDB. Big ambitions.**
|
|
43
|
+
|
|
44
|
+
MongoDB's document model and MQL are the most productive way to work with data -- but only if you can use them *everywhere*. Cloud, edge, laptop, airplane mode, CI pipeline, embedded device. **smongo** makes that real: a local-first MongoDB engine in Python, powered by WiredTiger (the same storage engine family that runs MongoDB itself), with bidirectional sync to Atlas when you're ready.
|
|
45
|
+
|
|
46
|
+
Write your app once. Run it against a local B-Tree. Ship it against Atlas. The query language never changes. The "S" stands for Small. The rest is all Mongo.
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
"Same everywhere" -- the architectural bet that the local engine, the query
|
|
50
|
+
language, the wire protocol, and the cloud database should all be the same
|
|
51
|
+
thing, with no translation layer in between.
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from smongo import MongoClient
|
|
56
|
+
|
|
57
|
+
# Flip the URI -- same code, different backend
|
|
58
|
+
client = MongoClient("local://data") # embedded WiredTiger
|
|
59
|
+
# client = MongoClient("mongodb+srv://...") # Atlas / any mongod
|
|
60
|
+
# client = MongoClient("local://data", sync="mongodb+srv://...") # local-first + auto sync
|
|
61
|
+
|
|
62
|
+
db = client["myapp"]
|
|
63
|
+
users = db["users"]
|
|
64
|
+
|
|
65
|
+
users.insert_one({"name": "Alice", "age": 34, "city": "NYC"})
|
|
66
|
+
users.create_index([("city", 1), ("age", -1)])
|
|
67
|
+
|
|
68
|
+
for doc in users.find({"city": "NYC", "age": {"$gt": 30}}):
|
|
69
|
+
print(doc["name"])
|
|
70
|
+
|
|
71
|
+
results = users.aggregate([
|
|
72
|
+
{"$group": {"_id": "$city", "avg_age": {"$avg": "$age"}}},
|
|
73
|
+
{"$sort": {"avg_age": -1}},
|
|
74
|
+
])
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Why smongo?
|
|
80
|
+
|
|
81
|
+
| Problem | How smongo fixes it |
|
|
82
|
+
|---|---|
|
|
83
|
+
| Local dev requires a running `mongod` or Docker container | Embedded WiredTiger -- Rust extension with direct WiredTiger FFI. No `mongod` required |
|
|
84
|
+
| `mongomock` doesn't support real aggregation pipelines | Full pipeline engine: 25+ stages incl. `$facet`, `$merge`, `$out`, `$vectorSearch`, `$lookup` with 17 group accumulators |
|
|
85
|
+
| Edge / offline-first apps need a different DB and query language | Same MQL everywhere -- one codebase, portable across environments |
|
|
86
|
+
| Syncing local state to the cloud is a custom nightmare | Built-in oplog-driven bidirectional sync with metrics, backoff, selective filters, and conflict resolution |
|
|
87
|
+
| Mock databases don't have indexes or query planners | Real B-Tree indexes with a heuristic prefix-scoring query planner that accelerates reads *and* writes |
|
|
88
|
+
| Embedded databases lack ACID writes or thread safety | WiredTiger transactions wrap every write (data + indexes + oplog), per-collection ReadWriteLock allows concurrent reads while serializing writes |
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Architecture
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
┌────────────────────────────────────────────────────────┐
|
|
96
|
+
│ Your Application │
|
|
97
|
+
│ from smongo import MongoClient │
|
|
98
|
+
└────────────────────┬───────────────────────────────────┘
|
|
99
|
+
│ URI routing
|
|
100
|
+
┌──────────┴──────────┐
|
|
101
|
+
▼ ▼
|
|
102
|
+
local://path mongodb://host
|
|
103
|
+
│ │
|
|
104
|
+
┌──────┴──────┐ ┌─────┴─────┐
|
|
105
|
+
│ Rust Engine│ │ PyMongo │
|
|
106
|
+
│ (_smongo_ │ │ Driver │
|
|
107
|
+
│ core) │ └───────────┘
|
|
108
|
+
│ ┌───────┐ │
|
|
109
|
+
│ │ MQL │ │ ◄── compile_query, apply_update (Rust)
|
|
110
|
+
│ │Compiler│ │ $gt $lt $in $ne $or $and ...
|
|
111
|
+
│ └───┬───┘ │
|
|
112
|
+
│ │ │
|
|
113
|
+
│ ┌───┴───┐ │
|
|
114
|
+
│ │ Query │ │ ◄── RustQueryPlanner: prefix-scoring
|
|
115
|
+
│ │Planner│ │ index scan / pk lookup / coll scan
|
|
116
|
+
│ └───┬───┘ │
|
|
117
|
+
│ │ │
|
|
118
|
+
│ ┌───┴───┐ │
|
|
119
|
+
│ │B-Tree │ │ ◄── RustIndexManager: WiredTiger tables
|
|
120
|
+
│ │Indexes│ │ single, compound, unique, sparse
|
|
121
|
+
│ └───┬───┘ │
|
|
122
|
+
│ │ │
|
|
123
|
+
│ ┌───┴───┐ │
|
|
124
|
+
│ │WiredTi│ │ ◄── Direct C FFI via wiredtiger-sys
|
|
125
|
+
│ │ ger │ │ key=_id, value=BSON (transactional)
|
|
126
|
+
│ └───┬───┘ │
|
|
127
|
+
│ │ │
|
|
128
|
+
│ ┌───┴───┐ │ ┌──────────────┐
|
|
129
|
+
│ │ Oplog │ │──────►│ SyncManager │──► Atlas
|
|
130
|
+
│ └───────┘ │ │ push / pull │
|
|
131
|
+
└─────────────┘ │ conflict res │
|
|
132
|
+
└──────────────┘
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Rust-Powered Engine (Required)
|
|
136
|
+
|
|
137
|
+
The compiled Rust extension (`_smongo_core`) is **required** and provides all performance-critical paths via [PyO3](https://pyo3.rs/). `MongoClient("local://...")` creates a Python `LocalClient` that delegates all storage operations, query compilation, expression evaluation, and update application to Rust:
|
|
138
|
+
|
|
139
|
+
- **Storage Engine** -- `RustLocalClient`, `RustLocalDB`, `RustLocalCollection` with direct WiredTiger C FFI (`wiredtiger-sys` sub-crate, `dlopen`). Every insert, find, update, delete, and index operation flows through Rust.
|
|
140
|
+
- **B-Tree Indexes & Query Planner** -- `RustIndexManager` and `RustQueryPlanner` manage all index types (single, compound, unique, sparse, text, hashed, wildcard) with Rust-native key encoding and plan scoring.
|
|
141
|
+
- **Streaming Cursors** -- `RustStreamingCursor` lazily iterates WiredTiger cursors for collection scan, PK lookup, index-backed, and OR-union paths.
|
|
142
|
+
- **ACID Transactions** -- `RustTransactionSession` with thread-local session override ensures all operations within a transaction route through the same WiredTiger session.
|
|
143
|
+
- **BSON Serialization** -- encode/decode documents using the Rust `bson` crate, eliminating Python tree walks (~60% of write time eliminated)
|
|
144
|
+
- **MQL Query Compiler** -- `compile_query` with all 18 query operators, compiled predicate evaluation
|
|
145
|
+
- **Expression Engine** -- `resolve_expr` with all 72 aggregation expression operators
|
|
146
|
+
- **Update Engine** -- `apply_update` with all 14 update operators, positional operators, and pipeline updates
|
|
147
|
+
- **Aggregation Pipeline** -- Full pipeline dispatch in Rust via `aggregate_pipeline`. All 25+ stages including `$group` (17 accumulators), `$lookup` (equality + sub-pipeline), `$graphLookup`, `$facet`. I/O-dominated stages (`$out`, `$merge`, `$unionWith`) and `$vectorSearch` delegate to Python.
|
|
148
|
+
- **Wire Protocol** -- Tokio-based async TCP server with Rust command handlers for all ~77 commands. BSON boundary normalization, cursor registry, session management, and profiler all in Rust. On the wire, `find` applies sort, skip, limit, and projection in Rust; `aggregate` dispatches straight into the Rust pipeline (`aggregate_pipeline`). Oplog and admin/metadata WiredTiger work uses typed Rust session/cursor borrow (no Python dispatch on those WT hot paths).
|
|
149
|
+
- **Schema Validation** -- `$jsonSchema` document validation runs entirely in Rust (`schema.rs`). Supports `required`, `properties`, `type`/`bsonType`, numeric/string/array constraints, `enum`, `pattern`, `additionalProperties`, and nested objects with ReDoS-safe regex matching.
|
|
150
|
+
|
|
151
|
+
The Python modules that remain are high-level orchestration (aggregation `Cursor` for the Python API, `SyncManager`) that calls *into* the Rust storage layer. See [BYE-BYE-GIL.md](BYE-BYE-GIL.md) for the full story.
|
|
152
|
+
|
|
153
|
+
- **Free-Threaded Python** -- smongo supports Python 3.13+ free-threaded builds (`python3.13t`). The extension declares `gil_used = false` and uses `PyOnceLock` for deadlock-free initialization. All `unsafe impl Send/Sync` are backed by Rust-native locks, not the GIL. Under the free-threaded interpreter, the wire protocol server can handle concurrent connections with true thread parallelism.
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Features
|
|
158
|
+
|
|
159
|
+
### Storage -- WiredTiger B-Trees with Streaming Reads
|
|
160
|
+
MongoDB acquired WiredTiger in 2014 and made it the default storage engine. smongo uses the same technology locally: documents are stored as **native BSON bytes** in WiredTiger B-Tree tables keyed by `_id`. Every write is wrapped in a **WiredTiger transaction** (data + indexes + oplog in a single atomic unit), a **per-collection ReadWriteLock** ensures thread safety with concurrent reader access, and the **query planner accelerates writes** (update/delete by `_id` or indexed field are O(log n), not O(n)). ACID atomicity, crash recovery, and efficient disk I/O -- for free.
|
|
161
|
+
|
|
162
|
+
**Reads are lazy.** `Collection.find()` returns a chainable `Cursor` backed by a `RustStreamingCursor` that pulls documents from WiredTiger one at a time. The streaming cursor consults the query planner and executes the optimal strategy (PK lookup, index scan, `$in` multi-point scan, `$or`-union, or collection scan) -- all lazily. Chained `.limit(10)` without `.sort()` deserializes only 10 documents from BSON regardless of how many match. `find_one()` and `count_documents()` use the same streaming path so they never build intermediate lists.
|
|
163
|
+
|
|
164
|
+
### MQL Compiler
|
|
165
|
+
A Rust-accelerated compiler translates MongoDB query dictionaries into executable predicates. Supported query operators: `$gt`, `$lt`, `$gte`, `$lte`, `$eq`, `$ne`, `$in`, `$nin`, `$exists`, `$regex`, `$not`, `$nor`, `$all`, `$elemMatch`, `$size`, `$type`, `$or`, `$and`. Update operators: `$set`, `$inc`, `$push`, `$unset`, `$addToSet`, `$pull`, `$pop`, `$min`, `$max`, `$rename`, `$currentDate`, `$mul`. Dot-notation paths work everywhere (`"address.city"`).
|
|
166
|
+
|
|
167
|
+
### Aggregation Pipeline
|
|
168
|
+
In-memory pipeline execution with 25+ stages: `$match`, `$group`, `$project`, `$sort`, `$limit`, `$skip`, `$unwind`, `$lookup`, `$graphLookup`, `$unionWith`, `$addFields`/`$set`, `$count`, `$replaceRoot`/`$replaceWith`, `$sample`, `$bucket`, `$bucketAuto`, `$sortByCount`, `$redact`, `$setWindowFields`, `$unset`, `$vectorSearch`, `$facet`, `$out`, `$merge`. Memory-bounded with spill-to-disk for `$sort` and `$group` when `allowDiskUse=True`. Group accumulators: `$sum`, `$avg`, `$min`, `$max`, `$push`, `$addToSet`, `$first`, `$last`, `$firstN`, `$lastN`, `$stdDevPop`, `$stdDevSamp`, `$mergeObjects`, `$top`, `$bottom`, `$topN`, `$bottomN`.
|
|
169
|
+
|
|
170
|
+
`$vectorSearch` runs fully in memory with:
|
|
171
|
+
- **USearch** (`usearch`) for fast RAM-native vector indexing/search
|
|
172
|
+
- **NumPy** fallback when USearch is unavailable
|
|
173
|
+
|
|
174
|
+
`$facet` runs independent sub-pipelines against the same input. `$out` replaces a target collection's contents. `$merge` upserts into a target collection with `whenMatched`/`whenNotMatched` semantics.
|
|
175
|
+
|
|
176
|
+
Build analytics and similarity queries that run locally with no external vector DB.
|
|
177
|
+
|
|
178
|
+
### B-Tree Indexes & Query Planner
|
|
179
|
+
Create single-field, compound, unique, and sparse indexes backed by dedicated WiredTiger tables. The query planner scores candidate indexes and picks the optimal execution path:
|
|
180
|
+
- **Index Scan** -- range or equality scan on the best-matching index
|
|
181
|
+
- **PK Lookup** -- O(log n) direct `_id` fetch
|
|
182
|
+
- **Collection Scan** -- fallback full-table scan
|
|
183
|
+
|
|
184
|
+
Sortable key encoding (IEEE 754 bit-flipping for numbers, hex inversion for descending fields) ensures correct lexicographic ordering across mixed types.
|
|
185
|
+
|
|
186
|
+
### Oplog (Operations Log)
|
|
187
|
+
Every mutation (insert, update, delete, index create/drop) is append-logged to a dedicated WiredTiger table with timestamps, version counters, and checksums. The oplog supports **compaction** (`compact_oplog(keep=N)`) to bound growth in long-running deployments, and auto-compacts after successful sync push cycles.
|
|
188
|
+
|
|
189
|
+
### Bidirectional Sync
|
|
190
|
+
`SyncManager` syncs local state to any MongoDB-compatible remote:
|
|
191
|
+
- **Push**: tail the oplog, batch `bulk_write` to remote, auto-compact after checkpoint
|
|
192
|
+
- **Pull**: change streams (preferred) or timestamp-based polling, merge remote changes locally
|
|
193
|
+
- **Index sync**: index definitions flow both directions
|
|
194
|
+
- **Conflict resolution**: Last-Write-Wins, local-wins, remote-wins, field-level merge, or a custom callable
|
|
195
|
+
- **Checkpointing**: survives crashes and restarts via a WiredTiger checkpoint table
|
|
196
|
+
- **Auto-sync**: background thread with configurable interval
|
|
197
|
+
- **Hybrid mode**: `MongoClient("local://...", sync="mongodb+srv://...")` auto-registers and starts sync
|
|
198
|
+
- **Exponential backoff**: on consecutive failures, backoff doubles up to 300s
|
|
199
|
+
- **Sync metrics**: `status()` returns `pushed`, `pulled`, `conflicts`, `errors` counters and a `state` field
|
|
200
|
+
- **Selective sync filters**: per-collection MQL filters control which documents are pushed/pulled
|
|
201
|
+
|
|
202
|
+
### Wire Protocol Server
|
|
203
|
+
smongo speaks the real MongoDB binary protocol (OP_MSG, OP_COMPRESSED, OP_QUERY). Point `mongosh`, PyMongo, Compass, or any MongoDB driver at `localhost:27017` and they'll talk to the embedded engine as if it were a real `mongod`. The Docker Compose setup exposes the wire server on port 27018 alongside the web dashboard -- `docker compose up` and connect Compass immediately. Small database, real protocol.
|
|
204
|
+
|
|
205
|
+
### Interactive Web Dashboard
|
|
206
|
+
A full-featured GUI at `localhost:5000` with:
|
|
207
|
+
|
|
208
|
+
| Tab | What it does |
|
|
209
|
+
|---|---|
|
|
210
|
+
| **Shell** | mongosh-compatible terminal -- `db.users.find({})`, `db.users.aggregate([...])`, arrow-key history, execution timing |
|
|
211
|
+
| **Documents** | Browse, insert, delete docs in a rich table with formatted values |
|
|
212
|
+
| **Find & Query** | Clickable query chips, plan badges (INDEX SCAN / COLL SCAN / PK LOOKUP), timing |
|
|
213
|
+
| **Aggregation** | Visual pipeline builder with drag stages, pre-built example pipelines |
|
|
214
|
+
| **Indexes** | List, create, drop B-Tree indexes; index template chips; query plan tester |
|
|
215
|
+
| **Sync** | Live visualization of local <-> remote, push/pull controls, remote client simulator, conflict metrics |
|
|
216
|
+
| **Oplog** | Color-coded mutation log with timestamps and version numbers |
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Quick Start
|
|
221
|
+
|
|
222
|
+
### Docker Compose (recommended)
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
docker compose up --build
|
|
226
|
+
# open http://localhost:5000 -- web dashboard
|
|
227
|
+
# Compass: mongodb://localhost:27018 -- wire protocol (browse with Compass)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
This starts a MongoDB container (stands in for Atlas), the smongo dashboard, and a wire protocol server. Compass connects to `localhost:27018` out of the box. Sample data is auto-seeded on first run: 10 employees, 5 indexes, everything synced. See [SMONGO-COMPASS.md](SMONGO-COMPASS.md) for the full Compass guide.
|
|
231
|
+
|
|
232
|
+
### Standalone (no Docker, no network)
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
pip install -e ".[all]" # installs smongo + builds the Rust extension via maturin
|
|
236
|
+
python demo.py
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Runs the full embedded engine locally -- indexes, queries, aggregation, oplog -- no MongoDB server. The Rust extension is built automatically by the maturin build backend.
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Wire Protocol Server
|
|
244
|
+
|
|
245
|
+
smongo includes a wire protocol server so that **real drivers** can connect to the embedded engine over TCP.
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
# Start the server on the default port
|
|
249
|
+
python -m smongo.wire --port 27017
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
Then connect with any standard MongoDB client:
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
mongosh mongodb://localhost:27017/mydb
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
```python
|
|
259
|
+
from pymongo import MongoClient
|
|
260
|
+
client = MongoClient("mongodb://localhost:27017")
|
|
261
|
+
db = client["mydb"]
|
|
262
|
+
db["things"].insert_one({"hello": "wire protocol"})
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
Or use the `WireServer` API directly in Python:
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
from smongo.wire import WireServer
|
|
269
|
+
|
|
270
|
+
with WireServer("./data", port=27017) as srv:
|
|
271
|
+
input("Press Enter to stop...")
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
**Security features (Rust wire server):**
|
|
275
|
+
- **TLS** via [rustls](https://github.com/rustls/rustls) -- available when using the Rust-native `RustWireServer`
|
|
276
|
+
- **SCRAM-SHA-256** authentication (RFC 7677) -- PBKDF2-hashed credentials persisted in WiredTiger (`table:__users`)
|
|
277
|
+
- **Auth gate** enforces authentication on all commands (handshake commands exempted)
|
|
278
|
+
|
|
279
|
+
> **Note:** TLS and SCRAM authentication are implemented in the Rust wire server (`RustWireServer`). The default Python `WireServer` provides plain TCP without auth. See [WIRE-PROTOCOL.md](WIRE-PROTOCOL.md) for details on both server paths.
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
283
|
+
## Project Structure
|
|
284
|
+
|
|
285
|
+
```
|
|
286
|
+
smongo/
|
|
287
|
+
__init__.py MongoClient, SyncManager, DuplicateKeyError,
|
|
288
|
+
InsertOne, UpdateOne, UpdateMany,
|
|
289
|
+
DeleteOne, DeleteMany, ReplaceOne, BulkWriteResult
|
|
290
|
+
_smongo_core/ Compiled Rust extension (PyO3) -- the actual engine
|
|
291
|
+
client.py URI-based routing, bulk_write, find_one_and_* facade
|
|
292
|
+
storage/ Storage layer (Python + Rust bridge)
|
|
293
|
+
engine.py LocalClient/LocalDB (Python interface; delegates to Rust)
|
|
294
|
+
collection.py TTLReaper (used by RustLocalCollection)
|
|
295
|
+
locking.py ReadWriteLock (Python fallback; runtime uses Rust)
|
|
296
|
+
results.py InsertResult, UpdateResult, DeleteResult
|
|
297
|
+
streaming.py StreamingCursor (Python fallback; runtime uses RustStreamingCursor)
|
|
298
|
+
helpers.py BSON encode/decode helpers
|
|
299
|
+
query/ MQL compiler package (Rust-accelerated)
|
|
300
|
+
compiler.py compile_query, query operators
|
|
301
|
+
update.py apply_update, positional operators
|
|
302
|
+
expressions.py resolve_expr, 60+ expression operators
|
|
303
|
+
paths.py get_value, set_value, unset_value
|
|
304
|
+
aggregation/ Pipeline engine package (25+ stages, Rust-accelerated)
|
|
305
|
+
cursor.py Cursor class (lazy Iterable input), aggregate dispatch
|
|
306
|
+
stages.py Core stages: $match, $group, $sort, etc.
|
|
307
|
+
joins.py $lookup, $graphLookup, $unionWith
|
|
308
|
+
output.py $facet, $out, $merge
|
|
309
|
+
vector.py $vectorSearch (NumPy / USearch)
|
|
310
|
+
index.py Index key encoding, helpers, DuplicateKeyError (runtime: RustIndexManager, RustQueryPlanner)
|
|
311
|
+
oplog.py Append-only operations log with compaction
|
|
312
|
+
sync.py Bidirectional sync with metrics, backoff, selective filters
|
|
313
|
+
objectid.py MongoDB-style ObjectId implementation
|
|
314
|
+
schema.py $jsonSchema validation layer (delegates to Rust)
|
|
315
|
+
wire/ MongoDB binary protocol server (OP_MSG, OP_COMPRESSED)
|
|
316
|
+
commands/ ~77 Rust command handlers (Python fallback for extensions)
|
|
317
|
+
sessions.py Session registry
|
|
318
|
+
transactions.py Transaction state, undo journal
|
|
319
|
+
profiler.py Profiler, OpTracker, TopStats
|
|
320
|
+
|
|
321
|
+
rust/ Rust crate (smongo-core) -- the engine
|
|
322
|
+
src/
|
|
323
|
+
storage_engine.rs RustLocalClient, RustLocalDB
|
|
324
|
+
local_collection.rs RustLocalCollection (CRUD, txns, streaming)
|
|
325
|
+
index_manager.rs RustIndexManager, RustQueryPlanner
|
|
326
|
+
streaming_cursor.rs RustStreamingCursor (lazy WiredTiger iteration)
|
|
327
|
+
transaction.rs RustTransactionSession (thread-local session override)
|
|
328
|
+
wt_bridge.rs PyO3 bridge for WiredTiger FFI types
|
|
329
|
+
wt_safe.rs Safe RAII wrappers for WiredTiger C API
|
|
330
|
+
wire_commands/ Rust command handlers (~77 commands, typed HandlerFn)
|
|
331
|
+
wire_dispatch.rs Single-downcast command dispatch (ConnectionContext)
|
|
332
|
+
wire_server.rs Tokio async TCP server (TLS via rustls)
|
|
333
|
+
wire_context.rs ConnectionContext, CachedImports (Arc-shared, OnceLock modules)
|
|
334
|
+
cached_modules.rs Process-wide OnceLock cache for stdlib Python modules
|
|
335
|
+
schema.rs $jsonSchema validation engine (ValidationError, validate_document)
|
|
336
|
+
scram.rs SCRAM-SHA-256 authentication (RFC 7677)
|
|
337
|
+
wiredtiger-sys/ Raw FFI bindings for WiredTiger C API (dlopen)
|
|
338
|
+
|
|
339
|
+
web_app.py Flask API + shell endpoint
|
|
340
|
+
templates/
|
|
341
|
+
index.html Single-page dashboard
|
|
342
|
+
static/ CSS, JS assets for dashboard
|
|
343
|
+
|
|
344
|
+
examples/
|
|
345
|
+
basic/
|
|
346
|
+
01_crud.py Insert, find, update, delete, cursor chaining
|
|
347
|
+
02_indexes.py B-tree indexes, query planner, unique constraints
|
|
348
|
+
03_aggregation.py $group, $sort, $project, $unwind, $lookup, $facet
|
|
349
|
+
04_streaming.py Lazy reads: find_one, count, limit short-circuit
|
|
350
|
+
05_schema_validation.py $jsonSchema enforcement on insert and update
|
|
351
|
+
06_bulk_write.py Batch InsertOne, UpdateOne, ReplaceOne, DeleteOne
|
|
352
|
+
07_change_streams.py Real-time watch() + raw oplog inspection
|
|
353
|
+
08_advanced_queries.py $or, $regex, $elemMatch, dot-notation, $not, $all
|
|
354
|
+
patterns/
|
|
355
|
+
ecommerce.py Shopping cart, orders, revenue analytics, dashboards
|
|
356
|
+
iot_timeseries.py 1000+ sensor readings, anomaly detection, facility stats
|
|
357
|
+
content_cms.py Blog CMS: tagging, search, author leaderboard, facets
|
|
358
|
+
|
|
359
|
+
demo.py Standalone CLI demo (no Docker needed)
|
|
360
|
+
Dockerfile Python 3.11 + WiredTiger build deps
|
|
361
|
+
docker-compose.yml App + MongoDB for the full sync experience
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
---
|
|
365
|
+
|
|
366
|
+
## Dev Commands
|
|
367
|
+
|
|
368
|
+
```bash
|
|
369
|
+
make install-test # install test/lint dependencies
|
|
370
|
+
make lint # ruff checks
|
|
371
|
+
make format # ruff formatter
|
|
372
|
+
make test # unit suite (1,090 tests)
|
|
373
|
+
make integration # docker-backed integration suite
|
|
374
|
+
make perf # benchmark suite
|
|
375
|
+
make coverage # coverage report (70% enforced)
|
|
376
|
+
make typecheck # mypy strict
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
---
|
|
380
|
+
|
|
381
|
+
## The API
|
|
382
|
+
|
|
383
|
+
```python
|
|
384
|
+
from smongo import MongoClient, InsertOne, UpdateOne, DeleteOne
|
|
385
|
+
|
|
386
|
+
client = MongoClient("local://data")
|
|
387
|
+
db = client["mydb"]
|
|
388
|
+
coll = db["things"]
|
|
389
|
+
|
|
390
|
+
# CRUD
|
|
391
|
+
coll.insert_one({"x": 1})
|
|
392
|
+
coll.insert_many([{"x": 2}, {"x": 3}])
|
|
393
|
+
coll.find({"x": {"$gt": 1}})
|
|
394
|
+
coll.find_one({"x": 2})
|
|
395
|
+
coll.update_one({"x": 1}, {"$set": {"x": 10}})
|
|
396
|
+
coll.update_many({}, {"$inc": {"x": 1}})
|
|
397
|
+
coll.delete_one({"x": 2})
|
|
398
|
+
coll.delete_many({"x": {"$lt": 5}})
|
|
399
|
+
coll.count_documents({"x": {"$gte": 1}})
|
|
400
|
+
|
|
401
|
+
# Atomic find-and-modify
|
|
402
|
+
coll.find_one_and_update({"x": 1}, {"$set": {"x": 10}}, return_document="after")
|
|
403
|
+
coll.find_one_and_replace({"x": 1}, {"x": 99, "replaced": True})
|
|
404
|
+
coll.find_one_and_delete({"x": 99})
|
|
405
|
+
|
|
406
|
+
# Bulk writes
|
|
407
|
+
coll.bulk_write([
|
|
408
|
+
InsertOne({"x": 100}),
|
|
409
|
+
UpdateOne({"x": 100}, {"$set": {"x": 200}}),
|
|
410
|
+
DeleteOne({"x": 3}),
|
|
411
|
+
])
|
|
412
|
+
|
|
413
|
+
# Indexes
|
|
414
|
+
coll.create_index([("x", 1)])
|
|
415
|
+
coll.create_index("name", unique=True)
|
|
416
|
+
coll.create_index([("city", 1), ("age", -1)])
|
|
417
|
+
coll.list_indexes()
|
|
418
|
+
coll.drop_index("x_1")
|
|
419
|
+
coll.explain({"x": {"$gt": 5}})
|
|
420
|
+
|
|
421
|
+
# Aggregation
|
|
422
|
+
coll.aggregate([
|
|
423
|
+
{"$match": {"status": "active"}},
|
|
424
|
+
{"$group": {"_id": "$dept", "total": {"$sum": "$salary"}}},
|
|
425
|
+
{"$sort": {"total": -1}},
|
|
426
|
+
{"$limit": 10},
|
|
427
|
+
])
|
|
428
|
+
|
|
429
|
+
# $facet -- run parallel sub-pipelines
|
|
430
|
+
coll.aggregate([
|
|
431
|
+
{"$facet": {
|
|
432
|
+
"by_dept": [{"$group": {"_id": "$dept", "count": {"$sum": 1}}}],
|
|
433
|
+
"top_5": [{"$sort": {"salary": -1}}, {"$limit": 5}],
|
|
434
|
+
}},
|
|
435
|
+
])
|
|
436
|
+
|
|
437
|
+
# $merge -- upsert results into another collection
|
|
438
|
+
coll.aggregate([
|
|
439
|
+
{"$group": {"_id": "$dept", "avg_salary": {"$avg": "$salary"}}},
|
|
440
|
+
{"$merge": {"into": "dept_stats", "on": "_id", "whenMatched": "replace"}},
|
|
441
|
+
])
|
|
442
|
+
|
|
443
|
+
# Transparent hybrid sync
|
|
444
|
+
hybrid = MongoClient("local://data", sync="mongodb+srv://user:pass@cluster.mongodb.net")
|
|
445
|
+
hybrid.sync.status() # includes pushed, pulled, conflicts, errors, state
|
|
446
|
+
hybrid.sync.sync_now()
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
---
|
|
450
|
+
|
|
451
|
+
## License
|
|
452
|
+
|
|
453
|
+
See [LICENSE](LICENSE).
|
|
454
|
+
|