synaptoroute 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synaptoroute-0.1.0/.env +13 -0
- synaptoroute-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +25 -0
- synaptoroute-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- synaptoroute-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +18 -0
- synaptoroute-0.1.0/.github/workflows/ci.yml +62 -0
- synaptoroute-0.1.0/.gitignore +0 -0
- synaptoroute-0.1.0/BENCHMARKS.md +76 -0
- synaptoroute-0.1.0/CODE_OF_CONDUCT.md +19 -0
- synaptoroute-0.1.0/CONTRIBUTING.md +31 -0
- synaptoroute-0.1.0/Dockerfile +25 -0
- synaptoroute-0.1.0/LICENSE +21 -0
- synaptoroute-0.1.0/PKG-INFO +164 -0
- synaptoroute-0.1.0/README.md +140 -0
- synaptoroute-0.1.0/benchmarks/bench_adversarial.py +63 -0
- synaptoroute-0.1.0/benchmarks/bench_classification.py +95 -0
- synaptoroute-0.1.0/benchmarks/bench_ddos.py +57 -0
- synaptoroute-0.1.0/benchmarks/bench_dynamic_batching.py +48 -0
- synaptoroute-0.1.0/benchmarks/bench_gpu_performance.py +47 -0
- synaptoroute-0.1.0/benchmarks/bench_lazy_compile.py +43 -0
- synaptoroute-0.1.0/benchmarks/bench_malformed.py +42 -0
- synaptoroute-0.1.0/benchmarks/bench_memory_leak.py +40 -0
- synaptoroute-0.1.0/benchmarks/bench_performance.py +51 -0
- synaptoroute-0.1.0/benchmarks/bench_scalability.py +42 -0
- synaptoroute-0.1.0/benchmarks/bench_vs_semantic_router.py +105 -0
- synaptoroute-0.1.0/benchmarks/read_logs.py +13 -0
- synaptoroute-0.1.0/examples/api_server.py +74 -0
- synaptoroute-0.1.0/notebooks/.ipynb_checkpoints/01-The-Routing-Problem-checkpoint.ipynb +68 -0
- synaptoroute-0.1.0/notebooks/01-The-Routing-Problem.ipynb +68 -0
- synaptoroute-0.1.0/notebooks/02-SynaptoRoute-Architecture.ipynb +49 -0
- synaptoroute-0.1.0/notebooks/03-Dynamic-Batching.ipynb +62 -0
- synaptoroute-0.1.0/pyproject.toml +41 -0
- synaptoroute-0.1.0/requirements.txt +9 -0
- synaptoroute-0.1.0/routes.db +0 -0
- synaptoroute-0.1.0/routes.db-shm +0 -0
- synaptoroute-0.1.0/routes.db-wal +0 -0
- synaptoroute-0.1.0/scripts/download_kaggle_data.py +19 -0
- synaptoroute-0.1.0/scripts/optimize_with_kaggle.py +74 -0
- synaptoroute-0.1.0/src/synaptoroute/__init__.py +13 -0
- synaptoroute-0.1.0/src/synaptoroute/encoder.py +25 -0
- synaptoroute-0.1.0/src/synaptoroute/exceptions.py +15 -0
- synaptoroute-0.1.0/src/synaptoroute/models.py +38 -0
- synaptoroute-0.1.0/src/synaptoroute/router.py +271 -0
- synaptoroute-0.1.0/src/synaptoroute/storage.py +139 -0
- synaptoroute-0.1.0/tests/__init__.py +1 -0
- synaptoroute-0.1.0/tests/test_encoder.py +20 -0
- synaptoroute-0.1.0/tests/test_models.py +20 -0
- synaptoroute-0.1.0/tests/test_optimization.py +43 -0
- synaptoroute-0.1.0/tests/test_router.py +79 -0
- synaptoroute-0.1.0/tests/test_storage.py +89 -0
synaptoroute-0.1.0/.env
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# API Keys
|
|
2
|
+
GROQ_API_KEY="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"
|
|
3
|
+
GEMINI_API_KEY="AIzaSyABX8yf5xLvkJEJ211GJ-6N-akmPVM8n54."
|
|
4
|
+
|
|
5
|
+
# Kaggle API Credentials
|
|
6
|
+
KAGGLE_USERNAME="sitanshukr"
|
|
7
|
+
KAGGLE_KEY="KGAT_447253d2b97a93aa3faae30bb4481004y"
|
|
8
|
+
|
|
9
|
+
# Database Paths
|
|
10
|
+
SQLITE_DB_PATH="data/router_memory.sqlite"
|
|
11
|
+
# CHROMA_DB_PATH="persistence/chroma_db" # Not used in SynaptoRoute
|
|
12
|
+
GROQ_API_KEY_2="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"
|
|
13
|
+
GOOGLE_API_KEY_2="AIzaSyAFt-hiURmYOxt6ckafhI-43NcU2lPaHY0"
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug report
|
|
3
|
+
about: Create a report to help us improve SynaptoRoute
|
|
4
|
+
title: '[BUG] '
|
|
5
|
+
labels: bug
|
|
6
|
+
assignees: ''
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
**Describe the bug**
|
|
11
|
+
A clear and concise description of what the bug is.
|
|
12
|
+
|
|
13
|
+
**To Reproduce**
|
|
14
|
+
Steps to reproduce the behavior:
|
|
15
|
+
1. Initialize router with '...'
|
|
16
|
+
2. Call router.add_route(...)
|
|
17
|
+
3. See error
|
|
18
|
+
|
|
19
|
+
**Expected behavior**
|
|
20
|
+
A clear and concise description of what you expected to happen.
|
|
21
|
+
|
|
22
|
+
**Environment (please complete the following information):**
|
|
23
|
+
- OS: [e.g. Ubuntu 22.04, Windows 11]
|
|
24
|
+
- Python Version: [e.g. 3.11]
|
|
25
|
+
- SynaptoRoute Version: [e.g. 0.1.0]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature request
|
|
3
|
+
about: Suggest an idea for SynaptoRoute
|
|
4
|
+
title: '[FEATURE] '
|
|
5
|
+
labels: enhancement
|
|
6
|
+
assignees: ''
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
**Is your feature request related to a problem? Please describe.**
|
|
11
|
+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
|
12
|
+
|
|
13
|
+
**Describe the solution you'd like**
|
|
14
|
+
A clear and concise description of what you want to happen.
|
|
15
|
+
|
|
16
|
+
**Describe alternatives you've considered**
|
|
17
|
+
A clear and concise description of any alternative solutions or features you've considered.
|
|
18
|
+
|
|
19
|
+
**Additional context**
|
|
20
|
+
Add any other context or screenshots about the feature request here.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
## Description
|
|
2
|
+
<!-- Please include a summary of the change and which issue is fixed. -->
|
|
3
|
+
|
|
4
|
+
Fixes # (issue)
|
|
5
|
+
|
|
6
|
+
## Type of change
|
|
7
|
+
- [ ] Bug fix (non-breaking change which fixes an issue)
|
|
8
|
+
- [ ] New feature (non-breaking change which adds functionality)
|
|
9
|
+
- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
|
|
10
|
+
- [ ] This change requires a documentation update
|
|
11
|
+
|
|
12
|
+
## Checklist:
|
|
13
|
+
- [ ] My code follows the architectural guidelines of this project (e.g. preserves $O(1)$ updates).
|
|
14
|
+
- [ ] I have performed a self-review of my own code.
|
|
15
|
+
- [ ] I have commented my code, particularly in hard-to-understand areas.
|
|
16
|
+
- [ ] I have made corresponding changes to the documentation (`README.md`, `notebooks/`).
|
|
17
|
+
- [ ] I have added tests that prove my fix is effective or that my feature works.
|
|
18
|
+
- [ ] New and existing unit tests pass locally with my changes.
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ "main" ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ "main" ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v3
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v4
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.11"
|
|
19
|
+
|
|
20
|
+
- name: Install dependencies
|
|
21
|
+
run: |
|
|
22
|
+
python -m pip install --upgrade pip
|
|
23
|
+
pip install -e .[test]
|
|
24
|
+
|
|
25
|
+
- name: Run Tests
|
|
26
|
+
run: |
|
|
27
|
+
pytest
|
|
28
|
+
|
|
29
|
+
docker-build:
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
steps:
|
|
32
|
+
- uses: actions/checkout@v3
|
|
33
|
+
|
|
34
|
+
- name: Build Docker Image
|
|
35
|
+
run: |
|
|
36
|
+
docker build -t synaptoroute-test .
|
|
37
|
+
|
|
38
|
+
benchmark:
|
|
39
|
+
needs: test
|
|
40
|
+
runs-on: ubuntu-latest
|
|
41
|
+
steps:
|
|
42
|
+
- uses: actions/checkout@v3
|
|
43
|
+
|
|
44
|
+
- name: Set up Python
|
|
45
|
+
uses: actions/setup-python@v4
|
|
46
|
+
with:
|
|
47
|
+
python-version: "3.11"
|
|
48
|
+
|
|
49
|
+
- name: Install dependencies
|
|
50
|
+
run: |
|
|
51
|
+
python -m pip install --upgrade pip
|
|
52
|
+
pip install -e .
|
|
53
|
+
|
|
54
|
+
- name: Run Dynamic Batching Benchmark
|
|
55
|
+
run: |
|
|
56
|
+
echo "### Dynamic Batching Throughput" >> $GITHUB_STEP_SUMMARY
|
|
57
|
+
python benchmarks/bench_dynamic_batching.py >> $GITHUB_STEP_SUMMARY
|
|
58
|
+
|
|
59
|
+
- name: Run System Performance Benchmark
|
|
60
|
+
run: |
|
|
61
|
+
echo "### System Performance & Hot-Reload Latency" >> $GITHUB_STEP_SUMMARY
|
|
62
|
+
python benchmarks/bench_performance.py >> $GITHUB_STEP_SUMMARY
|
|
Binary file
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# SynaptoRoute: Master Empirical Benchmarks
|
|
2
|
+
|
|
3
|
+
This document serves as the immutable, objective record of all performance, accuracy, and memory metrics recorded during the engineering of the SynaptoRoute engine.
|
|
4
|
+
|
|
5
|
+
## 1. Hardware Inference Latency (Batch Size = 1)
|
|
6
|
+
| Environment | P50 Latency | P99 Latency |
|
|
7
|
+
| :--- | :--- | :--- |
|
|
8
|
+
| **Cloud CPU (Ubuntu 2-Core)** | 3.07 ms | 3.94 ms |
|
|
9
|
+
| **Local GPU (RTX 3050)** | 8.51 ms | 14.11 ms |
|
|
10
|
+
|
|
11
|
+
> **Note:** The quantized INT8 ONNX architecture allows standard CPUs to outpace entry-level GPUs for sequential inferences due to minimized PCIe transfer overhead.
|
|
12
|
+
|
|
13
|
+
## 2. Dynamic Batching Throughput (Batch Size = 1000)
|
|
14
|
+
*Test: Firing 1000 concurrent async queries.*
|
|
15
|
+
|
|
16
|
+
| Environment | Amortized Latency (per query) |
|
|
17
|
+
| :--- | :--- |
|
|
18
|
+
| **Cloud CPU (Ubuntu 2-Core)** | 2.69 ms |
|
|
19
|
+
| **Local GPU (RTX 3050)** | 0.157 ms |
|
|
20
|
+
|
|
21
|
+
> **Note:** Under heavy concurrent load, the 5-millisecond dynamic batching queue kicks in, drastically increasing throughput and allowing hardware accelerators to shine.
|
|
22
|
+
|
|
23
|
+
## 3. Memory Profiling: Hot-Reloading ($O(1)$ vs $O(N)$)
|
|
24
|
+
*Test: Sequentially adding 500 routes dynamically.*
|
|
25
|
+
|
|
26
|
+
| Compilation Strategy | 10th Route Addition | 490th Route Addition | Behavior |
|
|
27
|
+
| :--- | :--- | :--- | :--- |
|
|
28
|
+
| **Eager (NumPy `vstack`)** | 1.15 ms | 4.88 ms | Linearly Degrading $O(N)$ |
|
|
29
|
+
| **SynaptoRoute Lazy** | 0.02 ms | 0.02 ms | Perfectly Flat $O(1)$ |
|
|
30
|
+
|
|
31
|
+
> **Note:** Deferred reallocation prevents server freezes during live updates. Average SynaptoRoute hot-reload penalty: 5.04 ms.
|
|
32
|
+
|
|
33
|
+
## 4. Classification Accuracy & Optimization
|
|
34
|
+
|
|
35
|
+
| Metric | Score |
|
|
36
|
+
| :--- | :--- |
|
|
37
|
+
| **Baseline Cosine Similarity Accuracy** | ~82.0% |
|
|
38
|
+
| **Optimized Threshold Accuracy** | > 98.0% |
|
|
39
|
+
| **Threshold Optimizer F1 Score** | 0.985 |
|
|
40
|
+
|
|
41
|
+
> **Note:** We achieved this by implementing an automatic ML optimizer (`fit_thresholds`) that calculates the mathematically perfect cosine threshold for every individual route based on a labeled dataset.
|
|
42
|
+
|
|
43
|
+
## 5. System Vulnerabilities & Leaks Fixed
|
|
44
|
+
- **Zombie Futures:** Resolved a critical async bug where cancelling the worker left client requests hanging.
|
|
45
|
+
- **DDoS Vulnerability:** Bounded the batching queue at `maxsize=10000` to prevent OOM errors.
|
|
46
|
+
- **SQLite Dangling Embeddings:** Implemented memory rebuilding via NumPy masks to prevent the router from retaining and matching against deleted utterances.
|
|
47
|
+
|
|
48
|
+
## 6. System Stability and Stress Testing
|
|
49
|
+
|
|
50
|
+
### Test 1: Concurrency Limits (20,000 Concurrent Requests)
|
|
51
|
+
| Metric | Count |
|
|
52
|
+
| :--- | :--- |
|
|
53
|
+
| **Processed Requests** | 10,000 |
|
|
54
|
+
| **Rejected Requests (RouterOverloadedError)** | 10,000 |
|
|
55
|
+
| **Unhandled Exceptions** | 0 |
|
|
56
|
+
|
|
57
|
+
> **Note:** The bounded queue (`maxsize=10000`) successfully prevented Out-of-Memory (OOM) errors during high concurrency. The system rejected excess requests as expected without process degradation. Total execution time: 31.42 seconds.
|
|
58
|
+
|
|
59
|
+
### Test 2: Memory Allocation Durability (2,000 Consecutive Reloads)
|
|
60
|
+
| Iteration | Peak RAM |
|
|
61
|
+
| :--- | :--- |
|
|
62
|
+
| **Iteration 0** | 0.01 MB |
|
|
63
|
+
| **Iteration 2000** | 0.32 MB |
|
|
64
|
+
|
|
65
|
+
> **Note:** Continuous route modification and reallocation over 2,000 iterations maintained stable memory usage, confirming that the NumPy mask replacement effectively mitigated prior memory leaks associated with eager compilation.
|
|
66
|
+
|
|
67
|
+
### Test 3: Edge-Case Input Handling
|
|
68
|
+
| Input Type | Status | Latency |
|
|
69
|
+
| :--- | :--- | :--- |
|
|
70
|
+
| **Empty String** | Processed | 10.66 ms |
|
|
71
|
+
| **Whitespace Only** | Processed | 14.68 ms |
|
|
72
|
+
| **Large Payload (1 MB)** | Processed | 461.85 ms |
|
|
73
|
+
| **Unstructured Noise (5000 chars)** | Processed | 145.79 ms |
|
|
74
|
+
| **Extended Unicode / Emojis** | Processed | 23.80 ms |
|
|
75
|
+
|
|
76
|
+
> **Note:** The ONNX runtime and asyncio worker thread successfully processed atypical and malformed inputs without raising critical exceptions or halting execution.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
|
5
|
+
|
|
6
|
+
We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
|
|
7
|
+
|
|
8
|
+
## Our Standards
|
|
9
|
+
Examples of behavior that contributes to a positive environment for our community include:
|
|
10
|
+
* Demonstrating empathy and kindness toward other people
|
|
11
|
+
* Being respectful of differing opinions, viewpoints, and experiences
|
|
12
|
+
* Giving and gracefully accepting constructive feedback
|
|
13
|
+
* Accepting responsibility and apologizing to those affected by our mistakes
|
|
14
|
+
|
|
15
|
+
Examples of unacceptable behavior include:
|
|
16
|
+
* The use of sexualized language or imagery
|
|
17
|
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
|
18
|
+
* Public or private harassment
|
|
19
|
+
* Other conduct which could reasonably be considered inappropriate in a professional setting.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Contributing to SynaptoRoute
|
|
2
|
+
|
|
3
|
+
First off, thank you for considering contributing to SynaptoRoute! It's people like you that make open source such a great community.
|
|
4
|
+
|
|
5
|
+
## Development Setup
|
|
6
|
+
|
|
7
|
+
1. **Fork the repository** on GitHub.
|
|
8
|
+
2. **Clone your fork** locally:
|
|
9
|
+
```bash
|
|
10
|
+
git clone https://github.com/YOUR-USERNAME/SynaptoRoute.git
|
|
11
|
+
cd SynaptoRoute
|
|
12
|
+
```
|
|
13
|
+
3. **Create a virtual environment and install dependencies:**
|
|
14
|
+
```bash
|
|
15
|
+
python -m venv venv
|
|
16
|
+
source venv/bin/activate # On Windows use `venv\Scripts\activate`
|
|
17
|
+
pip install -e .[api]
|
|
18
|
+
pip install pytest
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Running Tests
|
|
22
|
+
We enforce strict testing for all architectural components (encoding, latency, routing accuracy). Before submitting a PR, ensure all tests pass:
|
|
23
|
+
```bash
|
|
24
|
+
pytest tests/
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Pull Request Process
|
|
28
|
+
1. Ensure your code strictly adheres to the existing architectural philosophy (e.g., preserving $O(1)$ Lazy Compilation).
|
|
29
|
+
2. Update the `README.md` or Jupyter Notebooks in `notebooks/` if you add new features.
|
|
30
|
+
3. Open a Pull Request using the provided GitHub PR template.
|
|
31
|
+
4. Wait for CI/CD checks to pass.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
FROM python:3.11-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
# Install any required system dependencies
|
|
6
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
7
|
+
build-essential \
|
|
8
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
9
|
+
|
|
10
|
+
# Copy python project configuration
|
|
11
|
+
COPY pyproject.toml .
|
|
12
|
+
COPY README.md .
|
|
13
|
+
COPY src/ src/
|
|
14
|
+
|
|
15
|
+
# Install the package
|
|
16
|
+
RUN pip install --no-cache-dir .[api]
|
|
17
|
+
|
|
18
|
+
# Cache fastembed weights during build
|
|
19
|
+
RUN python -c "from fastembed import TextEmbedding; TextEmbedding()" || true
|
|
20
|
+
|
|
21
|
+
COPY examples/ examples/
|
|
22
|
+
|
|
23
|
+
EXPOSE 8000
|
|
24
|
+
|
|
25
|
+
CMD ["uvicorn", "examples.api_server:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sitanshu
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synaptoroute
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A dynamic zero-token semantic router
|
|
5
|
+
Project-URL: Repository, https://github.com/sitanshukr08/SynaptoRoute
|
|
6
|
+
Project-URL: Issues, https://github.com/sitanshukr08/SynaptoRoute/issues
|
|
7
|
+
Author-email: Sitanshu <contact@example.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Requires-Dist: fastembed>=0.8.0
|
|
15
|
+
Requires-Dist: numpy>=1.24.0
|
|
16
|
+
Requires-Dist: pydantic>=2.10.0
|
|
17
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
18
|
+
Provides-Extra: api
|
|
19
|
+
Requires-Dist: fastapi>=0.100.0; extra == 'api'
|
|
20
|
+
Requires-Dist: uvicorn>=0.22.0; extra == 'api'
|
|
21
|
+
Provides-Extra: test
|
|
22
|
+
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# SynaptoRoute
|
|
26
|
+
|
|
27
|
+
[](https://pypi.org/project/synaptoroute/)
|
|
28
|
+
[](https://github.com/sitanshukr08/SynaptoRoute/actions)
|
|
29
|
+
[](https://opensource.org/licenses/MIT)
|
|
30
|
+
[](https://www.python.org/downloads/)
|
|
31
|
+
[](https://fastapi.tiangolo.com)
|
|
32
|
+
[](https://github.com/sitanshukr08/SynaptoRoute/blob/main/CONTRIBUTING.md)
|
|
33
|
+
|
|
34
|
+
SynaptoRoute is a high-throughput, local semantic routing engine built for production Python microservices. Designed as a mathematically optimal alternative to Large Language Model (LLM) routing chains and slower local routers, it provides zero-token intent classification in under 3 milliseconds on standard cloud hardware.
|
|
35
|
+
|
|
36
|
+
## Table of Contents
|
|
37
|
+
- [Why SynaptoRoute?](#why-synaptoroute)
|
|
38
|
+
- [Architecture & Optimizations](#architecture--optimizations)
|
|
39
|
+
- [Performance Benchmarks](#performance-benchmarks)
|
|
40
|
+
- [Installation & Deployment](#installation--deployment)
|
|
41
|
+
- [Quick Start Guide](#quick-start-guide)
|
|
42
|
+
- [System Limitations](#system-limitations)
|
|
43
|
+
- [Community & Contributing](#community--contributing)
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Why SynaptoRoute?
|
|
48
|
+
|
|
49
|
+
In modern agentic systems, relying on an external API (like OpenAI or Anthropic) to make simple routing decisions—such as determining if a user wants to reset their password or check their balance—introduces unacceptable latency (300ms+) and high token costs.
|
|
50
|
+
|
|
51
|
+
SynaptoRoute solves this by executing intent classification entirely locally using INT8 quantized vector embeddings.
|
|
52
|
+
|
|
53
|
+
SynaptoRoute was engineered specifically to solve the $O(N)$ memory degradation problem during live hot-reloading and to maximize hardware utilization via asynchronous dynamic batching.
|
|
54
|
+
|
|
55
|
+
## Architecture & Optimizations
|
|
56
|
+
|
|
57
|
+
### 1. Lazy Memory Compilation
|
|
58
|
+
Traditional routers suffer from severe performance degradation during live updates. When a new route is added, they execute an immediate `numpy.vstack`, copying the entire vector array in memory ($O(N)$ complexity). SynaptoRoute defers this reallocation, appending new vectors to a lightweight list ($O(1)$) and only executing the heavy compilation precisely when the next query arrives, preventing server freezes.
|
|
59
|
+
|
|
60
|
+
### 2. Dynamic Asynchronous Batching
|
|
61
|
+
Hardware accelerators (GPUs, AVX512 CPUs) are optimized for large matrix multiplications. Sending single queries sequentially incurs massive transfer overhead. SynaptoRoute utilizes a background `asyncio.Queue` worker that traps parallel HTTP requests, waits 5 milliseconds, groups them into a batch, and processes them in a single hardware cycle.
|
|
62
|
+
|
|
63
|
+
### 3. INT8 Quantization
|
|
64
|
+
By default, SynaptoRoute leverages the `BAAI/bge-small-en-v1.5` model quantized to 8-bit integers via the ONNX runtime, slashing memory bandwidth requirements by 4x and maximizing CPU cache utilization.
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Performance Benchmarks
|
|
69
|
+
|
|
70
|
+
The following metrics were captured via automated GitHub Actions CI/CD running on a standard, unaccelerated `ubuntu-latest` 2-core cloud CPU.
|
|
71
|
+
|
|
72
|
+
| Metric | Cloud CPU Latency | Context |
|
|
73
|
+
| :--- | :--- | :--- |
|
|
74
|
+
| **Inference P99** | 3.94 ms | Single sequential query latency. |
|
|
75
|
+
| **Amortized P50** | 2.69 ms | Per-query latency when processing 1,000 concurrent requests via dynamic batching. |
|
|
76
|
+
| **Hot-Reload** | 5.04 ms | Time required to dynamically inject a new utterance into memory without dropping active API requests. |
|
|
77
|
+
|
|
78
|
+
> **📊 View Full Benchmarks:** For detailed analysis including Memory Leak Endurance, GPU Scaling, Classification F1-Scores, and Input Poisoning Survival Metrics, see our official [BENCHMARKS.md](BENCHMARKS.md).
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Installation & Deployment
|
|
83
|
+
|
|
84
|
+
### Method 1: Docker REST API (Recommended)
|
|
85
|
+
|
|
86
|
+
SynaptoRoute ships with a fully asynchronous FastAPI wrapper, designed for immediate drop-in deployment as a scalable microservice.
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# Build the Docker image
|
|
90
|
+
docker build -t synaptoroute .
|
|
91
|
+
|
|
92
|
+
# Run the container
|
|
93
|
+
docker run -p 8000:8000 synaptoroute
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
You can interface with the router immediately:
|
|
97
|
+
```bash
|
|
98
|
+
curl -X POST http://localhost:8000/route \
|
|
99
|
+
-H "Content-Type: application/json" \
|
|
100
|
+
-d '{"query": "I need help resetting my password"}'
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Method 2: Standard Python Package
|
|
104
|
+
|
|
105
|
+
To embed SynaptoRoute natively into your existing Python pipelines, install directly from pip (or via git if testing the latest main branch):
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
pip install synaptoroute
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Quick Start Guide
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
import asyncio
|
|
117
|
+
from synaptoroute.router import AdaptiveRouter
|
|
118
|
+
from synaptoroute.encoder import Encoder
|
|
119
|
+
from synaptoroute.storage import SQLiteStorage
|
|
120
|
+
from synaptoroute.models import Route
|
|
121
|
+
|
|
122
|
+
async def main():
|
|
123
|
+
# 1. Initialize Components
|
|
124
|
+
encoder = Encoder()
|
|
125
|
+
storage = SQLiteStorage("data/memory.sqlite")
|
|
126
|
+
router = AdaptiveRouter(encoder, storage)
|
|
127
|
+
|
|
128
|
+
# 2. Define Routes
|
|
129
|
+
billing_route = Route(
|
|
130
|
+
name="billing",
|
|
131
|
+
utterances=["I need a refund", "Where is my receipt?", "Cancel my subscription"]
|
|
132
|
+
)
|
|
133
|
+
router.add_route(billing_route)
|
|
134
|
+
|
|
135
|
+
# 3. Start the Background Batching Worker
|
|
136
|
+
await router.start()
|
|
137
|
+
|
|
138
|
+
# 4. Execute Async Queries
|
|
139
|
+
result = await router.aquery("How do I get my money back?")
|
|
140
|
+
print(f"Matched Intent: {result.name}") # Output: billing
|
|
141
|
+
|
|
142
|
+
# 5. Graceful Shutdown
|
|
143
|
+
await router.stop()
|
|
144
|
+
|
|
145
|
+
if __name__ == "__main__":
|
|
146
|
+
asyncio.run(main())
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## System Limitations
|
|
152
|
+
|
|
153
|
+
**Horizontal Scaling (Kubernetes Split-Brain)**
|
|
154
|
+
SynaptoRoute relies on a highly optimized, local in-memory NumPy matrix to achieve its microsecond latency. As such, it is structurally bound to a single node. If deployed across multiple load-balanced Kubernetes pods, a hot-reload request hitting Pod A will update Pod A's local memory, but Pod B will remain unaware. Scaling horizontally requires implementing an external event bus (e.g., Redis Pub/Sub) to broadcast memory invalidation events across the cluster.
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Community & Contributing
|
|
159
|
+
|
|
160
|
+
We welcome contributions of all sizes from the open-source community!
|
|
161
|
+
|
|
162
|
+
- **Contributing:** Please read our [Contributing Guidelines](CONTRIBUTING.md) to learn how to set up your development environment, run the test suite, and submit Pull Requests.
|
|
163
|
+
- **Code of Conduct:** We are committed to fostering a welcoming environment. Please review our [Code of Conduct](CODE_OF_CONDUCT.md).
|
|
164
|
+
- **Issues:** If you discover a bug or have a feature request, please [open an issue](https://github.com/sitanshukr08/SynaptoRoute/issues).
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# SynaptoRoute
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/synaptoroute/)
|
|
4
|
+
[](https://github.com/sitanshukr08/SynaptoRoute/actions)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
[](https://fastapi.tiangolo.com)
|
|
8
|
+
[](https://github.com/sitanshukr08/SynaptoRoute/blob/main/CONTRIBUTING.md)
|
|
9
|
+
|
|
10
|
+
SynaptoRoute is a high-throughput, local semantic routing engine built for production Python microservices. Designed as a mathematically optimal alternative to Large Language Model (LLM) routing chains and slower local routers, it provides zero-token intent classification in under 3 milliseconds on standard cloud hardware.
|
|
11
|
+
|
|
12
|
+
## Table of Contents
|
|
13
|
+
- [Why SynaptoRoute?](#why-synaptoroute)
|
|
14
|
+
- [Architecture & Optimizations](#architecture--optimizations)
|
|
15
|
+
- [Performance Benchmarks](#performance-benchmarks)
|
|
16
|
+
- [Installation & Deployment](#installation--deployment)
|
|
17
|
+
- [Quick Start Guide](#quick-start-guide)
|
|
18
|
+
- [System Limitations](#system-limitations)
|
|
19
|
+
- [Community & Contributing](#community--contributing)
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Why SynaptoRoute?
|
|
24
|
+
|
|
25
|
+
In modern agentic systems, relying on an external API (like OpenAI or Anthropic) to make simple routing decisions—such as determining if a user wants to reset their password or check their balance—introduces unacceptable latency (300ms+) and high token costs.
|
|
26
|
+
|
|
27
|
+
SynaptoRoute solves this by executing intent classification entirely locally using INT8 quantized vector embeddings.
|
|
28
|
+
|
|
29
|
+
SynaptoRoute was engineered specifically to solve the $O(N)$ memory degradation problem during live hot-reloading and to maximize hardware utilization via asynchronous dynamic batching.
|
|
30
|
+
|
|
31
|
+
## Architecture & Optimizations
|
|
32
|
+
|
|
33
|
+
### 1. Lazy Memory Compilation
|
|
34
|
+
Traditional routers suffer from severe performance degradation during live updates. When a new route is added, they execute an immediate `numpy.vstack`, copying the entire vector array in memory ($O(N)$ complexity). SynaptoRoute defers this reallocation, appending new vectors to a lightweight list ($O(1)$) and only executing the heavy compilation precisely when the next query arrives, preventing server freezes.
|
|
35
|
+
|
|
36
|
+
### 2. Dynamic Asynchronous Batching
|
|
37
|
+
Hardware accelerators (GPUs, AVX512 CPUs) are optimized for large matrix multiplications. Sending single queries sequentially incurs massive transfer overhead. SynaptoRoute utilizes a background `asyncio.Queue` worker that traps parallel HTTP requests, waits 5 milliseconds, groups them into a batch, and processes them in a single hardware cycle.
|
|
38
|
+
|
|
39
|
+
### 3. INT8 Quantization
|
|
40
|
+
By default, SynaptoRoute leverages the `BAAI/bge-small-en-v1.5` model quantized to 8-bit integers via the ONNX runtime, slashing memory bandwidth requirements by 4x and maximizing CPU cache utilization.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Performance Benchmarks
|
|
45
|
+
|
|
46
|
+
The following metrics were captured via automated GitHub Actions CI/CD running on a standard, unaccelerated `ubuntu-latest` 2-core cloud CPU.
|
|
47
|
+
|
|
48
|
+
| Metric | Cloud CPU Latency | Context |
|
|
49
|
+
| :--- | :--- | :--- |
|
|
50
|
+
| **Inference P99** | 3.94 ms | Single sequential query latency. |
|
|
51
|
+
| **Amortized P50** | 2.69 ms | Per-query latency when processing 1,000 concurrent requests via dynamic batching. |
|
|
52
|
+
| **Hot-Reload** | 5.04 ms | Time required to dynamically inject a new utterance into memory without dropping active API requests. |
|
|
53
|
+
|
|
54
|
+
> **📊 View Full Benchmarks:** For detailed analysis including Memory Leak Endurance, GPU Scaling, Classification F1-Scores, and Input Poisoning Survival Metrics, see our official [BENCHMARKS.md](BENCHMARKS.md).
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Installation & Deployment
|
|
59
|
+
|
|
60
|
+
### Method 1: Docker REST API (Recommended)
|
|
61
|
+
|
|
62
|
+
SynaptoRoute ships with a fully asynchronous FastAPI wrapper, designed for immediate drop-in deployment as a scalable microservice.
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Build the Docker image
|
|
66
|
+
docker build -t synaptoroute .
|
|
67
|
+
|
|
68
|
+
# Run the container
|
|
69
|
+
docker run -p 8000:8000 synaptoroute
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
You can interface with the router immediately:
|
|
73
|
+
```bash
|
|
74
|
+
curl -X POST http://localhost:8000/route \
|
|
75
|
+
-H "Content-Type: application/json" \
|
|
76
|
+
-d '{"query": "I need help resetting my password"}'
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Method 2: Standard Python Package
|
|
80
|
+
|
|
81
|
+
To embed SynaptoRoute natively into your existing Python pipelines, install directly from pip (or via git if testing the latest main branch):
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install synaptoroute
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Quick Start Guide
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
import asyncio
|
|
93
|
+
from synaptoroute.router import AdaptiveRouter
|
|
94
|
+
from synaptoroute.encoder import Encoder
|
|
95
|
+
from synaptoroute.storage import SQLiteStorage
|
|
96
|
+
from synaptoroute.models import Route
|
|
97
|
+
|
|
98
|
+
async def main():
|
|
99
|
+
# 1. Initialize Components
|
|
100
|
+
encoder = Encoder()
|
|
101
|
+
storage = SQLiteStorage("data/memory.sqlite")
|
|
102
|
+
router = AdaptiveRouter(encoder, storage)
|
|
103
|
+
|
|
104
|
+
# 2. Define Routes
|
|
105
|
+
billing_route = Route(
|
|
106
|
+
name="billing",
|
|
107
|
+
utterances=["I need a refund", "Where is my receipt?", "Cancel my subscription"]
|
|
108
|
+
)
|
|
109
|
+
router.add_route(billing_route)
|
|
110
|
+
|
|
111
|
+
# 3. Start the Background Batching Worker
|
|
112
|
+
await router.start()
|
|
113
|
+
|
|
114
|
+
# 4. Execute Async Queries
|
|
115
|
+
result = await router.aquery("How do I get my money back?")
|
|
116
|
+
print(f"Matched Intent: {result.name}") # Output: billing
|
|
117
|
+
|
|
118
|
+
# 5. Graceful Shutdown
|
|
119
|
+
await router.stop()
|
|
120
|
+
|
|
121
|
+
if __name__ == "__main__":
|
|
122
|
+
asyncio.run(main())
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## System Limitations
|
|
128
|
+
|
|
129
|
+
**Horizontal Scaling (Kubernetes Split-Brain)**
|
|
130
|
+
SynaptoRoute relies on a highly optimized, local in-memory NumPy matrix to achieve its microsecond latency. As such, it is structurally bound to a single node. If deployed across multiple load-balanced Kubernetes pods, a hot-reload request hitting Pod A will update Pod A's local memory, but Pod B will remain unaware. Scaling horizontally requires implementing an external event bus (e.g., Redis Pub/Sub) to broadcast memory invalidation events across the cluster.
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Community & Contributing
|
|
135
|
+
|
|
136
|
+
We welcome contributions of all sizes from the open-source community!
|
|
137
|
+
|
|
138
|
+
- **Contributing:** Please read our [Contributing Guidelines](CONTRIBUTING.md) to learn how to set up your development environment, run the test suite, and submit Pull Requests.
|
|
139
|
+
- **Code of Conduct:** We are committed to fostering a welcoming environment. Please review our [Code of Conduct](CODE_OF_CONDUCT.md).
|
|
140
|
+
- **Issues:** If you discover a bug or have a feature request, please [open an issue](https://github.com/sitanshukr08/SynaptoRoute/issues).
|