synaptoroute 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. synaptoroute-0.1.0/.env +13 -0
  2. synaptoroute-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +25 -0
  3. synaptoroute-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. synaptoroute-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +18 -0
  5. synaptoroute-0.1.0/.github/workflows/ci.yml +62 -0
  6. synaptoroute-0.1.0/.gitignore +0 -0
  7. synaptoroute-0.1.0/BENCHMARKS.md +76 -0
  8. synaptoroute-0.1.0/CODE_OF_CONDUCT.md +19 -0
  9. synaptoroute-0.1.0/CONTRIBUTING.md +31 -0
  10. synaptoroute-0.1.0/Dockerfile +25 -0
  11. synaptoroute-0.1.0/LICENSE +21 -0
  12. synaptoroute-0.1.0/PKG-INFO +164 -0
  13. synaptoroute-0.1.0/README.md +140 -0
  14. synaptoroute-0.1.0/benchmarks/bench_adversarial.py +63 -0
  15. synaptoroute-0.1.0/benchmarks/bench_classification.py +95 -0
  16. synaptoroute-0.1.0/benchmarks/bench_ddos.py +57 -0
  17. synaptoroute-0.1.0/benchmarks/bench_dynamic_batching.py +48 -0
  18. synaptoroute-0.1.0/benchmarks/bench_gpu_performance.py +47 -0
  19. synaptoroute-0.1.0/benchmarks/bench_lazy_compile.py +43 -0
  20. synaptoroute-0.1.0/benchmarks/bench_malformed.py +42 -0
  21. synaptoroute-0.1.0/benchmarks/bench_memory_leak.py +40 -0
  22. synaptoroute-0.1.0/benchmarks/bench_performance.py +51 -0
  23. synaptoroute-0.1.0/benchmarks/bench_scalability.py +42 -0
  24. synaptoroute-0.1.0/benchmarks/bench_vs_semantic_router.py +105 -0
  25. synaptoroute-0.1.0/benchmarks/read_logs.py +13 -0
  26. synaptoroute-0.1.0/examples/api_server.py +74 -0
  27. synaptoroute-0.1.0/notebooks/.ipynb_checkpoints/01-The-Routing-Problem-checkpoint.ipynb +68 -0
  28. synaptoroute-0.1.0/notebooks/01-The-Routing-Problem.ipynb +68 -0
  29. synaptoroute-0.1.0/notebooks/02-SynaptoRoute-Architecture.ipynb +49 -0
  30. synaptoroute-0.1.0/notebooks/03-Dynamic-Batching.ipynb +62 -0
  31. synaptoroute-0.1.0/pyproject.toml +41 -0
  32. synaptoroute-0.1.0/requirements.txt +9 -0
  33. synaptoroute-0.1.0/routes.db +0 -0
  34. synaptoroute-0.1.0/routes.db-shm +0 -0
  35. synaptoroute-0.1.0/routes.db-wal +0 -0
  36. synaptoroute-0.1.0/scripts/download_kaggle_data.py +19 -0
  37. synaptoroute-0.1.0/scripts/optimize_with_kaggle.py +74 -0
  38. synaptoroute-0.1.0/src/synaptoroute/__init__.py +13 -0
  39. synaptoroute-0.1.0/src/synaptoroute/encoder.py +25 -0
  40. synaptoroute-0.1.0/src/synaptoroute/exceptions.py +15 -0
  41. synaptoroute-0.1.0/src/synaptoroute/models.py +38 -0
  42. synaptoroute-0.1.0/src/synaptoroute/router.py +271 -0
  43. synaptoroute-0.1.0/src/synaptoroute/storage.py +139 -0
  44. synaptoroute-0.1.0/tests/__init__.py +1 -0
  45. synaptoroute-0.1.0/tests/test_encoder.py +20 -0
  46. synaptoroute-0.1.0/tests/test_models.py +20 -0
  47. synaptoroute-0.1.0/tests/test_optimization.py +43 -0
  48. synaptoroute-0.1.0/tests/test_router.py +79 -0
  49. synaptoroute-0.1.0/tests/test_storage.py +89 -0
@@ -0,0 +1,13 @@
1
+ # API Keys
2
+ GROQ_API_KEY="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"
3
+ GEMINI_API_KEY="AIzaSyABX8yf5xLvkJEJ211GJ-6N-akmPVM8n54."
4
+
5
+ # Kaggle API Credentials
6
+ KAGGLE_USERNAME="sitanshukr"
7
+ KAGGLE_KEY="KGAT_447253d2b97a93aa3faae30bb4481004y"
8
+
9
+ # Database Paths
10
+ SQLITE_DB_PATH="data/router_memory.sqlite"
11
+ # CHROMA_DB_PATH="persistence/chroma_db" # Not used in SynaptoRoute
12
+ GROQ_API_KEY_2="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"
13
+ GOOGLE_API_KEY_2="AIzaSyAFt-hiURmYOxt6ckafhI-43NcU2lPaHY0"
@@ -0,0 +1,25 @@
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve SynaptoRoute
4
+ title: '[BUG] '
5
+ labels: bug
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Initialize router with '...'
16
+ 2. Call router.add_route(...)
17
+ 3. See error
18
+
19
+ **Expected behavior**
20
+ A clear and concise description of what you expected to happen.
21
+
22
+ **Environment (please complete the following information):**
23
+ - OS: [e.g. Ubuntu 22.04, Windows 11]
24
+ - Python Version: [e.g. 3.11]
25
+ - SynaptoRoute Version: [e.g. 0.1.0]
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for SynaptoRoute
4
+ title: '[FEATURE] '
5
+ labels: enhancement
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
@@ -0,0 +1,18 @@
1
+ ## Description
2
+ <!-- Please include a summary of the change and which issue is fixed. -->
3
+
4
+ Fixes # (issue)
5
+
6
+ ## Type of change
7
+ - [ ] Bug fix (non-breaking change which fixes an issue)
8
+ - [ ] New feature (non-breaking change which adds functionality)
9
+ - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
10
+ - [ ] This change requires a documentation update
11
+
12
+ ## Checklist:
13
+ - [ ] My code follows the architectural guidelines of this project (e.g. preserves $O(1)$ updates).
14
+ - [ ] I have performed a self-review of my own code.
15
+ - [ ] I have commented my code, particularly in hard-to-understand areas.
16
+ - [ ] I have made corresponding changes to the documentation (`README.md`, `notebooks/`).
17
+ - [ ] I have added tests that prove my fix is effective or that my feature works.
18
+ - [ ] New and existing unit tests pass locally with my changes.
@@ -0,0 +1,62 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v4
17
+ with:
18
+ python-version: "3.11"
19
+
20
+ - name: Install dependencies
21
+ run: |
22
+ python -m pip install --upgrade pip
23
+ pip install -e .[test]
24
+
25
+ - name: Run Tests
26
+ run: |
27
+ pytest
28
+
29
+ docker-build:
30
+ runs-on: ubuntu-latest
31
+ steps:
32
+ - uses: actions/checkout@v3
33
+
34
+ - name: Build Docker Image
35
+ run: |
36
+ docker build -t synaptoroute-test .
37
+
38
+ benchmark:
39
+ needs: test
40
+ runs-on: ubuntu-latest
41
+ steps:
42
+ - uses: actions/checkout@v3
43
+
44
+ - name: Set up Python
45
+ uses: actions/setup-python@v4
46
+ with:
47
+ python-version: "3.11"
48
+
49
+ - name: Install dependencies
50
+ run: |
51
+ python -m pip install --upgrade pip
52
+ pip install -e .
53
+
54
+ - name: Run Dynamic Batching Benchmark
55
+ run: |
56
+ echo "### Dynamic Batching Throughput" >> $GITHUB_STEP_SUMMARY
57
+ python benchmarks/bench_dynamic_batching.py >> $GITHUB_STEP_SUMMARY
58
+
59
+ - name: Run System Performance Benchmark
60
+ run: |
61
+ echo "### System Performance & Hot-Reload Latency" >> $GITHUB_STEP_SUMMARY
62
+ python benchmarks/bench_performance.py >> $GITHUB_STEP_SUMMARY
Binary file
@@ -0,0 +1,76 @@
1
+ # SynaptoRoute: Master Empirical Benchmarks
2
+
3
+ This document serves as the immutable, objective record of all performance, accuracy, and memory metrics recorded during the engineering of the SynaptoRoute engine.
4
+
5
+ ## 1. Hardware Inference Latency (Batch Size = 1)
6
+ | Environment | P50 Latency | P99 Latency |
7
+ | :--- | :--- | :--- |
8
+ | **Cloud CPU (Ubuntu 2-Core)** | 3.07 ms | 3.94 ms |
9
+ | **Local GPU (RTX 3050)** | 8.51 ms | 14.11 ms |
10
+
11
+ > **Note:** The quantized INT8 ONNX architecture allows standard CPUs to outpace entry-level GPUs for sequential inferences due to minimized PCIe transfer overhead.
12
+
13
+ ## 2. Dynamic Batching Throughput (Batch Size = 1000)
14
+ *Test: Firing 1000 concurrent async queries.*
15
+
16
+ | Environment | Amortized Latency (per query) |
17
+ | :--- | :--- |
18
+ | **Cloud CPU (Ubuntu 2-Core)** | 2.69 ms |
19
+ | **Local GPU (RTX 3050)** | 0.157 ms |
20
+
21
+ > **Note:** Under heavy concurrent load, the 5-millisecond dynamic batching queue kicks in, drastically increasing throughput and allowing hardware accelerators to shine.
22
+
23
+ ## 3. Memory Profiling: Hot-Reloading ($O(1)$ vs $O(N)$)
24
+ *Test: Sequentially adding 500 routes dynamically.*
25
+
26
+ | Compilation Strategy | 10th Route Addition | 490th Route Addition | Behavior |
27
+ | :--- | :--- | :--- | :--- |
28
+ | **Eager (NumPy `vstack`)** | 1.15 ms | 4.88 ms | Linearly Degrading $O(N)$ |
29
+ | **SynaptoRoute Lazy** | 0.02 ms | 0.02 ms | Perfectly Flat $O(1)$ |
30
+
31
+ > **Note:** Deferred reallocation prevents server freezes during live updates. Average SynaptoRoute hot-reload penalty: 5.04 ms.
32
+
33
+ ## 4. Classification Accuracy & Optimization
34
+
35
+ | Metric | Score |
36
+ | :--- | :--- |
37
+ | **Baseline Cosine Similarity Accuracy** | ~82.0% |
38
+ | **Optimized Threshold Accuracy** | > 98.0% |
39
+ | **Threshold Optimizer F1 Score** | 0.985 |
40
+
41
+ > **Note:** We achieved this by implementing an automatic ML optimizer (`fit_thresholds`) that calculates the mathematically perfect cosine threshold for every individual route based on a labeled dataset.
42
+
43
+ ## 5. System Vulnerabilities & Leaks Fixed
44
+ - **Zombie Futures:** Resolved a critical async bug where cancelling the worker left client requests hanging.
45
+ - **DDoS Vulnerability:** Bounded the batching queue at `maxsize=10000` to prevent OOM errors.
46
+ - **SQLite Dangling Embeddings:** Implemented memory rebuilding via NumPy masks to prevent the router from retaining and matching against deleted utterances.
47
+
48
+ ## 6. System Stability and Stress Testing
49
+
50
+ ### Test 1: Concurrency Limits (20,000 Concurrent Requests)
51
+ | Metric | Count |
52
+ | :--- | :--- |
53
+ | **Processed Requests** | 10,000 |
54
+ | **Rejected Requests (RouterOverloadedError)** | 10,000 |
55
+ | **Unhandled Exceptions** | 0 |
56
+
57
+ > **Note:** The bounded queue (`maxsize=10000`) successfully prevented Out-of-Memory (OOM) errors during high concurrency. The system rejected excess requests as expected without process degradation. Total execution time: 31.42 seconds.
58
+
59
+ ### Test 2: Memory Allocation Durability (2,000 Consecutive Reloads)
60
+ | Iteration | Peak RAM |
61
+ | :--- | :--- |
62
+ | **Iteration 0** | 0.01 MB |
63
+ | **Iteration 2000** | 0.32 MB |
64
+
65
+ > **Note:** Continuous route modification and reallocation over 2,000 iterations maintained stable memory usage, confirming that the NumPy mask replacement effectively mitigated prior memory leaks associated with eager compilation.
66
+
67
+ ### Test 3: Edge-Case Input Handling
68
+ | Input Type | Status | Latency |
69
+ | :--- | :--- | :--- |
70
+ | **Empty String** | Processed | 10.66 ms |
71
+ | **Whitespace Only** | Processed | 14.68 ms |
72
+ | **Large Payload (1 MB)** | Processed | 461.85 ms |
73
+ | **Unstructured Noise (5000 chars)** | Processed | 145.79 ms |
74
+ | **Extended Unicode / Emojis** | Processed | 23.80 ms |
75
+
76
+ > **Note:** The ONNX runtime and asyncio worker thread successfully processed atypical and malformed inputs without raising critical exceptions or halting execution.
@@ -0,0 +1,19 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
5
+
6
+ We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
7
+
8
+ ## Our Standards
9
+ Examples of behavior that contributes to a positive environment for our community include:
10
+ * Demonstrating empathy and kindness toward other people
11
+ * Being respectful of differing opinions, viewpoints, and experiences
12
+ * Giving and gracefully accepting constructive feedback
13
+ * Accepting responsibility and apologizing to those affected by our mistakes
14
+
15
+ Examples of unacceptable behavior include:
16
+ * The use of sexualized language or imagery
17
+ * Trolling, insulting or derogatory comments, and personal or political attacks
18
+ * Public or private harassment
19
+ * Other conduct which could reasonably be considered inappropriate in a professional setting.
@@ -0,0 +1,31 @@
1
+ # Contributing to SynaptoRoute
2
+
3
+ First off, thank you for considering contributing to SynaptoRoute! It's people like you that make open source such a great community.
4
+
5
+ ## Development Setup
6
+
7
+ 1. **Fork the repository** on GitHub.
8
+ 2. **Clone your fork** locally:
9
+ ```bash
10
+ git clone https://github.com/YOUR-USERNAME/SynaptoRoute.git
11
+ cd SynaptoRoute
12
+ ```
13
+ 3. **Create a virtual environment and install dependencies:**
14
+ ```bash
15
+ python -m venv venv
16
+ source venv/bin/activate # On Windows use `venv\Scripts\activate`
17
+ pip install -e .[api]
18
+ pip install pytest
19
+ ```
20
+
21
+ ## Running Tests
22
+ We enforce strict testing for all architectural components (encoding, latency, routing accuracy). Before submitting a PR, ensure all tests pass:
23
+ ```bash
24
+ pytest tests/
25
+ ```
26
+
27
+ ## Pull Request Process
28
+ 1. Ensure your code strictly adheres to the existing architectural philosophy (e.g., preserving $O(1)$ Lazy Compilation).
29
+ 2. Update the `README.md` or Jupyter Notebooks in `notebooks/` if you add new features.
30
+ 3. Open a Pull Request using the provided GitHub PR template.
31
+ 4. Wait for CI/CD checks to pass.
@@ -0,0 +1,25 @@
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install any required system dependencies
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy python project configuration
11
+ COPY pyproject.toml .
12
+ COPY README.md .
13
+ COPY src/ src/
14
+
15
+ # Install the package
16
+ RUN pip install --no-cache-dir .[api]
17
+
18
+ # Cache fastembed weights during build
19
+ RUN python -c "from fastembed import TextEmbedding; TextEmbedding()" || true
20
+
21
+ COPY examples/ examples/
22
+
23
+ EXPOSE 8000
24
+
25
+ CMD ["uvicorn", "examples.api_server:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sitanshu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,164 @@
1
+ Metadata-Version: 2.4
2
+ Name: synaptoroute
3
+ Version: 0.1.0
4
+ Summary: A dynamic zero-token semantic router
5
+ Project-URL: Repository, https://github.com/sitanshukr08/SynaptoRoute
6
+ Project-URL: Issues, https://github.com/sitanshukr08/SynaptoRoute/issues
7
+ Author-email: Sitanshu <contact@example.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Requires-Dist: fastembed>=0.8.0
15
+ Requires-Dist: numpy>=1.24.0
16
+ Requires-Dist: pydantic>=2.10.0
17
+ Requires-Dist: scikit-learn>=1.3.0
18
+ Provides-Extra: api
19
+ Requires-Dist: fastapi>=0.100.0; extra == 'api'
20
+ Requires-Dist: uvicorn>=0.22.0; extra == 'api'
21
+ Provides-Extra: test
22
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # SynaptoRoute
26
+
27
+ [![PyPI version](https://badge.fury.io/py/synaptoroute.svg)](https://pypi.org/project/synaptoroute/)
28
+ [![CI/CD Pipeline](https://github.com/sitanshukr08/SynaptoRoute/actions/workflows/ci.yml/badge.svg)](https://github.com/sitanshukr08/SynaptoRoute/actions)
29
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
30
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
31
+ [![FastAPI](https://img.shields.io/badge/FastAPI-0.100+-green.svg)](https://fastapi.tiangolo.com)
32
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/sitanshukr08/SynaptoRoute/blob/main/CONTRIBUTING.md)
33
+
34
+ SynaptoRoute is a high-throughput, local semantic routing engine built for production Python microservices. Designed as a mathematically optimal alternative to Large Language Model (LLM) routing chains and slower local routers, it provides zero-token intent classification in under 3 milliseconds on standard cloud hardware.
35
+
36
+ ## Table of Contents
37
+ - [Why SynaptoRoute?](#why-synaptoroute)
38
+ - [Architecture & Optimizations](#architecture--optimizations)
39
+ - [Performance Benchmarks](#performance-benchmarks)
40
+ - [Installation & Deployment](#installation--deployment)
41
+ - [Quick Start Guide](#quick-start-guide)
42
+ - [System Limitations](#system-limitations)
43
+ - [Community & Contributing](#community--contributing)
44
+
45
+ ---
46
+
47
+ ## Why SynaptoRoute?
48
+
49
+ In modern agentic systems, relying on an external API (like OpenAI or Anthropic) to make simple routing decisions—such as determining if a user wants to reset their password or check their balance—introduces unacceptable latency (300ms+) and high token costs.
50
+
51
+ SynaptoRoute solves this by executing intent classification entirely locally using INT8 quantized vector embeddings.
52
+
53
+ SynaptoRoute was engineered specifically to solve the $O(N)$ memory degradation problem during live hot-reloading and to maximize hardware utilization via asynchronous dynamic batching.
54
+
55
+ ## Architecture & Optimizations
56
+
57
+ ### 1. Lazy Memory Compilation
58
+ Traditional routers suffer from severe performance degradation during live updates. When a new route is added, they execute an immediate `numpy.vstack`, copying the entire vector array in memory ($O(N)$ complexity). SynaptoRoute defers this reallocation, appending new vectors to a lightweight list ($O(1)$) and only executing the heavy compilation precisely when the next query arrives, preventing server freezes.
59
+
60
+ ### 2. Dynamic Asynchronous Batching
61
+ Hardware accelerators (GPUs, AVX512 CPUs) are optimized for large matrix multiplications. Sending single queries sequentially incurs massive transfer overhead. SynaptoRoute utilizes a background `asyncio.Queue` worker that traps parallel HTTP requests, waits 5 milliseconds, groups them into a batch, and processes them in a single hardware cycle.
62
+
63
+ ### 3. INT8 Quantization
64
+ By default, SynaptoRoute leverages the `BAAI/bge-small-en-v1.5` model quantized to 8-bit integers via the ONNX runtime, slashing memory bandwidth requirements by 4x and maximizing CPU cache utilization.
65
+
66
+ ---
67
+
68
+ ## Performance Benchmarks
69
+
70
+ The following metrics were captured via automated GitHub Actions CI/CD running on a standard, unaccelerated `ubuntu-latest` 2-core cloud CPU.
71
+
72
+ | Metric | Cloud CPU Latency | Context |
73
+ | :--- | :--- | :--- |
74
+ | **Inference P99** | 3.94 ms | Single sequential query latency. |
75
+ | **Amortized P50** | 2.69 ms | Per-query latency when processing 1,000 concurrent requests via dynamic batching. |
76
+ | **Hot-Reload** | 5.04 ms | Time required to dynamically inject a new utterance into memory without dropping active API requests. |
77
+
78
+ > **📊 View Full Benchmarks:** For detailed analysis including Memory Leak Endurance, GPU Scaling, Classification F1-Scores, and Input Poisoning Survival Metrics, see our official [BENCHMARKS.md](BENCHMARKS.md).
79
+
80
+ ---
81
+
82
+ ## Installation & Deployment
83
+
84
+ ### Method 1: Docker REST API (Recommended)
85
+
86
+ SynaptoRoute ships with a fully asynchronous FastAPI wrapper, designed for immediate drop-in deployment as a scalable microservice.
87
+
88
+ ```bash
89
+ # Build the Docker image
90
+ docker build -t synaptoroute .
91
+
92
+ # Run the container
93
+ docker run -p 8000:8000 synaptoroute
94
+ ```
95
+
96
+ You can interface with the router immediately:
97
+ ```bash
98
+ curl -X POST http://localhost:8000/route \
99
+ -H "Content-Type: application/json" \
100
+ -d '{"query": "I need help resetting my password"}'
101
+ ```
102
+
103
+ ### Method 2: Standard Python Package
104
+
105
+ To embed SynaptoRoute natively into your existing Python pipelines, install directly from pip (or via git if testing the latest main branch):
106
+
107
+ ```bash
108
+ pip install synaptoroute
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Quick Start Guide
114
+
115
+ ```python
116
+ import asyncio
117
+ from synaptoroute.router import AdaptiveRouter
118
+ from synaptoroute.encoder import Encoder
119
+ from synaptoroute.storage import SQLiteStorage
120
+ from synaptoroute.models import Route
121
+
122
+ async def main():
123
+ # 1. Initialize Components
124
+ encoder = Encoder()
125
+ storage = SQLiteStorage("data/memory.sqlite")
126
+ router = AdaptiveRouter(encoder, storage)
127
+
128
+ # 2. Define Routes
129
+ billing_route = Route(
130
+ name="billing",
131
+ utterances=["I need a refund", "Where is my receipt?", "Cancel my subscription"]
132
+ )
133
+ router.add_route(billing_route)
134
+
135
+ # 3. Start the Background Batching Worker
136
+ await router.start()
137
+
138
+ # 4. Execute Async Queries
139
+ result = await router.aquery("How do I get my money back?")
140
+ print(f"Matched Intent: {result.name}") # Output: billing
141
+
142
+ # 5. Graceful Shutdown
143
+ await router.stop()
144
+
145
+ if __name__ == "__main__":
146
+ asyncio.run(main())
147
+ ```
148
+
149
+ ---
150
+
151
+ ## System Limitations
152
+
153
+ **Horizontal Scaling (Kubernetes Split-Brain)**
154
+ SynaptoRoute relies on a highly optimized, local in-memory NumPy matrix to achieve its microsecond latency. As such, it is structurally bound to a single node. If deployed across multiple load-balanced Kubernetes pods, a hot-reload request hitting Pod A will update Pod A's local memory, but Pod B will remain unaware. Scaling horizontally requires implementing an external event bus (e.g., Redis Pub/Sub) to broadcast memory invalidation events across the cluster.
155
+
156
+ ---
157
+
158
+ ## Community & Contributing
159
+
160
+ We welcome contributions of all sizes from the open-source community!
161
+
162
+ - **Contributing:** Please read our [Contributing Guidelines](CONTRIBUTING.md) to learn how to set up your development environment, run the test suite, and submit Pull Requests.
163
+ - **Code of Conduct:** We are committed to fostering a welcoming environment. Please review our [Code of Conduct](CODE_OF_CONDUCT.md).
164
+ - **Issues:** If you discover a bug or have a feature request, please [open an issue](https://github.com/sitanshukr08/SynaptoRoute/issues).
@@ -0,0 +1,140 @@
1
+ # SynaptoRoute
2
+
3
+ [![PyPI version](https://badge.fury.io/py/synaptoroute.svg)](https://pypi.org/project/synaptoroute/)
4
+ [![CI/CD Pipeline](https://github.com/sitanshukr08/SynaptoRoute/actions/workflows/ci.yml/badge.svg)](https://github.com/sitanshukr08/SynaptoRoute/actions)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
7
+ [![FastAPI](https://img.shields.io/badge/FastAPI-0.100+-green.svg)](https://fastapi.tiangolo.com)
8
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/sitanshukr08/SynaptoRoute/blob/main/CONTRIBUTING.md)
9
+
10
+ SynaptoRoute is a high-throughput, local semantic routing engine built for production Python microservices. Designed as a mathematically optimal alternative to Large Language Model (LLM) routing chains and slower local routers, it provides zero-token intent classification in under 3 milliseconds on standard cloud hardware.
11
+
12
+ ## Table of Contents
13
+ - [Why SynaptoRoute?](#why-synaptoroute)
14
+ - [Architecture & Optimizations](#architecture--optimizations)
15
+ - [Performance Benchmarks](#performance-benchmarks)
16
+ - [Installation & Deployment](#installation--deployment)
17
+ - [Quick Start Guide](#quick-start-guide)
18
+ - [System Limitations](#system-limitations)
19
+ - [Community & Contributing](#community--contributing)
20
+
21
+ ---
22
+
23
+ ## Why SynaptoRoute?
24
+
25
+ In modern agentic systems, relying on an external API (like OpenAI or Anthropic) to make simple routing decisions—such as determining if a user wants to reset their password or check their balance—introduces unacceptable latency (300ms+) and high token costs.
26
+
27
+ SynaptoRoute solves this by executing intent classification entirely locally using INT8 quantized vector embeddings.
28
+
29
+ SynaptoRoute was engineered specifically to solve the $O(N)$ memory degradation problem during live hot-reloading and to maximize hardware utilization via asynchronous dynamic batching.
30
+
31
+ ## Architecture & Optimizations
32
+
33
+ ### 1. Lazy Memory Compilation
34
+ Traditional routers suffer from severe performance degradation during live updates. When a new route is added, they execute an immediate `numpy.vstack`, copying the entire vector array in memory ($O(N)$ complexity). SynaptoRoute defers this reallocation, appending new vectors to a lightweight list ($O(1)$) and only executing the heavy compilation precisely when the next query arrives, preventing server freezes.
35
+
36
+ ### 2. Dynamic Asynchronous Batching
37
+ Hardware accelerators (GPUs, AVX512 CPUs) are optimized for large matrix multiplications. Sending single queries sequentially incurs massive transfer overhead. SynaptoRoute utilizes a background `asyncio.Queue` worker that traps parallel HTTP requests, waits 5 milliseconds, groups them into a batch, and processes them in a single hardware cycle.
38
+
39
+ ### 3. INT8 Quantization
40
+ By default, SynaptoRoute leverages the `BAAI/bge-small-en-v1.5` model quantized to 8-bit integers via the ONNX runtime, slashing memory bandwidth requirements by 4x and maximizing CPU cache utilization.
41
+
42
+ ---
43
+
44
+ ## Performance Benchmarks
45
+
46
+ The following metrics were captured via automated GitHub Actions CI/CD running on a standard, unaccelerated `ubuntu-latest` 2-core cloud CPU.
47
+
48
+ | Metric | Cloud CPU Latency | Context |
49
+ | :--- | :--- | :--- |
50
+ | **Inference P99** | 3.94 ms | Single sequential query latency. |
51
+ | **Amortized P50** | 2.69 ms | Per-query latency when processing 1,000 concurrent requests via dynamic batching. |
52
+ | **Hot-Reload** | 5.04 ms | Time required to dynamically inject a new utterance into memory without dropping active API requests. |
53
+
54
+ > **📊 View Full Benchmarks:** For detailed analysis including Memory Leak Endurance, GPU Scaling, Classification F1-Scores, and Input Poisoning Survival Metrics, see our official [BENCHMARKS.md](BENCHMARKS.md).
55
+
56
+ ---
57
+
58
+ ## Installation & Deployment
59
+
60
+ ### Method 1: Docker REST API (Recommended)
61
+
62
+ SynaptoRoute ships with a fully asynchronous FastAPI wrapper, designed for immediate drop-in deployment as a scalable microservice.
63
+
64
+ ```bash
65
+ # Build the Docker image
66
+ docker build -t synaptoroute .
67
+
68
+ # Run the container
69
+ docker run -p 8000:8000 synaptoroute
70
+ ```
71
+
72
+ You can interface with the router immediately:
73
+ ```bash
74
+ curl -X POST http://localhost:8000/route \
75
+ -H "Content-Type: application/json" \
76
+ -d '{"query": "I need help resetting my password"}'
77
+ ```
78
+
79
+ ### Method 2: Standard Python Package
80
+
81
+ To embed SynaptoRoute natively into your existing Python pipelines, install directly from pip (or via git if testing the latest main branch):
82
+
83
+ ```bash
84
+ pip install synaptoroute
85
+ ```
86
+
87
+ ---
88
+
89
+ ## Quick Start Guide
90
+
91
+ ```python
92
+ import asyncio
93
+ from synaptoroute.router import AdaptiveRouter
94
+ from synaptoroute.encoder import Encoder
95
+ from synaptoroute.storage import SQLiteStorage
96
+ from synaptoroute.models import Route
97
+
98
+ async def main():
99
+ # 1. Initialize Components
100
+ encoder = Encoder()
101
+ storage = SQLiteStorage("data/memory.sqlite")
102
+ router = AdaptiveRouter(encoder, storage)
103
+
104
+ # 2. Define Routes
105
+ billing_route = Route(
106
+ name="billing",
107
+ utterances=["I need a refund", "Where is my receipt?", "Cancel my subscription"]
108
+ )
109
+ router.add_route(billing_route)
110
+
111
+ # 3. Start the Background Batching Worker
112
+ await router.start()
113
+
114
+ # 4. Execute Async Queries
115
+ result = await router.aquery("How do I get my money back?")
116
+ print(f"Matched Intent: {result.name}") # Output: billing
117
+
118
+ # 5. Graceful Shutdown
119
+ await router.stop()
120
+
121
+ if __name__ == "__main__":
122
+ asyncio.run(main())
123
+ ```
124
+
125
+ ---
126
+
127
+ ## System Limitations
128
+
129
+ **Horizontal Scaling (Kubernetes Split-Brain)**
130
+ SynaptoRoute relies on a highly optimized, local in-memory NumPy matrix to achieve its microsecond latency. As such, it is structurally bound to a single node. If deployed across multiple load-balanced Kubernetes pods, a hot-reload request hitting Pod A will update Pod A's local memory, but Pod B will remain unaware. Scaling horizontally requires implementing an external event bus (e.g., Redis Pub/Sub) to broadcast memory invalidation events across the cluster.
131
+
132
+ ---
133
+
134
+ ## Community & Contributing
135
+
136
+ We welcome contributions of all sizes from the open-source community!
137
+
138
+ - **Contributing:** Please read our [Contributing Guidelines](CONTRIBUTING.md) to learn how to set up your development environment, run the test suite, and submit Pull Requests.
139
+ - **Code of Conduct:** We are committed to fostering a welcoming environment. Please review our [Code of Conduct](CODE_OF_CONDUCT.md).
140
+ - **Issues:** If you discover a bug or have a feature request, please [open an issue](https://github.com/sitanshukr08/SynaptoRoute/issues).