synaptoroute 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synaptoroute/__init__.py +18 -0
- synaptoroute/encoder.py +136 -0
- synaptoroute/exceptions.py +19 -0
- synaptoroute/index.py +296 -0
- synaptoroute/integrations/__init__.py +3 -0
- synaptoroute/integrations/langchain.py +25 -0
- synaptoroute/integrations/llamaindex.py +47 -0
- synaptoroute/metrics.py +72 -0
- synaptoroute/models.py +39 -0
- synaptoroute/profile.py +31 -0
- synaptoroute/reranker.py +64 -0
- synaptoroute/router.py +544 -0
- synaptoroute/storage.py +200 -0
- synaptoroute/sync.py +157 -0
- synaptoroute/trainer.py +77 -0
- synaptoroute-0.3.0.dist-info/METADATA +229 -0
- synaptoroute-0.3.0.dist-info/RECORD +19 -0
- synaptoroute-0.3.0.dist-info/WHEEL +4 -0
- synaptoroute-0.3.0.dist-info/licenses/LICENSE +21 -0
synaptoroute/__init__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SynaptoRoute
|
|
3
|
+
A high-throughput, local semantic routing engine.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import importlib.metadata
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
__version__ = importlib.metadata.version("synaptoroute")
|
|
10
|
+
except importlib.metadata.PackageNotFoundError:
|
|
11
|
+
__version__ = "unknown"
|
|
12
|
+
|
|
13
|
+
from synaptoroute.router import AdaptiveRouter
|
|
14
|
+
from synaptoroute.encoder import Encoder
|
|
15
|
+
from synaptoroute.storage import BaseStorage, SQLiteStorage
|
|
16
|
+
from synaptoroute.models import Route
|
|
17
|
+
|
|
18
|
+
__all__ = ["AdaptiveRouter", "Encoder", "BaseStorage", "SQLiteStorage", "Route", "__version__"]
|
synaptoroute/encoder.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import numpy as np
|
|
3
|
+
import numpy.typing as npt
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
class BaseEncoder(abc.ABC):
|
|
7
|
+
"""
|
|
8
|
+
Abstract base class for all SynaptoRoute encoders.
|
|
9
|
+
"""
|
|
10
|
+
@property
|
|
11
|
+
@abc.abstractmethod
|
|
12
|
+
def requires_lock(self) -> bool:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
@abc.abstractmethod
|
|
17
|
+
def dim(self) -> int:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
@abc.abstractmethod
|
|
21
|
+
def encode(self, text: str) -> npt.NDArray[np.float32]:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@abc.abstractmethod
|
|
25
|
+
def encode_batch(self, texts: List[str]) -> npt.NDArray[np.float32]:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
class FastEmbedEncoder(BaseEncoder):
|
|
29
|
+
"""
|
|
30
|
+
Handles local intent embeddings using fastembed ONNX models.
|
|
31
|
+
"""
|
|
32
|
+
def __init__(self, model_name: str = "BAAI/bge-small-en-v1.5", providers: List[str] = None, threads: Optional[int] = None):
|
|
33
|
+
from fastembed import TextEmbedding
|
|
34
|
+
if providers is None:
|
|
35
|
+
providers = ["CPUExecutionProvider"]
|
|
36
|
+
self.model = TextEmbedding(model_name=model_name, providers=providers, threads=threads)
|
|
37
|
+
# Probe dimensionality directly from the model using a dummy token
|
|
38
|
+
self._dim = len(list(self.model.embed(["test"]))[0])
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def requires_lock(self) -> bool:
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def dim(self) -> int:
|
|
46
|
+
return self._dim
|
|
47
|
+
|
|
48
|
+
def encode(self, text: str) -> npt.NDArray[np.float32]:
|
|
49
|
+
embeddings = list(self.model.embed([text]))
|
|
50
|
+
return embeddings[0]
|
|
51
|
+
|
|
52
|
+
def encode_batch(self, texts: List[str]) -> npt.NDArray[np.float32]:
|
|
53
|
+
if not texts:
|
|
54
|
+
# Dynamically use the model's true dimensionality
|
|
55
|
+
return np.empty((0, self.dim), dtype=np.float32)
|
|
56
|
+
|
|
57
|
+
chunk_size = 500
|
|
58
|
+
all_embeddings = []
|
|
59
|
+
for i in range(0, len(texts), chunk_size):
|
|
60
|
+
chunk = texts[i:i + chunk_size]
|
|
61
|
+
embeddings = list(self.model.embed(chunk))
|
|
62
|
+
all_embeddings.extend(embeddings)
|
|
63
|
+
|
|
64
|
+
return np.array(all_embeddings, dtype=np.float32)
|
|
65
|
+
|
|
66
|
+
class OpenAIEncoder(BaseEncoder):
|
|
67
|
+
"""
|
|
68
|
+
Handles remote intent embeddings using OpenAI models.
|
|
69
|
+
"""
|
|
70
|
+
def __init__(self, model_name: str = "text-embedding-3-small", dim: Optional[int] = None, dimensions: Optional[int] = None, client=None):
|
|
71
|
+
try:
|
|
72
|
+
import openai
|
|
73
|
+
except ImportError as e:
|
|
74
|
+
raise RuntimeError("Please install synaptoroute[openai] to use the OpenAIEncoder. Run `pip install synaptoroute[openai]`.") from e
|
|
75
|
+
self.model_name = model_name
|
|
76
|
+
self.client = client or openai.OpenAI()
|
|
77
|
+
|
|
78
|
+
self.dimensions = dimensions
|
|
79
|
+
if dimensions is not None:
|
|
80
|
+
self._dim = dimensions
|
|
81
|
+
elif dim is not None:
|
|
82
|
+
self._dim = dim
|
|
83
|
+
else:
|
|
84
|
+
# Hardcode based on known models to save an API call
|
|
85
|
+
if model_name == "text-embedding-3-small":
|
|
86
|
+
self._dim = 1536
|
|
87
|
+
elif model_name == "text-embedding-3-large":
|
|
88
|
+
self._dim = 3072
|
|
89
|
+
elif model_name == "text-embedding-ada-002":
|
|
90
|
+
self._dim = 1536
|
|
91
|
+
else:
|
|
92
|
+
raise ValueError("dim must be provided explicitly for unknown models.")
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def requires_lock(self) -> bool:
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def dim(self) -> int:
|
|
100
|
+
return self._dim
|
|
101
|
+
|
|
102
|
+
def encode(self, text: str) -> npt.NDArray[np.float32]:
|
|
103
|
+
import openai
|
|
104
|
+
from synaptoroute.exceptions import SynaptoRouteError
|
|
105
|
+
try:
|
|
106
|
+
kwargs = {"input": [text], "model": self.model_name}
|
|
107
|
+
if self.dimensions is not None:
|
|
108
|
+
kwargs["dimensions"] = self.dimensions
|
|
109
|
+
response = self.client.embeddings.create(**kwargs)
|
|
110
|
+
return np.array(response.data[0].embedding, dtype=np.float32)
|
|
111
|
+
except openai.OpenAIError as e:
|
|
112
|
+
raise SynaptoRouteError(f"OpenAI API Error: {e}") from e
|
|
113
|
+
|
|
114
|
+
def encode_batch(self, texts: List[str]) -> npt.NDArray[np.float32]:
|
|
115
|
+
if not texts:
|
|
116
|
+
return np.empty((0, self.dim), dtype=np.float32)
|
|
117
|
+
|
|
118
|
+
import openai
|
|
119
|
+
from synaptoroute.exceptions import SynaptoRouteError
|
|
120
|
+
try:
|
|
121
|
+
chunk_size = 2048
|
|
122
|
+
all_embeddings = []
|
|
123
|
+
for i in range(0, len(texts), chunk_size):
|
|
124
|
+
chunk = texts[i:i + chunk_size]
|
|
125
|
+
kwargs = {"input": chunk, "model": self.model_name}
|
|
126
|
+
if self.dimensions is not None:
|
|
127
|
+
kwargs["dimensions"] = self.dimensions
|
|
128
|
+
response = self.client.embeddings.create(**kwargs)
|
|
129
|
+
embeddings = [data.embedding for data in response.data]
|
|
130
|
+
all_embeddings.extend(embeddings)
|
|
131
|
+
return np.array(all_embeddings, dtype=np.float32)
|
|
132
|
+
except openai.OpenAIError as e:
|
|
133
|
+
raise SynaptoRouteError(f"OpenAI API Error: {e}") from e
|
|
134
|
+
|
|
135
|
+
# Preserve backwards compatibility
|
|
136
|
+
Encoder = FastEmbedEncoder
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
class SynaptoRouteError(Exception):
|
|
2
|
+
"""Base exception for SynaptoRoute."""
|
|
3
|
+
pass
|
|
4
|
+
|
|
5
|
+
class RouteNotFoundError(SynaptoRouteError):
|
|
6
|
+
"""Raised when a specified route cannot be found."""
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
class ModelLoadError(SynaptoRouteError):
|
|
10
|
+
"""Raised when an embedding model fails to load."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
class RouterOverloadedError(SynaptoRouteError):
|
|
14
|
+
"""Raised when the dynamic batching queue is full (DDoS protection)."""
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
class RouterCapacityError(SynaptoRouteError):
|
|
18
|
+
"""Raised when the router's maximum capacity is exceeded."""
|
|
19
|
+
pass
|
synaptoroute/index.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import threading
|
|
3
|
+
from typing import List, Tuple
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import faiss
|
|
7
|
+
HAS_FAISS = True
|
|
8
|
+
except ImportError:
|
|
9
|
+
HAS_FAISS = False
|
|
10
|
+
|
|
11
|
+
class NumpyIndex:
|
|
12
|
+
"""
|
|
13
|
+
O(1) Lazy Memory Slicing dense numpy index.
|
|
14
|
+
Used as the default engine if FAISS is not installed.
|
|
15
|
+
"""
|
|
16
|
+
def __init__(self, dim: int, max_capacity: int = 50000):
|
|
17
|
+
self.dim = dim
|
|
18
|
+
self.lock = threading.Lock()
|
|
19
|
+
self.embeddings = np.zeros((max_capacity, dim), dtype=np.float32)
|
|
20
|
+
self.tombstones = set()
|
|
21
|
+
self._tombstone_array = np.array([], dtype=int)
|
|
22
|
+
self._id_to_route = {}
|
|
23
|
+
self._route_to_ids = {}
|
|
24
|
+
self._next_id = 0
|
|
25
|
+
self.max_capacity = max_capacity
|
|
26
|
+
self.ntotal = 0
|
|
27
|
+
|
|
28
|
+
def _add_unlocked(self, embeddings: np.ndarray, route_name: str):
|
|
29
|
+
num_embs = embeddings.shape[0]
|
|
30
|
+
if self._next_id + num_embs > self.max_capacity:
|
|
31
|
+
raise ValueError("ID_OVERFLOW")
|
|
32
|
+
|
|
33
|
+
if embeddings.dtype != np.float32:
|
|
34
|
+
embeddings = embeddings.astype(np.float32)
|
|
35
|
+
|
|
36
|
+
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
|
37
|
+
norms[norms == 0] = 1
|
|
38
|
+
embeddings = embeddings / norms
|
|
39
|
+
|
|
40
|
+
self.embeddings[self._next_id:self._next_id + num_embs] = embeddings
|
|
41
|
+
ids = list(range(self._next_id, self._next_id + num_embs))
|
|
42
|
+
|
|
43
|
+
if route_name not in self._route_to_ids:
|
|
44
|
+
self._route_to_ids[route_name] = []
|
|
45
|
+
self._route_to_ids[route_name].extend(ids)
|
|
46
|
+
for i in ids:
|
|
47
|
+
self._id_to_route[i] = route_name
|
|
48
|
+
|
|
49
|
+
self._next_id += num_embs
|
|
50
|
+
self.ntotal += num_embs
|
|
51
|
+
|
|
52
|
+
def add(self, embeddings: np.ndarray, route_name: str):
|
|
53
|
+
with self.lock:
|
|
54
|
+
self._add_unlocked(embeddings, route_name)
|
|
55
|
+
num_embs = embeddings.shape[0]
|
|
56
|
+
if self._next_id + num_embs > self.max_capacity:
|
|
57
|
+
raise ValueError("Capacity exceeded")
|
|
58
|
+
|
|
59
|
+
if embeddings.dtype != np.float32:
|
|
60
|
+
embeddings = embeddings.astype(np.float32)
|
|
61
|
+
|
|
62
|
+
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
|
63
|
+
norms[norms == 0] = 1
|
|
64
|
+
embeddings = embeddings / norms
|
|
65
|
+
|
|
66
|
+
self.embeddings[self._next_id:self._next_id + num_embs] = embeddings
|
|
67
|
+
ids = list(range(self._next_id, self._next_id + num_embs))
|
|
68
|
+
|
|
69
|
+
if route_name not in self._route_to_ids:
|
|
70
|
+
self._route_to_ids[route_name] = []
|
|
71
|
+
self._route_to_ids[route_name].extend(ids)
|
|
72
|
+
for i in ids:
|
|
73
|
+
self._id_to_route[i] = route_name
|
|
74
|
+
|
|
75
|
+
self._next_id += num_embs
|
|
76
|
+
self.ntotal += num_embs
|
|
77
|
+
|
|
78
|
+
def delete(self, route_name: str):
|
|
79
|
+
with self.lock:
|
|
80
|
+
if route_name in self._route_to_ids:
|
|
81
|
+
ids = self._route_to_ids[route_name]
|
|
82
|
+
self.tombstones.update(ids)
|
|
83
|
+
self._tombstone_array = np.array(list(self.tombstones), dtype=int)
|
|
84
|
+
for i in ids:
|
|
85
|
+
self._id_to_route.pop(i, None)
|
|
86
|
+
del self._route_to_ids[route_name]
|
|
87
|
+
|
|
88
|
+
def search(self, query_embeddings: np.ndarray, top_k: int = 1) -> List[List[Tuple[float, str]]]:
|
|
89
|
+
with self.lock:
|
|
90
|
+
if self.ntotal == 0 or self._next_id == 0:
|
|
91
|
+
return [[] for _ in range(query_embeddings.shape[0])]
|
|
92
|
+
|
|
93
|
+
if query_embeddings.dtype != np.float32:
|
|
94
|
+
query_embeddings = query_embeddings.astype(np.float32)
|
|
95
|
+
|
|
96
|
+
norms = np.linalg.norm(query_embeddings, axis=1, keepdims=True)
|
|
97
|
+
norms[norms == 0] = 1
|
|
98
|
+
query_embeddings = query_embeddings / norms
|
|
99
|
+
|
|
100
|
+
valid_mask = np.ones(self._next_id, dtype=bool)
|
|
101
|
+
if self._tombstone_array.size > 0:
|
|
102
|
+
valid_mask[self._tombstone_array] = False
|
|
103
|
+
|
|
104
|
+
if not np.any(valid_mask):
|
|
105
|
+
return [[] for _ in range(query_embeddings.shape[0])]
|
|
106
|
+
|
|
107
|
+
scores = np.dot(query_embeddings, self.embeddings[:self._next_id].T)
|
|
108
|
+
|
|
109
|
+
results = []
|
|
110
|
+
for i in range(scores.shape[0]):
|
|
111
|
+
valid_scores = scores[i][valid_mask]
|
|
112
|
+
valid_indices = np.arange(self._next_id)[valid_mask]
|
|
113
|
+
|
|
114
|
+
# Sort descending
|
|
115
|
+
num_results = min(top_k, len(valid_scores))
|
|
116
|
+
if num_results == 0:
|
|
117
|
+
results.append([])
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
if len(valid_scores) > num_results:
|
|
121
|
+
sort_idx = np.argpartition(valid_scores, -num_results)[-num_results:]
|
|
122
|
+
sort_idx = sort_idx[np.argsort(valid_scores[sort_idx])[::-1]]
|
|
123
|
+
else:
|
|
124
|
+
sort_idx = np.argsort(valid_scores)[::-1]
|
|
125
|
+
|
|
126
|
+
query_results = []
|
|
127
|
+
for idx in sort_idx:
|
|
128
|
+
real_idx = valid_indices[idx]
|
|
129
|
+
route_name = self._id_to_route[real_idx]
|
|
130
|
+
query_results.append((float(valid_scores[idx]), route_name))
|
|
131
|
+
results.append(query_results)
|
|
132
|
+
return results
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def total_vectors(self) -> int:
|
|
136
|
+
return self.ntotal - len(self.tombstones)
|
|
137
|
+
|
|
138
|
+
def rebuild(self, route_map: dict, embeddings_map: dict):
|
|
139
|
+
with self.lock:
|
|
140
|
+
self.embeddings = np.zeros((self.max_capacity, self.dim), dtype=np.float32)
|
|
141
|
+
self._route_to_ids = {}
|
|
142
|
+
self._id_to_route = {}
|
|
143
|
+
self._next_id = 0
|
|
144
|
+
self.tombstones.clear()
|
|
145
|
+
self._tombstone_array = np.array([], dtype=int)
|
|
146
|
+
self.ntotal = 0
|
|
147
|
+
|
|
148
|
+
for route_name, route in route_map.items():
|
|
149
|
+
if route_name not in embeddings_map:
|
|
150
|
+
continue
|
|
151
|
+
embs_data = embeddings_map[route_name]
|
|
152
|
+
if not embs_data:
|
|
153
|
+
continue
|
|
154
|
+
valid_embs = []
|
|
155
|
+
for e_bytes in embs_data:
|
|
156
|
+
if e_bytes is not None:
|
|
157
|
+
valid_embs.append(np.frombuffer(e_bytes, dtype=np.float32))
|
|
158
|
+
if not valid_embs:
|
|
159
|
+
continue
|
|
160
|
+
embeddings = np.stack(valid_embs)
|
|
161
|
+
self._add_unlocked(embeddings, route_name)
|
|
162
|
+
|
|
163
|
+
def get_index(dim: int, max_capacity: int = 50000):
|
|
164
|
+
if HAS_FAISS:
|
|
165
|
+
return FaissIndex(dim)
|
|
166
|
+
else:
|
|
167
|
+
return NumpyIndex(dim, max_capacity)
|
|
168
|
+
|
|
169
|
+
class FaissIndex:
|
|
170
|
+
"""
|
|
171
|
+
A FAISS-based vector index utilizing HNSW for sub-linear search latency.
|
|
172
|
+
Employs a Tombstone architecture for O(1) instantaneous deletions.
|
|
173
|
+
"""
|
|
174
|
+
def __init__(self, dim: int):
|
|
175
|
+
self.dim = dim
|
|
176
|
+
self.lock = threading.Lock()
|
|
177
|
+
|
|
178
|
+
# Inner Product (Cosine Similarity for normalized embeddings)
|
|
179
|
+
base_index = faiss.IndexHNSWFlat(self.dim, 32, faiss.METRIC_INNER_PRODUCT)
|
|
180
|
+
self.index = faiss.IndexIDMap(base_index)
|
|
181
|
+
|
|
182
|
+
self.tombstones = set()
|
|
183
|
+
|
|
184
|
+
# Bidirectional mapping
|
|
185
|
+
self._id_to_route = {}
|
|
186
|
+
self._route_to_ids = {}
|
|
187
|
+
self._next_id = 0
|
|
188
|
+
|
|
189
|
+
def add(self, embeddings: np.ndarray, route_name: str):
|
|
190
|
+
with self.lock:
|
|
191
|
+
num_embs = embeddings.shape[0]
|
|
192
|
+
ids = np.arange(self._next_id, self._next_id + num_embs, dtype=np.int64)
|
|
193
|
+
|
|
194
|
+
if embeddings.dtype != np.float32:
|
|
195
|
+
embeddings = embeddings.astype(np.float32)
|
|
196
|
+
|
|
197
|
+
faiss.normalize_L2(embeddings)
|
|
198
|
+
self.index.add_with_ids(embeddings, ids)
|
|
199
|
+
|
|
200
|
+
if route_name not in self._route_to_ids:
|
|
201
|
+
self._route_to_ids[route_name] = []
|
|
202
|
+
|
|
203
|
+
self._route_to_ids[route_name].extend(ids.tolist())
|
|
204
|
+
for i in ids:
|
|
205
|
+
self._id_to_route[int(i)] = route_name
|
|
206
|
+
|
|
207
|
+
self._next_id += num_embs
|
|
208
|
+
|
|
209
|
+
def delete(self, route_name: str):
|
|
210
|
+
with self.lock:
|
|
211
|
+
if route_name in self._route_to_ids:
|
|
212
|
+
ids = self._route_to_ids[route_name]
|
|
213
|
+
self.tombstones.update(ids)
|
|
214
|
+
for i in ids:
|
|
215
|
+
self._id_to_route.pop(i, None)
|
|
216
|
+
del self._route_to_ids[route_name]
|
|
217
|
+
|
|
218
|
+
def search(self, query_embeddings: np.ndarray, top_k: int = 1) -> List[List[Tuple[float, str]]]:
|
|
219
|
+
with self.lock:
|
|
220
|
+
# Overfetch to account for tombstones
|
|
221
|
+
search_k = min(self.index.ntotal, max(top_k + len(self.tombstones) * 2, 2048))
|
|
222
|
+
|
|
223
|
+
if search_k == 0:
|
|
224
|
+
return [[] for _ in range(query_embeddings.shape[0])]
|
|
225
|
+
|
|
226
|
+
if query_embeddings.dtype != np.float32:
|
|
227
|
+
query_embeddings = query_embeddings.astype(np.float32)
|
|
228
|
+
|
|
229
|
+
faiss.normalize_L2(query_embeddings)
|
|
230
|
+
distances, indices = self.index.search(query_embeddings, search_k)
|
|
231
|
+
|
|
232
|
+
results = []
|
|
233
|
+
for i in range(query_embeddings.shape[0]):
|
|
234
|
+
query_results = []
|
|
235
|
+
for j in range(search_k):
|
|
236
|
+
idx = int(indices[i][j])
|
|
237
|
+
if idx != -1 and idx not in self.tombstones:
|
|
238
|
+
route_name = self._id_to_route[idx]
|
|
239
|
+
query_results.append((float(distances[i][j]), route_name))
|
|
240
|
+
if len(query_results) == top_k:
|
|
241
|
+
break
|
|
242
|
+
results.append(query_results)
|
|
243
|
+
|
|
244
|
+
return results
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def total_vectors(self) -> int:
|
|
248
|
+
return self.index.ntotal - len(self.tombstones)
|
|
249
|
+
|
|
250
|
+
def rebuild(self, route_map: dict, embeddings_map: dict):
|
|
251
|
+
"""Garbage Collection: Completely reconstructs the HNSW index to flush dead vectors."""
|
|
252
|
+
with self.lock:
|
|
253
|
+
# Create a brand new index
|
|
254
|
+
base_index = faiss.IndexHNSWFlat(self.dim, 32, faiss.METRIC_INNER_PRODUCT)
|
|
255
|
+
new_index = faiss.IndexIDMap(base_index)
|
|
256
|
+
|
|
257
|
+
new_route_to_ids = {}
|
|
258
|
+
new_id_to_route = {}
|
|
259
|
+
next_id = 0
|
|
260
|
+
|
|
261
|
+
for route_name, route in route_map.items():
|
|
262
|
+
if route_name not in embeddings_map:
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
embs_data = embeddings_map[route_name]
|
|
266
|
+
if not embs_data:
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
# Collect embeddings for this route
|
|
270
|
+
valid_embs = []
|
|
271
|
+
for e_bytes in embs_data:
|
|
272
|
+
if e_bytes is not None:
|
|
273
|
+
valid_embs.append(np.frombuffer(e_bytes, dtype=np.float32))
|
|
274
|
+
|
|
275
|
+
if not valid_embs:
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
embeddings = np.stack(valid_embs)
|
|
279
|
+
num_embs = embeddings.shape[0]
|
|
280
|
+
ids = np.arange(next_id, next_id + num_embs, dtype=np.int64)
|
|
281
|
+
|
|
282
|
+
faiss.normalize_L2(embeddings)
|
|
283
|
+
new_index.add_with_ids(embeddings, ids)
|
|
284
|
+
|
|
285
|
+
new_route_to_ids[route_name] = ids.tolist()
|
|
286
|
+
for i in ids:
|
|
287
|
+
new_id_to_route[int(i)] = route_name
|
|
288
|
+
|
|
289
|
+
next_id += num_embs
|
|
290
|
+
|
|
291
|
+
# Atomic swap
|
|
292
|
+
self.index = new_index
|
|
293
|
+
self._route_to_ids = new_route_to_ids
|
|
294
|
+
self._id_to_route = new_id_to_route
|
|
295
|
+
self._next_id = next_id
|
|
296
|
+
self.tombstones.clear()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
from langchain_core.runnables import Runnable, RunnableConfig
|
|
3
|
+
|
|
4
|
+
from synaptoroute.router import AdaptiveRouter
|
|
5
|
+
|
|
6
|
+
class SynaptoRouteChain(Runnable):
|
|
7
|
+
"""
|
|
8
|
+
A LangChain Runnable that routes string inputs using SynaptoRoute.
|
|
9
|
+
Returns the route name if matched, else 'default'.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, router: AdaptiveRouter):
|
|
13
|
+
self.router = router
|
|
14
|
+
|
|
15
|
+
def invoke(self, input: str, config: Optional[RunnableConfig] = None, **kwargs: Any) -> str:
|
|
16
|
+
route = self.router(input)
|
|
17
|
+
if route and hasattr(route, 'name'):
|
|
18
|
+
return route.name
|
|
19
|
+
return "default"
|
|
20
|
+
|
|
21
|
+
async def ainvoke(self, input: str, config: Optional[RunnableConfig] = None, **kwargs: Any) -> str:
|
|
22
|
+
route = await self.router.aquery(input)
|
|
23
|
+
if route and hasattr(route, 'name'):
|
|
24
|
+
return route.name
|
|
25
|
+
return "default"
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from typing import Sequence, Any
|
|
2
|
+
|
|
3
|
+
from llama_index.core.schema import QueryBundle
|
|
4
|
+
from llama_index.core.selectors import BaseSelector, SelectorResult, SingleSelection
|
|
5
|
+
from llama_index.core.tools.types import ToolMetadata
|
|
6
|
+
|
|
7
|
+
from synaptoroute.router import AdaptiveRouter
|
|
8
|
+
|
|
9
|
+
class SynaptoRouteSelector(BaseSelector):
|
|
10
|
+
"""
|
|
11
|
+
A selector that uses SynaptoRoute's AdaptiveRouter to select a route.
|
|
12
|
+
"""
|
|
13
|
+
router: Any
|
|
14
|
+
|
|
15
|
+
def __init__(self, router: AdaptiveRouter):
|
|
16
|
+
super().__init__()
|
|
17
|
+
self.router = router
|
|
18
|
+
|
|
19
|
+
def _get_prompts(self) -> dict:
|
|
20
|
+
return {}
|
|
21
|
+
|
|
22
|
+
def _update_prompts(self, prompts: dict) -> None:
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def _select(self, choices: Sequence[ToolMetadata], query: QueryBundle) -> SelectorResult:
|
|
26
|
+
result = self.router(query.query_str)
|
|
27
|
+
route_name = result.name if result else "default"
|
|
28
|
+
|
|
29
|
+
for i, choice in enumerate(choices):
|
|
30
|
+
if choice.name == route_name:
|
|
31
|
+
return SelectorResult(
|
|
32
|
+
selections=[SingleSelection(index=i, reason=f"Matched route: {route_name}")]
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
raise ValueError(f"No matching choice found for route: {route_name}")
|
|
36
|
+
|
|
37
|
+
async def _aselect(self, choices: Sequence[ToolMetadata], query: QueryBundle) -> SelectorResult:
|
|
38
|
+
result = await self.router.aquery(query.query_str)
|
|
39
|
+
route_name = result.name if result else "default"
|
|
40
|
+
|
|
41
|
+
for i, choice in enumerate(choices):
|
|
42
|
+
if choice.name == route_name:
|
|
43
|
+
return SelectorResult(
|
|
44
|
+
selections=[SingleSelection(index=i, reason=f"Matched route: {route_name}")]
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
raise ValueError(f"No matching choice found for route: {route_name}")
|
synaptoroute/metrics.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from prometheus_client import Histogram, Gauge, Counter, generate_latest, CollectorRegistry
|
|
3
|
+
HAS_PROMETHEUS = True
|
|
4
|
+
except ImportError:
|
|
5
|
+
HAS_PROMETHEUS = False
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
import collections
|
|
9
|
+
|
|
10
|
+
class _MockHistogram:
|
|
11
|
+
def __init__(self, name):
|
|
12
|
+
self.name = name
|
|
13
|
+
self.observations = collections.deque(maxlen=1000)
|
|
14
|
+
|
|
15
|
+
def observe(self, amount):
|
|
16
|
+
self.observations.append(amount)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _MockGauge:
|
|
20
|
+
def __init__(self, name):
|
|
21
|
+
self.name = name
|
|
22
|
+
self.value = 0.0
|
|
23
|
+
|
|
24
|
+
def set(self, value):
|
|
25
|
+
self.value = float(value)
|
|
26
|
+
|
|
27
|
+
def inc(self, amount=1.0):
|
|
28
|
+
self.value += float(amount)
|
|
29
|
+
|
|
30
|
+
def dec(self, amount=1.0):
|
|
31
|
+
self.value -= float(amount)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class _MockCounter:
|
|
35
|
+
def __init__(self, name):
|
|
36
|
+
self.name = name
|
|
37
|
+
self.value = 0.0
|
|
38
|
+
|
|
39
|
+
def inc(self, amount=1.0):
|
|
40
|
+
self.value += float(amount)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class MetricsRegistry:
|
|
44
|
+
def __init__(self):
|
|
45
|
+
if HAS_PROMETHEUS:
|
|
46
|
+
self.registry = CollectorRegistry()
|
|
47
|
+
self.inference_latency_seconds = Histogram("inference_latency_seconds", "Latency of routing", registry=self.registry)
|
|
48
|
+
self.batch_size = Histogram("batch_size", "Batch size of queries processed", registry=self.registry)
|
|
49
|
+
self.queue_depth = Gauge("queue_depth", "Depth of the query queue", registry=self.registry)
|
|
50
|
+
self.capacity_usage = Gauge("capacity_usage", "Number of vectors stored in capacity", registry=self.registry)
|
|
51
|
+
self.gc_errors = Counter("gc_errors_total", "Number of failed background index rebuilds", registry=self.registry)
|
|
52
|
+
else:
|
|
53
|
+
self.registry = None
|
|
54
|
+
self.inference_latency_seconds = _MockHistogram("inference_latency_seconds")
|
|
55
|
+
self.batch_size = _MockHistogram("batch_size")
|
|
56
|
+
self.queue_depth = _MockGauge("queue_depth")
|
|
57
|
+
self.capacity_usage = _MockGauge("capacity_usage")
|
|
58
|
+
self.gc_errors = _MockCounter("gc_errors_total")
|
|
59
|
+
|
|
60
|
+
def export_metrics(self) -> str:
|
|
61
|
+
if HAS_PROMETHEUS:
|
|
62
|
+
return generate_latest(self.registry).decode("utf-8")
|
|
63
|
+
else:
|
|
64
|
+
lines = []
|
|
65
|
+
for name, metric in vars(self).items():
|
|
66
|
+
if isinstance(metric, _MockHistogram):
|
|
67
|
+
obs = metric.observations
|
|
68
|
+
avg = sum(obs) / len(obs) if obs else 0
|
|
69
|
+
lines.append(f"{name}: count={len(obs)} avg={avg}")
|
|
70
|
+
elif isinstance(metric, (_MockGauge, _MockCounter)):
|
|
71
|
+
lines.append(f"{name}: value={metric.value}")
|
|
72
|
+
return "\n".join(lines)
|
synaptoroute/models.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import List, Optional, Dict, Any
|
|
3
|
+
from pydantic import BaseModel, Field, field_validator, StringConstraints, ConfigDict
|
|
4
|
+
from typing_extensions import Annotated
|
|
5
|
+
|
|
6
|
+
NonEmptyString = Annotated[str, StringConstraints(strip_whitespace=True, min_length=1)]
|
|
7
|
+
|
|
8
|
+
class Route(BaseModel):
|
|
9
|
+
"""
|
|
10
|
+
Represents a single semantic route or intent.
|
|
11
|
+
"""
|
|
12
|
+
model_config = ConfigDict(validate_assignment=True)
|
|
13
|
+
name: str = Field(..., min_length=1, pattern=r"^[a-zA-Z0-9_-]+$")
|
|
14
|
+
utterances: List[NonEmptyString] = Field(..., min_length=1)
|
|
15
|
+
threshold: float = Field(0.5, ge=-1.0, le=1.0)
|
|
16
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
17
|
+
|
|
18
|
+
@field_validator('metadata')
|
|
19
|
+
@classmethod
|
|
20
|
+
def validate_metadata_serializable(cls, v: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
|
21
|
+
if v is not None:
|
|
22
|
+
try:
|
|
23
|
+
json.dumps(v)
|
|
24
|
+
except (TypeError, ValueError) as e:
|
|
25
|
+
raise ValueError(f"Metadata must be JSON serializable: {e}")
|
|
26
|
+
return v
|
|
27
|
+
|
|
28
|
+
@field_validator('utterances')
|
|
29
|
+
@classmethod
|
|
30
|
+
def deduplicate_utterances(cls, v: List[str]) -> List[str]:
|
|
31
|
+
seen = set()
|
|
32
|
+
deduped = []
|
|
33
|
+
for utt in v:
|
|
34
|
+
if utt not in seen:
|
|
35
|
+
seen.add(utt)
|
|
36
|
+
deduped.append(utt)
|
|
37
|
+
if not deduped:
|
|
38
|
+
raise ValueError("Route must have at least one valid utterance.")
|
|
39
|
+
return deduped
|
synaptoroute/profile.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
class ProfileType(Enum):
|
|
6
|
+
LATENCY = "latency"
|
|
7
|
+
THROUGHPUT = "throughput"
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class OptimizationProfile:
|
|
11
|
+
type: ProfileType
|
|
12
|
+
threads: int
|
|
13
|
+
batch_size: int
|
|
14
|
+
batch_timeout: float
|
|
15
|
+
|
|
16
|
+
def get_profile(profile_type: ProfileType = ProfileType.THROUGHPUT) -> OptimizationProfile:
|
|
17
|
+
cpu_count = os.cpu_count() or 4
|
|
18
|
+
if profile_type == ProfileType.LATENCY:
|
|
19
|
+
return OptimizationProfile(
|
|
20
|
+
type=ProfileType.LATENCY,
|
|
21
|
+
threads=max(1, cpu_count - 1),
|
|
22
|
+
batch_size=1,
|
|
23
|
+
batch_timeout=0.0
|
|
24
|
+
)
|
|
25
|
+
else:
|
|
26
|
+
return OptimizationProfile(
|
|
27
|
+
type=ProfileType.THROUGHPUT,
|
|
28
|
+
threads=1,
|
|
29
|
+
batch_size=32,
|
|
30
|
+
batch_timeout=0.005
|
|
31
|
+
)
|