tidevec 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tidevec/__init__.py +20 -0
- tidevec/client.py +602 -0
- tidevec/cortexdb_pb2.py +123 -0
- tidevec/cortexdb_pb2_grpc.py +635 -0
- tidevec/tidevec_pb2.py +123 -0
- tidevec/tidevec_pb2_grpc.py +635 -0
- tidevec-0.1.0.dist-info/METADATA +159 -0
- tidevec-0.1.0.dist-info/RECORD +9 -0
- tidevec-0.1.0.dist-info/WHEEL +4 -0
tidevec/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""TideVec Python SDK — Temporally-aware causal vector database."""
|
|
2
|
+
|
|
3
|
+
from .client import (
|
|
4
|
+
TideVec,
|
|
5
|
+
AsyncTideVec,
|
|
6
|
+
SearchHit,
|
|
7
|
+
SearchResponse,
|
|
8
|
+
CollectionInfo,
|
|
9
|
+
HalfLife,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.0"
|
|
13
|
+
__all__ = [
|
|
14
|
+
"TideVec",
|
|
15
|
+
"AsyncTideVec",
|
|
16
|
+
"SearchHit",
|
|
17
|
+
"SearchResponse",
|
|
18
|
+
"CollectionInfo",
|
|
19
|
+
"HalfLife",
|
|
20
|
+
]
|
tidevec/client.py
ADDED
|
@@ -0,0 +1,602 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TideVec Python SDK
|
|
3
|
+
====================
|
|
4
|
+
Temporally-aware causal vector database client.
|
|
5
|
+
|
|
6
|
+
Install:
|
|
7
|
+
pip install tidevec-py
|
|
8
|
+
|
|
9
|
+
Quick start:
|
|
10
|
+
from tidevec import TideVec
|
|
11
|
+
|
|
12
|
+
db = TideVec("localhost:6399")
|
|
13
|
+
|
|
14
|
+
db.create_collection("docs", dim=768)
|
|
15
|
+
db.upsert("docs", [{"id": "v1", "embedding": [0.1, ...], "payload": {"src": "wiki"}}])
|
|
16
|
+
results = db.search("docs", query_vector=[0.1, ...], top_k=10, temporal_blend=0.3)
|
|
17
|
+
for r in results:
|
|
18
|
+
print(r.id, r.score, r.temporal_score)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import time
|
|
24
|
+
import warnings
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
from typing import Any, Dict, Iterator, List, Optional, Union
|
|
27
|
+
|
|
28
|
+
import grpc
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
from . import tidevec_pb2 as pb
|
|
32
|
+
from . import tidevec_pb2_grpc as pb_grpc
|
|
33
|
+
except ImportError:
|
|
34
|
+
import tidevec_pb2 as pb # type: ignore
|
|
35
|
+
import tidevec_pb2_grpc as pb_grpc # type: ignore
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ================================================================
|
|
39
|
+
# Typed result objects
|
|
40
|
+
# ================================================================
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class SearchHit:
|
|
44
|
+
"""One result from a vector search."""
|
|
45
|
+
id: str
|
|
46
|
+
score: float
|
|
47
|
+
vector_score: float
|
|
48
|
+
temporal_score: float
|
|
49
|
+
payload: Dict[str, str]
|
|
50
|
+
created_at: int
|
|
51
|
+
staleness_warning: bool = False
|
|
52
|
+
staleness_reason: str = ""
|
|
53
|
+
causal_neighbors: List[str] = field(default_factory=list)
|
|
54
|
+
contradicted_by: List[str] = field(default_factory=list)
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def _from_proto(cls, r) -> "SearchHit":
|
|
58
|
+
return cls(
|
|
59
|
+
id = r.id,
|
|
60
|
+
score = r.score,
|
|
61
|
+
vector_score = r.vector_score,
|
|
62
|
+
temporal_score = r.temporal_score,
|
|
63
|
+
payload = dict(r.payload),
|
|
64
|
+
created_at = r.created_at,
|
|
65
|
+
staleness_warning= r.staleness_warning,
|
|
66
|
+
staleness_reason = r.staleness_reason,
|
|
67
|
+
causal_neighbors = list(r.causal_neighbors),
|
|
68
|
+
contradicted_by = list(r.contradicted_by),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def __repr__(self) -> str:
|
|
72
|
+
return (f"SearchHit(id={self.id!r}, score={self.score:.4f}, "
|
|
73
|
+
f"temporal={self.temporal_score:.3f})")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class SearchResponse:
|
|
78
|
+
"""Full response from a search call."""
|
|
79
|
+
hits: List[SearchHit]
|
|
80
|
+
count: int
|
|
81
|
+
latency_ms: float = 0.0
|
|
82
|
+
query_id: str = ""
|
|
83
|
+
strategy: str = ""
|
|
84
|
+
staleness_warnings: List[dict] = field(default_factory=list)
|
|
85
|
+
contradiction_alerts: List[dict] = field(default_factory=list)
|
|
86
|
+
|
|
87
|
+
def __iter__(self) -> Iterator[SearchHit]:
|
|
88
|
+
return iter(self.hits)
|
|
89
|
+
|
|
90
|
+
def __len__(self) -> int:
|
|
91
|
+
return len(self.hits)
|
|
92
|
+
|
|
93
|
+
def __getitem__(self, idx: int) -> SearchHit:
|
|
94
|
+
return self.hits[idx]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class CollectionInfo:
|
|
99
|
+
name: str
|
|
100
|
+
n_vectors: int
|
|
101
|
+
n_shards: int
|
|
102
|
+
dim: int
|
|
103
|
+
index_type: str
|
|
104
|
+
metric: str
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# ================================================================
|
|
108
|
+
# Temporal preset helpers
|
|
109
|
+
# ================================================================
|
|
110
|
+
|
|
111
|
+
class HalfLife:
|
|
112
|
+
"""Pre-built temporal decay presets."""
|
|
113
|
+
ONE_HOUR = 3_600_000
|
|
114
|
+
ONE_DAY = 86_400_000
|
|
115
|
+
ONE_WEEK = 604_800_000
|
|
116
|
+
ONE_MONTH = 2_592_000_000
|
|
117
|
+
ONE_YEAR = 31_536_000_000
|
|
118
|
+
|
|
119
|
+
AGENT_SESSION = ONE_HOUR
|
|
120
|
+
NEWS_FEED = ONE_DAY
|
|
121
|
+
SUPPORT_TICKET = ONE_MONTH
|
|
122
|
+
DOCUMENT_STORE = ONE_YEAR
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ================================================================
|
|
126
|
+
# Main client
|
|
127
|
+
# ================================================================
|
|
128
|
+
|
|
129
|
+
class TideVec:
|
|
130
|
+
"""
|
|
131
|
+
TideVec Python client.
|
|
132
|
+
|
|
133
|
+
Supports both REST (via requests) and gRPC (via grpc).
|
|
134
|
+
gRPC is preferred for high-throughput production use.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
host: Server host:port (default "localhost:6399")
|
|
138
|
+
api_key: Optional API key (set via X-Api-Key header)
|
|
139
|
+
timeout: Default RPC timeout in seconds
|
|
140
|
+
use_grpc: Use gRPC transport (default True if grpc available)
|
|
141
|
+
tls: Use TLS for gRPC (default False for local dev)
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
def __init__(
|
|
145
|
+
self,
|
|
146
|
+
host: str = "localhost:6399",
|
|
147
|
+
api_key: str = "",
|
|
148
|
+
timeout: float = 30.0,
|
|
149
|
+
use_grpc: bool = True,
|
|
150
|
+
tls: bool = False,
|
|
151
|
+
):
|
|
152
|
+
self._host = host
|
|
153
|
+
self._api_key = api_key
|
|
154
|
+
self._timeout = timeout
|
|
155
|
+
self._channel: Optional[grpc.Channel] = None
|
|
156
|
+
self._stub: Optional[pb_grpc.TideVecStub] = None
|
|
157
|
+
|
|
158
|
+
if use_grpc:
|
|
159
|
+
self._connect_grpc(tls)
|
|
160
|
+
|
|
161
|
+
def _connect_grpc(self, tls: bool) -> None:
|
|
162
|
+
creds = grpc.ssl_channel_credentials() if tls else None
|
|
163
|
+
options = [
|
|
164
|
+
("grpc.max_send_message_length", 256 * 1024 * 1024),
|
|
165
|
+
("grpc.max_receive_message_length", 256 * 1024 * 1024),
|
|
166
|
+
("grpc.keepalive_time_ms", 30_000),
|
|
167
|
+
]
|
|
168
|
+
if creds:
|
|
169
|
+
self._channel = grpc.secure_channel(self._host, creds, options)
|
|
170
|
+
else:
|
|
171
|
+
self._channel = grpc.insecure_channel(self._host, options)
|
|
172
|
+
self._stub = pb_grpc.TideVecStub(self._channel)
|
|
173
|
+
|
|
174
|
+
def _meta(self) -> List[tuple]:
|
|
175
|
+
if self._api_key:
|
|
176
|
+
return [("x-api-key", self._api_key)]
|
|
177
|
+
return []
|
|
178
|
+
|
|
179
|
+
def close(self) -> None:
|
|
180
|
+
if self._channel:
|
|
181
|
+
self._channel.close()
|
|
182
|
+
|
|
183
|
+
def __enter__(self) -> "TideVec":
|
|
184
|
+
return self
|
|
185
|
+
|
|
186
|
+
def __exit__(self, *_) -> None:
|
|
187
|
+
self.close()
|
|
188
|
+
|
|
189
|
+
# ---- Health ------------------------------------------------
|
|
190
|
+
|
|
191
|
+
def health(self) -> dict:
|
|
192
|
+
"""Check server health."""
|
|
193
|
+
resp = self._stub.Health(
|
|
194
|
+
pb.HealthRequest(), timeout=self._timeout, metadata=self._meta())
|
|
195
|
+
return {
|
|
196
|
+
"status": resp.status,
|
|
197
|
+
"version": resp.version,
|
|
198
|
+
"collections": resp.collections,
|
|
199
|
+
"timestamp_ms": resp.timestamp_ms,
|
|
200
|
+
"gpu_available": resp.gpu_available,
|
|
201
|
+
"tpu_available": resp.tpu_available,
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
def ping(self) -> bool:
|
|
205
|
+
"""Return True if server is reachable."""
|
|
206
|
+
try:
|
|
207
|
+
self.health()
|
|
208
|
+
return True
|
|
209
|
+
except grpc.RpcError:
|
|
210
|
+
return False
|
|
211
|
+
|
|
212
|
+
# ---- Collections -------------------------------------------
|
|
213
|
+
|
|
214
|
+
def create_collection(
|
|
215
|
+
self,
|
|
216
|
+
name: str,
|
|
217
|
+
dim: int,
|
|
218
|
+
index_type: str = "tvindex",
|
|
219
|
+
metric: str = "cosine",
|
|
220
|
+
n_shards: int = 4,
|
|
221
|
+
n_replicas: int = 1,
|
|
222
|
+
write_quorum: int = 1,
|
|
223
|
+
half_life_ms: int = HalfLife.ONE_MONTH,
|
|
224
|
+
temporal_blend: float = 0.3,
|
|
225
|
+
) -> str:
|
|
226
|
+
"""Create a new collection."""
|
|
227
|
+
metric_enum = {
|
|
228
|
+
"cosine": pb.Metric.COSINE,
|
|
229
|
+
"l2": pb.Metric.L2,
|
|
230
|
+
"dot": pb.Metric.DOT,
|
|
231
|
+
}.get(metric.lower(), pb.Metric.COSINE)
|
|
232
|
+
|
|
233
|
+
req = pb.CreateCollectionRequest(
|
|
234
|
+
name = name,
|
|
235
|
+
dim = dim,
|
|
236
|
+
index_type = index_type,
|
|
237
|
+
metric = metric_enum,
|
|
238
|
+
n_shards = n_shards,
|
|
239
|
+
n_replicas = n_replicas,
|
|
240
|
+
write_quorum = write_quorum,
|
|
241
|
+
temporal = pb.TemporalConfig(
|
|
242
|
+
half_life_ms = half_life_ms,
|
|
243
|
+
temporal_blend = temporal_blend,
|
|
244
|
+
),
|
|
245
|
+
)
|
|
246
|
+
resp = self._stub.CreateCollection(
|
|
247
|
+
req, timeout=self._timeout, metadata=self._meta())
|
|
248
|
+
return resp.name
|
|
249
|
+
|
|
250
|
+
def drop_collection(self, name: str) -> bool:
|
|
251
|
+
resp = self._stub.DropCollection(
|
|
252
|
+
pb.DropCollectionRequest(name=name),
|
|
253
|
+
timeout=self._timeout, metadata=self._meta())
|
|
254
|
+
return resp.status == "ok"
|
|
255
|
+
|
|
256
|
+
def get_collection(self, name: str) -> CollectionInfo:
|
|
257
|
+
resp = self._stub.GetCollection(
|
|
258
|
+
pb.GetCollectionRequest(name=name),
|
|
259
|
+
timeout=self._timeout, metadata=self._meta())
|
|
260
|
+
info = resp.info
|
|
261
|
+
return CollectionInfo(
|
|
262
|
+
name=info.name, n_vectors=info.n_vectors,
|
|
263
|
+
n_shards=info.n_shards, dim=info.dim,
|
|
264
|
+
index_type=info.index_type, metric=str(info.metric),
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def list_collections(self) -> List[CollectionInfo]:
|
|
268
|
+
resp = self._stub.ListCollections(
|
|
269
|
+
pb.ListCollectionsRequest(),
|
|
270
|
+
timeout=self._timeout, metadata=self._meta())
|
|
271
|
+
return [
|
|
272
|
+
CollectionInfo(
|
|
273
|
+
name=c.name, n_vectors=c.n_vectors,
|
|
274
|
+
n_shards=c.n_shards, dim=c.dim,
|
|
275
|
+
index_type=c.index_type, metric=str(c.metric),
|
|
276
|
+
) for c in resp.collections
|
|
277
|
+
]
|
|
278
|
+
|
|
279
|
+
def set_temporal(
|
|
280
|
+
self, name: str,
|
|
281
|
+
half_life_ms: int,
|
|
282
|
+
temporal_blend: float = 0.3,
|
|
283
|
+
) -> None:
|
|
284
|
+
"""Update temporal decay settings for a collection."""
|
|
285
|
+
self._stub.UpdateTemporal(
|
|
286
|
+
pb.UpdateTemporalRequest(
|
|
287
|
+
name=name,
|
|
288
|
+
config=pb.TemporalConfig(
|
|
289
|
+
half_life_ms = half_life_ms,
|
|
290
|
+
temporal_blend = temporal_blend,
|
|
291
|
+
),
|
|
292
|
+
),
|
|
293
|
+
timeout=self._timeout, metadata=self._meta())
|
|
294
|
+
|
|
295
|
+
# ---- Vectors -----------------------------------------------
|
|
296
|
+
|
|
297
|
+
def upsert(
|
|
298
|
+
self,
|
|
299
|
+
collection: str,
|
|
300
|
+
vectors: List[Dict[str, Any]],
|
|
301
|
+
) -> int:
|
|
302
|
+
"""
|
|
303
|
+
Upsert vectors into a collection.
|
|
304
|
+
|
|
305
|
+
Each vector dict:
|
|
306
|
+
{
|
|
307
|
+
"id": str,
|
|
308
|
+
"embedding": List[float],
|
|
309
|
+
"payload": Dict[str, str] (optional),
|
|
310
|
+
"ttl_seconds": int (optional),
|
|
311
|
+
"edges": [{"target_id":str, "type":"CAUSES", "weight":0.9}]
|
|
312
|
+
}
|
|
313
|
+
"""
|
|
314
|
+
edge_type_map = {
|
|
315
|
+
"CAUSES": pb.EdgeType.CAUSES, "CONTRADICTS": pb.EdgeType.CONTRADICTS,
|
|
316
|
+
"UPDATES": pb.EdgeType.UPDATES, "RELATED_TO": pb.EdgeType.RELATED_TO,
|
|
317
|
+
"ENTITY_OF": pb.EdgeType.ENTITY_OF, "SUPPORTS": pb.EdgeType.SUPPORTS,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
pb_vecs = []
|
|
321
|
+
for v in vectors:
|
|
322
|
+
edges = [
|
|
323
|
+
pb.CausalEdge(
|
|
324
|
+
target_id = e["target_id"],
|
|
325
|
+
type = edge_type_map.get(e.get("type", "RELATED_TO"),
|
|
326
|
+
pb.EdgeType.RELATED_TO),
|
|
327
|
+
weight = float(e.get("weight", 1.0)),
|
|
328
|
+
)
|
|
329
|
+
for e in v.get("edges", [])
|
|
330
|
+
]
|
|
331
|
+
pb_vecs.append(pb.Vector(
|
|
332
|
+
id = v["id"],
|
|
333
|
+
embedding = v["embedding"],
|
|
334
|
+
payload = {str(k): str(val) for k, val in v.get("payload", {}).items()},
|
|
335
|
+
ttl_seconds = v.get("ttl_seconds", 0),
|
|
336
|
+
edges = edges,
|
|
337
|
+
))
|
|
338
|
+
|
|
339
|
+
resp = self._stub.Upsert(
|
|
340
|
+
pb.UpsertRequest(collection=collection, vectors=pb_vecs),
|
|
341
|
+
timeout=self._timeout, metadata=self._meta())
|
|
342
|
+
return resp.inserted
|
|
343
|
+
|
|
344
|
+
def delete(self, collection: str, ids: List[str]) -> int:
|
|
345
|
+
resp = self._stub.Delete(
|
|
346
|
+
pb.DeleteRequest(collection=collection, ids=ids),
|
|
347
|
+
timeout=self._timeout, metadata=self._meta())
|
|
348
|
+
return resp.deleted
|
|
349
|
+
|
|
350
|
+
# ---- Search ------------------------------------------------
|
|
351
|
+
|
|
352
|
+
def search(
|
|
353
|
+
self,
|
|
354
|
+
collection: str,
|
|
355
|
+
query_vector: List[float],
|
|
356
|
+
top_k: int = 10,
|
|
357
|
+
temporal_blend: float = 0.3,
|
|
358
|
+
mode: str = "vector_only",
|
|
359
|
+
causal_hops: int = 1,
|
|
360
|
+
filter: str = "",
|
|
361
|
+
metric: str = "cosine",
|
|
362
|
+
include_trace: bool = False,
|
|
363
|
+
include_staleness_warnings: bool = True,
|
|
364
|
+
device: str = "auto",
|
|
365
|
+
) -> SearchResponse:
|
|
366
|
+
"""
|
|
367
|
+
Search for nearest neighbours with temporal scoring.
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
collection: Collection name
|
|
371
|
+
query_vector: Query embedding (must match collection dim)
|
|
372
|
+
top_k: Number of results
|
|
373
|
+
temporal_blend: 0.0 = pure vector, 1.0 = pure temporal
|
|
374
|
+
mode: "vector_only" | "causal_expand" |
|
|
375
|
+
"contradiction_check" | "entity_resolve"
|
|
376
|
+
filter: "key=value" payload filter
|
|
377
|
+
include_trace: Return RetrievalTrace metadata
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
SearchResponse with .hits list of SearchHit
|
|
381
|
+
"""
|
|
382
|
+
mode_map = {
|
|
383
|
+
"vector_only": pb.QueryMode.VECTOR_ONLY,
|
|
384
|
+
"causal_expand": pb.QueryMode.CAUSAL_EXPAND,
|
|
385
|
+
"contradiction_check": pb.QueryMode.CONTRADICTION_CHECK,
|
|
386
|
+
"entity_resolve": pb.QueryMode.ENTITY_RESOLVE,
|
|
387
|
+
}
|
|
388
|
+
device_map = {
|
|
389
|
+
"auto": pb.Device.AUTO, "cpu": pb.Device.CPU,
|
|
390
|
+
"gpu": pb.Device.GPU, "tpu": pb.Device.TPU,
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
opts = pb.SearchOptions(
|
|
394
|
+
top_k = top_k,
|
|
395
|
+
temporal_blend = temporal_blend,
|
|
396
|
+
mode = mode_map.get(mode, pb.QueryMode.VECTOR_ONLY),
|
|
397
|
+
causal_hops = causal_hops,
|
|
398
|
+
filter = filter,
|
|
399
|
+
metric = pb.Metric.COSINE,
|
|
400
|
+
include_trace = include_trace,
|
|
401
|
+
include_staleness_warnings = include_staleness_warnings,
|
|
402
|
+
device_hint = device_map.get(device.lower(), pb.Device.AUTO),
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
resp = self._stub.Search(
|
|
406
|
+
pb.SearchRequest(
|
|
407
|
+
collection = collection,
|
|
408
|
+
vector = query_vector,
|
|
409
|
+
options = opts,
|
|
410
|
+
),
|
|
411
|
+
timeout=self._timeout, metadata=self._meta())
|
|
412
|
+
|
|
413
|
+
hits = [SearchHit._from_proto(r) for r in resp.results]
|
|
414
|
+
return SearchResponse(
|
|
415
|
+
hits = hits,
|
|
416
|
+
count = resp.count,
|
|
417
|
+
latency_ms = resp.trace.latency_ms if include_trace else 0.0,
|
|
418
|
+
query_id = resp.trace.query_id if include_trace else "",
|
|
419
|
+
strategy = resp.trace.strategy if include_trace else "",
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
def batch_search(
|
|
423
|
+
self,
|
|
424
|
+
collection: str,
|
|
425
|
+
query_vectors: List[List[float]],
|
|
426
|
+
top_k: int = 10,
|
|
427
|
+
temporal_blend: float = 0.3,
|
|
428
|
+
device: str = "auto",
|
|
429
|
+
) -> List[SearchResponse]:
|
|
430
|
+
"""Batch multiple queries in one GPU/TPU call."""
|
|
431
|
+
device_map = {"auto": pb.Device.AUTO, "gpu": pb.Device.GPU, "tpu": pb.Device.TPU}
|
|
432
|
+
requests = [
|
|
433
|
+
pb.SearchRequest(
|
|
434
|
+
collection = collection,
|
|
435
|
+
vector = q,
|
|
436
|
+
options = pb.SearchOptions(
|
|
437
|
+
top_k = top_k,
|
|
438
|
+
temporal_blend = temporal_blend,
|
|
439
|
+
device_hint = device_map.get(device.lower(), pb.Device.AUTO),
|
|
440
|
+
),
|
|
441
|
+
) for q in query_vectors
|
|
442
|
+
]
|
|
443
|
+
resp = self._stub.BatchSearch(
|
|
444
|
+
pb.BatchSearchRequest(collection=collection, queries=requests),
|
|
445
|
+
timeout=self._timeout, metadata=self._meta())
|
|
446
|
+
return [
|
|
447
|
+
SearchResponse(
|
|
448
|
+
hits = [SearchHit._from_proto(r) for r in sr.results],
|
|
449
|
+
count = sr.count,
|
|
450
|
+
) for sr in resp.responses
|
|
451
|
+
]
|
|
452
|
+
|
|
453
|
+
def search_stream(
|
|
454
|
+
self,
|
|
455
|
+
collection: str,
|
|
456
|
+
query_vector: List[float],
|
|
457
|
+
top_k: int = 10,
|
|
458
|
+
) -> Iterator[SearchHit]:
|
|
459
|
+
"""Server-streaming search — results arrive as they're found."""
|
|
460
|
+
opts = pb.SearchOptions(top_k=top_k)
|
|
461
|
+
for result in self._stub.SearchStream(
|
|
462
|
+
pb.SearchRequest(
|
|
463
|
+
collection=collection, vector=query_vector, options=opts),
|
|
464
|
+
timeout=self._timeout, metadata=self._meta()):
|
|
465
|
+
yield SearchHit._from_proto(result)
|
|
466
|
+
|
|
467
|
+
# ---- Graph -------------------------------------------------
|
|
468
|
+
|
|
469
|
+
def add_edges(
|
|
470
|
+
self,
|
|
471
|
+
collection: str,
|
|
472
|
+
edges: List[Dict[str, Any]],
|
|
473
|
+
) -> int:
|
|
474
|
+
"""
|
|
475
|
+
Add causal edges between vectors.
|
|
476
|
+
|
|
477
|
+
edges: [{"src": "v1", "tgt": "v2", "type": "CAUSES", "weight": 0.9}]
|
|
478
|
+
"""
|
|
479
|
+
edge_type_map = {
|
|
480
|
+
"CAUSES": pb.EdgeType.CAUSES, "CONTRADICTS": pb.EdgeType.CONTRADICTS,
|
|
481
|
+
"UPDATES": pb.EdgeType.UPDATES, "RELATED_TO": pb.EdgeType.RELATED_TO,
|
|
482
|
+
"ENTITY_OF": pb.EdgeType.ENTITY_OF, "SUPPORTS": pb.EdgeType.SUPPORTS,
|
|
483
|
+
}
|
|
484
|
+
pb_edges = [
|
|
485
|
+
pb.Edge(
|
|
486
|
+
src = e["src"],
|
|
487
|
+
tgt = e["tgt"],
|
|
488
|
+
type = edge_type_map.get(e.get("type", "RELATED_TO"),
|
|
489
|
+
pb.EdgeType.RELATED_TO),
|
|
490
|
+
weight = float(e.get("weight", 1.0)),
|
|
491
|
+
) for e in edges
|
|
492
|
+
]
|
|
493
|
+
resp = self._stub.AddEdges(
|
|
494
|
+
pb.AddEdgesRequest(collection=collection, edges=pb_edges),
|
|
495
|
+
timeout=self._timeout, metadata=self._meta())
|
|
496
|
+
return resp.added
|
|
497
|
+
|
|
498
|
+
# ---- Convenience wrappers ----------------------------------
|
|
499
|
+
|
|
500
|
+
def upsert_one(
|
|
501
|
+
self,
|
|
502
|
+
collection: str,
|
|
503
|
+
id: str,
|
|
504
|
+
embedding: List[float],
|
|
505
|
+
payload: Optional[Dict[str, str]] = None,
|
|
506
|
+
ttl_seconds: int = 0,
|
|
507
|
+
) -> None:
|
|
508
|
+
"""Upsert a single vector."""
|
|
509
|
+
self.upsert(collection, [{
|
|
510
|
+
"id": id, "embedding": embedding,
|
|
511
|
+
"payload": payload or {},
|
|
512
|
+
"ttl_seconds": ttl_seconds,
|
|
513
|
+
}])
|
|
514
|
+
|
|
515
|
+
def search_one(
|
|
516
|
+
self,
|
|
517
|
+
collection: str,
|
|
518
|
+
query_vector: List[float],
|
|
519
|
+
top_k: int = 1,
|
|
520
|
+
temporal_blend: float = 0.3,
|
|
521
|
+
) -> Optional[SearchHit]:
|
|
522
|
+
"""Search and return the single nearest neighbour."""
|
|
523
|
+
resp = self.search(collection, query_vector, top_k=top_k,
|
|
524
|
+
temporal_blend=temporal_blend)
|
|
525
|
+
return resp.hits[0] if resp.hits else None
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
# ================================================================
|
|
529
|
+
# Async client (asyncio)
|
|
530
|
+
# ================================================================
|
|
531
|
+
|
|
532
|
+
class AsyncTideVec:
|
|
533
|
+
"""
|
|
534
|
+
Async TideVec client for use with asyncio.
|
|
535
|
+
|
|
536
|
+
Usage:
|
|
537
|
+
async with AsyncTideVec("localhost:6399") as db:
|
|
538
|
+
await db.upsert("docs", [...])
|
|
539
|
+
results = await db.search("docs", query_vector=[...])
|
|
540
|
+
"""
|
|
541
|
+
|
|
542
|
+
def __init__(self, host: str = "localhost:6399", api_key: str = ""):
|
|
543
|
+
self._host = host
|
|
544
|
+
self._api_key = api_key
|
|
545
|
+
self._channel = None
|
|
546
|
+
self._stub = None
|
|
547
|
+
|
|
548
|
+
async def _ensure_connected(self):
|
|
549
|
+
if self._stub is None:
|
|
550
|
+
self._channel = grpc.aio.insecure_channel(self._host)
|
|
551
|
+
self._stub = pb_grpc.TideVecStub(self._channel)
|
|
552
|
+
|
|
553
|
+
async def __aenter__(self) -> "AsyncTideVec":
|
|
554
|
+
await self._ensure_connected()
|
|
555
|
+
return self
|
|
556
|
+
|
|
557
|
+
async def __aexit__(self, *_) -> None:
|
|
558
|
+
if self._channel:
|
|
559
|
+
await self._channel.close()
|
|
560
|
+
|
|
561
|
+
def _meta(self):
|
|
562
|
+
return [("x-api-key", self._api_key)] if self._api_key else []
|
|
563
|
+
|
|
564
|
+
async def health(self) -> dict:
|
|
565
|
+
await self._ensure_connected()
|
|
566
|
+
resp = await self._stub.Health(pb.HealthRequest(), metadata=self._meta())
|
|
567
|
+
return {"status": resp.status, "version": resp.version}
|
|
568
|
+
|
|
569
|
+
async def upsert(self, collection: str, vectors: List[Dict]) -> int:
|
|
570
|
+
await self._ensure_connected()
|
|
571
|
+
# Reuse sync conversion logic
|
|
572
|
+
sync = TideVec.__new__(TideVec)
|
|
573
|
+
sync._stub = self._stub
|
|
574
|
+
sync._timeout = 30.0
|
|
575
|
+
sync._api_key = self._api_key
|
|
576
|
+
# For async: delegate to sync stub (grpc.aio supports await)
|
|
577
|
+
pb_vecs = [pb.Vector(id=v["id"], embedding=v["embedding"],
|
|
578
|
+
payload={str(k): str(val)
|
|
579
|
+
for k, val in v.get("payload",{}).items()})
|
|
580
|
+
for v in vectors]
|
|
581
|
+
resp = await self._stub.Upsert(
|
|
582
|
+
pb.UpsertRequest(collection=collection, vectors=pb_vecs),
|
|
583
|
+
metadata=self._meta())
|
|
584
|
+
return resp.inserted
|
|
585
|
+
|
|
586
|
+
async def search(
|
|
587
|
+
self,
|
|
588
|
+
collection: str,
|
|
589
|
+
query_vector: List[float],
|
|
590
|
+
top_k: int = 10,
|
|
591
|
+
temporal_blend: float = 0.3,
|
|
592
|
+
) -> SearchResponse:
|
|
593
|
+
await self._ensure_connected()
|
|
594
|
+
opts = pb.SearchOptions(top_k=top_k, temporal_blend=temporal_blend)
|
|
595
|
+
resp = await self._stub.Search(
|
|
596
|
+
pb.SearchRequest(collection=collection,
|
|
597
|
+
vector=query_vector, options=opts),
|
|
598
|
+
metadata=self._meta())
|
|
599
|
+
return SearchResponse(
|
|
600
|
+
hits = [SearchHit._from_proto(r) for r in resp.results],
|
|
601
|
+
count = resp.count,
|
|
602
|
+
)
|