tidevec 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tidevec/__init__.py ADDED
@@ -0,0 +1,20 @@
1
+ """TideVec Python SDK — Temporally-aware causal vector database."""
2
+
3
+ from .client import (
4
+ TideVec,
5
+ AsyncTideVec,
6
+ SearchHit,
7
+ SearchResponse,
8
+ CollectionInfo,
9
+ HalfLife,
10
+ )
11
+
12
+ __version__ = "0.1.0"
13
+ __all__ = [
14
+ "TideVec",
15
+ "AsyncTideVec",
16
+ "SearchHit",
17
+ "SearchResponse",
18
+ "CollectionInfo",
19
+ "HalfLife",
20
+ ]
tidevec/client.py ADDED
@@ -0,0 +1,602 @@
1
+ """
2
+ TideVec Python SDK
3
+ ====================
4
+ Temporally-aware causal vector database client.
5
+
6
+ Install:
7
+ pip install tidevec-py
8
+
9
+ Quick start:
10
+ from tidevec import TideVec
11
+
12
+ db = TideVec("localhost:6399")
13
+
14
+ db.create_collection("docs", dim=768)
15
+ db.upsert("docs", [{"id": "v1", "embedding": [0.1, ...], "payload": {"src": "wiki"}}])
16
+ results = db.search("docs", query_vector=[0.1, ...], top_k=10, temporal_blend=0.3)
17
+ for r in results:
18
+ print(r.id, r.score, r.temporal_score)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import time
24
+ import warnings
25
+ from dataclasses import dataclass, field
26
+ from typing import Any, Dict, Iterator, List, Optional, Union
27
+
28
+ import grpc
29
+
30
+ try:
31
+ from . import tidevec_pb2 as pb
32
+ from . import tidevec_pb2_grpc as pb_grpc
33
+ except ImportError:
34
+ import tidevec_pb2 as pb # type: ignore
35
+ import tidevec_pb2_grpc as pb_grpc # type: ignore
36
+
37
+
38
+ # ================================================================
39
+ # Typed result objects
40
+ # ================================================================
41
+
42
+ @dataclass
43
+ class SearchHit:
44
+ """One result from a vector search."""
45
+ id: str
46
+ score: float
47
+ vector_score: float
48
+ temporal_score: float
49
+ payload: Dict[str, str]
50
+ created_at: int
51
+ staleness_warning: bool = False
52
+ staleness_reason: str = ""
53
+ causal_neighbors: List[str] = field(default_factory=list)
54
+ contradicted_by: List[str] = field(default_factory=list)
55
+
56
+ @classmethod
57
+ def _from_proto(cls, r) -> "SearchHit":
58
+ return cls(
59
+ id = r.id,
60
+ score = r.score,
61
+ vector_score = r.vector_score,
62
+ temporal_score = r.temporal_score,
63
+ payload = dict(r.payload),
64
+ created_at = r.created_at,
65
+ staleness_warning= r.staleness_warning,
66
+ staleness_reason = r.staleness_reason,
67
+ causal_neighbors = list(r.causal_neighbors),
68
+ contradicted_by = list(r.contradicted_by),
69
+ )
70
+
71
+ def __repr__(self) -> str:
72
+ return (f"SearchHit(id={self.id!r}, score={self.score:.4f}, "
73
+ f"temporal={self.temporal_score:.3f})")
74
+
75
+
76
+ @dataclass
77
+ class SearchResponse:
78
+ """Full response from a search call."""
79
+ hits: List[SearchHit]
80
+ count: int
81
+ latency_ms: float = 0.0
82
+ query_id: str = ""
83
+ strategy: str = ""
84
+ staleness_warnings: List[dict] = field(default_factory=list)
85
+ contradiction_alerts: List[dict] = field(default_factory=list)
86
+
87
+ def __iter__(self) -> Iterator[SearchHit]:
88
+ return iter(self.hits)
89
+
90
+ def __len__(self) -> int:
91
+ return len(self.hits)
92
+
93
+ def __getitem__(self, idx: int) -> SearchHit:
94
+ return self.hits[idx]
95
+
96
+
97
+ @dataclass
98
+ class CollectionInfo:
99
+ name: str
100
+ n_vectors: int
101
+ n_shards: int
102
+ dim: int
103
+ index_type: str
104
+ metric: str
105
+
106
+
107
+ # ================================================================
108
+ # Temporal preset helpers
109
+ # ================================================================
110
+
111
+ class HalfLife:
112
+ """Pre-built temporal decay presets."""
113
+ ONE_HOUR = 3_600_000
114
+ ONE_DAY = 86_400_000
115
+ ONE_WEEK = 604_800_000
116
+ ONE_MONTH = 2_592_000_000
117
+ ONE_YEAR = 31_536_000_000
118
+
119
+ AGENT_SESSION = ONE_HOUR
120
+ NEWS_FEED = ONE_DAY
121
+ SUPPORT_TICKET = ONE_MONTH
122
+ DOCUMENT_STORE = ONE_YEAR
123
+
124
+
125
+ # ================================================================
126
+ # Main client
127
+ # ================================================================
128
+
129
+ class TideVec:
130
+ """
131
+ TideVec Python client.
132
+
133
+ Supports both REST (via requests) and gRPC (via grpc).
134
+ gRPC is preferred for high-throughput production use.
135
+
136
+ Args:
137
+ host: Server host:port (default "localhost:6399")
138
+ api_key: Optional API key (set via X-Api-Key header)
139
+ timeout: Default RPC timeout in seconds
140
+ use_grpc: Use gRPC transport (default True if grpc available)
141
+ tls: Use TLS for gRPC (default False for local dev)
142
+ """
143
+
144
+ def __init__(
145
+ self,
146
+ host: str = "localhost:6399",
147
+ api_key: str = "",
148
+ timeout: float = 30.0,
149
+ use_grpc: bool = True,
150
+ tls: bool = False,
151
+ ):
152
+ self._host = host
153
+ self._api_key = api_key
154
+ self._timeout = timeout
155
+ self._channel: Optional[grpc.Channel] = None
156
+ self._stub: Optional[pb_grpc.TideVecStub] = None
157
+
158
+ if use_grpc:
159
+ self._connect_grpc(tls)
160
+
161
+ def _connect_grpc(self, tls: bool) -> None:
162
+ creds = grpc.ssl_channel_credentials() if tls else None
163
+ options = [
164
+ ("grpc.max_send_message_length", 256 * 1024 * 1024),
165
+ ("grpc.max_receive_message_length", 256 * 1024 * 1024),
166
+ ("grpc.keepalive_time_ms", 30_000),
167
+ ]
168
+ if creds:
169
+ self._channel = grpc.secure_channel(self._host, creds, options)
170
+ else:
171
+ self._channel = grpc.insecure_channel(self._host, options)
172
+ self._stub = pb_grpc.TideVecStub(self._channel)
173
+
174
+ def _meta(self) -> List[tuple]:
175
+ if self._api_key:
176
+ return [("x-api-key", self._api_key)]
177
+ return []
178
+
179
+ def close(self) -> None:
180
+ if self._channel:
181
+ self._channel.close()
182
+
183
+ def __enter__(self) -> "TideVec":
184
+ return self
185
+
186
+ def __exit__(self, *_) -> None:
187
+ self.close()
188
+
189
+ # ---- Health ------------------------------------------------
190
+
191
+ def health(self) -> dict:
192
+ """Check server health."""
193
+ resp = self._stub.Health(
194
+ pb.HealthRequest(), timeout=self._timeout, metadata=self._meta())
195
+ return {
196
+ "status": resp.status,
197
+ "version": resp.version,
198
+ "collections": resp.collections,
199
+ "timestamp_ms": resp.timestamp_ms,
200
+ "gpu_available": resp.gpu_available,
201
+ "tpu_available": resp.tpu_available,
202
+ }
203
+
204
+ def ping(self) -> bool:
205
+ """Return True if server is reachable."""
206
+ try:
207
+ self.health()
208
+ return True
209
+ except grpc.RpcError:
210
+ return False
211
+
212
+ # ---- Collections -------------------------------------------
213
+
214
+ def create_collection(
215
+ self,
216
+ name: str,
217
+ dim: int,
218
+ index_type: str = "tvindex",
219
+ metric: str = "cosine",
220
+ n_shards: int = 4,
221
+ n_replicas: int = 1,
222
+ write_quorum: int = 1,
223
+ half_life_ms: int = HalfLife.ONE_MONTH,
224
+ temporal_blend: float = 0.3,
225
+ ) -> str:
226
+ """Create a new collection."""
227
+ metric_enum = {
228
+ "cosine": pb.Metric.COSINE,
229
+ "l2": pb.Metric.L2,
230
+ "dot": pb.Metric.DOT,
231
+ }.get(metric.lower(), pb.Metric.COSINE)
232
+
233
+ req = pb.CreateCollectionRequest(
234
+ name = name,
235
+ dim = dim,
236
+ index_type = index_type,
237
+ metric = metric_enum,
238
+ n_shards = n_shards,
239
+ n_replicas = n_replicas,
240
+ write_quorum = write_quorum,
241
+ temporal = pb.TemporalConfig(
242
+ half_life_ms = half_life_ms,
243
+ temporal_blend = temporal_blend,
244
+ ),
245
+ )
246
+ resp = self._stub.CreateCollection(
247
+ req, timeout=self._timeout, metadata=self._meta())
248
+ return resp.name
249
+
250
+ def drop_collection(self, name: str) -> bool:
251
+ resp = self._stub.DropCollection(
252
+ pb.DropCollectionRequest(name=name),
253
+ timeout=self._timeout, metadata=self._meta())
254
+ return resp.status == "ok"
255
+
256
+ def get_collection(self, name: str) -> CollectionInfo:
257
+ resp = self._stub.GetCollection(
258
+ pb.GetCollectionRequest(name=name),
259
+ timeout=self._timeout, metadata=self._meta())
260
+ info = resp.info
261
+ return CollectionInfo(
262
+ name=info.name, n_vectors=info.n_vectors,
263
+ n_shards=info.n_shards, dim=info.dim,
264
+ index_type=info.index_type, metric=str(info.metric),
265
+ )
266
+
267
+ def list_collections(self) -> List[CollectionInfo]:
268
+ resp = self._stub.ListCollections(
269
+ pb.ListCollectionsRequest(),
270
+ timeout=self._timeout, metadata=self._meta())
271
+ return [
272
+ CollectionInfo(
273
+ name=c.name, n_vectors=c.n_vectors,
274
+ n_shards=c.n_shards, dim=c.dim,
275
+ index_type=c.index_type, metric=str(c.metric),
276
+ ) for c in resp.collections
277
+ ]
278
+
279
+ def set_temporal(
280
+ self, name: str,
281
+ half_life_ms: int,
282
+ temporal_blend: float = 0.3,
283
+ ) -> None:
284
+ """Update temporal decay settings for a collection."""
285
+ self._stub.UpdateTemporal(
286
+ pb.UpdateTemporalRequest(
287
+ name=name,
288
+ config=pb.TemporalConfig(
289
+ half_life_ms = half_life_ms,
290
+ temporal_blend = temporal_blend,
291
+ ),
292
+ ),
293
+ timeout=self._timeout, metadata=self._meta())
294
+
295
+ # ---- Vectors -----------------------------------------------
296
+
297
+ def upsert(
298
+ self,
299
+ collection: str,
300
+ vectors: List[Dict[str, Any]],
301
+ ) -> int:
302
+ """
303
+ Upsert vectors into a collection.
304
+
305
+ Each vector dict:
306
+ {
307
+ "id": str,
308
+ "embedding": List[float],
309
+ "payload": Dict[str, str] (optional),
310
+ "ttl_seconds": int (optional),
311
+ "edges": [{"target_id":str, "type":"CAUSES", "weight":0.9}]
312
+ }
313
+ """
314
+ edge_type_map = {
315
+ "CAUSES": pb.EdgeType.CAUSES, "CONTRADICTS": pb.EdgeType.CONTRADICTS,
316
+ "UPDATES": pb.EdgeType.UPDATES, "RELATED_TO": pb.EdgeType.RELATED_TO,
317
+ "ENTITY_OF": pb.EdgeType.ENTITY_OF, "SUPPORTS": pb.EdgeType.SUPPORTS,
318
+ }
319
+
320
+ pb_vecs = []
321
+ for v in vectors:
322
+ edges = [
323
+ pb.CausalEdge(
324
+ target_id = e["target_id"],
325
+ type = edge_type_map.get(e.get("type", "RELATED_TO"),
326
+ pb.EdgeType.RELATED_TO),
327
+ weight = float(e.get("weight", 1.0)),
328
+ )
329
+ for e in v.get("edges", [])
330
+ ]
331
+ pb_vecs.append(pb.Vector(
332
+ id = v["id"],
333
+ embedding = v["embedding"],
334
+ payload = {str(k): str(val) for k, val in v.get("payload", {}).items()},
335
+ ttl_seconds = v.get("ttl_seconds", 0),
336
+ edges = edges,
337
+ ))
338
+
339
+ resp = self._stub.Upsert(
340
+ pb.UpsertRequest(collection=collection, vectors=pb_vecs),
341
+ timeout=self._timeout, metadata=self._meta())
342
+ return resp.inserted
343
+
344
+ def delete(self, collection: str, ids: List[str]) -> int:
345
+ resp = self._stub.Delete(
346
+ pb.DeleteRequest(collection=collection, ids=ids),
347
+ timeout=self._timeout, metadata=self._meta())
348
+ return resp.deleted
349
+
350
+ # ---- Search ------------------------------------------------
351
+
352
+ def search(
353
+ self,
354
+ collection: str,
355
+ query_vector: List[float],
356
+ top_k: int = 10,
357
+ temporal_blend: float = 0.3,
358
+ mode: str = "vector_only",
359
+ causal_hops: int = 1,
360
+ filter: str = "",
361
+ metric: str = "cosine",
362
+ include_trace: bool = False,
363
+ include_staleness_warnings: bool = True,
364
+ device: str = "auto",
365
+ ) -> SearchResponse:
366
+ """
367
+ Search for nearest neighbours with temporal scoring.
368
+
369
+ Args:
370
+ collection: Collection name
371
+ query_vector: Query embedding (must match collection dim)
372
+ top_k: Number of results
373
+ temporal_blend: 0.0 = pure vector, 1.0 = pure temporal
374
+ mode: "vector_only" | "causal_expand" |
375
+ "contradiction_check" | "entity_resolve"
376
+ filter: "key=value" payload filter
377
+ include_trace: Return RetrievalTrace metadata
378
+
379
+ Returns:
380
+ SearchResponse with .hits list of SearchHit
381
+ """
382
+ mode_map = {
383
+ "vector_only": pb.QueryMode.VECTOR_ONLY,
384
+ "causal_expand": pb.QueryMode.CAUSAL_EXPAND,
385
+ "contradiction_check": pb.QueryMode.CONTRADICTION_CHECK,
386
+ "entity_resolve": pb.QueryMode.ENTITY_RESOLVE,
387
+ }
388
+ device_map = {
389
+ "auto": pb.Device.AUTO, "cpu": pb.Device.CPU,
390
+ "gpu": pb.Device.GPU, "tpu": pb.Device.TPU,
391
+ }
392
+
393
+ opts = pb.SearchOptions(
394
+ top_k = top_k,
395
+ temporal_blend = temporal_blend,
396
+ mode = mode_map.get(mode, pb.QueryMode.VECTOR_ONLY),
397
+ causal_hops = causal_hops,
398
+ filter = filter,
399
+ metric = pb.Metric.COSINE,
400
+ include_trace = include_trace,
401
+ include_staleness_warnings = include_staleness_warnings,
402
+ device_hint = device_map.get(device.lower(), pb.Device.AUTO),
403
+ )
404
+
405
+ resp = self._stub.Search(
406
+ pb.SearchRequest(
407
+ collection = collection,
408
+ vector = query_vector,
409
+ options = opts,
410
+ ),
411
+ timeout=self._timeout, metadata=self._meta())
412
+
413
+ hits = [SearchHit._from_proto(r) for r in resp.results]
414
+ return SearchResponse(
415
+ hits = hits,
416
+ count = resp.count,
417
+ latency_ms = resp.trace.latency_ms if include_trace else 0.0,
418
+ query_id = resp.trace.query_id if include_trace else "",
419
+ strategy = resp.trace.strategy if include_trace else "",
420
+ )
421
+
422
+ def batch_search(
423
+ self,
424
+ collection: str,
425
+ query_vectors: List[List[float]],
426
+ top_k: int = 10,
427
+ temporal_blend: float = 0.3,
428
+ device: str = "auto",
429
+ ) -> List[SearchResponse]:
430
+ """Batch multiple queries in one GPU/TPU call."""
431
+ device_map = {"auto": pb.Device.AUTO, "gpu": pb.Device.GPU, "tpu": pb.Device.TPU}
432
+ requests = [
433
+ pb.SearchRequest(
434
+ collection = collection,
435
+ vector = q,
436
+ options = pb.SearchOptions(
437
+ top_k = top_k,
438
+ temporal_blend = temporal_blend,
439
+ device_hint = device_map.get(device.lower(), pb.Device.AUTO),
440
+ ),
441
+ ) for q in query_vectors
442
+ ]
443
+ resp = self._stub.BatchSearch(
444
+ pb.BatchSearchRequest(collection=collection, queries=requests),
445
+ timeout=self._timeout, metadata=self._meta())
446
+ return [
447
+ SearchResponse(
448
+ hits = [SearchHit._from_proto(r) for r in sr.results],
449
+ count = sr.count,
450
+ ) for sr in resp.responses
451
+ ]
452
+
453
+ def search_stream(
454
+ self,
455
+ collection: str,
456
+ query_vector: List[float],
457
+ top_k: int = 10,
458
+ ) -> Iterator[SearchHit]:
459
+ """Server-streaming search — results arrive as they're found."""
460
+ opts = pb.SearchOptions(top_k=top_k)
461
+ for result in self._stub.SearchStream(
462
+ pb.SearchRequest(
463
+ collection=collection, vector=query_vector, options=opts),
464
+ timeout=self._timeout, metadata=self._meta()):
465
+ yield SearchHit._from_proto(result)
466
+
467
+ # ---- Graph -------------------------------------------------
468
+
469
+ def add_edges(
470
+ self,
471
+ collection: str,
472
+ edges: List[Dict[str, Any]],
473
+ ) -> int:
474
+ """
475
+ Add causal edges between vectors.
476
+
477
+ edges: [{"src": "v1", "tgt": "v2", "type": "CAUSES", "weight": 0.9}]
478
+ """
479
+ edge_type_map = {
480
+ "CAUSES": pb.EdgeType.CAUSES, "CONTRADICTS": pb.EdgeType.CONTRADICTS,
481
+ "UPDATES": pb.EdgeType.UPDATES, "RELATED_TO": pb.EdgeType.RELATED_TO,
482
+ "ENTITY_OF": pb.EdgeType.ENTITY_OF, "SUPPORTS": pb.EdgeType.SUPPORTS,
483
+ }
484
+ pb_edges = [
485
+ pb.Edge(
486
+ src = e["src"],
487
+ tgt = e["tgt"],
488
+ type = edge_type_map.get(e.get("type", "RELATED_TO"),
489
+ pb.EdgeType.RELATED_TO),
490
+ weight = float(e.get("weight", 1.0)),
491
+ ) for e in edges
492
+ ]
493
+ resp = self._stub.AddEdges(
494
+ pb.AddEdgesRequest(collection=collection, edges=pb_edges),
495
+ timeout=self._timeout, metadata=self._meta())
496
+ return resp.added
497
+
498
+ # ---- Convenience wrappers ----------------------------------
499
+
500
+ def upsert_one(
501
+ self,
502
+ collection: str,
503
+ id: str,
504
+ embedding: List[float],
505
+ payload: Optional[Dict[str, str]] = None,
506
+ ttl_seconds: int = 0,
507
+ ) -> None:
508
+ """Upsert a single vector."""
509
+ self.upsert(collection, [{
510
+ "id": id, "embedding": embedding,
511
+ "payload": payload or {},
512
+ "ttl_seconds": ttl_seconds,
513
+ }])
514
+
515
+ def search_one(
516
+ self,
517
+ collection: str,
518
+ query_vector: List[float],
519
+ top_k: int = 1,
520
+ temporal_blend: float = 0.3,
521
+ ) -> Optional[SearchHit]:
522
+ """Search and return the single nearest neighbour."""
523
+ resp = self.search(collection, query_vector, top_k=top_k,
524
+ temporal_blend=temporal_blend)
525
+ return resp.hits[0] if resp.hits else None
526
+
527
+
528
+ # ================================================================
529
+ # Async client (asyncio)
530
+ # ================================================================
531
+
532
+ class AsyncTideVec:
533
+ """
534
+ Async TideVec client for use with asyncio.
535
+
536
+ Usage:
537
+ async with AsyncTideVec("localhost:6399") as db:
538
+ await db.upsert("docs", [...])
539
+ results = await db.search("docs", query_vector=[...])
540
+ """
541
+
542
+ def __init__(self, host: str = "localhost:6399", api_key: str = ""):
543
+ self._host = host
544
+ self._api_key = api_key
545
+ self._channel = None
546
+ self._stub = None
547
+
548
+ async def _ensure_connected(self):
549
+ if self._stub is None:
550
+ self._channel = grpc.aio.insecure_channel(self._host)
551
+ self._stub = pb_grpc.TideVecStub(self._channel)
552
+
553
+ async def __aenter__(self) -> "AsyncTideVec":
554
+ await self._ensure_connected()
555
+ return self
556
+
557
+ async def __aexit__(self, *_) -> None:
558
+ if self._channel:
559
+ await self._channel.close()
560
+
561
+ def _meta(self):
562
+ return [("x-api-key", self._api_key)] if self._api_key else []
563
+
564
+ async def health(self) -> dict:
565
+ await self._ensure_connected()
566
+ resp = await self._stub.Health(pb.HealthRequest(), metadata=self._meta())
567
+ return {"status": resp.status, "version": resp.version}
568
+
569
+ async def upsert(self, collection: str, vectors: List[Dict]) -> int:
570
+ await self._ensure_connected()
571
+ # Reuse sync conversion logic
572
+ sync = TideVec.__new__(TideVec)
573
+ sync._stub = self._stub
574
+ sync._timeout = 30.0
575
+ sync._api_key = self._api_key
576
+ # For async: delegate to sync stub (grpc.aio supports await)
577
+ pb_vecs = [pb.Vector(id=v["id"], embedding=v["embedding"],
578
+ payload={str(k): str(val)
579
+ for k, val in v.get("payload",{}).items()})
580
+ for v in vectors]
581
+ resp = await self._stub.Upsert(
582
+ pb.UpsertRequest(collection=collection, vectors=pb_vecs),
583
+ metadata=self._meta())
584
+ return resp.inserted
585
+
586
+ async def search(
587
+ self,
588
+ collection: str,
589
+ query_vector: List[float],
590
+ top_k: int = 10,
591
+ temporal_blend: float = 0.3,
592
+ ) -> SearchResponse:
593
+ await self._ensure_connected()
594
+ opts = pb.SearchOptions(top_k=top_k, temporal_blend=temporal_blend)
595
+ resp = await self._stub.Search(
596
+ pb.SearchRequest(collection=collection,
597
+ vector=query_vector, options=opts),
598
+ metadata=self._meta())
599
+ return SearchResponse(
600
+ hits = [SearchHit._from_proto(r) for r in resp.results],
601
+ count = resp.count,
602
+ )