vedana-core 0.1.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vedana_core/vts.py ADDED
@@ -0,0 +1,167 @@
1
+ import abc
2
+ import logging
3
+ from typing import Sequence
4
+
5
+ import numpy as np
6
+ from neo4j import AsyncGraphDatabase, Record, RoutingControl
7
+ from opentelemetry import trace
8
+ from sqlalchemy import RowMapping, select
9
+ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
10
+ from vedana_etl.catalog import edges, nodes, rag_anchor_embeddings, rag_edge_embeddings
11
+
12
+ from vedana_core.db import get_sessionmaker
13
+
14
+ logger = logging.getLogger(__name__)
15
+ tracer = trace.get_tracer(__name__)
16
+
17
+
18
+ class VectorStore(abc.ABC):
19
+ async def vector_search(
20
+ self,
21
+ label: str,
22
+ prop_type: str,
23
+ prop_name: str,
24
+ embedding: list[float],
25
+ threshold: float,
26
+ top_n: int = 5,
27
+ ):
28
+ raise NotImplementedError
29
+
30
+
31
+ class MemgraphVectorStore(VectorStore):
32
+ """
33
+ Use Memgraph vector_search capabilities. Requires vector indices to be created separately.
34
+ """
35
+
36
+ def __init__(self, uri: str, user: str, pwd: str, db_name: str = "") -> None:
37
+ self.driver = AsyncGraphDatabase.driver(uri, auth=(user, pwd), database=db_name)
38
+ # await self.driver.verify_connectivity()
39
+ self.driver_uri = uri
40
+ self.auth = (user, pwd)
41
+
42
+ async def vector_search(
43
+ self,
44
+ label: str,
45
+ prop_type: str,
46
+ prop_name: str,
47
+ embedding: np.ndarray | list[float],
48
+ threshold: float,
49
+ top_n: int = 5,
50
+ ) -> list[Record]:
51
+ with tracer.start_as_current_span("memgraph.vector_search") as span:
52
+ span.set_attribute("memgraph.label", label)
53
+ span.set_attribute("memgraph.prop_type", prop_type)
54
+ span.set_attribute("memgraph.prop_name", prop_name)
55
+ span.set_attribute("memgraph.top_n", top_n)
56
+ span.set_attribute("memgraph.threshold", threshold)
57
+
58
+ if prop_type == "edge":
59
+ query = (
60
+ "CALL vector_search.search_edges($idx_name, $top_n, $embedding) "
61
+ "YIELD similarity, edge "
62
+ "WITH similarity, edge "
63
+ "WHERE similarity > $threshold "
64
+ "RETURN similarity, edge, startNode(edge) AS start, endNode(edge) AS end;"
65
+ )
66
+ else: # node
67
+ query = (
68
+ "CALL vector_search.search($idx_name, $top_n, $embedding) "
69
+ "YIELD similarity, node "
70
+ "WITH similarity, node WHERE similarity > $threshold RETURN *"
71
+ )
72
+
73
+ span.set_attribute("memgraph.query", query)
74
+
75
+ idx_name = f"{label}_{prop_name}_embed_idx".replace(" ", "_")
76
+ res = await self.driver.execute_query(
77
+ query,
78
+ idx_name=idx_name,
79
+ top_n=top_n,
80
+ embedding=embedding,
81
+ threshold=threshold,
82
+ routing_=RoutingControl.READ,
83
+ )
84
+ return res.records
85
+
86
+
87
+ class PGVectorStore(VectorStore):
88
+ def __init__(
89
+ self,
90
+ sessionmaker: async_sessionmaker[AsyncSession] | None = None,
91
+ ) -> None:
92
+ self._sessionmaker: async_sessionmaker[AsyncSession] = sessionmaker or get_sessionmaker()
93
+ self.rag_anchor_embeddings_table = rag_anchor_embeddings.store.data_table # type: ignore[attr-defined]
94
+ self.rag_edge_embeddings_table = rag_edge_embeddings.store.data_table # type: ignore[attr-defined]
95
+ self.node_table = nodes.store.data_table # type: ignore[attr-defined]
96
+ self.edge_table = edges.store.data_table # type: ignore[attr-defined]
97
+
98
+ async def vector_search(
99
+ self,
100
+ label: str,
101
+ prop_type: str,
102
+ prop_name: str,
103
+ embedding: np.ndarray | list[float],
104
+ threshold: float,
105
+ top_n: int = 5,
106
+ ) -> Sequence[RowMapping]:
107
+ with tracer.start_as_current_span("pgvector.vector_search") as span:
108
+ span.set_attribute("pgvector.label", label)
109
+ span.set_attribute("pgvector.prop_type", prop_type)
110
+ span.set_attribute("pgvector.prop_name", prop_name)
111
+ span.set_attribute("pgvector.top_n", top_n)
112
+ span.set_attribute("pgvector.threshold", threshold)
113
+
114
+ async with self._sessionmaker() as session:
115
+ if prop_type == "edge":
116
+ distance = self.rag_edge_embeddings_table.c.embedding.cosine_distance(embedding)
117
+ similarity = (1 - distance).label("similarity")
118
+
119
+ stmt = (
120
+ select(
121
+ similarity,
122
+ self.edge_table.c.from_node_id,
123
+ self.edge_table.c.to_node_id,
124
+ self.edge_table.c.edge_label,
125
+ self.edge_table.c.attributes.label("edge"),
126
+ )
127
+ .select_from(
128
+ self.rag_edge_embeddings_table.join(
129
+ self.edge_table,
130
+ (self.rag_edge_embeddings_table.c.from_node_id == self.edge_table.c.from_node_id)
131
+ & (self.rag_edge_embeddings_table.c.to_node_id == self.edge_table.c.to_node_id)
132
+ & (self.rag_edge_embeddings_table.c.edge_label == self.edge_table.c.edge_label),
133
+ )
134
+ )
135
+ .where(self.rag_edge_embeddings_table.c.edge_label == label)
136
+ .where(self.rag_edge_embeddings_table.c.attribute_name == prop_name)
137
+ .where(similarity > threshold)
138
+ .order_by(distance)
139
+ .limit(top_n)
140
+ )
141
+
142
+ else: # node
143
+ distance = self.rag_anchor_embeddings_table.c.embedding.cosine_distance(embedding)
144
+ similarity = (1 - distance).label("similarity")
145
+
146
+ stmt = (
147
+ select(
148
+ similarity,
149
+ self.node_table.c.node_id,
150
+ self.node_table.c.node_type,
151
+ self.node_table.c.attributes.label("node"),
152
+ )
153
+ .select_from(
154
+ self.rag_anchor_embeddings_table.join(
155
+ self.node_table, self.rag_anchor_embeddings_table.c.node_id == self.node_table.c.node_id
156
+ )
157
+ )
158
+ .where(self.rag_anchor_embeddings_table.c.label == label)
159
+ .where(self.rag_anchor_embeddings_table.c.attribute_name == prop_name)
160
+ .where(similarity > threshold)
161
+ .order_by(distance)
162
+ .limit(top_n)
163
+ )
164
+
165
+ span.set_attribute("pgvector.query", str(stmt))
166
+ res = await session.execute(stmt)
167
+ return res.mappings().all()
@@ -0,0 +1,29 @@
1
+ Metadata-Version: 2.4
2
+ Name: vedana-core
3
+ Version: 0.1.0.dev3
4
+ Summary: Semantic Graph RAG App
5
+ Author-email: Andrey Tatarinov <a@tatarinov.co>, Timur Sheydaev <tsheyd@epoch8.co>
6
+ Requires-Python: >=3.12
7
+ Requires-Dist: aiohttp>=3.11.18
8
+ Requires-Dist: aioitertools>=0.12.0
9
+ Requires-Dist: aiosqlite>=0.21.0
10
+ Requires-Dist: alembic>=1.13.0
11
+ Requires-Dist: async-lru>=2.0.5
12
+ Requires-Dist: asyncpg>=0.29.0
13
+ Requires-Dist: grist-api>=0.1.1
14
+ Requires-Dist: jims-backoffice
15
+ Requires-Dist: jims-core
16
+ Requires-Dist: jims-telegram
17
+ Requires-Dist: jims-tui
18
+ Requires-Dist: litellm>=1.79.0
19
+ Requires-Dist: neo4j>=5.28.1
20
+ Requires-Dist: openai>=2.8.0
21
+ Requires-Dist: pandas>=2.2.3
22
+ Requires-Dist: psycopg2-binary>=2.9.10
23
+ Requires-Dist: pydantic-settings>=2.9.1
24
+ Requires-Dist: sentry-sdk[fastapi,opentelemetry]>=2.32.0
25
+ Requires-Dist: sqlalchemy>=2.0.41
26
+ Requires-Dist: vedana-etl
27
+ Description-Content-Type: text/markdown
28
+
29
+ # Vedana
@@ -0,0 +1,17 @@
1
+ vedana_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ vedana_core/app.py,sha256=LmolsEf6_INsDWpeSaYDxoZ9v029K-hmlRvUV_65VT0,1893
3
+ vedana_core/data_model.py,sha256=O24Rzwh712fhj4al71S2UM9RUFbg1UV5fojvPooH6UI,17589
4
+ vedana_core/data_provider.py,sha256=r3yVKKm7Bo6tN0ZnmlIyo-Ndd5KNhBRPaC0wb895Xl0,18711
5
+ vedana_core/db.py,sha256=Wi71gXBrqPv6N6MIjaaHHFVmGIjr3LiRs5AeduDbGLc,1143
6
+ vedana_core/graph.py,sha256=uYYRdfgX5rXI30cdNvXpbevDo0fEzRcTrwIu6h11Q2k,10800
7
+ vedana_core/llm.py,sha256=oRPPoQPOLhf17tipekOMcFjKTHUhryshmvnpC9NA5lQ,8948
8
+ vedana_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ vedana_core/rag_agent.py,sha256=kYYgVe7tLghLaaeaLUPJU8hS-LEZ1knx7As3oH_Wfks,8321
10
+ vedana_core/rag_pipeline.py,sha256=eOnWYph-2qZufw4LzS6SeKWf69I7IKX_ynTd5H4noNY,13750
11
+ vedana_core/settings.py,sha256=X78c46zppz6kuFIJx4xGUoaerlomPYz8XrYqeIOMy8c,892
12
+ vedana_core/start_pipeline.py,sha256=SJWBCbod88z1Qv8st3bPAsqgGK7cRSwxOJcoWGRXvLM,597
13
+ vedana_core/utils.py,sha256=blNd44rHLfVjWvKJGHx5q6RJPHHGBHE_8DYRUdYUX24,1189
14
+ vedana_core/vts.py,sha256=JzIhb2bw1Z4fJrY7nn3FewIzRm2zZZVOUD8kgcgiucU,6981
15
+ vedana_core-0.1.0.dev3.dist-info/METADATA,sha256=ctotRu_F0biJqNDOFV1b2A4Z_MJmNM8mhGNgLk7tACI,899
16
+ vedana_core-0.1.0.dev3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
17
+ vedana_core-0.1.0.dev3.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any