vedana-core 0.1.0.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vedana_core/__init__.py +0 -0
- vedana_core/app.py +78 -0
- vedana_core/data_model.py +465 -0
- vedana_core/data_provider.py +513 -0
- vedana_core/db.py +41 -0
- vedana_core/graph.py +300 -0
- vedana_core/llm.py +192 -0
- vedana_core/py.typed +0 -0
- vedana_core/rag_agent.py +234 -0
- vedana_core/rag_pipeline.py +326 -0
- vedana_core/settings.py +35 -0
- vedana_core/start_pipeline.py +17 -0
- vedana_core/utils.py +31 -0
- vedana_core/vts.py +167 -0
- vedana_core-0.1.0.dev3.dist-info/METADATA +29 -0
- vedana_core-0.1.0.dev3.dist-info/RECORD +17 -0
- vedana_core-0.1.0.dev3.dist-info/WHEEL +4 -0
vedana_core/vts.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Sequence
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from neo4j import AsyncGraphDatabase, Record, RoutingControl
|
|
7
|
+
from opentelemetry import trace
|
|
8
|
+
from sqlalchemy import RowMapping, select
|
|
9
|
+
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
|
10
|
+
from vedana_etl.catalog import edges, nodes, rag_anchor_embeddings, rag_edge_embeddings
|
|
11
|
+
|
|
12
|
+
from vedana_core.db import get_sessionmaker
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
tracer = trace.get_tracer(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class VectorStore(abc.ABC):
|
|
19
|
+
async def vector_search(
|
|
20
|
+
self,
|
|
21
|
+
label: str,
|
|
22
|
+
prop_type: str,
|
|
23
|
+
prop_name: str,
|
|
24
|
+
embedding: list[float],
|
|
25
|
+
threshold: float,
|
|
26
|
+
top_n: int = 5,
|
|
27
|
+
):
|
|
28
|
+
raise NotImplementedError
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MemgraphVectorStore(VectorStore):
|
|
32
|
+
"""
|
|
33
|
+
Use Memgraph vector_search capabilities. Requires vector indices to be created separately.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, uri: str, user: str, pwd: str, db_name: str = "") -> None:
|
|
37
|
+
self.driver = AsyncGraphDatabase.driver(uri, auth=(user, pwd), database=db_name)
|
|
38
|
+
# await self.driver.verify_connectivity()
|
|
39
|
+
self.driver_uri = uri
|
|
40
|
+
self.auth = (user, pwd)
|
|
41
|
+
|
|
42
|
+
async def vector_search(
|
|
43
|
+
self,
|
|
44
|
+
label: str,
|
|
45
|
+
prop_type: str,
|
|
46
|
+
prop_name: str,
|
|
47
|
+
embedding: np.ndarray | list[float],
|
|
48
|
+
threshold: float,
|
|
49
|
+
top_n: int = 5,
|
|
50
|
+
) -> list[Record]:
|
|
51
|
+
with tracer.start_as_current_span("memgraph.vector_search") as span:
|
|
52
|
+
span.set_attribute("memgraph.label", label)
|
|
53
|
+
span.set_attribute("memgraph.prop_type", prop_type)
|
|
54
|
+
span.set_attribute("memgraph.prop_name", prop_name)
|
|
55
|
+
span.set_attribute("memgraph.top_n", top_n)
|
|
56
|
+
span.set_attribute("memgraph.threshold", threshold)
|
|
57
|
+
|
|
58
|
+
if prop_type == "edge":
|
|
59
|
+
query = (
|
|
60
|
+
"CALL vector_search.search_edges($idx_name, $top_n, $embedding) "
|
|
61
|
+
"YIELD similarity, edge "
|
|
62
|
+
"WITH similarity, edge "
|
|
63
|
+
"WHERE similarity > $threshold "
|
|
64
|
+
"RETURN similarity, edge, startNode(edge) AS start, endNode(edge) AS end;"
|
|
65
|
+
)
|
|
66
|
+
else: # node
|
|
67
|
+
query = (
|
|
68
|
+
"CALL vector_search.search($idx_name, $top_n, $embedding) "
|
|
69
|
+
"YIELD similarity, node "
|
|
70
|
+
"WITH similarity, node WHERE similarity > $threshold RETURN *"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
span.set_attribute("memgraph.query", query)
|
|
74
|
+
|
|
75
|
+
idx_name = f"{label}_{prop_name}_embed_idx".replace(" ", "_")
|
|
76
|
+
res = await self.driver.execute_query(
|
|
77
|
+
query,
|
|
78
|
+
idx_name=idx_name,
|
|
79
|
+
top_n=top_n,
|
|
80
|
+
embedding=embedding,
|
|
81
|
+
threshold=threshold,
|
|
82
|
+
routing_=RoutingControl.READ,
|
|
83
|
+
)
|
|
84
|
+
return res.records
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class PGVectorStore(VectorStore):
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
sessionmaker: async_sessionmaker[AsyncSession] | None = None,
|
|
91
|
+
) -> None:
|
|
92
|
+
self._sessionmaker: async_sessionmaker[AsyncSession] = sessionmaker or get_sessionmaker()
|
|
93
|
+
self.rag_anchor_embeddings_table = rag_anchor_embeddings.store.data_table # type: ignore[attr-defined]
|
|
94
|
+
self.rag_edge_embeddings_table = rag_edge_embeddings.store.data_table # type: ignore[attr-defined]
|
|
95
|
+
self.node_table = nodes.store.data_table # type: ignore[attr-defined]
|
|
96
|
+
self.edge_table = edges.store.data_table # type: ignore[attr-defined]
|
|
97
|
+
|
|
98
|
+
async def vector_search(
|
|
99
|
+
self,
|
|
100
|
+
label: str,
|
|
101
|
+
prop_type: str,
|
|
102
|
+
prop_name: str,
|
|
103
|
+
embedding: np.ndarray | list[float],
|
|
104
|
+
threshold: float,
|
|
105
|
+
top_n: int = 5,
|
|
106
|
+
) -> Sequence[RowMapping]:
|
|
107
|
+
with tracer.start_as_current_span("pgvector.vector_search") as span:
|
|
108
|
+
span.set_attribute("pgvector.label", label)
|
|
109
|
+
span.set_attribute("pgvector.prop_type", prop_type)
|
|
110
|
+
span.set_attribute("pgvector.prop_name", prop_name)
|
|
111
|
+
span.set_attribute("pgvector.top_n", top_n)
|
|
112
|
+
span.set_attribute("pgvector.threshold", threshold)
|
|
113
|
+
|
|
114
|
+
async with self._sessionmaker() as session:
|
|
115
|
+
if prop_type == "edge":
|
|
116
|
+
distance = self.rag_edge_embeddings_table.c.embedding.cosine_distance(embedding)
|
|
117
|
+
similarity = (1 - distance).label("similarity")
|
|
118
|
+
|
|
119
|
+
stmt = (
|
|
120
|
+
select(
|
|
121
|
+
similarity,
|
|
122
|
+
self.edge_table.c.from_node_id,
|
|
123
|
+
self.edge_table.c.to_node_id,
|
|
124
|
+
self.edge_table.c.edge_label,
|
|
125
|
+
self.edge_table.c.attributes.label("edge"),
|
|
126
|
+
)
|
|
127
|
+
.select_from(
|
|
128
|
+
self.rag_edge_embeddings_table.join(
|
|
129
|
+
self.edge_table,
|
|
130
|
+
(self.rag_edge_embeddings_table.c.from_node_id == self.edge_table.c.from_node_id)
|
|
131
|
+
& (self.rag_edge_embeddings_table.c.to_node_id == self.edge_table.c.to_node_id)
|
|
132
|
+
& (self.rag_edge_embeddings_table.c.edge_label == self.edge_table.c.edge_label),
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
.where(self.rag_edge_embeddings_table.c.edge_label == label)
|
|
136
|
+
.where(self.rag_edge_embeddings_table.c.attribute_name == prop_name)
|
|
137
|
+
.where(similarity > threshold)
|
|
138
|
+
.order_by(distance)
|
|
139
|
+
.limit(top_n)
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
else: # node
|
|
143
|
+
distance = self.rag_anchor_embeddings_table.c.embedding.cosine_distance(embedding)
|
|
144
|
+
similarity = (1 - distance).label("similarity")
|
|
145
|
+
|
|
146
|
+
stmt = (
|
|
147
|
+
select(
|
|
148
|
+
similarity,
|
|
149
|
+
self.node_table.c.node_id,
|
|
150
|
+
self.node_table.c.node_type,
|
|
151
|
+
self.node_table.c.attributes.label("node"),
|
|
152
|
+
)
|
|
153
|
+
.select_from(
|
|
154
|
+
self.rag_anchor_embeddings_table.join(
|
|
155
|
+
self.node_table, self.rag_anchor_embeddings_table.c.node_id == self.node_table.c.node_id
|
|
156
|
+
)
|
|
157
|
+
)
|
|
158
|
+
.where(self.rag_anchor_embeddings_table.c.label == label)
|
|
159
|
+
.where(self.rag_anchor_embeddings_table.c.attribute_name == prop_name)
|
|
160
|
+
.where(similarity > threshold)
|
|
161
|
+
.order_by(distance)
|
|
162
|
+
.limit(top_n)
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
span.set_attribute("pgvector.query", str(stmt))
|
|
166
|
+
res = await session.execute(stmt)
|
|
167
|
+
return res.mappings().all()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vedana-core
|
|
3
|
+
Version: 0.1.0.dev3
|
|
4
|
+
Summary: Semantic Graph RAG App
|
|
5
|
+
Author-email: Andrey Tatarinov <a@tatarinov.co>, Timur Sheydaev <tsheyd@epoch8.co>
|
|
6
|
+
Requires-Python: >=3.12
|
|
7
|
+
Requires-Dist: aiohttp>=3.11.18
|
|
8
|
+
Requires-Dist: aioitertools>=0.12.0
|
|
9
|
+
Requires-Dist: aiosqlite>=0.21.0
|
|
10
|
+
Requires-Dist: alembic>=1.13.0
|
|
11
|
+
Requires-Dist: async-lru>=2.0.5
|
|
12
|
+
Requires-Dist: asyncpg>=0.29.0
|
|
13
|
+
Requires-Dist: grist-api>=0.1.1
|
|
14
|
+
Requires-Dist: jims-backoffice
|
|
15
|
+
Requires-Dist: jims-core
|
|
16
|
+
Requires-Dist: jims-telegram
|
|
17
|
+
Requires-Dist: jims-tui
|
|
18
|
+
Requires-Dist: litellm>=1.79.0
|
|
19
|
+
Requires-Dist: neo4j>=5.28.1
|
|
20
|
+
Requires-Dist: openai>=2.8.0
|
|
21
|
+
Requires-Dist: pandas>=2.2.3
|
|
22
|
+
Requires-Dist: psycopg2-binary>=2.9.10
|
|
23
|
+
Requires-Dist: pydantic-settings>=2.9.1
|
|
24
|
+
Requires-Dist: sentry-sdk[fastapi,opentelemetry]>=2.32.0
|
|
25
|
+
Requires-Dist: sqlalchemy>=2.0.41
|
|
26
|
+
Requires-Dist: vedana-etl
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# Vedana
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
vedana_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
vedana_core/app.py,sha256=LmolsEf6_INsDWpeSaYDxoZ9v029K-hmlRvUV_65VT0,1893
|
|
3
|
+
vedana_core/data_model.py,sha256=O24Rzwh712fhj4al71S2UM9RUFbg1UV5fojvPooH6UI,17589
|
|
4
|
+
vedana_core/data_provider.py,sha256=r3yVKKm7Bo6tN0ZnmlIyo-Ndd5KNhBRPaC0wb895Xl0,18711
|
|
5
|
+
vedana_core/db.py,sha256=Wi71gXBrqPv6N6MIjaaHHFVmGIjr3LiRs5AeduDbGLc,1143
|
|
6
|
+
vedana_core/graph.py,sha256=uYYRdfgX5rXI30cdNvXpbevDo0fEzRcTrwIu6h11Q2k,10800
|
|
7
|
+
vedana_core/llm.py,sha256=oRPPoQPOLhf17tipekOMcFjKTHUhryshmvnpC9NA5lQ,8948
|
|
8
|
+
vedana_core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
vedana_core/rag_agent.py,sha256=kYYgVe7tLghLaaeaLUPJU8hS-LEZ1knx7As3oH_Wfks,8321
|
|
10
|
+
vedana_core/rag_pipeline.py,sha256=eOnWYph-2qZufw4LzS6SeKWf69I7IKX_ynTd5H4noNY,13750
|
|
11
|
+
vedana_core/settings.py,sha256=X78c46zppz6kuFIJx4xGUoaerlomPYz8XrYqeIOMy8c,892
|
|
12
|
+
vedana_core/start_pipeline.py,sha256=SJWBCbod88z1Qv8st3bPAsqgGK7cRSwxOJcoWGRXvLM,597
|
|
13
|
+
vedana_core/utils.py,sha256=blNd44rHLfVjWvKJGHx5q6RJPHHGBHE_8DYRUdYUX24,1189
|
|
14
|
+
vedana_core/vts.py,sha256=JzIhb2bw1Z4fJrY7nn3FewIzRm2zZZVOUD8kgcgiucU,6981
|
|
15
|
+
vedana_core-0.1.0.dev3.dist-info/METADATA,sha256=ctotRu_F0biJqNDOFV1b2A4Z_MJmNM8mhGNgLk7tACI,899
|
|
16
|
+
vedana_core-0.1.0.dev3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
+
vedana_core-0.1.0.dev3.dist-info/RECORD,,
|