vedana-core 0.1.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
vedana_core/app.py ADDED
@@ -0,0 +1,78 @@
1
+ from dataclasses import dataclass
2
+
3
+ import sqlalchemy.ext.asyncio as sa_aio
4
+ from async_lru import alru_cache
5
+ from jims_core.app import JimsApp
6
+ from loguru import logger
7
+
8
+ from vedana_core.data_model import DataModel
9
+ from vedana_core.db import get_sessionmaker
10
+ from vedana_core.graph import Graph, MemgraphGraph
11
+ from vedana_core.vts import VectorStore, PGVectorStore
12
+ from vedana_core.rag_pipeline import RagPipeline
13
+ from vedana_core.start_pipeline import StartPipeline
14
+ from vedana_core.settings import settings as core_settings
15
+
16
+
17
+ @dataclass
18
+ class VedanaApp:
19
+ sessionmaker: sa_aio.async_sessionmaker[sa_aio.AsyncSession]
20
+
21
+ graph: Graph
22
+ vts: VectorStore
23
+ data_model: DataModel
24
+ pipeline: RagPipeline
25
+ start_pipeline: StartPipeline
26
+
27
+
28
+ @alru_cache
29
+ async def make_vedana_app() -> VedanaApp:
30
+ sessionmaker = get_sessionmaker()
31
+
32
+ graph = MemgraphGraph(
33
+ core_settings.memgraph_uri,
34
+ core_settings.memgraph_user,
35
+ core_settings.memgraph_pwd,
36
+ )
37
+
38
+ vts = PGVectorStore(
39
+ sessionmaker=sessionmaker,
40
+ )
41
+
42
+ data_model = DataModel(sessionmaker=sessionmaker)
43
+
44
+ pipeline = RagPipeline(
45
+ graph=graph,
46
+ vts=vts,
47
+ data_model=data_model,
48
+ logger=logger,
49
+ threshold=0.8,
50
+ )
51
+
52
+ start_pipeline = StartPipeline(data_model=data_model)
53
+
54
+ return VedanaApp(
55
+ sessionmaker=sessionmaker,
56
+ graph=graph,
57
+ vts=vts,
58
+ data_model=data_model,
59
+ pipeline=pipeline,
60
+ start_pipeline=start_pipeline,
61
+ )
62
+
63
+
64
+ @alru_cache
65
+ async def make_jims_app() -> JimsApp:
66
+ vedana_app = await make_vedana_app()
67
+
68
+ app = JimsApp(
69
+ sessionmaker=vedana_app.sessionmaker,
70
+ pipeline=vedana_app.pipeline,
71
+ conversation_start_pipeline=vedana_app.start_pipeline,
72
+ )
73
+
74
+ return app
75
+
76
+
77
+ # This creates a async coroutine which will be evaluated in the event loop
78
+ app = make_jims_app()
@@ -0,0 +1,465 @@
1
+ import logging
2
+ from dataclasses import dataclass
3
+
4
+ import sqlalchemy.ext.asyncio as sa_aio
5
+ from sqlalchemy import select
6
+ from vedana_etl.catalog import (
7
+ dm_anchor_attributes,
8
+ dm_anchors,
9
+ dm_conversation_lifecycle,
10
+ dm_link_attributes,
11
+ dm_links,
12
+ dm_prompts,
13
+ dm_queries,
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class Attribute:
21
+ name: str
22
+ description: str
23
+ example: str
24
+ dtype: str
25
+ query: str
26
+ embeddable: bool = False
27
+ embed_threshold: float = 0
28
+
29
+
30
+ @dataclass
31
+ class Anchor:
32
+ noun: str
33
+ description: str
34
+ id_example: str
35
+ query: str
36
+ attributes: list[Attribute]
37
+
38
+ def __str__(self) -> str:
39
+ return self.noun
40
+
41
+
42
+ @dataclass
43
+ class Link:
44
+ anchor_from: Anchor
45
+ anchor_to: Anchor
46
+ sentence: str
47
+ description: str
48
+ query: str
49
+ attributes: list[Attribute]
50
+ has_direction: bool = False
51
+ anchor_from_link_attr_name: str = ""
52
+ anchor_to_link_attr_name: str = ""
53
+
54
+
55
+ @dataclass
56
+ class Query:
57
+ name: str
58
+ example: str
59
+
60
+
61
+ @dataclass
62
+ class ConversationLifecycleEvent:
63
+ event: str
64
+ text: str
65
+
66
+
67
+ @dataclass
68
+ class Prompt:
69
+ name: str
70
+ text: str
71
+
72
+
73
+ class DataModel:
74
+ """
75
+ DataModel, read from SQL tables at runtime
76
+ """
77
+
78
+ def __init__(self, sessionmaker: sa_aio.async_sessionmaker[sa_aio.AsyncSession]) -> None:
79
+ self.sessionmaker = sessionmaker
80
+
81
+ @classmethod
82
+ def create(cls, sessionmaker) -> "DataModel":
83
+ return cls(sessionmaker=sessionmaker)
84
+
85
+ async def get_anchors(self) -> list[Anchor]:
86
+ """Read anchors from dm_anchors table."""
87
+ # .data_table is TableStoreDB's attribute
88
+ anchors_table = dm_anchors.store.data_table # type: ignore[attr-defined]
89
+ anchors_attr_table = dm_anchor_attributes.store.data_table # type: ignore[attr-defined]
90
+
91
+ async with self.sessionmaker() as session:
92
+ join_query = select(
93
+ anchors_table.c.noun,
94
+ anchors_table.c.description.label("anchor_description"),
95
+ anchors_table.c.id_example,
96
+ anchors_table.c.query.label("anchor_query"),
97
+ anchors_attr_table.c.attribute_name,
98
+ anchors_attr_table.c.description.label("attr_description"),
99
+ anchors_attr_table.c.data_example,
100
+ anchors_attr_table.c.embeddable,
101
+ anchors_attr_table.c.query.label("attr_query"),
102
+ anchors_attr_table.c.dtype,
103
+ anchors_attr_table.c.embed_threshold,
104
+ ).select_from(
105
+ anchors_table.join( # left join
106
+ anchors_attr_table,
107
+ anchors_table.c.noun == anchors_attr_table.c.anchor,
108
+ isouter=True,
109
+ )
110
+ )
111
+ result = (await session.execute(join_query)).fetchall()
112
+
113
+ anchors = {}
114
+ for row in result:
115
+ noun = row.noun
116
+ if noun not in anchors:
117
+ anchors[noun] = Anchor(
118
+ noun=noun,
119
+ description=row.anchor_description,
120
+ id_example=row.id_example,
121
+ query=row.anchor_query,
122
+ attributes=[],
123
+ )
124
+
125
+ # Add attribute if it exists (attribute_name will be None for anchors without attributes)
126
+ if row.attribute_name is not None:
127
+ anchors[noun].attributes.append(
128
+ Attribute(
129
+ name=row.attribute_name,
130
+ description=row.attr_description if row.attr_description else "",
131
+ example=row.data_example if row.data_example else "",
132
+ embeddable=row.embeddable if row.embeddable is not None else False,
133
+ query=row.attr_query if row.attr_query else "",
134
+ dtype=row.dtype if row.dtype else "",
135
+ embed_threshold=row.embed_threshold if row.embed_threshold is not None else 1.0,
136
+ )
137
+ )
138
+
139
+ return list(anchors.values())
140
+
141
+ async def get_links(self, anchors_dict: dict[str, Anchor] | None = None) -> list[Link]:
142
+ """Read links from dm_links table."""
143
+ links_table = dm_links.store.data_table # type: ignore[attr-defined]
144
+ links_attr_table = dm_link_attributes.store.data_table # type: ignore[attr-defined]
145
+
146
+ if anchors_dict is None:
147
+ anchors = await self.get_anchors()
148
+ anchors_dict = {anchor.noun: anchor for anchor in anchors}
149
+
150
+ async with self.sessionmaker() as session:
151
+ join_query = select(
152
+ links_table.c.anchor1,
153
+ links_table.c.anchor2,
154
+ links_table.c.sentence,
155
+ links_table.c.description.label("link_description"),
156
+ links_table.c.query.label("link_query"),
157
+ links_table.c.anchor1_link_column_name,
158
+ links_table.c.anchor2_link_column_name,
159
+ links_table.c.has_direction,
160
+ links_attr_table.c.attribute_name,
161
+ links_attr_table.c.description.label("attr_description"),
162
+ links_attr_table.c.data_example,
163
+ links_attr_table.c.embeddable,
164
+ links_attr_table.c.query.label("attr_query"),
165
+ links_attr_table.c.dtype,
166
+ links_attr_table.c.embed_threshold,
167
+ ).select_from(
168
+ links_table.join( # left join
169
+ links_attr_table,
170
+ links_table.c.sentence == links_attr_table.c.link,
171
+ isouter=True,
172
+ )
173
+ )
174
+
175
+ result = (await session.execute(join_query)).fetchall()
176
+
177
+ links = {}
178
+ for row in result:
179
+ sentence = row.sentence
180
+ if sentence not in links:
181
+ anchor_from = anchors_dict.get(row.anchor1)
182
+ anchor_to = anchors_dict.get(row.anchor2)
183
+ if anchor_from is None or anchor_to is None:
184
+ logger.error(f'Link {sentence} has invalid connection "{row.anchor1} - {row.anchor2}"')
185
+ continue
186
+
187
+ links[sentence] = Link(
188
+ anchor_from=anchor_from,
189
+ anchor_to=anchor_to,
190
+ anchor_from_link_attr_name=row.anchor1_link_column_name,
191
+ anchor_to_link_attr_name=row.anchor2_link_column_name,
192
+ sentence=sentence,
193
+ description=row.link_description,
194
+ query=row.link_query,
195
+ has_direction=bool(row.has_direction) if row.has_direction is not None else False,
196
+ attributes=[],
197
+ )
198
+
199
+ # Add attribute if it exists (attribute_name will be None for anchors without attributes)
200
+ if row.attribute_name is not None:
201
+ links[sentence].attributes.append(
202
+ Attribute(
203
+ name=row.attribute_name,
204
+ description=row.attr_description if row.attr_description else "",
205
+ example=row.data_example if row.data_example else "",
206
+ embeddable=row.embeddable if row.embeddable is not None else False,
207
+ query=row.attr_query if row.attr_query else "",
208
+ dtype=row.dtype if row.dtype else "",
209
+ embed_threshold=row.embed_threshold if row.embed_threshold is not None else 1.0,
210
+ )
211
+ )
212
+
213
+ return list(links.values())
214
+
215
+ async def get_queries(self) -> list[Query]:
216
+ try:
217
+ queries_table = dm_queries.store.data_table # type: ignore[attr-defined]
218
+ async with self.sessionmaker() as session:
219
+ result = (await session.execute(select(queries_table))).fetchall()
220
+ return [Query(name=row.query_name, example=row.query_example) for row in result]
221
+ except Exception:
222
+ return []
223
+
224
+ async def get_conversation_lifecycle_events(self) -> list[ConversationLifecycleEvent]:
225
+ try:
226
+ lifecycle_table = dm_conversation_lifecycle.store.data_table # type: ignore[attr-defined]
227
+ async with self.sessionmaker() as session:
228
+ result = (await session.execute(select(lifecycle_table))).fetchall()
229
+ return [ConversationLifecycleEvent(event=row.event, text=row.text) for row in result]
230
+ except Exception:
231
+ return []
232
+
233
+ async def conversation_lifecycle_events(self) -> dict[str, str]:
234
+ cl = await self.get_conversation_lifecycle_events()
235
+ return {c.event: c.text for c in cl}
236
+
237
+ async def get_prompts(self) -> list[Prompt]:
238
+ try:
239
+ prompts_table = dm_prompts.store.data_table # type: ignore[attr-defined]
240
+ async with self.sessionmaker() as session:
241
+ result = (await session.execute(select(prompts_table))).fetchall()
242
+ return [Prompt(name=row.name, text=row.text) for row in result]
243
+ except Exception:
244
+ return []
245
+
246
+ async def prompt_templates(self) -> dict[str, str]:
247
+ prompts = await self.get_prompts()
248
+ return {p.name: p.text for p in prompts}
249
+
250
+ async def vector_indices(self) -> list[tuple[str, str, str, float]]:
251
+ """
252
+ returns list
253
+ ("anchor", anchor.noun, anchor.attribute, anchor.th) +
254
+ ("edge", link.sentence, link.attribute, link.th)
255
+ for all embeddable attributes
256
+ """
257
+ anchors = await self.get_anchors()
258
+ links = await self.get_links(anchors_dict={a.noun: a for a in anchors})
259
+
260
+ a_i = [
261
+ ("anchor", anchor.noun, attr.name, attr.embed_threshold)
262
+ for anchor in anchors
263
+ for attr in anchor.attributes
264
+ if attr.embeddable
265
+ ]
266
+ l_i = [
267
+ ("edge", link.sentence, attr.name, attr.embed_threshold)
268
+ for link in links
269
+ for attr in link.attributes
270
+ if attr.embeddable
271
+ ]
272
+ return a_i + l_i
273
+
274
+ async def anchor_links(self, anchor_noun: str) -> list[Link]:
275
+ """all links that connect to/from this anchor"""
276
+ links = await self.get_links()
277
+ return [
278
+ link
279
+ for link in links
280
+ if (link.anchor_from.noun == anchor_noun and link.anchor_from_link_attr_name)
281
+ or (link.anchor_to.noun == anchor_noun and link.anchor_to_link_attr_name)
282
+ ]
283
+
284
+ async def to_text_descr(
285
+ self,
286
+ anchor_nouns: list[str] | None = None,
287
+ link_sentences: list[str] | None = None,
288
+ anchor_attribute_names: list[str] | None = None,
289
+ link_attribute_names: list[str] | None = None,
290
+ query_names: list[str] | None = None,
291
+ ) -> str:
292
+ """Create a text description of the data model, optionally filtered.
293
+
294
+ Args:
295
+ anchor_nouns: List of anchor nouns to include. If None, includes all.
296
+ link_sentences: List of link sentences to include. If None, includes all.
297
+ anchor_attribute_names: List of anchor attribute names to include. If None, includes all.
298
+ link_attribute_names: List of link attribute names to include. If None, includes all.
299
+ query_names: List of query names to include. If None, includes all.
300
+
301
+ Returns:
302
+ A formatted string description of the data model.
303
+ """
304
+ anchors = await self.get_anchors()
305
+ links = await self.get_links(anchors_dict={a.noun: a for a in anchors})
306
+ queries = await self.get_queries()
307
+ dm_templates = await self.prompt_templates()
308
+
309
+ # Convert to sets for efficient lookup, None means include all
310
+ anchor_set = set(anchor_nouns) if anchor_nouns is not None else None
311
+ link_set = set(link_sentences) if link_sentences is not None else None
312
+ anchor_attr_set = set(anchor_attribute_names) if anchor_attribute_names is not None else None
313
+ link_attr_set = set(link_attribute_names) if link_attribute_names is not None else None
314
+ query_set = set(query_names) if query_names is not None else None
315
+
316
+ # Filter anchors
317
+ filtered_anchors = [anchor for anchor in anchors if anchor_set is None or anchor.noun in anchor_set]
318
+
319
+ # Create a map for quick anchor lookup (for link filtering)
320
+ anchors_map = {a.noun: a for a in filtered_anchors}
321
+
322
+ # Filter links (only include if both anchors are in filtered set)
323
+ filtered_links = [
324
+ link
325
+ for link in links
326
+ if (link_set is None or link.sentence in link_set)
327
+ and link.anchor_from.noun in anchors_map
328
+ and link.anchor_to.noun in anchors_map
329
+ ]
330
+
331
+ anchor_descr = "\n".join(
332
+ dm_templates.get("dm_anchor_descr_template", dm_anchor_descr_template).format(anchor=anchor)
333
+ for anchor in filtered_anchors
334
+ )
335
+
336
+ anchor_attrs_descr = "\n".join(
337
+ dm_templates.get("dm_attr_descr_template", dm_attr_descr_template).format(anchor=anchor, attr=attr)
338
+ for anchor in filtered_anchors
339
+ for attr in anchor.attributes
340
+ if anchor_attr_set is None or attr.name in anchor_attr_set
341
+ )
342
+
343
+ link_descr = "\n".join(
344
+ dm_templates.get("dm_link_descr_template", dm_link_descr_template).format(link=link)
345
+ for link in filtered_links
346
+ )
347
+
348
+ link_attrs_descr = "\n".join(
349
+ dm_templates.get("dm_link_attr_descr_template", dm_link_attr_descr_template).format(link=link, attr=attr)
350
+ for link in filtered_links
351
+ for attr in link.attributes
352
+ if link_attr_set is None or attr.name in link_attr_set
353
+ )
354
+
355
+ filtered_queries = [q for q in queries if query_set is None or q.name in query_set]
356
+ queries_descr = "\n".join(
357
+ dm_templates.get("dm_query_descr_template", dm_query_descr_template).format(query=query)
358
+ for query in filtered_queries
359
+ )
360
+
361
+ dm_template = dm_templates.get("dm_descr_template", dm_descr_template)
362
+
363
+ return dm_template.format(
364
+ anchors=anchor_descr,
365
+ anchor_attrs=anchor_attrs_descr,
366
+ links=link_descr,
367
+ link_attrs=link_attrs_descr,
368
+ queries=queries_descr,
369
+ )
370
+
371
+ async def to_compact_json(self) -> dict:
372
+ anchors = await self.get_anchors()
373
+ links = await self.get_links(anchors_dict={a.noun: a for a in anchors})
374
+ queries = await self.get_queries()
375
+
376
+ descr = {
377
+ "anchors": [
378
+ {
379
+ "name": a.noun,
380
+ "description": a.description,
381
+ "example": a.id_example,
382
+ "attributes": [
383
+ {
384
+ "attr_name": aa.name,
385
+ "attr_description": aa.description,
386
+ }
387
+ for aa in a.attributes
388
+ ],
389
+ }
390
+ for a in anchors
391
+ ],
392
+ "links": [
393
+ {
394
+ "from": li.anchor_from,
395
+ "to": li.anchor_to,
396
+ "sentence": li.sentence,
397
+ "description": li.description,
398
+ "attributes": [
399
+ {
400
+ "attr_name": la.name,
401
+ "attr_description": la.description,
402
+ }
403
+ for la in li.attributes
404
+ ],
405
+ }
406
+ for li in links
407
+ ],
408
+ "queries": {i: q.name for i, q in enumerate(queries, start=1)},
409
+ }
410
+ return descr
411
+
412
+
413
+ # default templates
414
+ dm_descr_template = """\
415
+ ## Узлы:
416
+ {anchors}
417
+
418
+ ## Атрибуты узлов:
419
+ {anchor_attrs}
420
+
421
+ ## Связи между узлами:
422
+ {links}
423
+
424
+ ## Атрибуты связей:
425
+ {link_attrs}
426
+
427
+ ## Типичные вопросы:
428
+ {queries}
429
+ """
430
+
431
+ dm_anchor_descr_template = (
432
+ "- {anchor.noun}: {anchor.description}; пример ID: {anchor.id_example}; запрос для получения: {anchor.query}"
433
+ )
434
+ dm_attr_descr_template = (
435
+ "- {anchor.noun}.{attr.name}: {attr.description}; пример: {attr.example}; запрос для получения: {attr.query}"
436
+ )
437
+ dm_link_descr_template = "- {link.sentence}: {link.description}; пример запроса: {link.query}"
438
+ dm_link_attr_descr_template = (
439
+ "- {link.sentence}.{attr.name}: {attr.description}; пример: {attr.example}; запрос для получения: {attr.query}"
440
+ )
441
+ dm_query_descr_template = "- {query.name}\n{query.example}"
442
+
443
+ # Compact templates (without cypher queries)
444
+ dm_compact_descr_template = """\
445
+ ## Узлы:
446
+ {anchors}
447
+
448
+ ## Атрибуты узлов:
449
+ {anchor_attrs}
450
+
451
+ ## Связи между узлами:
452
+ {links}
453
+
454
+ ## Атрибуты связей:
455
+ {link_attrs}
456
+
457
+ ## Сценарии вопросов:
458
+ {queries}
459
+ """
460
+
461
+ dm_compact_anchor_descr_template = "- {anchor.noun}: {anchor.description}"
462
+ dm_compact_attr_descr_template = "- {anchor.noun}.{attr.name}: {attr.description}"
463
+ dm_compact_link_descr_template = "- {link.sentence}: {link.description}"
464
+ dm_compact_link_attr_descr_template = "- {link.sentence}.{attr.name}: {attr.description}"
465
+ dm_compact_query_descr_template = "- {query.name}"