typeagent-py 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- typeagent/aitools/auth.py +61 -0
- typeagent/aitools/embeddings.py +232 -0
- typeagent/aitools/utils.py +244 -0
- typeagent/aitools/vectorbase.py +175 -0
- typeagent/knowpro/answer_context_schema.py +49 -0
- typeagent/knowpro/answer_response_schema.py +34 -0
- typeagent/knowpro/answers.py +577 -0
- typeagent/knowpro/collections.py +759 -0
- typeagent/knowpro/common.py +9 -0
- typeagent/knowpro/convknowledge.py +112 -0
- typeagent/knowpro/convsettings.py +94 -0
- typeagent/knowpro/convutils.py +49 -0
- typeagent/knowpro/date_time_schema.py +32 -0
- typeagent/knowpro/field_helpers.py +87 -0
- typeagent/knowpro/fuzzyindex.py +144 -0
- typeagent/knowpro/interfaces.py +818 -0
- typeagent/knowpro/knowledge.py +88 -0
- typeagent/knowpro/kplib.py +125 -0
- typeagent/knowpro/query.py +1128 -0
- typeagent/knowpro/search.py +628 -0
- typeagent/knowpro/search_query_schema.py +165 -0
- typeagent/knowpro/searchlang.py +729 -0
- typeagent/knowpro/searchlib.py +345 -0
- typeagent/knowpro/secindex.py +100 -0
- typeagent/knowpro/serialization.py +390 -0
- typeagent/knowpro/textlocindex.py +179 -0
- typeagent/knowpro/utils.py +17 -0
- typeagent/mcp/server.py +139 -0
- typeagent/podcasts/podcast.py +473 -0
- typeagent/podcasts/podcast_import.py +105 -0
- typeagent/storage/__init__.py +25 -0
- typeagent/storage/memory/__init__.py +13 -0
- typeagent/storage/memory/collections.py +68 -0
- typeagent/storage/memory/convthreads.py +81 -0
- typeagent/storage/memory/messageindex.py +178 -0
- typeagent/storage/memory/propindex.py +289 -0
- typeagent/storage/memory/provider.py +84 -0
- typeagent/storage/memory/reltermsindex.py +318 -0
- typeagent/storage/memory/semrefindex.py +660 -0
- typeagent/storage/memory/timestampindex.py +176 -0
- typeagent/storage/sqlite/__init__.py +31 -0
- typeagent/storage/sqlite/collections.py +362 -0
- typeagent/storage/sqlite/messageindex.py +382 -0
- typeagent/storage/sqlite/propindex.py +119 -0
- typeagent/storage/sqlite/provider.py +293 -0
- typeagent/storage/sqlite/reltermsindex.py +328 -0
- typeagent/storage/sqlite/schema.py +248 -0
- typeagent/storage/sqlite/semrefindex.py +156 -0
- typeagent/storage/sqlite/timestampindex.py +146 -0
- typeagent/storage/utils.py +41 -0
- typeagent_py-0.1.0.dist-info/METADATA +28 -0
- typeagent_py-0.1.0.dist-info/RECORD +55 -0
- typeagent_py-0.1.0.dist-info/WHEEL +5 -0
- typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
- typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,660 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
from __future__ import annotations # TODO: Avoid
|
5
|
+
|
6
|
+
from collections.abc import AsyncIterable, Callable
|
7
|
+
|
8
|
+
from typechat import Failure
|
9
|
+
|
10
|
+
from ...knowpro import convknowledge, kplib, secindex
|
11
|
+
from ...knowpro.convsettings import ConversationSettings
|
12
|
+
from ...knowpro.convsettings import SemanticRefIndexSettings
|
13
|
+
from ...knowpro.interfaces import (
|
14
|
+
# Interfaces.
|
15
|
+
IConversation,
|
16
|
+
IMessage,
|
17
|
+
ISemanticRefCollection,
|
18
|
+
ITermToSemanticRefIndex,
|
19
|
+
# Other imports.
|
20
|
+
Knowledge,
|
21
|
+
KnowledgeType,
|
22
|
+
MessageOrdinal,
|
23
|
+
SemanticRefOrdinal,
|
24
|
+
ScoredSemanticRefOrdinal,
|
25
|
+
SemanticRef,
|
26
|
+
TermToSemanticRefIndexItemData,
|
27
|
+
TermToSemanticRefIndexData,
|
28
|
+
TextLocation,
|
29
|
+
TextRange,
|
30
|
+
Topic,
|
31
|
+
)
|
32
|
+
from ...knowpro.utils import text_range_from_message_chunk
|
33
|
+
from ...knowpro.knowledge import extract_knowledge_from_text_batch
|
34
|
+
|
35
|
+
|
36
|
+
def text_range_from_location(
|
37
|
+
message_ordinal: MessageOrdinal,
|
38
|
+
chunk_ordinal: int = 0,
|
39
|
+
) -> TextRange:
|
40
|
+
return TextRange(
|
41
|
+
start=TextLocation(message_ordinal, chunk_ordinal),
|
42
|
+
end=None,
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
type KnowledgeValidator = Callable[
|
47
|
+
[
|
48
|
+
KnowledgeType, # knowledge_type
|
49
|
+
Knowledge, # knowledge
|
50
|
+
],
|
51
|
+
bool,
|
52
|
+
]
|
53
|
+
|
54
|
+
|
55
|
+
async def add_batch_to_semantic_ref_index[
|
56
|
+
TMessage: IMessage, TTermToSemanticRefIndex: ITermToSemanticRefIndex
|
57
|
+
](
|
58
|
+
conversation: IConversation[TMessage, TTermToSemanticRefIndex],
|
59
|
+
batch: list[TextLocation],
|
60
|
+
knowledge_extractor: convknowledge.KnowledgeExtractor,
|
61
|
+
terms_added: set[str] | None = None,
|
62
|
+
) -> None:
|
63
|
+
messages = conversation.messages
|
64
|
+
|
65
|
+
text_batch = [
|
66
|
+
(await messages.get_item(tl.message_ordinal))
|
67
|
+
.text_chunks[tl.chunk_ordinal]
|
68
|
+
.strip()
|
69
|
+
for tl in batch
|
70
|
+
]
|
71
|
+
|
72
|
+
knowledge_results = await extract_knowledge_from_text_batch(
|
73
|
+
knowledge_extractor,
|
74
|
+
text_batch,
|
75
|
+
len(text_batch),
|
76
|
+
)
|
77
|
+
for i, knowledge_result in enumerate(knowledge_results):
|
78
|
+
if isinstance(knowledge_result, Failure):
|
79
|
+
raise RuntimeError(
|
80
|
+
f"Knowledge extraction failed: {knowledge_result.message}"
|
81
|
+
)
|
82
|
+
text_location = batch[i]
|
83
|
+
knowledge = knowledge_result.value
|
84
|
+
await add_knowledge_to_semantic_ref_index(
|
85
|
+
conversation,
|
86
|
+
text_location.message_ordinal,
|
87
|
+
text_location.chunk_ordinal,
|
88
|
+
knowledge,
|
89
|
+
terms_added,
|
90
|
+
)
|
91
|
+
|
92
|
+
|
93
|
+
async def add_entity_to_index(
|
94
|
+
entity: kplib.ConcreteEntity,
|
95
|
+
semantic_refs: ISemanticRefCollection,
|
96
|
+
semantic_ref_index: ITermToSemanticRefIndex,
|
97
|
+
message_ordinal: MessageOrdinal,
|
98
|
+
chunk_ordinal: int = 0,
|
99
|
+
) -> None:
|
100
|
+
ref_ordinal = await semantic_refs.size()
|
101
|
+
await semantic_refs.append(
|
102
|
+
SemanticRef(
|
103
|
+
semantic_ref_ordinal=ref_ordinal,
|
104
|
+
range=text_range_from_location(message_ordinal, chunk_ordinal),
|
105
|
+
knowledge=entity,
|
106
|
+
)
|
107
|
+
)
|
108
|
+
await semantic_ref_index.add_term(entity.name, ref_ordinal)
|
109
|
+
# Add each type as a separate term.
|
110
|
+
for type in entity.type:
|
111
|
+
await semantic_ref_index.add_term(type, ref_ordinal)
|
112
|
+
# Add every facet name as a separate term.
|
113
|
+
if entity.facets:
|
114
|
+
for facet in entity.facets:
|
115
|
+
await add_facet(facet, ref_ordinal, semantic_ref_index)
|
116
|
+
|
117
|
+
|
118
|
+
async def add_term_to_index(
|
119
|
+
index: ITermToSemanticRefIndex,
|
120
|
+
term: str,
|
121
|
+
semantic_ref_ordinal: SemanticRefOrdinal,
|
122
|
+
terms_added: set[str] | None = None,
|
123
|
+
) -> None:
|
124
|
+
"""Add a term to the semantic reference index.
|
125
|
+
|
126
|
+
Args:
|
127
|
+
index: The index to add the term to
|
128
|
+
term: The term to add
|
129
|
+
semantic_ref_ordinal: Ordinal of the semantic reference
|
130
|
+
terms_added: Optional set to track terms added to the index
|
131
|
+
"""
|
132
|
+
term = await index.add_term(term, semantic_ref_ordinal)
|
133
|
+
if terms_added is not None:
|
134
|
+
terms_added.add(term)
|
135
|
+
|
136
|
+
|
137
|
+
async def add_entity(
|
138
|
+
entity: kplib.ConcreteEntity,
|
139
|
+
semantic_refs: ISemanticRefCollection,
|
140
|
+
semantic_ref_index: ITermToSemanticRefIndex,
|
141
|
+
message_ordinal: MessageOrdinal,
|
142
|
+
chunk_ordinal: int,
|
143
|
+
terms_added: set[str] | None = None,
|
144
|
+
) -> None:
|
145
|
+
"""Add an entity to the semantic reference index.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
entity: The concrete entity to add
|
149
|
+
semantic_refs: Collection of semantic references to add to
|
150
|
+
semantic_ref_index: Index to add terms to
|
151
|
+
message_ordinal: Ordinal of the message containing the entity
|
152
|
+
chunk_ordinal: Ordinal of the chunk within the message
|
153
|
+
terms_added: Optional set to track terms added to the index
|
154
|
+
"""
|
155
|
+
semantic_ref_ordinal = await semantic_refs.size()
|
156
|
+
await semantic_refs.append(
|
157
|
+
SemanticRef(
|
158
|
+
semantic_ref_ordinal=semantic_ref_ordinal,
|
159
|
+
range=text_range_from_message_chunk(message_ordinal, chunk_ordinal),
|
160
|
+
knowledge=entity,
|
161
|
+
)
|
162
|
+
)
|
163
|
+
await add_term_to_index(
|
164
|
+
semantic_ref_index,
|
165
|
+
entity.name,
|
166
|
+
semantic_ref_ordinal,
|
167
|
+
terms_added,
|
168
|
+
)
|
169
|
+
|
170
|
+
# Add each type as a separate term
|
171
|
+
for type_name in entity.type:
|
172
|
+
await add_term_to_index(
|
173
|
+
semantic_ref_index, type_name, semantic_ref_ordinal, terms_added
|
174
|
+
)
|
175
|
+
|
176
|
+
# Add every facet name as a separate term
|
177
|
+
if entity.facets:
|
178
|
+
for facet in entity.facets:
|
179
|
+
await add_facet(facet, semantic_ref_ordinal, semantic_ref_index)
|
180
|
+
|
181
|
+
|
182
|
+
async def add_facet(
|
183
|
+
facet: kplib.Facet | None,
|
184
|
+
semantic_ref_ordinal: SemanticRefOrdinal,
|
185
|
+
semantic_ref_index: ITermToSemanticRefIndex,
|
186
|
+
terms_added: set[str] | None = None,
|
187
|
+
) -> None:
|
188
|
+
if facet is not None:
|
189
|
+
await add_term_to_index(
|
190
|
+
semantic_ref_index,
|
191
|
+
facet.name,
|
192
|
+
semantic_ref_ordinal,
|
193
|
+
terms_added,
|
194
|
+
)
|
195
|
+
if facet.value is not None:
|
196
|
+
await add_term_to_index(
|
197
|
+
semantic_ref_index,
|
198
|
+
str(facet.value),
|
199
|
+
semantic_ref_ordinal,
|
200
|
+
terms_added,
|
201
|
+
)
|
202
|
+
# semantic_ref_index.add_term(facet.name, ref_ordinal)
|
203
|
+
# semantic_ref_index.add_term(str(facet), ref_ordinal)
|
204
|
+
|
205
|
+
|
206
|
+
async def add_topic(
|
207
|
+
topic: Topic,
|
208
|
+
semantic_refs: ISemanticRefCollection,
|
209
|
+
semantic_ref_index: ITermToSemanticRefIndex,
|
210
|
+
message_ordinal: MessageOrdinal,
|
211
|
+
chunk_ordinal: int,
|
212
|
+
terms_added: set[str] | None = None,
|
213
|
+
) -> None:
|
214
|
+
"""Add a topic to the semantic reference index.
|
215
|
+
|
216
|
+
Args:
|
217
|
+
topic: The topic to add
|
218
|
+
semantic_refs: Collection of semantic references to add to
|
219
|
+
semantic_ref_index: Index to add terms to
|
220
|
+
message_ordinal: Ordinal of the message containing the topic
|
221
|
+
chunk_ordinal: Ordinal of the chunk within the message
|
222
|
+
terms_added: Optional set to track terms added to the index
|
223
|
+
"""
|
224
|
+
semantic_ref_ordinal = await semantic_refs.size()
|
225
|
+
await semantic_refs.append(
|
226
|
+
SemanticRef(
|
227
|
+
semantic_ref_ordinal=semantic_ref_ordinal,
|
228
|
+
range=text_range_from_message_chunk(message_ordinal, chunk_ordinal),
|
229
|
+
knowledge=topic,
|
230
|
+
)
|
231
|
+
)
|
232
|
+
|
233
|
+
await add_term_to_index(
|
234
|
+
semantic_ref_index,
|
235
|
+
topic.text,
|
236
|
+
semantic_ref_ordinal,
|
237
|
+
terms_added,
|
238
|
+
)
|
239
|
+
|
240
|
+
|
241
|
+
async def add_action(
|
242
|
+
action: kplib.Action,
|
243
|
+
semantic_refs: ISemanticRefCollection,
|
244
|
+
semantic_ref_index: ITermToSemanticRefIndex,
|
245
|
+
message_ordinal: MessageOrdinal,
|
246
|
+
chunk_ordinal: int,
|
247
|
+
terms_added: set[str] | None = None,
|
248
|
+
) -> None:
|
249
|
+
"""Add an action to the semantic reference index.
|
250
|
+
|
251
|
+
Args:
|
252
|
+
action: The action to add
|
253
|
+
semantic_refs: Collection of semantic references to add to
|
254
|
+
semantic_ref_index: Index to add terms to
|
255
|
+
message_ordinal: Ordinal of the message containing the action
|
256
|
+
chunk_ordinal: Ordinal of the chunk within the message
|
257
|
+
terms_added: Optional set to track terms added to the index
|
258
|
+
"""
|
259
|
+
semantic_ref_ordinal = await semantic_refs.size()
|
260
|
+
await semantic_refs.append(
|
261
|
+
SemanticRef(
|
262
|
+
semantic_ref_ordinal=semantic_ref_ordinal,
|
263
|
+
range=text_range_from_message_chunk(message_ordinal, chunk_ordinal),
|
264
|
+
knowledge=action,
|
265
|
+
)
|
266
|
+
)
|
267
|
+
|
268
|
+
await add_term_to_index(
|
269
|
+
semantic_ref_index,
|
270
|
+
" ".join(action.verbs),
|
271
|
+
semantic_ref_ordinal,
|
272
|
+
terms_added,
|
273
|
+
)
|
274
|
+
|
275
|
+
if action.subject_entity_name != "none":
|
276
|
+
await add_term_to_index(
|
277
|
+
semantic_ref_index,
|
278
|
+
action.subject_entity_name,
|
279
|
+
semantic_ref_ordinal,
|
280
|
+
terms_added,
|
281
|
+
)
|
282
|
+
|
283
|
+
if action.object_entity_name != "none":
|
284
|
+
await add_term_to_index(
|
285
|
+
semantic_ref_index,
|
286
|
+
action.object_entity_name,
|
287
|
+
semantic_ref_ordinal,
|
288
|
+
terms_added,
|
289
|
+
)
|
290
|
+
|
291
|
+
if action.indirect_object_entity_name != "none":
|
292
|
+
await add_term_to_index(
|
293
|
+
semantic_ref_index,
|
294
|
+
action.indirect_object_entity_name,
|
295
|
+
semantic_ref_ordinal,
|
296
|
+
terms_added,
|
297
|
+
)
|
298
|
+
|
299
|
+
if action.params:
|
300
|
+
for param in action.params:
|
301
|
+
if isinstance(param, str):
|
302
|
+
await add_term_to_index(
|
303
|
+
semantic_ref_index,
|
304
|
+
param,
|
305
|
+
semantic_ref_ordinal,
|
306
|
+
terms_added,
|
307
|
+
)
|
308
|
+
else:
|
309
|
+
await add_term_to_index(
|
310
|
+
semantic_ref_index,
|
311
|
+
param.name,
|
312
|
+
semantic_ref_ordinal,
|
313
|
+
terms_added,
|
314
|
+
)
|
315
|
+
if isinstance(param.value, str):
|
316
|
+
await add_term_to_index(
|
317
|
+
semantic_ref_index,
|
318
|
+
param.value,
|
319
|
+
semantic_ref_ordinal,
|
320
|
+
terms_added,
|
321
|
+
)
|
322
|
+
|
323
|
+
await add_facet(
|
324
|
+
action.subject_entity_facet,
|
325
|
+
semantic_ref_ordinal,
|
326
|
+
semantic_ref_index,
|
327
|
+
terms_added,
|
328
|
+
)
|
329
|
+
|
330
|
+
|
331
|
+
# TODO: add_tag
|
332
|
+
# TODO:L KnowledgeValidator
|
333
|
+
|
334
|
+
|
335
|
+
async def add_knowledge_to_semantic_ref_index(
|
336
|
+
conversation: IConversation,
|
337
|
+
message_ordinal: MessageOrdinal,
|
338
|
+
chunk_ordinal: int,
|
339
|
+
knowledge: kplib.KnowledgeResponse,
|
340
|
+
terms_added: set[str] | None = None,
|
341
|
+
) -> None:
|
342
|
+
"""Add knowledge to the semantic reference index of a conversation.
|
343
|
+
|
344
|
+
Args:
|
345
|
+
conversation: The conversation to add knowledge to
|
346
|
+
message_ordinal: Ordinal of the message containing the knowledge
|
347
|
+
chunk_ordinal: Ordinal of the chunk within the message
|
348
|
+
knowledge: Knowledge response containing entities, actions and topics
|
349
|
+
terms_added: Optional set to track terms added to the index
|
350
|
+
"""
|
351
|
+
verify_has_semantic_ref_index(conversation)
|
352
|
+
|
353
|
+
semantic_refs = conversation.semantic_refs
|
354
|
+
assert semantic_refs is not None
|
355
|
+
semantic_ref_index = conversation.semantic_ref_index
|
356
|
+
assert semantic_ref_index is not None
|
357
|
+
|
358
|
+
for entity in knowledge.entities:
|
359
|
+
if validate_entity(entity):
|
360
|
+
await add_entity(
|
361
|
+
entity,
|
362
|
+
semantic_refs,
|
363
|
+
semantic_ref_index,
|
364
|
+
message_ordinal,
|
365
|
+
chunk_ordinal,
|
366
|
+
terms_added,
|
367
|
+
)
|
368
|
+
|
369
|
+
for action in knowledge.actions:
|
370
|
+
await add_action(
|
371
|
+
action,
|
372
|
+
semantic_refs,
|
373
|
+
semantic_ref_index,
|
374
|
+
message_ordinal,
|
375
|
+
chunk_ordinal,
|
376
|
+
terms_added,
|
377
|
+
)
|
378
|
+
|
379
|
+
for inverse_action in knowledge.inverse_actions:
|
380
|
+
await add_action(
|
381
|
+
inverse_action,
|
382
|
+
semantic_refs,
|
383
|
+
semantic_ref_index,
|
384
|
+
message_ordinal,
|
385
|
+
chunk_ordinal,
|
386
|
+
terms_added,
|
387
|
+
)
|
388
|
+
|
389
|
+
for topic in knowledge.topics:
|
390
|
+
topic_obj = Topic(text=topic)
|
391
|
+
await add_topic(
|
392
|
+
topic_obj,
|
393
|
+
semantic_refs,
|
394
|
+
semantic_ref_index,
|
395
|
+
message_ordinal,
|
396
|
+
chunk_ordinal,
|
397
|
+
terms_added,
|
398
|
+
)
|
399
|
+
|
400
|
+
|
401
|
+
def validate_entity(entity: kplib.ConcreteEntity) -> bool:
|
402
|
+
return bool(entity.name)
|
403
|
+
|
404
|
+
|
405
|
+
async def add_topic_to_index(
|
406
|
+
topic: Topic | str,
|
407
|
+
semantic_refs: ISemanticRefCollection,
|
408
|
+
semantic_ref_index: ITermToSemanticRefIndex,
|
409
|
+
message_ordinal: MessageOrdinal,
|
410
|
+
chunk_ordinal: int = 0,
|
411
|
+
) -> None:
|
412
|
+
if isinstance(topic, str):
|
413
|
+
topic = Topic(text=topic)
|
414
|
+
ref_ordinal = await semantic_refs.size()
|
415
|
+
await semantic_refs.append(
|
416
|
+
SemanticRef(
|
417
|
+
semantic_ref_ordinal=ref_ordinal,
|
418
|
+
range=text_range_from_location(message_ordinal, chunk_ordinal),
|
419
|
+
knowledge=topic,
|
420
|
+
)
|
421
|
+
)
|
422
|
+
await semantic_ref_index.add_term(topic.text, ref_ordinal)
|
423
|
+
|
424
|
+
|
425
|
+
async def add_action_to_index(
|
426
|
+
action: kplib.Action,
|
427
|
+
semantic_refs: ISemanticRefCollection,
|
428
|
+
semantic_ref_index: ITermToSemanticRefIndex,
|
429
|
+
message_ordinal: int,
|
430
|
+
chunk_ordinal: int = 0,
|
431
|
+
) -> None:
|
432
|
+
ref_ordinal = await semantic_refs.size()
|
433
|
+
await semantic_refs.append(
|
434
|
+
SemanticRef(
|
435
|
+
semantic_ref_ordinal=ref_ordinal,
|
436
|
+
range=text_range_from_location(message_ordinal, chunk_ordinal),
|
437
|
+
knowledge=action,
|
438
|
+
)
|
439
|
+
)
|
440
|
+
await semantic_ref_index.add_term(" ".join(action.verbs), ref_ordinal)
|
441
|
+
if action.subject_entity_name != "none":
|
442
|
+
await semantic_ref_index.add_term(action.subject_entity_name, ref_ordinal)
|
443
|
+
if action.object_entity_name != "none":
|
444
|
+
await semantic_ref_index.add_term(action.object_entity_name, ref_ordinal)
|
445
|
+
if action.indirect_object_entity_name != "none":
|
446
|
+
await semantic_ref_index.add_term(
|
447
|
+
action.indirect_object_entity_name, ref_ordinal
|
448
|
+
)
|
449
|
+
if action.params:
|
450
|
+
for param in action.params:
|
451
|
+
if isinstance(param, str):
|
452
|
+
await semantic_ref_index.add_term(param, ref_ordinal)
|
453
|
+
else:
|
454
|
+
await semantic_ref_index.add_term(param.name, ref_ordinal)
|
455
|
+
if isinstance(param.value, str):
|
456
|
+
await semantic_ref_index.add_term(param.value, ref_ordinal)
|
457
|
+
await add_facet(action.subject_entity_facet, ref_ordinal, semantic_ref_index)
|
458
|
+
|
459
|
+
|
460
|
+
async def add_knowledge_to_index(
|
461
|
+
semantic_refs: ISemanticRefCollection,
|
462
|
+
semantic_ref_index: ITermToSemanticRefIndex,
|
463
|
+
message_ordinal: MessageOrdinal,
|
464
|
+
knowledge: kplib.KnowledgeResponse,
|
465
|
+
) -> None:
|
466
|
+
for entity in knowledge.entities:
|
467
|
+
await add_entity_to_index(
|
468
|
+
entity, semantic_refs, semantic_ref_index, message_ordinal
|
469
|
+
)
|
470
|
+
for action in knowledge.actions:
|
471
|
+
await add_action_to_index(
|
472
|
+
action, semantic_refs, semantic_ref_index, message_ordinal
|
473
|
+
)
|
474
|
+
for inverse_action in knowledge.inverse_actions:
|
475
|
+
await add_action_to_index(
|
476
|
+
inverse_action, semantic_refs, semantic_ref_index, message_ordinal
|
477
|
+
)
|
478
|
+
for topic in knowledge.topics:
|
479
|
+
await add_topic_to_index(
|
480
|
+
topic, semantic_refs, semantic_ref_index, message_ordinal
|
481
|
+
)
|
482
|
+
|
483
|
+
|
484
|
+
async def add_metadata_to_index[TMessage: IMessage](
|
485
|
+
messages: AsyncIterable[TMessage],
|
486
|
+
semantic_refs: ISemanticRefCollection,
|
487
|
+
semantic_ref_index: ITermToSemanticRefIndex,
|
488
|
+
knowledge_validator: KnowledgeValidator | None = None,
|
489
|
+
) -> None:
|
490
|
+
i = 0
|
491
|
+
async for msg in messages:
|
492
|
+
knowledge_response = msg.get_knowledge()
|
493
|
+
for entity in knowledge_response.entities:
|
494
|
+
if knowledge_validator is None or knowledge_validator("entity", entity):
|
495
|
+
await add_entity_to_index(entity, semantic_refs, semantic_ref_index, i)
|
496
|
+
for action in knowledge_response.actions:
|
497
|
+
if knowledge_validator is None or knowledge_validator("action", action):
|
498
|
+
await add_action_to_index(action, semantic_refs, semantic_ref_index, i)
|
499
|
+
for topic_response in knowledge_response.topics:
|
500
|
+
topic = Topic(text=topic_response)
|
501
|
+
if knowledge_validator is None or knowledge_validator("topic", topic):
|
502
|
+
await add_topic_to_index(topic, semantic_refs, semantic_ref_index, i)
|
503
|
+
i += 1
|
504
|
+
|
505
|
+
|
506
|
+
class TermToSemanticRefIndex(ITermToSemanticRefIndex):
|
507
|
+
_map: dict[str, list[ScoredSemanticRefOrdinal]]
|
508
|
+
|
509
|
+
def __init__(self):
|
510
|
+
super().__init__()
|
511
|
+
self._map = {}
|
512
|
+
|
513
|
+
async def size(self) -> int:
|
514
|
+
return len(self._map)
|
515
|
+
|
516
|
+
async def get_terms(self) -> list[str]:
|
517
|
+
return list(self._map)
|
518
|
+
|
519
|
+
async def clear(self) -> None:
|
520
|
+
self._clear()
|
521
|
+
|
522
|
+
def _clear(self) -> None:
|
523
|
+
self._map.clear()
|
524
|
+
|
525
|
+
async def add_term(
|
526
|
+
self,
|
527
|
+
term: str,
|
528
|
+
semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal,
|
529
|
+
) -> str:
|
530
|
+
if not term:
|
531
|
+
return term
|
532
|
+
if not isinstance(semantic_ref_ordinal, ScoredSemanticRefOrdinal):
|
533
|
+
semantic_ref_ordinal = ScoredSemanticRefOrdinal(semantic_ref_ordinal, 1.0)
|
534
|
+
term = self._prepare_term(term)
|
535
|
+
existing = self._map.get(term)
|
536
|
+
if existing is not None:
|
537
|
+
existing.append(semantic_ref_ordinal)
|
538
|
+
else:
|
539
|
+
self._map[term] = [semantic_ref_ordinal]
|
540
|
+
return term
|
541
|
+
|
542
|
+
async def lookup_term(self, term: str) -> list[ScoredSemanticRefOrdinal] | None:
|
543
|
+
return self._map.get(self._prepare_term(term)) or []
|
544
|
+
|
545
|
+
async def remove_term(
|
546
|
+
self, term: str, semantic_ref_ordinal: SemanticRefOrdinal
|
547
|
+
) -> None:
|
548
|
+
term = self._prepare_term(term)
|
549
|
+
if term in self._map:
|
550
|
+
# Remove only the specific semantic ref ordinal, not the entire term
|
551
|
+
scored_refs = self._map[term]
|
552
|
+
self._map[term] = [
|
553
|
+
ref
|
554
|
+
for ref in scored_refs
|
555
|
+
if ref.semantic_ref_ordinal != semantic_ref_ordinal
|
556
|
+
]
|
557
|
+
# Clean up empty terms
|
558
|
+
if not self._map[term]:
|
559
|
+
del self._map[term]
|
560
|
+
|
561
|
+
async def serialize(self) -> TermToSemanticRefIndexData:
|
562
|
+
items: list[TermToSemanticRefIndexItemData] = []
|
563
|
+
for term, scored_semantic_ref_ordinals in self._map.items():
|
564
|
+
items.append(
|
565
|
+
TermToSemanticRefIndexItemData(
|
566
|
+
term=term,
|
567
|
+
semanticRefOrdinals=[
|
568
|
+
s.serialize() for s in scored_semantic_ref_ordinals
|
569
|
+
],
|
570
|
+
)
|
571
|
+
)
|
572
|
+
return TermToSemanticRefIndexData(items=items)
|
573
|
+
|
574
|
+
async def deserialize(self, data: TermToSemanticRefIndexData) -> None:
|
575
|
+
self._clear()
|
576
|
+
for index_item_data in data["items"]:
|
577
|
+
term = index_item_data.get("term")
|
578
|
+
term = self._prepare_term(term)
|
579
|
+
scored_refs_data = index_item_data["semanticRefOrdinals"]
|
580
|
+
scored_refs = [
|
581
|
+
ScoredSemanticRefOrdinal.deserialize(s) for s in scored_refs_data
|
582
|
+
]
|
583
|
+
self._map[term] = scored_refs
|
584
|
+
|
585
|
+
def _prepare_term(self, term: str) -> str:
|
586
|
+
return term.lower()
|
587
|
+
|
588
|
+
|
589
|
+
# ...
|
590
|
+
|
591
|
+
|
592
|
+
async def build_semantic_ref[TMessage: IMessage](
|
593
|
+
conversation: IConversation[TMessage, ITermToSemanticRefIndex],
|
594
|
+
conversation_settings: ConversationSettings,
|
595
|
+
) -> None:
|
596
|
+
await build_semantic_ref_index(
|
597
|
+
conversation,
|
598
|
+
conversation_settings.semantic_ref_index_settings,
|
599
|
+
)
|
600
|
+
if conversation.semantic_ref_index is not None:
|
601
|
+
await secindex.build_secondary_indexes(
|
602
|
+
conversation,
|
603
|
+
conversation_settings,
|
604
|
+
)
|
605
|
+
|
606
|
+
|
607
|
+
async def build_semantic_ref_index[TM: IMessage](
|
608
|
+
conversation: IConversation[TM, ITermToSemanticRefIndex],
|
609
|
+
settings: SemanticRefIndexSettings,
|
610
|
+
) -> None:
|
611
|
+
await add_to_semantic_ref_index(conversation, settings, 0)
|
612
|
+
|
613
|
+
|
614
|
+
async def add_to_semantic_ref_index[
|
615
|
+
TMessage: IMessage, TTermToSemanticRefIndex: ITermToSemanticRefIndex
|
616
|
+
](
|
617
|
+
conversation: IConversation[TMessage, TTermToSemanticRefIndex],
|
618
|
+
settings: SemanticRefIndexSettings,
|
619
|
+
message_ordinal_start_at: MessageOrdinal,
|
620
|
+
terms_added: list[str] | None = None,
|
621
|
+
) -> None:
|
622
|
+
"""Add semantic references to the conversation's semantic reference index."""
|
623
|
+
|
624
|
+
# Only create knowledge extractor if auto extraction is enabled
|
625
|
+
knowledge_extractor = None
|
626
|
+
if settings.auto_extract_knowledge:
|
627
|
+
knowledge_extractor = (
|
628
|
+
settings.knowledge_extractor or convknowledge.KnowledgeExtractor()
|
629
|
+
)
|
630
|
+
|
631
|
+
# TODO: get_message_chunk_batch
|
632
|
+
# for text_location_batch in get_message_chunk_batch(
|
633
|
+
# conversation.messages,
|
634
|
+
# message_ordinal_start_at,
|
635
|
+
# settings.batch_size,
|
636
|
+
# ):
|
637
|
+
# await add_batch_to_semantic_ref_index(
|
638
|
+
# conversation,
|
639
|
+
# text_location_batch,
|
640
|
+
# knowledge_extractor,
|
641
|
+
# terms_added,
|
642
|
+
# )
|
643
|
+
|
644
|
+
|
645
|
+
def verify_has_semantic_ref_index(conversation: IConversation) -> None:
|
646
|
+
if conversation.secondary_indexes is None or conversation.semantic_refs is None:
|
647
|
+
raise ValueError("Conversation does not have an index")
|
648
|
+
|
649
|
+
|
650
|
+
async def dump(
|
651
|
+
semantic_ref_index: TermToSemanticRefIndex, semantic_refs: ISemanticRefCollection
|
652
|
+
) -> None:
|
653
|
+
print("semantic_ref_index = {")
|
654
|
+
for k, v in semantic_ref_index._map.items():
|
655
|
+
print(f" {k!r}: {v},")
|
656
|
+
print("}\n")
|
657
|
+
print("semantic_refs = []")
|
658
|
+
async for semantic_ref in semantic_refs:
|
659
|
+
print(f" {semantic_ref},")
|
660
|
+
print("]\n")
|