typeagent-py 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. typeagent/aitools/auth.py +61 -0
  2. typeagent/aitools/embeddings.py +232 -0
  3. typeagent/aitools/utils.py +244 -0
  4. typeagent/aitools/vectorbase.py +175 -0
  5. typeagent/knowpro/answer_context_schema.py +49 -0
  6. typeagent/knowpro/answer_response_schema.py +34 -0
  7. typeagent/knowpro/answers.py +577 -0
  8. typeagent/knowpro/collections.py +759 -0
  9. typeagent/knowpro/common.py +9 -0
  10. typeagent/knowpro/convknowledge.py +112 -0
  11. typeagent/knowpro/convsettings.py +94 -0
  12. typeagent/knowpro/convutils.py +49 -0
  13. typeagent/knowpro/date_time_schema.py +32 -0
  14. typeagent/knowpro/field_helpers.py +87 -0
  15. typeagent/knowpro/fuzzyindex.py +144 -0
  16. typeagent/knowpro/interfaces.py +818 -0
  17. typeagent/knowpro/knowledge.py +88 -0
  18. typeagent/knowpro/kplib.py +125 -0
  19. typeagent/knowpro/query.py +1128 -0
  20. typeagent/knowpro/search.py +628 -0
  21. typeagent/knowpro/search_query_schema.py +165 -0
  22. typeagent/knowpro/searchlang.py +729 -0
  23. typeagent/knowpro/searchlib.py +345 -0
  24. typeagent/knowpro/secindex.py +100 -0
  25. typeagent/knowpro/serialization.py +390 -0
  26. typeagent/knowpro/textlocindex.py +179 -0
  27. typeagent/knowpro/utils.py +17 -0
  28. typeagent/mcp/server.py +139 -0
  29. typeagent/podcasts/podcast.py +473 -0
  30. typeagent/podcasts/podcast_import.py +105 -0
  31. typeagent/storage/__init__.py +25 -0
  32. typeagent/storage/memory/__init__.py +13 -0
  33. typeagent/storage/memory/collections.py +68 -0
  34. typeagent/storage/memory/convthreads.py +81 -0
  35. typeagent/storage/memory/messageindex.py +178 -0
  36. typeagent/storage/memory/propindex.py +289 -0
  37. typeagent/storage/memory/provider.py +84 -0
  38. typeagent/storage/memory/reltermsindex.py +318 -0
  39. typeagent/storage/memory/semrefindex.py +660 -0
  40. typeagent/storage/memory/timestampindex.py +176 -0
  41. typeagent/storage/sqlite/__init__.py +31 -0
  42. typeagent/storage/sqlite/collections.py +362 -0
  43. typeagent/storage/sqlite/messageindex.py +382 -0
  44. typeagent/storage/sqlite/propindex.py +119 -0
  45. typeagent/storage/sqlite/provider.py +293 -0
  46. typeagent/storage/sqlite/reltermsindex.py +328 -0
  47. typeagent/storage/sqlite/schema.py +248 -0
  48. typeagent/storage/sqlite/semrefindex.py +156 -0
  49. typeagent/storage/sqlite/timestampindex.py +146 -0
  50. typeagent/storage/utils.py +41 -0
  51. typeagent_py-0.1.0.dist-info/METADATA +28 -0
  52. typeagent_py-0.1.0.dist-info/RECORD +55 -0
  53. typeagent_py-0.1.0.dist-info/WHEEL +5 -0
  54. typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
  55. typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,660 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ from __future__ import annotations # TODO: Avoid
5
+
6
+ from collections.abc import AsyncIterable, Callable
7
+
8
+ from typechat import Failure
9
+
10
+ from ...knowpro import convknowledge, kplib, secindex
11
+ from ...knowpro.convsettings import ConversationSettings
12
+ from ...knowpro.convsettings import SemanticRefIndexSettings
13
+ from ...knowpro.interfaces import (
14
+ # Interfaces.
15
+ IConversation,
16
+ IMessage,
17
+ ISemanticRefCollection,
18
+ ITermToSemanticRefIndex,
19
+ # Other imports.
20
+ Knowledge,
21
+ KnowledgeType,
22
+ MessageOrdinal,
23
+ SemanticRefOrdinal,
24
+ ScoredSemanticRefOrdinal,
25
+ SemanticRef,
26
+ TermToSemanticRefIndexItemData,
27
+ TermToSemanticRefIndexData,
28
+ TextLocation,
29
+ TextRange,
30
+ Topic,
31
+ )
32
+ from ...knowpro.utils import text_range_from_message_chunk
33
+ from ...knowpro.knowledge import extract_knowledge_from_text_batch
34
+
35
+
36
+ def text_range_from_location(
37
+ message_ordinal: MessageOrdinal,
38
+ chunk_ordinal: int = 0,
39
+ ) -> TextRange:
40
+ return TextRange(
41
+ start=TextLocation(message_ordinal, chunk_ordinal),
42
+ end=None,
43
+ )
44
+
45
+
46
+ type KnowledgeValidator = Callable[
47
+ [
48
+ KnowledgeType, # knowledge_type
49
+ Knowledge, # knowledge
50
+ ],
51
+ bool,
52
+ ]
53
+
54
+
55
+ async def add_batch_to_semantic_ref_index[
56
+ TMessage: IMessage, TTermToSemanticRefIndex: ITermToSemanticRefIndex
57
+ ](
58
+ conversation: IConversation[TMessage, TTermToSemanticRefIndex],
59
+ batch: list[TextLocation],
60
+ knowledge_extractor: convknowledge.KnowledgeExtractor,
61
+ terms_added: set[str] | None = None,
62
+ ) -> None:
63
+ messages = conversation.messages
64
+
65
+ text_batch = [
66
+ (await messages.get_item(tl.message_ordinal))
67
+ .text_chunks[tl.chunk_ordinal]
68
+ .strip()
69
+ for tl in batch
70
+ ]
71
+
72
+ knowledge_results = await extract_knowledge_from_text_batch(
73
+ knowledge_extractor,
74
+ text_batch,
75
+ len(text_batch),
76
+ )
77
+ for i, knowledge_result in enumerate(knowledge_results):
78
+ if isinstance(knowledge_result, Failure):
79
+ raise RuntimeError(
80
+ f"Knowledge extraction failed: {knowledge_result.message}"
81
+ )
82
+ text_location = batch[i]
83
+ knowledge = knowledge_result.value
84
+ await add_knowledge_to_semantic_ref_index(
85
+ conversation,
86
+ text_location.message_ordinal,
87
+ text_location.chunk_ordinal,
88
+ knowledge,
89
+ terms_added,
90
+ )
91
+
92
+
93
+ async def add_entity_to_index(
94
+ entity: kplib.ConcreteEntity,
95
+ semantic_refs: ISemanticRefCollection,
96
+ semantic_ref_index: ITermToSemanticRefIndex,
97
+ message_ordinal: MessageOrdinal,
98
+ chunk_ordinal: int = 0,
99
+ ) -> None:
100
+ ref_ordinal = await semantic_refs.size()
101
+ await semantic_refs.append(
102
+ SemanticRef(
103
+ semantic_ref_ordinal=ref_ordinal,
104
+ range=text_range_from_location(message_ordinal, chunk_ordinal),
105
+ knowledge=entity,
106
+ )
107
+ )
108
+ await semantic_ref_index.add_term(entity.name, ref_ordinal)
109
+ # Add each type as a separate term.
110
+ for type in entity.type:
111
+ await semantic_ref_index.add_term(type, ref_ordinal)
112
+ # Add every facet name as a separate term.
113
+ if entity.facets:
114
+ for facet in entity.facets:
115
+ await add_facet(facet, ref_ordinal, semantic_ref_index)
116
+
117
+
118
+ async def add_term_to_index(
119
+ index: ITermToSemanticRefIndex,
120
+ term: str,
121
+ semantic_ref_ordinal: SemanticRefOrdinal,
122
+ terms_added: set[str] | None = None,
123
+ ) -> None:
124
+ """Add a term to the semantic reference index.
125
+
126
+ Args:
127
+ index: The index to add the term to
128
+ term: The term to add
129
+ semantic_ref_ordinal: Ordinal of the semantic reference
130
+ terms_added: Optional set to track terms added to the index
131
+ """
132
+ term = await index.add_term(term, semantic_ref_ordinal)
133
+ if terms_added is not None:
134
+ terms_added.add(term)
135
+
136
+
137
+ async def add_entity(
138
+ entity: kplib.ConcreteEntity,
139
+ semantic_refs: ISemanticRefCollection,
140
+ semantic_ref_index: ITermToSemanticRefIndex,
141
+ message_ordinal: MessageOrdinal,
142
+ chunk_ordinal: int,
143
+ terms_added: set[str] | None = None,
144
+ ) -> None:
145
+ """Add an entity to the semantic reference index.
146
+
147
+ Args:
148
+ entity: The concrete entity to add
149
+ semantic_refs: Collection of semantic references to add to
150
+ semantic_ref_index: Index to add terms to
151
+ message_ordinal: Ordinal of the message containing the entity
152
+ chunk_ordinal: Ordinal of the chunk within the message
153
+ terms_added: Optional set to track terms added to the index
154
+ """
155
+ semantic_ref_ordinal = await semantic_refs.size()
156
+ await semantic_refs.append(
157
+ SemanticRef(
158
+ semantic_ref_ordinal=semantic_ref_ordinal,
159
+ range=text_range_from_message_chunk(message_ordinal, chunk_ordinal),
160
+ knowledge=entity,
161
+ )
162
+ )
163
+ await add_term_to_index(
164
+ semantic_ref_index,
165
+ entity.name,
166
+ semantic_ref_ordinal,
167
+ terms_added,
168
+ )
169
+
170
+ # Add each type as a separate term
171
+ for type_name in entity.type:
172
+ await add_term_to_index(
173
+ semantic_ref_index, type_name, semantic_ref_ordinal, terms_added
174
+ )
175
+
176
+ # Add every facet name as a separate term
177
+ if entity.facets:
178
+ for facet in entity.facets:
179
+ await add_facet(facet, semantic_ref_ordinal, semantic_ref_index)
180
+
181
+
182
+ async def add_facet(
183
+ facet: kplib.Facet | None,
184
+ semantic_ref_ordinal: SemanticRefOrdinal,
185
+ semantic_ref_index: ITermToSemanticRefIndex,
186
+ terms_added: set[str] | None = None,
187
+ ) -> None:
188
+ if facet is not None:
189
+ await add_term_to_index(
190
+ semantic_ref_index,
191
+ facet.name,
192
+ semantic_ref_ordinal,
193
+ terms_added,
194
+ )
195
+ if facet.value is not None:
196
+ await add_term_to_index(
197
+ semantic_ref_index,
198
+ str(facet.value),
199
+ semantic_ref_ordinal,
200
+ terms_added,
201
+ )
202
+ # semantic_ref_index.add_term(facet.name, ref_ordinal)
203
+ # semantic_ref_index.add_term(str(facet), ref_ordinal)
204
+
205
+
206
+ async def add_topic(
207
+ topic: Topic,
208
+ semantic_refs: ISemanticRefCollection,
209
+ semantic_ref_index: ITermToSemanticRefIndex,
210
+ message_ordinal: MessageOrdinal,
211
+ chunk_ordinal: int,
212
+ terms_added: set[str] | None = None,
213
+ ) -> None:
214
+ """Add a topic to the semantic reference index.
215
+
216
+ Args:
217
+ topic: The topic to add
218
+ semantic_refs: Collection of semantic references to add to
219
+ semantic_ref_index: Index to add terms to
220
+ message_ordinal: Ordinal of the message containing the topic
221
+ chunk_ordinal: Ordinal of the chunk within the message
222
+ terms_added: Optional set to track terms added to the index
223
+ """
224
+ semantic_ref_ordinal = await semantic_refs.size()
225
+ await semantic_refs.append(
226
+ SemanticRef(
227
+ semantic_ref_ordinal=semantic_ref_ordinal,
228
+ range=text_range_from_message_chunk(message_ordinal, chunk_ordinal),
229
+ knowledge=topic,
230
+ )
231
+ )
232
+
233
+ await add_term_to_index(
234
+ semantic_ref_index,
235
+ topic.text,
236
+ semantic_ref_ordinal,
237
+ terms_added,
238
+ )
239
+
240
+
241
+ async def add_action(
242
+ action: kplib.Action,
243
+ semantic_refs: ISemanticRefCollection,
244
+ semantic_ref_index: ITermToSemanticRefIndex,
245
+ message_ordinal: MessageOrdinal,
246
+ chunk_ordinal: int,
247
+ terms_added: set[str] | None = None,
248
+ ) -> None:
249
+ """Add an action to the semantic reference index.
250
+
251
+ Args:
252
+ action: The action to add
253
+ semantic_refs: Collection of semantic references to add to
254
+ semantic_ref_index: Index to add terms to
255
+ message_ordinal: Ordinal of the message containing the action
256
+ chunk_ordinal: Ordinal of the chunk within the message
257
+ terms_added: Optional set to track terms added to the index
258
+ """
259
+ semantic_ref_ordinal = await semantic_refs.size()
260
+ await semantic_refs.append(
261
+ SemanticRef(
262
+ semantic_ref_ordinal=semantic_ref_ordinal,
263
+ range=text_range_from_message_chunk(message_ordinal, chunk_ordinal),
264
+ knowledge=action,
265
+ )
266
+ )
267
+
268
+ await add_term_to_index(
269
+ semantic_ref_index,
270
+ " ".join(action.verbs),
271
+ semantic_ref_ordinal,
272
+ terms_added,
273
+ )
274
+
275
+ if action.subject_entity_name != "none":
276
+ await add_term_to_index(
277
+ semantic_ref_index,
278
+ action.subject_entity_name,
279
+ semantic_ref_ordinal,
280
+ terms_added,
281
+ )
282
+
283
+ if action.object_entity_name != "none":
284
+ await add_term_to_index(
285
+ semantic_ref_index,
286
+ action.object_entity_name,
287
+ semantic_ref_ordinal,
288
+ terms_added,
289
+ )
290
+
291
+ if action.indirect_object_entity_name != "none":
292
+ await add_term_to_index(
293
+ semantic_ref_index,
294
+ action.indirect_object_entity_name,
295
+ semantic_ref_ordinal,
296
+ terms_added,
297
+ )
298
+
299
+ if action.params:
300
+ for param in action.params:
301
+ if isinstance(param, str):
302
+ await add_term_to_index(
303
+ semantic_ref_index,
304
+ param,
305
+ semantic_ref_ordinal,
306
+ terms_added,
307
+ )
308
+ else:
309
+ await add_term_to_index(
310
+ semantic_ref_index,
311
+ param.name,
312
+ semantic_ref_ordinal,
313
+ terms_added,
314
+ )
315
+ if isinstance(param.value, str):
316
+ await add_term_to_index(
317
+ semantic_ref_index,
318
+ param.value,
319
+ semantic_ref_ordinal,
320
+ terms_added,
321
+ )
322
+
323
+ await add_facet(
324
+ action.subject_entity_facet,
325
+ semantic_ref_ordinal,
326
+ semantic_ref_index,
327
+ terms_added,
328
+ )
329
+
330
+
331
+ # TODO: add_tag
332
+ # TODO:L KnowledgeValidator
333
+
334
+
335
+ async def add_knowledge_to_semantic_ref_index(
336
+ conversation: IConversation,
337
+ message_ordinal: MessageOrdinal,
338
+ chunk_ordinal: int,
339
+ knowledge: kplib.KnowledgeResponse,
340
+ terms_added: set[str] | None = None,
341
+ ) -> None:
342
+ """Add knowledge to the semantic reference index of a conversation.
343
+
344
+ Args:
345
+ conversation: The conversation to add knowledge to
346
+ message_ordinal: Ordinal of the message containing the knowledge
347
+ chunk_ordinal: Ordinal of the chunk within the message
348
+ knowledge: Knowledge response containing entities, actions and topics
349
+ terms_added: Optional set to track terms added to the index
350
+ """
351
+ verify_has_semantic_ref_index(conversation)
352
+
353
+ semantic_refs = conversation.semantic_refs
354
+ assert semantic_refs is not None
355
+ semantic_ref_index = conversation.semantic_ref_index
356
+ assert semantic_ref_index is not None
357
+
358
+ for entity in knowledge.entities:
359
+ if validate_entity(entity):
360
+ await add_entity(
361
+ entity,
362
+ semantic_refs,
363
+ semantic_ref_index,
364
+ message_ordinal,
365
+ chunk_ordinal,
366
+ terms_added,
367
+ )
368
+
369
+ for action in knowledge.actions:
370
+ await add_action(
371
+ action,
372
+ semantic_refs,
373
+ semantic_ref_index,
374
+ message_ordinal,
375
+ chunk_ordinal,
376
+ terms_added,
377
+ )
378
+
379
+ for inverse_action in knowledge.inverse_actions:
380
+ await add_action(
381
+ inverse_action,
382
+ semantic_refs,
383
+ semantic_ref_index,
384
+ message_ordinal,
385
+ chunk_ordinal,
386
+ terms_added,
387
+ )
388
+
389
+ for topic in knowledge.topics:
390
+ topic_obj = Topic(text=topic)
391
+ await add_topic(
392
+ topic_obj,
393
+ semantic_refs,
394
+ semantic_ref_index,
395
+ message_ordinal,
396
+ chunk_ordinal,
397
+ terms_added,
398
+ )
399
+
400
+
401
+ def validate_entity(entity: kplib.ConcreteEntity) -> bool:
402
+ return bool(entity.name)
403
+
404
+
405
+ async def add_topic_to_index(
406
+ topic: Topic | str,
407
+ semantic_refs: ISemanticRefCollection,
408
+ semantic_ref_index: ITermToSemanticRefIndex,
409
+ message_ordinal: MessageOrdinal,
410
+ chunk_ordinal: int = 0,
411
+ ) -> None:
412
+ if isinstance(topic, str):
413
+ topic = Topic(text=topic)
414
+ ref_ordinal = await semantic_refs.size()
415
+ await semantic_refs.append(
416
+ SemanticRef(
417
+ semantic_ref_ordinal=ref_ordinal,
418
+ range=text_range_from_location(message_ordinal, chunk_ordinal),
419
+ knowledge=topic,
420
+ )
421
+ )
422
+ await semantic_ref_index.add_term(topic.text, ref_ordinal)
423
+
424
+
425
+ async def add_action_to_index(
426
+ action: kplib.Action,
427
+ semantic_refs: ISemanticRefCollection,
428
+ semantic_ref_index: ITermToSemanticRefIndex,
429
+ message_ordinal: int,
430
+ chunk_ordinal: int = 0,
431
+ ) -> None:
432
+ ref_ordinal = await semantic_refs.size()
433
+ await semantic_refs.append(
434
+ SemanticRef(
435
+ semantic_ref_ordinal=ref_ordinal,
436
+ range=text_range_from_location(message_ordinal, chunk_ordinal),
437
+ knowledge=action,
438
+ )
439
+ )
440
+ await semantic_ref_index.add_term(" ".join(action.verbs), ref_ordinal)
441
+ if action.subject_entity_name != "none":
442
+ await semantic_ref_index.add_term(action.subject_entity_name, ref_ordinal)
443
+ if action.object_entity_name != "none":
444
+ await semantic_ref_index.add_term(action.object_entity_name, ref_ordinal)
445
+ if action.indirect_object_entity_name != "none":
446
+ await semantic_ref_index.add_term(
447
+ action.indirect_object_entity_name, ref_ordinal
448
+ )
449
+ if action.params:
450
+ for param in action.params:
451
+ if isinstance(param, str):
452
+ await semantic_ref_index.add_term(param, ref_ordinal)
453
+ else:
454
+ await semantic_ref_index.add_term(param.name, ref_ordinal)
455
+ if isinstance(param.value, str):
456
+ await semantic_ref_index.add_term(param.value, ref_ordinal)
457
+ await add_facet(action.subject_entity_facet, ref_ordinal, semantic_ref_index)
458
+
459
+
460
+ async def add_knowledge_to_index(
461
+ semantic_refs: ISemanticRefCollection,
462
+ semantic_ref_index: ITermToSemanticRefIndex,
463
+ message_ordinal: MessageOrdinal,
464
+ knowledge: kplib.KnowledgeResponse,
465
+ ) -> None:
466
+ for entity in knowledge.entities:
467
+ await add_entity_to_index(
468
+ entity, semantic_refs, semantic_ref_index, message_ordinal
469
+ )
470
+ for action in knowledge.actions:
471
+ await add_action_to_index(
472
+ action, semantic_refs, semantic_ref_index, message_ordinal
473
+ )
474
+ for inverse_action in knowledge.inverse_actions:
475
+ await add_action_to_index(
476
+ inverse_action, semantic_refs, semantic_ref_index, message_ordinal
477
+ )
478
+ for topic in knowledge.topics:
479
+ await add_topic_to_index(
480
+ topic, semantic_refs, semantic_ref_index, message_ordinal
481
+ )
482
+
483
+
484
+ async def add_metadata_to_index[TMessage: IMessage](
485
+ messages: AsyncIterable[TMessage],
486
+ semantic_refs: ISemanticRefCollection,
487
+ semantic_ref_index: ITermToSemanticRefIndex,
488
+ knowledge_validator: KnowledgeValidator | None = None,
489
+ ) -> None:
490
+ i = 0
491
+ async for msg in messages:
492
+ knowledge_response = msg.get_knowledge()
493
+ for entity in knowledge_response.entities:
494
+ if knowledge_validator is None or knowledge_validator("entity", entity):
495
+ await add_entity_to_index(entity, semantic_refs, semantic_ref_index, i)
496
+ for action in knowledge_response.actions:
497
+ if knowledge_validator is None or knowledge_validator("action", action):
498
+ await add_action_to_index(action, semantic_refs, semantic_ref_index, i)
499
+ for topic_response in knowledge_response.topics:
500
+ topic = Topic(text=topic_response)
501
+ if knowledge_validator is None or knowledge_validator("topic", topic):
502
+ await add_topic_to_index(topic, semantic_refs, semantic_ref_index, i)
503
+ i += 1
504
+
505
+
506
+ class TermToSemanticRefIndex(ITermToSemanticRefIndex):
507
+ _map: dict[str, list[ScoredSemanticRefOrdinal]]
508
+
509
+ def __init__(self):
510
+ super().__init__()
511
+ self._map = {}
512
+
513
+ async def size(self) -> int:
514
+ return len(self._map)
515
+
516
+ async def get_terms(self) -> list[str]:
517
+ return list(self._map)
518
+
519
+ async def clear(self) -> None:
520
+ self._clear()
521
+
522
+ def _clear(self) -> None:
523
+ self._map.clear()
524
+
525
+ async def add_term(
526
+ self,
527
+ term: str,
528
+ semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal,
529
+ ) -> str:
530
+ if not term:
531
+ return term
532
+ if not isinstance(semantic_ref_ordinal, ScoredSemanticRefOrdinal):
533
+ semantic_ref_ordinal = ScoredSemanticRefOrdinal(semantic_ref_ordinal, 1.0)
534
+ term = self._prepare_term(term)
535
+ existing = self._map.get(term)
536
+ if existing is not None:
537
+ existing.append(semantic_ref_ordinal)
538
+ else:
539
+ self._map[term] = [semantic_ref_ordinal]
540
+ return term
541
+
542
+ async def lookup_term(self, term: str) -> list[ScoredSemanticRefOrdinal] | None:
543
+ return self._map.get(self._prepare_term(term)) or []
544
+
545
+ async def remove_term(
546
+ self, term: str, semantic_ref_ordinal: SemanticRefOrdinal
547
+ ) -> None:
548
+ term = self._prepare_term(term)
549
+ if term in self._map:
550
+ # Remove only the specific semantic ref ordinal, not the entire term
551
+ scored_refs = self._map[term]
552
+ self._map[term] = [
553
+ ref
554
+ for ref in scored_refs
555
+ if ref.semantic_ref_ordinal != semantic_ref_ordinal
556
+ ]
557
+ # Clean up empty terms
558
+ if not self._map[term]:
559
+ del self._map[term]
560
+
561
+ async def serialize(self) -> TermToSemanticRefIndexData:
562
+ items: list[TermToSemanticRefIndexItemData] = []
563
+ for term, scored_semantic_ref_ordinals in self._map.items():
564
+ items.append(
565
+ TermToSemanticRefIndexItemData(
566
+ term=term,
567
+ semanticRefOrdinals=[
568
+ s.serialize() for s in scored_semantic_ref_ordinals
569
+ ],
570
+ )
571
+ )
572
+ return TermToSemanticRefIndexData(items=items)
573
+
574
+ async def deserialize(self, data: TermToSemanticRefIndexData) -> None:
575
+ self._clear()
576
+ for index_item_data in data["items"]:
577
+ term = index_item_data.get("term")
578
+ term = self._prepare_term(term)
579
+ scored_refs_data = index_item_data["semanticRefOrdinals"]
580
+ scored_refs = [
581
+ ScoredSemanticRefOrdinal.deserialize(s) for s in scored_refs_data
582
+ ]
583
+ self._map[term] = scored_refs
584
+
585
+ def _prepare_term(self, term: str) -> str:
586
+ return term.lower()
587
+
588
+
589
+ # ...
590
+
591
+
592
+ async def build_semantic_ref[TMessage: IMessage](
593
+ conversation: IConversation[TMessage, ITermToSemanticRefIndex],
594
+ conversation_settings: ConversationSettings,
595
+ ) -> None:
596
+ await build_semantic_ref_index(
597
+ conversation,
598
+ conversation_settings.semantic_ref_index_settings,
599
+ )
600
+ if conversation.semantic_ref_index is not None:
601
+ await secindex.build_secondary_indexes(
602
+ conversation,
603
+ conversation_settings,
604
+ )
605
+
606
+
607
+ async def build_semantic_ref_index[TM: IMessage](
608
+ conversation: IConversation[TM, ITermToSemanticRefIndex],
609
+ settings: SemanticRefIndexSettings,
610
+ ) -> None:
611
+ await add_to_semantic_ref_index(conversation, settings, 0)
612
+
613
+
614
+ async def add_to_semantic_ref_index[
615
+ TMessage: IMessage, TTermToSemanticRefIndex: ITermToSemanticRefIndex
616
+ ](
617
+ conversation: IConversation[TMessage, TTermToSemanticRefIndex],
618
+ settings: SemanticRefIndexSettings,
619
+ message_ordinal_start_at: MessageOrdinal,
620
+ terms_added: list[str] | None = None,
621
+ ) -> None:
622
+ """Add semantic references to the conversation's semantic reference index."""
623
+
624
+ # Only create knowledge extractor if auto extraction is enabled
625
+ knowledge_extractor = None
626
+ if settings.auto_extract_knowledge:
627
+ knowledge_extractor = (
628
+ settings.knowledge_extractor or convknowledge.KnowledgeExtractor()
629
+ )
630
+
631
+ # TODO: get_message_chunk_batch
632
+ # for text_location_batch in get_message_chunk_batch(
633
+ # conversation.messages,
634
+ # message_ordinal_start_at,
635
+ # settings.batch_size,
636
+ # ):
637
+ # await add_batch_to_semantic_ref_index(
638
+ # conversation,
639
+ # text_location_batch,
640
+ # knowledge_extractor,
641
+ # terms_added,
642
+ # )
643
+
644
+
645
+ def verify_has_semantic_ref_index(conversation: IConversation) -> None:
646
+ if conversation.secondary_indexes is None or conversation.semantic_refs is None:
647
+ raise ValueError("Conversation does not have an index")
648
+
649
+
650
+ async def dump(
651
+ semantic_ref_index: TermToSemanticRefIndex, semantic_refs: ISemanticRefCollection
652
+ ) -> None:
653
+ print("semantic_ref_index = {")
654
+ for k, v in semantic_ref_index._map.items():
655
+ print(f" {k!r}: {v},")
656
+ print("}\n")
657
+ print("semantic_refs = []")
658
+ async for semantic_ref in semantic_refs:
659
+ print(f" {semantic_ref},")
660
+ print("]\n")