typeagent-py 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. typeagent/aitools/auth.py +61 -0
  2. typeagent/aitools/embeddings.py +232 -0
  3. typeagent/aitools/utils.py +244 -0
  4. typeagent/aitools/vectorbase.py +175 -0
  5. typeagent/knowpro/answer_context_schema.py +49 -0
  6. typeagent/knowpro/answer_response_schema.py +34 -0
  7. typeagent/knowpro/answers.py +577 -0
  8. typeagent/knowpro/collections.py +759 -0
  9. typeagent/knowpro/common.py +9 -0
  10. typeagent/knowpro/convknowledge.py +112 -0
  11. typeagent/knowpro/convsettings.py +94 -0
  12. typeagent/knowpro/convutils.py +49 -0
  13. typeagent/knowpro/date_time_schema.py +32 -0
  14. typeagent/knowpro/field_helpers.py +87 -0
  15. typeagent/knowpro/fuzzyindex.py +144 -0
  16. typeagent/knowpro/interfaces.py +818 -0
  17. typeagent/knowpro/knowledge.py +88 -0
  18. typeagent/knowpro/kplib.py +125 -0
  19. typeagent/knowpro/query.py +1128 -0
  20. typeagent/knowpro/search.py +628 -0
  21. typeagent/knowpro/search_query_schema.py +165 -0
  22. typeagent/knowpro/searchlang.py +729 -0
  23. typeagent/knowpro/searchlib.py +345 -0
  24. typeagent/knowpro/secindex.py +100 -0
  25. typeagent/knowpro/serialization.py +390 -0
  26. typeagent/knowpro/textlocindex.py +179 -0
  27. typeagent/knowpro/utils.py +17 -0
  28. typeagent/mcp/server.py +139 -0
  29. typeagent/podcasts/podcast.py +473 -0
  30. typeagent/podcasts/podcast_import.py +105 -0
  31. typeagent/storage/__init__.py +25 -0
  32. typeagent/storage/memory/__init__.py +13 -0
  33. typeagent/storage/memory/collections.py +68 -0
  34. typeagent/storage/memory/convthreads.py +81 -0
  35. typeagent/storage/memory/messageindex.py +178 -0
  36. typeagent/storage/memory/propindex.py +289 -0
  37. typeagent/storage/memory/provider.py +84 -0
  38. typeagent/storage/memory/reltermsindex.py +318 -0
  39. typeagent/storage/memory/semrefindex.py +660 -0
  40. typeagent/storage/memory/timestampindex.py +176 -0
  41. typeagent/storage/sqlite/__init__.py +31 -0
  42. typeagent/storage/sqlite/collections.py +362 -0
  43. typeagent/storage/sqlite/messageindex.py +382 -0
  44. typeagent/storage/sqlite/propindex.py +119 -0
  45. typeagent/storage/sqlite/provider.py +293 -0
  46. typeagent/storage/sqlite/reltermsindex.py +328 -0
  47. typeagent/storage/sqlite/schema.py +248 -0
  48. typeagent/storage/sqlite/semrefindex.py +156 -0
  49. typeagent/storage/sqlite/timestampindex.py +146 -0
  50. typeagent/storage/utils.py +41 -0
  51. typeagent_py-0.1.0.dist-info/METADATA +28 -0
  52. typeagent_py-0.1.0.dist-info/RECORD +55 -0
  53. typeagent_py-0.1.0.dist-info/WHEEL +5 -0
  54. typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
  55. typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,818 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ from abc import ABC, abstractmethod
5
+ from collections.abc import AsyncIterable, Iterable, Sequence
6
+ from datetime import (
7
+ datetime as Datetime, # For export.
8
+ timedelta as Timedelta, # For export.
9
+ )
10
+ from typing import (
11
+ Any,
12
+ ClassVar,
13
+ Literal,
14
+ NotRequired,
15
+ Protocol,
16
+ Self,
17
+ TypedDict,
18
+ runtime_checkable,
19
+ )
20
+
21
+ from pydantic.dataclasses import dataclass
22
+ from pydantic import Field, AliasChoices
23
+ import typechat
24
+
25
+ from ..aitools.embeddings import NormalizedEmbeddings
26
+ from . import kplib
27
+ from .field_helpers import CamelCaseField
28
+
29
+
30
+ class IKnowledgeSource(Protocol):
31
+ """A Knowledge Source is any object that returns knowledge."""
32
+
33
+ def get_knowledge(self) -> kplib.KnowledgeResponse:
34
+ """Retrieves knowledge from the source."""
35
+ ...
36
+
37
+
38
+ class IKnowledgeExtractor(Protocol):
39
+ """Interface for extracting knowledge from messages."""
40
+
41
+ async def extract(self, message: str) -> typechat.Result[kplib.KnowledgeResponse]:
42
+ """Extract knowledge from a message."""
43
+ ...
44
+
45
+
46
+ @dataclass
47
+ class DeletionInfo:
48
+ timestamp: str
49
+ reason: str | None = None
50
+
51
+
52
+ # Messages are referenced by their sequential ordinal numbers.
53
+ type MessageOrdinal = int
54
+
55
+
56
+ class IMessageMetadata(Protocol):
57
+ """Metadata associated with a message."""
58
+
59
+ # The source ("senders") of the message
60
+ source: str | list[str] | None = None
61
+
62
+ # The dest ("recipients") of the message
63
+ dest: str | list[str] | None = None
64
+
65
+
66
+ class IMessage[TMetadata: IMessageMetadata](IKnowledgeSource, Protocol):
67
+ """A message in a conversation
68
+
69
+ A Message contains one or more text chunks.
70
+ """
71
+
72
+ # The text of the message, split into chunks.
73
+ text_chunks: list[str]
74
+
75
+ # (Optional) tags associated with the message.
76
+ tags: list[str]
77
+
78
+ # The (optional) timestamp of the message.
79
+ timestamp: str | None = None
80
+
81
+ # (Future) Information about the deletion of the message.
82
+ deletion_info: DeletionInfo | None = None
83
+
84
+ # Metadata associated with the message such as its source.
85
+ metadata: TMetadata | None = None
86
+
87
+
88
+ type SemanticRefOrdinal = int
89
+
90
+
91
+ @dataclass
92
+ class ScoredSemanticRefOrdinal:
93
+ semantic_ref_ordinal: SemanticRefOrdinal = CamelCaseField(
94
+ "The ordinal of the semantic reference"
95
+ )
96
+ score: float = CamelCaseField("The relevance score")
97
+
98
+ def __repr__(self) -> str:
99
+ return f"{self.__class__.__name__}({self.semantic_ref_ordinal}, {self.score})"
100
+
101
+ def serialize(self) -> "ScoredSemanticRefOrdinalData":
102
+ return self.__pydantic_serializer__.to_python(self, by_alias=True) # type: ignore
103
+
104
+ @staticmethod
105
+ def deserialize(data: "ScoredSemanticRefOrdinalData") -> "ScoredSemanticRefOrdinal":
106
+ return ScoredSemanticRefOrdinal.__pydantic_validator__.validate_python(data) # type: ignore
107
+
108
+
109
+ @dataclass
110
+ class ScoredMessageOrdinal:
111
+ message_ordinal: MessageOrdinal
112
+ score: float
113
+
114
+
115
+ class ITermToSemanticRefIndex(Protocol):
116
+ async def size(self) -> int: ...
117
+
118
+ async def get_terms(self) -> list[str]: ...
119
+
120
+ async def add_term(
121
+ self,
122
+ term: str,
123
+ semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal,
124
+ ) -> str: ...
125
+
126
+ async def remove_term(
127
+ self, term: str, semantic_ref_ordinal: SemanticRefOrdinal
128
+ ) -> None: ...
129
+
130
+ async def lookup_term(self, term: str) -> list[ScoredSemanticRefOrdinal] | None: ...
131
+
132
+ async def clear(self) -> None: ...
133
+
134
+ async def serialize(self) -> Any: ...
135
+
136
+ async def deserialize(self, data: Any) -> None: ...
137
+
138
+
139
+ type KnowledgeType = Literal["entity", "action", "topic", "tag"]
140
+
141
+
142
+ @dataclass
143
+ class Topic:
144
+ knowledge_type: ClassVar[Literal["topic"]] = "topic"
145
+ text: str
146
+
147
+
148
+ @dataclass
149
+ class Tag:
150
+ knowledge_type: ClassVar[Literal["tag"]] = "tag"
151
+ text: str
152
+
153
+
154
+ type Knowledge = kplib.ConcreteEntity | kplib.Action | Topic | Tag
155
+
156
+
157
+ class TextLocationData(TypedDict):
158
+ messageOrdinal: MessageOrdinal
159
+ chunkOrdinal: int
160
+
161
+
162
+ @dataclass(order=True)
163
+ class TextLocation:
164
+ # The ordinal of the message.
165
+ message_ordinal: MessageOrdinal = CamelCaseField("The ordinal of the message")
166
+ # The ordinal of the chunk.
167
+ # In the end of a TextRange, 1 + ordinal of the last chunk in the range.
168
+ chunk_ordinal: int = CamelCaseField(
169
+ "The ordinal of the chunk; in the end of a TextRange, 1 + ordinal of the last chunk in the range",
170
+ default=0,
171
+ )
172
+
173
+ def __repr__(self) -> str:
174
+ return (
175
+ f"{self.__class__.__name__}({self.message_ordinal}, {self.chunk_ordinal})"
176
+ )
177
+
178
+ def serialize(self) -> TextLocationData:
179
+ return self.__pydantic_serializer__.to_python(self, by_alias=True) # type: ignore
180
+
181
+ @staticmethod
182
+ def deserialize(data: TextLocationData) -> "TextLocation":
183
+ return TextLocation.__pydantic_validator__.validate_python(data) # type: ignore
184
+
185
+
186
+ class TextRangeData(TypedDict):
187
+ start: TextLocationData
188
+ end: NotRequired[TextLocationData | None]
189
+
190
+
191
+ # A text range within a session.
192
+ # TODO: Are TextRanges totally ordered?
193
+ @dataclass
194
+ class TextRange:
195
+ # The start of the range.
196
+ start: TextLocation
197
+ # The end of the range (exclusive). If None, the range is a single point.
198
+ end: TextLocation | None = None
199
+
200
+ def __repr__(self) -> str:
201
+ if self.end is None:
202
+ return f"{self.__class__.__name__}({self.start})"
203
+ else:
204
+ return f"{self.__class__.__name__}({self.start}, {self.end})"
205
+
206
+ def __eq__(self, other: object) -> bool:
207
+ if not isinstance(other, TextRange):
208
+ return NotImplemented
209
+
210
+ if self.start != other.start:
211
+ return False
212
+
213
+ # Get the effective end for both ranges
214
+ self_end = self.end or TextLocation(
215
+ self.start.message_ordinal, self.start.chunk_ordinal + 1
216
+ )
217
+ other_end = other.end or TextLocation(
218
+ other.start.message_ordinal, other.start.chunk_ordinal + 1
219
+ )
220
+
221
+ return self_end == other_end
222
+
223
+ def __lt__(self, other: Self) -> bool:
224
+ if self.start != other.start:
225
+ return self.start < other.start
226
+ self_end = self.end or TextLocation(
227
+ self.start.message_ordinal, self.start.chunk_ordinal + 1
228
+ )
229
+ other_end = other.end or TextLocation(
230
+ other.start.message_ordinal, other.start.chunk_ordinal + 1
231
+ )
232
+ return self_end < other_end
233
+
234
+ def __gt__(self, other: Self) -> bool:
235
+ return other.__lt__(self)
236
+
237
+ def __ge__(self, other: Self) -> bool:
238
+ return not self.__lt__(other)
239
+
240
+ def __le__(self, other: Self) -> bool:
241
+ return not other.__lt__(self)
242
+
243
+ def __contains__(self, other: Self) -> bool:
244
+ other_end = other.end or TextLocation(
245
+ other.start.message_ordinal, other.start.chunk_ordinal + 1
246
+ )
247
+ self_end = self.end or TextLocation(
248
+ self.start.message_ordinal, self.start.chunk_ordinal + 1
249
+ )
250
+ return self.start <= other.start and other_end <= self_end
251
+
252
+ def serialize(self) -> TextRangeData:
253
+ return self.__pydantic_serializer__.to_python(self, by_alias=True, exclude_none=True) # type: ignore
254
+
255
+ @staticmethod
256
+ def deserialize(data: TextRangeData) -> "TextRange":
257
+ return TextRange.__pydantic_validator__.validate_python(data) # type: ignore
258
+
259
+
260
+ # TODO: Implement serializing KnowledgeData (or import from kplib).
261
+ class KnowledgeData(TypedDict):
262
+ pass
263
+
264
+
265
+ class SemanticRefData(TypedDict):
266
+ semanticRefOrdinal: SemanticRefOrdinal
267
+ range: TextRangeData
268
+ knowledgeType: KnowledgeType
269
+ knowledge: KnowledgeData
270
+
271
+
272
+ @dataclass
273
+ class SemanticRef:
274
+ semantic_ref_ordinal: SemanticRefOrdinal = CamelCaseField(
275
+ "The ordinal of the semantic reference"
276
+ )
277
+ range: TextRange = CamelCaseField("The text range of the semantic reference")
278
+ knowledge: Knowledge = CamelCaseField(
279
+ "The knowledge associated with this semantic reference"
280
+ )
281
+
282
+ def __repr__(self) -> str:
283
+ return f"{self.__class__.__name__}({self.semantic_ref_ordinal}, {self.range}, {self.knowledge.knowledge_type!r}, {self.knowledge})"
284
+
285
+ def serialize(self) -> SemanticRefData:
286
+ from . import serialization
287
+
288
+ return SemanticRefData(
289
+ semanticRefOrdinal=self.semantic_ref_ordinal,
290
+ range=self.range.serialize(),
291
+ knowledgeType=self.knowledge.knowledge_type,
292
+ knowledge=serialization.serialize_object(self.knowledge),
293
+ )
294
+
295
+ @staticmethod
296
+ def deserialize(data: SemanticRefData) -> "SemanticRef":
297
+ from . import serialization
298
+
299
+ knowledge = serialization.deserialize_knowledge(
300
+ data["knowledgeType"], data["knowledge"]
301
+ )
302
+ return SemanticRef(
303
+ semantic_ref_ordinal=data["semanticRefOrdinal"],
304
+ range=TextRange.deserialize(data["range"]),
305
+ knowledge=knowledge,
306
+ )
307
+
308
+
309
+ @dataclass
310
+ class DateRange:
311
+ start: Datetime
312
+ # Inclusive. If None, the range is unbounded.
313
+ end: Datetime | None = None
314
+
315
+ def __repr__(self) -> str:
316
+ if self.end is None:
317
+ return f"{self.__class__.__name__}({self.start!r})"
318
+ else:
319
+ return f"{self.__class__.__name__}({self.start!r}, {self.end!r})"
320
+
321
+ def __contains__(self, datetime: Datetime) -> bool:
322
+ if self.end is None:
323
+ return self.start <= datetime
324
+ return self.start <= datetime <= self.end
325
+
326
+
327
+ # Term must be hashable to allow using it as a dict key or set member.
328
+ @dataclass(unsafe_hash=True)
329
+ class Term:
330
+ text: str
331
+ # Optional weighting for these matches.
332
+ weight: float | None = None
333
+
334
+ def __repr__(self) -> str:
335
+ if self.weight is None:
336
+ return f"{self.__class__.__name__}({self.text!r})"
337
+ else:
338
+ return f"{self.__class__.__name__}({self.text!r}, {self.weight:.4g})"
339
+
340
+ def serialize(self) -> "TermData":
341
+ return self.__pydantic_serializer__.to_python(self, by_alias=True, exclude_none=True) # type: ignore
342
+
343
+
344
+ # Allows for faster retrieval of name, value properties
345
+ @runtime_checkable
346
+ class IPropertyToSemanticRefIndex(Protocol):
347
+ async def size(self) -> int: ...
348
+
349
+ async def get_values(self) -> list[str]: ...
350
+
351
+ async def add_property(
352
+ self,
353
+ property_name: str,
354
+ value: str,
355
+ semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal,
356
+ ) -> None: ...
357
+
358
+ async def lookup_property(
359
+ self, property_name: str, value: str
360
+ ) -> list[ScoredSemanticRefOrdinal] | None: ...
361
+
362
+ async def clear(self) -> None: ...
363
+
364
+ async def remove_property(self, prop_name: str, semref_id: int) -> None: ...
365
+
366
+ async def remove_all_for_semref(self, semref_id: int) -> None: ...
367
+
368
+
369
+ @dataclass
370
+ class TimestampedTextRange:
371
+ timestamp: str
372
+ range: TextRange
373
+
374
+
375
+ # Return text ranges in the given date range.
376
+ class ITimestampToTextRangeIndex(Protocol):
377
+ # Contract (stable across providers):
378
+ # - Timestamps must be ISO-8601 strings sortable lexicographically.
379
+ # - lookup_range(DateRange) returns items with start <= t < end (end exclusive).
380
+ # If end is None, treat as a point query with end = start + epsilon.
381
+ async def size(self) -> int: ...
382
+
383
+ async def add_timestamp(
384
+ self, message_ordinal: MessageOrdinal, timestamp: str
385
+ ) -> bool: ...
386
+
387
+ async def add_timestamps(
388
+ self, message_timestamps: list[tuple[MessageOrdinal, str]]
389
+ ) -> None: ...
390
+
391
+ async def lookup_range(
392
+ self, date_range: DateRange
393
+ ) -> list[TimestampedTextRange]: ...
394
+
395
+
396
+ class ITermToRelatedTerms(Protocol):
397
+ async def lookup_term(self, text: str) -> list[Term] | None: ...
398
+
399
+ async def size(self) -> int: ...
400
+
401
+ async def is_empty(self) -> bool: ...
402
+
403
+ async def clear(self) -> None: ...
404
+
405
+ async def add_related_term(
406
+ self, text: str, related_terms: Term | list[Term]
407
+ ) -> None: ...
408
+
409
+ async def remove_term(self, text: str) -> None: ...
410
+
411
+ async def serialize(self) -> "TermToRelatedTermsData": ...
412
+
413
+ async def deserialize(self, data: "TermToRelatedTermsData | None") -> None: ...
414
+
415
+
416
+ class ITermToRelatedTermsFuzzy(Protocol):
417
+ async def size(self) -> int: ...
418
+
419
+ async def add_terms(self, texts: list[str]) -> None: ...
420
+
421
+ async def lookup_term(
422
+ self,
423
+ text: str,
424
+ max_hits: int | None = None,
425
+ min_score: float | None = None,
426
+ ) -> list[Term]: ...
427
+
428
+ async def lookup_terms(
429
+ self,
430
+ texts: list[str],
431
+ max_hits: int | None = None,
432
+ min_score: float | None = None,
433
+ ) -> list[list[Term]]: ...
434
+
435
+
436
+ class ITermToRelatedTermsIndex(Protocol):
437
+ # Providers may implement aliases and fuzzy via separate tables, but must
438
+ # expose them through these properties.
439
+ @property
440
+ def aliases(self) -> ITermToRelatedTerms: ...
441
+
442
+ @property
443
+ def fuzzy_index(self) -> ITermToRelatedTermsFuzzy | None: ...
444
+
445
+ async def serialize(self) -> "TermsToRelatedTermsIndexData": ...
446
+
447
+ async def deserialize(self, data: "TermsToRelatedTermsIndexData") -> None: ...
448
+
449
+
450
+ class ThreadData(TypedDict):
451
+ description: str
452
+ ranges: list[TextRangeData]
453
+
454
+
455
+ # A Thread is a set of text ranges in a conversation.
456
+ @dataclass
457
+ class Thread:
458
+ description: str
459
+ ranges: Sequence[TextRange]
460
+
461
+ def serialize(self) -> ThreadData:
462
+ return self.__pydantic_serializer__.to_python(self, by_alias=True) # type: ignore
463
+
464
+ @staticmethod
465
+ def deserialize(data: ThreadData) -> "Thread":
466
+ return Thread.__pydantic_validator__.validate_python(data) # type: ignore
467
+
468
+
469
+ type ThreadOrdinal = int
470
+
471
+
472
+ @dataclass
473
+ class ScoredThreadOrdinal:
474
+ thread_ordinal: ThreadOrdinal
475
+ score: float
476
+
477
+
478
+ class IConversationThreads(Protocol):
479
+ threads: list[Thread]
480
+
481
+ async def add_thread(self, thread: Thread) -> None: ...
482
+
483
+ async def lookup_thread(
484
+ self,
485
+ thread_description: str,
486
+ max_matches: int | None = None,
487
+ threshold_score: float | None = None,
488
+ ) -> list[ScoredThreadOrdinal] | None: ...
489
+
490
+ def serialize(self) -> "ConversationThreadData[ThreadDataItem]": ...
491
+
492
+ def deserialize(self, data: "ConversationThreadData[ThreadDataItem]") -> None: ...
493
+
494
+
495
+ @runtime_checkable
496
+ class IMessageTextIndex[TMessage: IMessage](Protocol):
497
+
498
+ async def add_messages(
499
+ self,
500
+ messages: Iterable[TMessage],
501
+ ) -> None: ...
502
+
503
+ async def add_messages_starting_at(
504
+ self,
505
+ start_message_ordinal: int,
506
+ messages: list[TMessage],
507
+ ) -> None: ...
508
+
509
+ async def lookup_messages(
510
+ self,
511
+ message_text: str,
512
+ max_matches: int | None = None,
513
+ threshold_score: float | None = None,
514
+ ) -> list[ScoredMessageOrdinal]: ...
515
+
516
+ async def lookup_messages_in_subset(
517
+ self,
518
+ message_text: str,
519
+ ordinals_to_search: list[MessageOrdinal],
520
+ max_matches: int | None = None,
521
+ threshold_score: float | None = None,
522
+ ) -> list[ScoredMessageOrdinal]: ...
523
+
524
+ # Async alternatives to __len__ and __bool__
525
+ async def size(self) -> int: ...
526
+
527
+ async def is_empty(self) -> bool: ...
528
+
529
+ # TODO: Others?
530
+
531
+ async def serialize(self) -> "MessageTextIndexData": ...
532
+
533
+ async def deserialize(self, data: "MessageTextIndexData") -> None: ...
534
+
535
+
536
+ class IConversationSecondaryIndexes[TMessage: IMessage](Protocol):
537
+ property_to_semantic_ref_index: IPropertyToSemanticRefIndex | None
538
+ timestamp_index: ITimestampToTextRangeIndex | None
539
+ term_to_related_terms_index: ITermToRelatedTermsIndex | None
540
+ threads: IConversationThreads | None = None
541
+ message_index: IMessageTextIndex[TMessage] | None = None
542
+
543
+
544
+ class IConversation[
545
+ TMessage: IMessage,
546
+ TTermToSemanticRefIndex: ITermToSemanticRefIndex,
547
+ ](Protocol):
548
+ name_tag: str
549
+ tags: list[str]
550
+ messages: "IMessageCollection[TMessage]"
551
+ semantic_refs: "ISemanticRefCollection"
552
+ semantic_ref_index: TTermToSemanticRefIndex
553
+ secondary_indexes: IConversationSecondaryIndexes[TMessage] | None
554
+
555
+
556
+ # -------------
557
+ # Search Types
558
+ # -------------
559
+
560
+
561
+ @dataclass
562
+ class SearchTerm:
563
+ """Represents a term being searched for.
564
+
565
+ Attributes:
566
+ term: The term being searched for.
567
+ related_terms: Additional terms related to the term. These can be supplied
568
+ from synonym tables and so on.
569
+ - An empty list indicates no related matches for this term.
570
+ - `None` indicates that the search processor may try to resolve related
571
+ terms from any available secondary indexes (e.g., ITermToRelatedTermsIndex).
572
+ """
573
+
574
+ term: Term
575
+ related_terms: list[Term] | None = CamelCaseField(
576
+ "Additional terms related to the term. These can be supplied from synonym tables and so on",
577
+ default=None,
578
+ )
579
+
580
+
581
+ # Well-known knowledge properties.
582
+ type KnowledgePropertyName = Literal[
583
+ "name", # the name of an entity
584
+ "type", # the type of an entity
585
+ "verb", # the verb of an action
586
+ "subject", # the subject of an action
587
+ "object", # the object of an action
588
+ "indirectObject", # the indirect object of an action
589
+ "tag", # tag
590
+ "topic", # topic
591
+ ]
592
+
593
+
594
+ @dataclass
595
+ class PropertySearchTerm:
596
+ """PropertySearch terms let you match named property values.
597
+
598
+ - You can match a well-known property name (e.g., name("Bach"), type("book")).
599
+ - Or you can provide a SearchTerm as a propertyName.
600
+ For example, to match hue(red):
601
+ - propertyName as SearchTerm, set to 'hue'
602
+ - propertyValue as SearchTerm, set to 'red'
603
+ We also want hue(red) to match any facets called color(red).
604
+
605
+ SearchTerms can include related terms:
606
+ - For example, you could include "color" as a related term for the
607
+ propertyName "hue", or 'crimson' for red.
608
+
609
+ The query processor can also resolve related terms using a
610
+ related terms secondary index, if one is available.
611
+ """
612
+
613
+ property_name: KnowledgePropertyName | SearchTerm = CamelCaseField(
614
+ "The property name to search for"
615
+ )
616
+ property_value: SearchTerm = CamelCaseField("The property value to search for")
617
+
618
+
619
+ @dataclass
620
+ class SearchTermGroup:
621
+ """A group of search terms."""
622
+
623
+ boolean_op: Literal["and", "or", "or_max"] = CamelCaseField(
624
+ "The boolean operation to apply to the terms"
625
+ )
626
+ terms: list["SearchTermGroupTypes"] = CamelCaseField(
627
+ "The list of search terms in this group", default_factory=list
628
+ )
629
+
630
+
631
+ type SearchTermGroupTypes = SearchTerm | PropertySearchTerm | SearchTermGroup
632
+
633
+
634
+ @dataclass
635
+ class WhenFilter:
636
+ """Additional constraints on when a SemanticRef is considered a match.
637
+
638
+ A SemanticRef matching a term is actually considered a match
639
+ when the following optional conditions are met (if present, must match):
640
+ knowledgeType matches, e.g. knowledgeType == 'entity'
641
+ dateRange matches, e.g. (Jan 3rd to Jan 10th)
642
+ Semantic Refs are within supplied SCOPE,
643
+ i.e. only Semantic Refs from a 'scoping' set of text ranges will match
644
+ """
645
+
646
+ knowledge_type: KnowledgeType | None = None
647
+ date_range: DateRange | None = None
648
+ thread_description: str | None = None
649
+ tags: list[str] | None = None
650
+
651
+ # SCOPE DEFINITION
652
+
653
+ # Search terms whose matching text ranges supply the scope for this query
654
+ scope_defining_terms: SearchTermGroup | None = None
655
+ # Additional scoping ranges separately computed by caller
656
+ text_ranges_in_scope: list[TextRange] | None = None
657
+
658
+
659
+ @dataclass
660
+ class SearchSelectExpr:
661
+ """An expression used to select structured contents of a conversation."""
662
+
663
+ search_term_group: SearchTermGroup = CamelCaseField(
664
+ "Term group that matches information"
665
+ ) # Term group that matches information
666
+ when: WhenFilter | None = None # Filter that scopes what information to match
667
+
668
+
669
+ @dataclass
670
+ class SemanticRefSearchResult:
671
+ """Result of a semantic reference search."""
672
+
673
+ term_matches: set[str]
674
+ semantic_ref_matches: list[ScoredSemanticRefOrdinal]
675
+
676
+
677
+ # --------------------------------------------------
678
+ # Serialization formats use TypedDict and camelCase
679
+ # --------------------------------------------------
680
+
681
+
682
+ class ThreadDataItem(TypedDict):
683
+ thread: ThreadData
684
+ embedding: list[float] | None # TODO: Why not NormalizedEmbedding?
685
+
686
+
687
+ class ConversationThreadData[TThreadDataItem: ThreadDataItem](TypedDict):
688
+ threads: list[TThreadDataItem] | None
689
+
690
+
691
+ class TermData(TypedDict):
692
+ text: str
693
+ weight: NotRequired[float | None]
694
+
695
+
696
+ class TermsToRelatedTermsDataItem(TypedDict):
697
+ termText: str
698
+ relatedTerms: list[TermData]
699
+
700
+
701
+ class TermToRelatedTermsData(TypedDict):
702
+ relatedTerms: NotRequired[list[TermsToRelatedTermsDataItem] | None]
703
+
704
+
705
+ class TextEmbeddingIndexData(TypedDict):
706
+ textItems: list[str]
707
+ embeddings: NormalizedEmbeddings | None
708
+
709
+
710
+ class TermsToRelatedTermsIndexData(TypedDict):
711
+ aliasData: NotRequired[TermToRelatedTermsData]
712
+ textEmbeddingData: NotRequired[TextEmbeddingIndexData]
713
+
714
+
715
+ class ScoredSemanticRefOrdinalData(TypedDict):
716
+ semanticRefOrdinal: SemanticRefOrdinal
717
+ score: float
718
+
719
+
720
+ class TermToSemanticRefIndexItemData(TypedDict):
721
+ term: str
722
+ semanticRefOrdinals: list[ScoredSemanticRefOrdinalData]
723
+
724
+
725
+ # Persistent form of a term index.
726
+ class TermToSemanticRefIndexData(TypedDict):
727
+ items: list[TermToSemanticRefIndexItemData]
728
+
729
+
730
+ class ConversationData[TMessageData](TypedDict):
731
+ nameTag: str
732
+ messages: list[TMessageData]
733
+ tags: list[str]
734
+ semanticRefs: list[SemanticRefData] | None
735
+ semanticIndexData: NotRequired[TermToSemanticRefIndexData | None]
736
+
737
+
738
+ class TextToTextLocationIndexData(TypedDict):
739
+ textLocations: list[TextLocationData]
740
+ embeddings: NormalizedEmbeddings | None
741
+
742
+
743
+ class MessageTextIndexData(TypedDict):
744
+ indexData: NotRequired[TextToTextLocationIndexData | None]
745
+
746
+
747
+ class ConversationDataWithIndexes[TMessageData](ConversationData[TMessageData]):
748
+ relatedTermsIndexData: NotRequired[TermsToRelatedTermsIndexData | None]
749
+ threadData: NotRequired[ConversationThreadData[ThreadDataItem] | None]
750
+ messageIndexData: NotRequired[MessageTextIndexData | None]
751
+
752
+
753
+ # --------------------------------
754
+ # Indexing helper data structures
755
+ # --------------------------------
756
+
757
+
758
+ # --------
759
+ # Storage
760
+ # --------
761
+
762
+
763
+ class IReadonlyCollection[T, TOrdinal](AsyncIterable[T], Protocol):
764
+ async def size(self) -> int: ...
765
+
766
+ async def get_item(self, arg: TOrdinal) -> T: ...
767
+
768
+ async def get_slice(self, start: int, stop: int) -> list[T]: ...
769
+
770
+ async def get_multiple(self, arg: list[TOrdinal]) -> list[T]: ...
771
+
772
+
773
+ class ICollection[T, TOrdinal](IReadonlyCollection[T, TOrdinal], Protocol):
774
+ """An APPEND-ONLY collection."""
775
+
776
+ @property
777
+ def is_persistent(self) -> bool: ...
778
+
779
+ async def append(self, item: T) -> None: ...
780
+
781
+ async def extend(self, items: Iterable[T]) -> None:
782
+ """Append multiple items to the collection."""
783
+ # The default implementation just calls append for each item.
784
+ for item in items:
785
+ await self.append(item)
786
+
787
+
788
+ class IMessageCollection[TMessage: IMessage](
789
+ ICollection[TMessage, MessageOrdinal], Protocol
790
+ ):
791
+ """A collection of Messages."""
792
+
793
+
794
+ class ISemanticRefCollection(ICollection[SemanticRef, SemanticRefOrdinal], Protocol):
795
+ """A collection of SemanticRefs."""
796
+
797
+
798
+ class IStorageProvider[TMessage: IMessage](Protocol):
799
+ """API spec for storage providers -- maybe in-memory or persistent."""
800
+
801
+ async def get_message_collection(self) -> IMessageCollection[TMessage]: ...
802
+
803
+ async def get_semantic_ref_collection(self) -> ISemanticRefCollection: ...
804
+
805
+ # Index getters - ALL 6 index types for this conversation
806
+ async def get_semantic_ref_index(self) -> ITermToSemanticRefIndex: ...
807
+
808
+ async def get_property_index(self) -> IPropertyToSemanticRefIndex: ...
809
+
810
+ async def get_timestamp_index(self) -> ITimestampToTextRangeIndex: ...
811
+
812
+ async def get_message_text_index(self) -> IMessageTextIndex[TMessage]: ...
813
+
814
+ async def get_related_terms_index(self) -> ITermToRelatedTermsIndex: ...
815
+
816
+ async def get_conversation_threads(self) -> IConversationThreads: ...
817
+
818
+ async def close(self) -> None: ...