typeagent-py 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. typeagent/aitools/auth.py +61 -0
  2. typeagent/aitools/embeddings.py +232 -0
  3. typeagent/aitools/utils.py +244 -0
  4. typeagent/aitools/vectorbase.py +175 -0
  5. typeagent/knowpro/answer_context_schema.py +49 -0
  6. typeagent/knowpro/answer_response_schema.py +34 -0
  7. typeagent/knowpro/answers.py +577 -0
  8. typeagent/knowpro/collections.py +759 -0
  9. typeagent/knowpro/common.py +9 -0
  10. typeagent/knowpro/convknowledge.py +112 -0
  11. typeagent/knowpro/convsettings.py +94 -0
  12. typeagent/knowpro/convutils.py +49 -0
  13. typeagent/knowpro/date_time_schema.py +32 -0
  14. typeagent/knowpro/field_helpers.py +87 -0
  15. typeagent/knowpro/fuzzyindex.py +144 -0
  16. typeagent/knowpro/interfaces.py +818 -0
  17. typeagent/knowpro/knowledge.py +88 -0
  18. typeagent/knowpro/kplib.py +125 -0
  19. typeagent/knowpro/query.py +1128 -0
  20. typeagent/knowpro/search.py +628 -0
  21. typeagent/knowpro/search_query_schema.py +165 -0
  22. typeagent/knowpro/searchlang.py +729 -0
  23. typeagent/knowpro/searchlib.py +345 -0
  24. typeagent/knowpro/secindex.py +100 -0
  25. typeagent/knowpro/serialization.py +390 -0
  26. typeagent/knowpro/textlocindex.py +179 -0
  27. typeagent/knowpro/utils.py +17 -0
  28. typeagent/mcp/server.py +139 -0
  29. typeagent/podcasts/podcast.py +473 -0
  30. typeagent/podcasts/podcast_import.py +105 -0
  31. typeagent/storage/__init__.py +25 -0
  32. typeagent/storage/memory/__init__.py +13 -0
  33. typeagent/storage/memory/collections.py +68 -0
  34. typeagent/storage/memory/convthreads.py +81 -0
  35. typeagent/storage/memory/messageindex.py +178 -0
  36. typeagent/storage/memory/propindex.py +289 -0
  37. typeagent/storage/memory/provider.py +84 -0
  38. typeagent/storage/memory/reltermsindex.py +318 -0
  39. typeagent/storage/memory/semrefindex.py +660 -0
  40. typeagent/storage/memory/timestampindex.py +176 -0
  41. typeagent/storage/sqlite/__init__.py +31 -0
  42. typeagent/storage/sqlite/collections.py +362 -0
  43. typeagent/storage/sqlite/messageindex.py +382 -0
  44. typeagent/storage/sqlite/propindex.py +119 -0
  45. typeagent/storage/sqlite/provider.py +293 -0
  46. typeagent/storage/sqlite/reltermsindex.py +328 -0
  47. typeagent/storage/sqlite/schema.py +248 -0
  48. typeagent/storage/sqlite/semrefindex.py +156 -0
  49. typeagent/storage/sqlite/timestampindex.py +146 -0
  50. typeagent/storage/utils.py +41 -0
  51. typeagent_py-0.1.0.dist-info/METADATA +28 -0
  52. typeagent_py-0.1.0.dist-info/RECORD +55 -0
  53. typeagent_py-0.1.0.dist-info/WHEEL +5 -0
  54. typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
  55. typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,175 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ from collections.abc import Callable, Iterable
5
+ from dataclasses import dataclass
6
+
7
+ import numpy as np
8
+
9
+ from .embeddings import AsyncEmbeddingModel, NormalizedEmbedding, NormalizedEmbeddings
10
+
11
+
12
+ @dataclass
13
+ class ScoredInt:
14
+ item: int
15
+ score: float
16
+
17
+
18
+ @dataclass
19
+ class TextEmbeddingIndexSettings:
20
+ embedding_model: AsyncEmbeddingModel
21
+ embedding_size: int # Always embedding_model.embedding_size
22
+ min_score: float
23
+ max_matches: int | None
24
+ retry_max_attempts: int = 2
25
+ retry_delay: float = 2.0 # Seconds
26
+ batch_size: int = 8
27
+
28
+ def __init__(
29
+ self,
30
+ embedding_model: AsyncEmbeddingModel | None = None,
31
+ embedding_size: int | None = None,
32
+ min_score: float | None = None,
33
+ max_matches: int | None = None,
34
+ ):
35
+ self.embedding_model = embedding_model or AsyncEmbeddingModel(embedding_size)
36
+ self.embedding_size = self.embedding_model.embedding_size
37
+ assert (
38
+ embedding_size is None or self.embedding_size == embedding_size
39
+ ), f"Given embedding size {embedding_size} doesn't match model's embedding size {self.embedding_size}"
40
+ self.min_score = min_score if min_score is not None else 0.85
41
+ self.max_matches = max_matches
42
+
43
+
44
+ class VectorBase:
45
+ settings: TextEmbeddingIndexSettings
46
+ _vectors: NormalizedEmbeddings
47
+ _model: AsyncEmbeddingModel
48
+ _embedding_size: int
49
+
50
+ def __init__(self, settings: TextEmbeddingIndexSettings):
51
+ self.settings = settings
52
+ self._model = settings.embedding_model
53
+ self._embedding_size = self._model.embedding_size
54
+ self.clear()
55
+
56
+ async def get_embedding(self, key: str, cache: bool = True) -> NormalizedEmbedding:
57
+ if cache:
58
+ return await self._model.get_embedding(key)
59
+ else:
60
+ return await self._model.get_embedding_nocache(key)
61
+
62
+ async def get_embeddings(
63
+ self, keys: list[str], cache: bool = True
64
+ ) -> NormalizedEmbeddings:
65
+ if cache:
66
+ return await self._model.get_embeddings(keys)
67
+ else:
68
+ return await self._model.get_embeddings_nocache(keys)
69
+
70
+ def __len__(self) -> int:
71
+ return len(self._vectors)
72
+
73
+ # Needed because otherwise an empty index would be falsy.
74
+ def __bool__(self) -> bool:
75
+ return True
76
+
77
+ def add_embedding(
78
+ self, key: str | None, embedding: NormalizedEmbedding | list[float]
79
+ ) -> None:
80
+ if isinstance(embedding, list):
81
+ embedding = np.array(embedding, dtype=np.float32)
82
+ embeddings = embedding.reshape(1, -1) # Make it 2D: 1xN
83
+ self._vectors = np.append(self._vectors, embeddings, axis=0)
84
+ if key is not None:
85
+ self._model.add_embedding(key, embedding)
86
+
87
+ def add_embeddings(self, embeddings: NormalizedEmbeddings) -> None:
88
+ assert embeddings.ndim == 2
89
+ assert embeddings.shape[1] == self._embedding_size
90
+ self._vectors = np.concatenate((self._vectors, embeddings), axis=0)
91
+
92
+ async def add_key(self, key: str, cache: bool = True) -> None:
93
+ embeddings = (await self.get_embedding(key, cache=cache)).reshape(1, -1)
94
+ self._vectors = np.append(self._vectors, embeddings, axis=0)
95
+
96
+ async def add_keys(self, keys: list[str], cache: bool = True) -> None:
97
+ embeddings = await self.get_embeddings(keys, cache=cache)
98
+ self._vectors = np.concatenate((self._vectors, embeddings), axis=0)
99
+
100
+ def fuzzy_lookup_embedding(
101
+ self,
102
+ embedding: NormalizedEmbedding,
103
+ max_hits: int | None = None,
104
+ min_score: float | None = None,
105
+ predicate: Callable[[int], bool] | None = None,
106
+ ) -> list[ScoredInt]:
107
+ if max_hits is None:
108
+ max_hits = 10
109
+ if min_score is None:
110
+ min_score = 0.0
111
+ # This line does most of the work:
112
+ scores: Iterable[float] = np.dot(self._vectors, embedding)
113
+ scored_ordinals = [
114
+ ScoredInt(i, score)
115
+ for i, score in enumerate(scores)
116
+ if score >= min_score and (predicate is None or predicate(i))
117
+ ]
118
+ scored_ordinals.sort(key=lambda x: x.score, reverse=True)
119
+ return scored_ordinals[:max_hits]
120
+
121
+ # TODO: Make this and fuzzy_lookup_embedding() more similar.
122
+ def fuzzy_lookup_embedding_in_subset(
123
+ self,
124
+ embedding: NormalizedEmbedding,
125
+ ordinals_of_subset: list[int],
126
+ max_hits: int | None = None,
127
+ min_score: float | None = None,
128
+ ) -> list[ScoredInt]:
129
+ return self.fuzzy_lookup_embedding(
130
+ embedding, max_hits, min_score, lambda i: i in ordinals_of_subset
131
+ )
132
+
133
+ async def fuzzy_lookup(
134
+ self,
135
+ key: str,
136
+ max_hits: int | None = None,
137
+ min_score: float | None = None,
138
+ predicate: Callable[[int], bool] | None = None,
139
+ ) -> list[ScoredInt]:
140
+ if max_hits is None:
141
+ max_hits = self.settings.max_matches
142
+ if min_score is None:
143
+ min_score = self.settings.min_score
144
+ embedding = await self.get_embedding(key)
145
+ return self.fuzzy_lookup_embedding(
146
+ embedding, max_hits=max_hits, min_score=min_score, predicate=predicate
147
+ )
148
+
149
+ def clear(self) -> None:
150
+ self._vectors = np.array([], dtype=np.float32)
151
+ self._vectors.shape = (0, self._embedding_size)
152
+
153
+ def get_embedding_at(self, pos: int) -> NormalizedEmbedding:
154
+ if 0 <= pos < len(self._vectors):
155
+ return self._vectors[pos]
156
+ raise IndexError(
157
+ f"Index {pos} out of bounds for embedding index of size {len(self)}"
158
+ )
159
+
160
+ def serialize_embedding_at(self, pos: int) -> NormalizedEmbedding | None:
161
+ return self._vectors[pos] if 0 <= pos < len(self._vectors) else None
162
+
163
+ def serialize(self) -> NormalizedEmbeddings:
164
+ assert self._vectors.shape == (len(self._vectors), self._embedding_size)
165
+ return self._vectors # TODO: Should we make a copy?
166
+
167
+ def deserialize(self, data: NormalizedEmbeddings | None) -> None:
168
+ if data is None:
169
+ self.clear()
170
+ return
171
+ assert data.shape == (len(data), self._embedding_size), [
172
+ data.shape,
173
+ self._embedding_size,
174
+ ]
175
+ self._vectors = data # TODO: Should we make a copy?
@@ -0,0 +1,49 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ # TODO: Are we sure this isn't used as a translator schema class?
5
+
6
+ from dataclasses import dataclass
7
+ from typing import Annotated, Any, Union
8
+ from typing_extensions import Doc
9
+
10
+ from ..knowpro.interfaces import DateRange
11
+
12
+ EntityNames = Union[str, list[str]]
13
+
14
+
15
+ @dataclass
16
+ class RelevantKnowledge:
17
+ knowledge: Annotated[Any, Doc("The actual knowledge")]
18
+ origin: Annotated[
19
+ EntityNames | None, Doc("Entity or entities who mentioned the knowledge")
20
+ ] = None
21
+ audience: Annotated[
22
+ EntityNames | None,
23
+ Doc("Entity or entities who received or consumed this knowledge"),
24
+ ] = None
25
+ time_range: Annotated[
26
+ DateRange | None, Doc("Time period during which this knowledge was gathered")
27
+ ] = None
28
+
29
+
30
+ @dataclass
31
+ class RelevantMessage:
32
+ from_: Annotated[EntityNames | None, Doc("Sender(s) of the message")]
33
+ to: Annotated[EntityNames | None, Doc("Recipient(s) of the message")]
34
+ timestamp: Annotated[str | None, Doc("Timestamp of the message in ISO format")]
35
+ messageText: Annotated[str | list[str] | None, Doc("Text chunks in this message")]
36
+
37
+
38
+ @dataclass
39
+ class AnswerContext:
40
+ """Use empty lists for unneeded properties."""
41
+
42
+ entities: Annotated[
43
+ list[RelevantKnowledge],
44
+ Doc(
45
+ "Relevant entities. Use the 'name' and 'type' properties of entities to PRECISELY identify those that answer the user question."
46
+ ),
47
+ ]
48
+ topics: Annotated[list[RelevantKnowledge], Doc("Relevant topics")]
49
+ messages: Annotated[list[RelevantMessage], Doc("Relevant messages")]
@@ -0,0 +1,34 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ from typing import Literal, Annotated
5
+ from typing_extensions import Doc
6
+ from pydantic.dataclasses import dataclass
7
+
8
+ AnswerType = Literal[
9
+ "NoAnswer", # If question cannot be accurately answered from [ANSWER CONTEXT]
10
+ "Answered", # Fully answer question
11
+ # TODO: Add a category for outright errors, e.g. network errors
12
+ ]
13
+
14
+
15
+ @dataclass
16
+ class AnswerResponse:
17
+ type: Annotated[
18
+ AnswerType,
19
+ Doc(
20
+ 'use "NoAnswer" if no highly relevant answer found in the [ANSWER CONTEXT]'
21
+ ),
22
+ ]
23
+ answer: Annotated[
24
+ str | None,
25
+ Doc(
26
+ "the answer to display if [ANSWER CONTEXT] is highly relevant and can be used to answer the user's question"
27
+ ),
28
+ ] = None
29
+ whyNoAnswer: Annotated[
30
+ str | None,
31
+ Doc(
32
+ "If NoAnswer, explain why..\nparticularly explain why you didn't use any supplied entities"
33
+ ),
34
+ ] = None