typeagent-py 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- typeagent/aitools/auth.py +61 -0
- typeagent/aitools/embeddings.py +232 -0
- typeagent/aitools/utils.py +244 -0
- typeagent/aitools/vectorbase.py +175 -0
- typeagent/knowpro/answer_context_schema.py +49 -0
- typeagent/knowpro/answer_response_schema.py +34 -0
- typeagent/knowpro/answers.py +577 -0
- typeagent/knowpro/collections.py +759 -0
- typeagent/knowpro/common.py +9 -0
- typeagent/knowpro/convknowledge.py +112 -0
- typeagent/knowpro/convsettings.py +94 -0
- typeagent/knowpro/convutils.py +49 -0
- typeagent/knowpro/date_time_schema.py +32 -0
- typeagent/knowpro/field_helpers.py +87 -0
- typeagent/knowpro/fuzzyindex.py +144 -0
- typeagent/knowpro/interfaces.py +818 -0
- typeagent/knowpro/knowledge.py +88 -0
- typeagent/knowpro/kplib.py +125 -0
- typeagent/knowpro/query.py +1128 -0
- typeagent/knowpro/search.py +628 -0
- typeagent/knowpro/search_query_schema.py +165 -0
- typeagent/knowpro/searchlang.py +729 -0
- typeagent/knowpro/searchlib.py +345 -0
- typeagent/knowpro/secindex.py +100 -0
- typeagent/knowpro/serialization.py +390 -0
- typeagent/knowpro/textlocindex.py +179 -0
- typeagent/knowpro/utils.py +17 -0
- typeagent/mcp/server.py +139 -0
- typeagent/podcasts/podcast.py +473 -0
- typeagent/podcasts/podcast_import.py +105 -0
- typeagent/storage/__init__.py +25 -0
- typeagent/storage/memory/__init__.py +13 -0
- typeagent/storage/memory/collections.py +68 -0
- typeagent/storage/memory/convthreads.py +81 -0
- typeagent/storage/memory/messageindex.py +178 -0
- typeagent/storage/memory/propindex.py +289 -0
- typeagent/storage/memory/provider.py +84 -0
- typeagent/storage/memory/reltermsindex.py +318 -0
- typeagent/storage/memory/semrefindex.py +660 -0
- typeagent/storage/memory/timestampindex.py +176 -0
- typeagent/storage/sqlite/__init__.py +31 -0
- typeagent/storage/sqlite/collections.py +362 -0
- typeagent/storage/sqlite/messageindex.py +382 -0
- typeagent/storage/sqlite/propindex.py +119 -0
- typeagent/storage/sqlite/provider.py +293 -0
- typeagent/storage/sqlite/reltermsindex.py +328 -0
- typeagent/storage/sqlite/schema.py +248 -0
- typeagent/storage/sqlite/semrefindex.py +156 -0
- typeagent/storage/sqlite/timestampindex.py +146 -0
- typeagent/storage/utils.py +41 -0
- typeagent_py-0.1.0.dist-info/METADATA +28 -0
- typeagent_py-0.1.0.dist-info/RECORD +55 -0
- typeagent_py-0.1.0.dist-info/WHEEL +5 -0
- typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
- typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,175 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
from collections.abc import Callable, Iterable
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from .embeddings import AsyncEmbeddingModel, NormalizedEmbedding, NormalizedEmbeddings
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class ScoredInt:
|
14
|
+
item: int
|
15
|
+
score: float
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class TextEmbeddingIndexSettings:
|
20
|
+
embedding_model: AsyncEmbeddingModel
|
21
|
+
embedding_size: int # Always embedding_model.embedding_size
|
22
|
+
min_score: float
|
23
|
+
max_matches: int | None
|
24
|
+
retry_max_attempts: int = 2
|
25
|
+
retry_delay: float = 2.0 # Seconds
|
26
|
+
batch_size: int = 8
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
embedding_model: AsyncEmbeddingModel | None = None,
|
31
|
+
embedding_size: int | None = None,
|
32
|
+
min_score: float | None = None,
|
33
|
+
max_matches: int | None = None,
|
34
|
+
):
|
35
|
+
self.embedding_model = embedding_model or AsyncEmbeddingModel(embedding_size)
|
36
|
+
self.embedding_size = self.embedding_model.embedding_size
|
37
|
+
assert (
|
38
|
+
embedding_size is None or self.embedding_size == embedding_size
|
39
|
+
), f"Given embedding size {embedding_size} doesn't match model's embedding size {self.embedding_size}"
|
40
|
+
self.min_score = min_score if min_score is not None else 0.85
|
41
|
+
self.max_matches = max_matches
|
42
|
+
|
43
|
+
|
44
|
+
class VectorBase:
|
45
|
+
settings: TextEmbeddingIndexSettings
|
46
|
+
_vectors: NormalizedEmbeddings
|
47
|
+
_model: AsyncEmbeddingModel
|
48
|
+
_embedding_size: int
|
49
|
+
|
50
|
+
def __init__(self, settings: TextEmbeddingIndexSettings):
|
51
|
+
self.settings = settings
|
52
|
+
self._model = settings.embedding_model
|
53
|
+
self._embedding_size = self._model.embedding_size
|
54
|
+
self.clear()
|
55
|
+
|
56
|
+
async def get_embedding(self, key: str, cache: bool = True) -> NormalizedEmbedding:
|
57
|
+
if cache:
|
58
|
+
return await self._model.get_embedding(key)
|
59
|
+
else:
|
60
|
+
return await self._model.get_embedding_nocache(key)
|
61
|
+
|
62
|
+
async def get_embeddings(
|
63
|
+
self, keys: list[str], cache: bool = True
|
64
|
+
) -> NormalizedEmbeddings:
|
65
|
+
if cache:
|
66
|
+
return await self._model.get_embeddings(keys)
|
67
|
+
else:
|
68
|
+
return await self._model.get_embeddings_nocache(keys)
|
69
|
+
|
70
|
+
def __len__(self) -> int:
|
71
|
+
return len(self._vectors)
|
72
|
+
|
73
|
+
# Needed because otherwise an empty index would be falsy.
|
74
|
+
def __bool__(self) -> bool:
|
75
|
+
return True
|
76
|
+
|
77
|
+
def add_embedding(
|
78
|
+
self, key: str | None, embedding: NormalizedEmbedding | list[float]
|
79
|
+
) -> None:
|
80
|
+
if isinstance(embedding, list):
|
81
|
+
embedding = np.array(embedding, dtype=np.float32)
|
82
|
+
embeddings = embedding.reshape(1, -1) # Make it 2D: 1xN
|
83
|
+
self._vectors = np.append(self._vectors, embeddings, axis=0)
|
84
|
+
if key is not None:
|
85
|
+
self._model.add_embedding(key, embedding)
|
86
|
+
|
87
|
+
def add_embeddings(self, embeddings: NormalizedEmbeddings) -> None:
|
88
|
+
assert embeddings.ndim == 2
|
89
|
+
assert embeddings.shape[1] == self._embedding_size
|
90
|
+
self._vectors = np.concatenate((self._vectors, embeddings), axis=0)
|
91
|
+
|
92
|
+
async def add_key(self, key: str, cache: bool = True) -> None:
|
93
|
+
embeddings = (await self.get_embedding(key, cache=cache)).reshape(1, -1)
|
94
|
+
self._vectors = np.append(self._vectors, embeddings, axis=0)
|
95
|
+
|
96
|
+
async def add_keys(self, keys: list[str], cache: bool = True) -> None:
|
97
|
+
embeddings = await self.get_embeddings(keys, cache=cache)
|
98
|
+
self._vectors = np.concatenate((self._vectors, embeddings), axis=0)
|
99
|
+
|
100
|
+
def fuzzy_lookup_embedding(
|
101
|
+
self,
|
102
|
+
embedding: NormalizedEmbedding,
|
103
|
+
max_hits: int | None = None,
|
104
|
+
min_score: float | None = None,
|
105
|
+
predicate: Callable[[int], bool] | None = None,
|
106
|
+
) -> list[ScoredInt]:
|
107
|
+
if max_hits is None:
|
108
|
+
max_hits = 10
|
109
|
+
if min_score is None:
|
110
|
+
min_score = 0.0
|
111
|
+
# This line does most of the work:
|
112
|
+
scores: Iterable[float] = np.dot(self._vectors, embedding)
|
113
|
+
scored_ordinals = [
|
114
|
+
ScoredInt(i, score)
|
115
|
+
for i, score in enumerate(scores)
|
116
|
+
if score >= min_score and (predicate is None or predicate(i))
|
117
|
+
]
|
118
|
+
scored_ordinals.sort(key=lambda x: x.score, reverse=True)
|
119
|
+
return scored_ordinals[:max_hits]
|
120
|
+
|
121
|
+
# TODO: Make this and fuzzy_lookup_embedding() more similar.
|
122
|
+
def fuzzy_lookup_embedding_in_subset(
|
123
|
+
self,
|
124
|
+
embedding: NormalizedEmbedding,
|
125
|
+
ordinals_of_subset: list[int],
|
126
|
+
max_hits: int | None = None,
|
127
|
+
min_score: float | None = None,
|
128
|
+
) -> list[ScoredInt]:
|
129
|
+
return self.fuzzy_lookup_embedding(
|
130
|
+
embedding, max_hits, min_score, lambda i: i in ordinals_of_subset
|
131
|
+
)
|
132
|
+
|
133
|
+
async def fuzzy_lookup(
|
134
|
+
self,
|
135
|
+
key: str,
|
136
|
+
max_hits: int | None = None,
|
137
|
+
min_score: float | None = None,
|
138
|
+
predicate: Callable[[int], bool] | None = None,
|
139
|
+
) -> list[ScoredInt]:
|
140
|
+
if max_hits is None:
|
141
|
+
max_hits = self.settings.max_matches
|
142
|
+
if min_score is None:
|
143
|
+
min_score = self.settings.min_score
|
144
|
+
embedding = await self.get_embedding(key)
|
145
|
+
return self.fuzzy_lookup_embedding(
|
146
|
+
embedding, max_hits=max_hits, min_score=min_score, predicate=predicate
|
147
|
+
)
|
148
|
+
|
149
|
+
def clear(self) -> None:
|
150
|
+
self._vectors = np.array([], dtype=np.float32)
|
151
|
+
self._vectors.shape = (0, self._embedding_size)
|
152
|
+
|
153
|
+
def get_embedding_at(self, pos: int) -> NormalizedEmbedding:
|
154
|
+
if 0 <= pos < len(self._vectors):
|
155
|
+
return self._vectors[pos]
|
156
|
+
raise IndexError(
|
157
|
+
f"Index {pos} out of bounds for embedding index of size {len(self)}"
|
158
|
+
)
|
159
|
+
|
160
|
+
def serialize_embedding_at(self, pos: int) -> NormalizedEmbedding | None:
|
161
|
+
return self._vectors[pos] if 0 <= pos < len(self._vectors) else None
|
162
|
+
|
163
|
+
def serialize(self) -> NormalizedEmbeddings:
|
164
|
+
assert self._vectors.shape == (len(self._vectors), self._embedding_size)
|
165
|
+
return self._vectors # TODO: Should we make a copy?
|
166
|
+
|
167
|
+
def deserialize(self, data: NormalizedEmbeddings | None) -> None:
|
168
|
+
if data is None:
|
169
|
+
self.clear()
|
170
|
+
return
|
171
|
+
assert data.shape == (len(data), self._embedding_size), [
|
172
|
+
data.shape,
|
173
|
+
self._embedding_size,
|
174
|
+
]
|
175
|
+
self._vectors = data # TODO: Should we make a copy?
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
# TODO: Are we sure this isn't used as a translator schema class?
|
5
|
+
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import Annotated, Any, Union
|
8
|
+
from typing_extensions import Doc
|
9
|
+
|
10
|
+
from ..knowpro.interfaces import DateRange
|
11
|
+
|
12
|
+
EntityNames = Union[str, list[str]]
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class RelevantKnowledge:
|
17
|
+
knowledge: Annotated[Any, Doc("The actual knowledge")]
|
18
|
+
origin: Annotated[
|
19
|
+
EntityNames | None, Doc("Entity or entities who mentioned the knowledge")
|
20
|
+
] = None
|
21
|
+
audience: Annotated[
|
22
|
+
EntityNames | None,
|
23
|
+
Doc("Entity or entities who received or consumed this knowledge"),
|
24
|
+
] = None
|
25
|
+
time_range: Annotated[
|
26
|
+
DateRange | None, Doc("Time period during which this knowledge was gathered")
|
27
|
+
] = None
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class RelevantMessage:
|
32
|
+
from_: Annotated[EntityNames | None, Doc("Sender(s) of the message")]
|
33
|
+
to: Annotated[EntityNames | None, Doc("Recipient(s) of the message")]
|
34
|
+
timestamp: Annotated[str | None, Doc("Timestamp of the message in ISO format")]
|
35
|
+
messageText: Annotated[str | list[str] | None, Doc("Text chunks in this message")]
|
36
|
+
|
37
|
+
|
38
|
+
@dataclass
|
39
|
+
class AnswerContext:
|
40
|
+
"""Use empty lists for unneeded properties."""
|
41
|
+
|
42
|
+
entities: Annotated[
|
43
|
+
list[RelevantKnowledge],
|
44
|
+
Doc(
|
45
|
+
"Relevant entities. Use the 'name' and 'type' properties of entities to PRECISELY identify those that answer the user question."
|
46
|
+
),
|
47
|
+
]
|
48
|
+
topics: Annotated[list[RelevantKnowledge], Doc("Relevant topics")]
|
49
|
+
messages: Annotated[list[RelevantMessage], Doc("Relevant messages")]
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
from typing import Literal, Annotated
|
5
|
+
from typing_extensions import Doc
|
6
|
+
from pydantic.dataclasses import dataclass
|
7
|
+
|
8
|
+
AnswerType = Literal[
|
9
|
+
"NoAnswer", # If question cannot be accurately answered from [ANSWER CONTEXT]
|
10
|
+
"Answered", # Fully answer question
|
11
|
+
# TODO: Add a category for outright errors, e.g. network errors
|
12
|
+
]
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class AnswerResponse:
|
17
|
+
type: Annotated[
|
18
|
+
AnswerType,
|
19
|
+
Doc(
|
20
|
+
'use "NoAnswer" if no highly relevant answer found in the [ANSWER CONTEXT]'
|
21
|
+
),
|
22
|
+
]
|
23
|
+
answer: Annotated[
|
24
|
+
str | None,
|
25
|
+
Doc(
|
26
|
+
"the answer to display if [ANSWER CONTEXT] is highly relevant and can be used to answer the user's question"
|
27
|
+
),
|
28
|
+
] = None
|
29
|
+
whyNoAnswer: Annotated[
|
30
|
+
str | None,
|
31
|
+
Doc(
|
32
|
+
"If NoAnswer, explain why..\nparticularly explain why you didn't use any supplied entities"
|
33
|
+
),
|
34
|
+
] = None
|