typeagent-py 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- typeagent/aitools/auth.py +61 -0
- typeagent/aitools/embeddings.py +232 -0
- typeagent/aitools/utils.py +244 -0
- typeagent/aitools/vectorbase.py +175 -0
- typeagent/knowpro/answer_context_schema.py +49 -0
- typeagent/knowpro/answer_response_schema.py +34 -0
- typeagent/knowpro/answers.py +577 -0
- typeagent/knowpro/collections.py +759 -0
- typeagent/knowpro/common.py +9 -0
- typeagent/knowpro/convknowledge.py +112 -0
- typeagent/knowpro/convsettings.py +94 -0
- typeagent/knowpro/convutils.py +49 -0
- typeagent/knowpro/date_time_schema.py +32 -0
- typeagent/knowpro/field_helpers.py +87 -0
- typeagent/knowpro/fuzzyindex.py +144 -0
- typeagent/knowpro/interfaces.py +818 -0
- typeagent/knowpro/knowledge.py +88 -0
- typeagent/knowpro/kplib.py +125 -0
- typeagent/knowpro/query.py +1128 -0
- typeagent/knowpro/search.py +628 -0
- typeagent/knowpro/search_query_schema.py +165 -0
- typeagent/knowpro/searchlang.py +729 -0
- typeagent/knowpro/searchlib.py +345 -0
- typeagent/knowpro/secindex.py +100 -0
- typeagent/knowpro/serialization.py +390 -0
- typeagent/knowpro/textlocindex.py +179 -0
- typeagent/knowpro/utils.py +17 -0
- typeagent/mcp/server.py +139 -0
- typeagent/podcasts/podcast.py +473 -0
- typeagent/podcasts/podcast_import.py +105 -0
- typeagent/storage/__init__.py +25 -0
- typeagent/storage/memory/__init__.py +13 -0
- typeagent/storage/memory/collections.py +68 -0
- typeagent/storage/memory/convthreads.py +81 -0
- typeagent/storage/memory/messageindex.py +178 -0
- typeagent/storage/memory/propindex.py +289 -0
- typeagent/storage/memory/provider.py +84 -0
- typeagent/storage/memory/reltermsindex.py +318 -0
- typeagent/storage/memory/semrefindex.py +660 -0
- typeagent/storage/memory/timestampindex.py +176 -0
- typeagent/storage/sqlite/__init__.py +31 -0
- typeagent/storage/sqlite/collections.py +362 -0
- typeagent/storage/sqlite/messageindex.py +382 -0
- typeagent/storage/sqlite/propindex.py +119 -0
- typeagent/storage/sqlite/provider.py +293 -0
- typeagent/storage/sqlite/reltermsindex.py +328 -0
- typeagent/storage/sqlite/schema.py +248 -0
- typeagent/storage/sqlite/semrefindex.py +156 -0
- typeagent/storage/sqlite/timestampindex.py +146 -0
- typeagent/storage/utils.py +41 -0
- typeagent_py-0.1.0.dist-info/METADATA +28 -0
- typeagent_py-0.1.0.dist-info/RECORD +55 -0
- typeagent_py-0.1.0.dist-info/WHEEL +5 -0
- typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
- typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,628 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
from collections.abc import Callable
|
5
|
+
from pydantic.dataclasses import dataclass
|
6
|
+
from pydantic import Field, AliasChoices
|
7
|
+
from typing import TypeGuard, cast, Annotated
|
8
|
+
|
9
|
+
from .collections import MessageAccumulator, SemanticRefAccumulator
|
10
|
+
from .field_helpers import CamelCaseField
|
11
|
+
from .interfaces import (
|
12
|
+
IConversation,
|
13
|
+
IConversationSecondaryIndexes,
|
14
|
+
KnowledgeType,
|
15
|
+
PropertySearchTerm,
|
16
|
+
ScoredMessageOrdinal,
|
17
|
+
ScoredSemanticRefOrdinal,
|
18
|
+
SearchSelectExpr,
|
19
|
+
SearchTerm,
|
20
|
+
SearchTermGroup,
|
21
|
+
SemanticRef,
|
22
|
+
SemanticRefSearchResult,
|
23
|
+
Term,
|
24
|
+
WhenFilter,
|
25
|
+
)
|
26
|
+
from .kplib import ConcreteEntity
|
27
|
+
from ..storage.memory.messageindex import IMessageTextEmbeddingIndex
|
28
|
+
from .searchlib import create_tag_search_term_group
|
29
|
+
from .query import (
|
30
|
+
BooleanOp,
|
31
|
+
CompiledSearchTerm,
|
32
|
+
CompiledTermGroup,
|
33
|
+
GetScopeExpr,
|
34
|
+
GetScoredMessagesExpr,
|
35
|
+
GroupByKnowledgeTypeExpr,
|
36
|
+
GroupSearchResultsExpr,
|
37
|
+
IQueryOpExpr,
|
38
|
+
IQuerySemanticRefPredicate,
|
39
|
+
IQueryTextRangeSelector,
|
40
|
+
KnowledgeTypePredicate,
|
41
|
+
MatchMessagesAndExpr,
|
42
|
+
MatchMessagesBooleanExpr,
|
43
|
+
MatchMessagesOrExpr,
|
44
|
+
MatchMessagesOrMaxExpr,
|
45
|
+
MatchPropertySearchTermExpr,
|
46
|
+
MatchSearchTermExpr,
|
47
|
+
MatchTagExpr,
|
48
|
+
MatchTermsAndExpr,
|
49
|
+
MatchTermsBooleanExpr,
|
50
|
+
MatchTermsOrExpr,
|
51
|
+
MatchTermsOrMaxExpr,
|
52
|
+
MatchTopicExpr,
|
53
|
+
MessagesFromKnowledgeExpr,
|
54
|
+
NoOpExpr,
|
55
|
+
QueryEvalContext,
|
56
|
+
RankMessagesBySimilarityExpr,
|
57
|
+
SelectMessagesInCharBudget,
|
58
|
+
SelectTopNExpr,
|
59
|
+
SelectTopNKnowledgeGroupExpr,
|
60
|
+
TextRangeSelector,
|
61
|
+
TextRangesFromMessagesSelector,
|
62
|
+
TextRangesInDateRangeSelector,
|
63
|
+
ThreadSelector,
|
64
|
+
WhereSemanticRefExpr,
|
65
|
+
is_conversation_searchable,
|
66
|
+
match_entity_name_or_type,
|
67
|
+
to_non_required_search_term,
|
68
|
+
to_required_search_term,
|
69
|
+
)
|
70
|
+
from ..storage.memory.reltermsindex import resolve_related_terms
|
71
|
+
|
72
|
+
|
73
|
+
@dataclass
|
74
|
+
class SearchQueryExpr:
|
75
|
+
select_expressions: list[SearchSelectExpr] = CamelCaseField(
|
76
|
+
"List of selection expressions for the search"
|
77
|
+
)
|
78
|
+
raw_query: str | None = None
|
79
|
+
|
80
|
+
|
81
|
+
@dataclass
|
82
|
+
class SearchOptions:
|
83
|
+
max_knowledge_matches: int | None = None
|
84
|
+
exact_match: bool = False
|
85
|
+
max_message_matches: int | None = None
|
86
|
+
# The maximum # of total message characters to select
|
87
|
+
# The query processor will ensure that the cumulative character count of message matches
|
88
|
+
# is less than this number
|
89
|
+
max_chars_in_budget: int | None = None
|
90
|
+
threshold_score: float | None = None
|
91
|
+
|
92
|
+
def __repr__(self):
|
93
|
+
parts = []
|
94
|
+
for key in dir(self):
|
95
|
+
if not key.startswith("_"):
|
96
|
+
value = getattr(self, key)
|
97
|
+
if value is not None:
|
98
|
+
parts.append(f"{key}={value!r}")
|
99
|
+
return f"{self.__class__.__name__}({', '.join(parts)})"
|
100
|
+
|
101
|
+
|
102
|
+
@dataclass
|
103
|
+
class ConversationSearchResult:
|
104
|
+
message_matches: list[ScoredMessageOrdinal]
|
105
|
+
knowledge_matches: dict[KnowledgeType, SemanticRefSearchResult]
|
106
|
+
raw_query_text: str | None = None
|
107
|
+
|
108
|
+
|
109
|
+
async def search_conversation(
|
110
|
+
conversation: IConversation,
|
111
|
+
search_term_group: SearchTermGroup,
|
112
|
+
when_filter: WhenFilter | None = None,
|
113
|
+
options: SearchOptions | None = None,
|
114
|
+
raw_search_query: str | None = None,
|
115
|
+
) -> ConversationSearchResult | None:
|
116
|
+
options = options or SearchOptions()
|
117
|
+
knowledge_matches = await search_conversation_knowledge(
|
118
|
+
conversation, search_term_group, when_filter, options
|
119
|
+
)
|
120
|
+
if knowledge_matches is None:
|
121
|
+
return None
|
122
|
+
# return ConversationSearchResult([], knowledge_matches, raw_search_query)
|
123
|
+
# Future: Combine knowledge and message queries into single query tree.
|
124
|
+
compiler = QueryCompiler(conversation, conversation.secondary_indexes)
|
125
|
+
message_query = await compiler.compile_message_query(
|
126
|
+
knowledge_matches, options, raw_search_query
|
127
|
+
)
|
128
|
+
message_matches: list[ScoredMessageOrdinal] = await run_query(
|
129
|
+
conversation, options, message_query
|
130
|
+
)
|
131
|
+
return ConversationSearchResult(
|
132
|
+
message_matches, knowledge_matches, raw_search_query
|
133
|
+
)
|
134
|
+
|
135
|
+
|
136
|
+
async def search_conversation_knowledge(
|
137
|
+
conversation: IConversation,
|
138
|
+
search_term_group: SearchTermGroup,
|
139
|
+
when_filter: WhenFilter | None = None,
|
140
|
+
options: SearchOptions | None = None,
|
141
|
+
) -> dict[KnowledgeType, SemanticRefSearchResult] | None:
|
142
|
+
"""Search a conversation for knowledge that matches the given search terms and filter."""
|
143
|
+
options = options or SearchOptions()
|
144
|
+
if not is_conversation_searchable(conversation):
|
145
|
+
return None
|
146
|
+
assert (
|
147
|
+
conversation.secondary_indexes is not None
|
148
|
+
), "Conversation secondary indexes must be initialized before searching"
|
149
|
+
compiler = QueryCompiler(conversation, conversation.secondary_indexes)
|
150
|
+
knowledge_query = await compiler.compile_knowledge_query(
|
151
|
+
search_term_group, when_filter, options
|
152
|
+
)
|
153
|
+
return await run_query(conversation, options, knowledge_query)
|
154
|
+
|
155
|
+
|
156
|
+
# TODO: search_conversation_by_text_similarity
|
157
|
+
|
158
|
+
|
159
|
+
async def run_search_query(
|
160
|
+
conversation: IConversation,
|
161
|
+
query: SearchQueryExpr,
|
162
|
+
options: SearchOptions | None = None,
|
163
|
+
original_query_text: str | None = None,
|
164
|
+
) -> list[ConversationSearchResult]:
|
165
|
+
options = options or SearchOptions()
|
166
|
+
results: list[ConversationSearchResult] = []
|
167
|
+
for expr in query.select_expressions:
|
168
|
+
search_results = await search_conversation(
|
169
|
+
conversation,
|
170
|
+
expr.search_term_group,
|
171
|
+
expr.when,
|
172
|
+
options,
|
173
|
+
original_query_text or query.raw_query,
|
174
|
+
)
|
175
|
+
if search_results is not None:
|
176
|
+
results.append(search_results)
|
177
|
+
return results
|
178
|
+
|
179
|
+
|
180
|
+
# TODO: run_search_query_by_text_similarity
|
181
|
+
|
182
|
+
|
183
|
+
async def run_query[T](
|
184
|
+
conversation: IConversation,
|
185
|
+
options: SearchOptions | None,
|
186
|
+
query: IQueryOpExpr[T],
|
187
|
+
) -> T:
|
188
|
+
secondary_indexes = conversation.secondary_indexes
|
189
|
+
assert (
|
190
|
+
secondary_indexes is not None
|
191
|
+
), "Conversation secondary indexes must be initialized before running queries"
|
192
|
+
return await query.eval(
|
193
|
+
QueryEvalContext(
|
194
|
+
conversation,
|
195
|
+
secondary_indexes.property_to_semantic_ref_index,
|
196
|
+
secondary_indexes.timestamp_index,
|
197
|
+
)
|
198
|
+
)
|
199
|
+
|
200
|
+
|
201
|
+
# NOTE: QueryCompiler instances are stateful, and not thread-safe.
|
202
|
+
# Create a new one for each query.
|
203
|
+
class QueryCompiler:
|
204
|
+
def __init__(
|
205
|
+
self,
|
206
|
+
conversation: IConversation,
|
207
|
+
secondary_indexes: IConversationSecondaryIndexes | None,
|
208
|
+
entity_term_match_weight: float = 100.0,
|
209
|
+
default_term_match_weight: float = 10.0,
|
210
|
+
related_is_exact_threshold: float = 0.95,
|
211
|
+
):
|
212
|
+
self.conversation = conversation
|
213
|
+
self.secondary_indexes = secondary_indexes
|
214
|
+
self.entity_term_match_weight = entity_term_match_weight
|
215
|
+
self.default_term_match_weight = default_term_match_weight
|
216
|
+
self.related_is_exact_threshold = related_is_exact_threshold
|
217
|
+
# All SearchTerms used which compiling the 'select' portion of the query.
|
218
|
+
self.all_search_terms: list[CompiledTermGroup] = []
|
219
|
+
# All search terms used while compiling predicates in the query.
|
220
|
+
self.all_predicate_search_terms: list[CompiledTermGroup] = []
|
221
|
+
self.all_scope_search_terms: list[CompiledTermGroup] = []
|
222
|
+
|
223
|
+
# NOTE: Everything is async because we sometimes use embeddings.
|
224
|
+
|
225
|
+
async def compile_knowledge_query(
|
226
|
+
self,
|
227
|
+
terms: SearchTermGroup,
|
228
|
+
filter: WhenFilter | None = None,
|
229
|
+
options: SearchOptions | None = None,
|
230
|
+
) -> GroupSearchResultsExpr:
|
231
|
+
query = await self.compile_query(terms, filter, options)
|
232
|
+
|
233
|
+
exact_match = (
|
234
|
+
options.exact_match
|
235
|
+
if options is not None and options.exact_match is not None
|
236
|
+
else False
|
237
|
+
)
|
238
|
+
if not exact_match:
|
239
|
+
await self.resolve_related_terms(self.all_search_terms, True)
|
240
|
+
await self.resolve_related_terms(self.all_predicate_search_terms, False)
|
241
|
+
await self.resolve_related_terms(self.all_scope_search_terms, False)
|
242
|
+
|
243
|
+
return GroupSearchResultsExpr(query)
|
244
|
+
|
245
|
+
async def compile_message_query(
|
246
|
+
self,
|
247
|
+
knowledge: (
|
248
|
+
IQueryOpExpr[dict[KnowledgeType, SemanticRefSearchResult]]
|
249
|
+
| dict[KnowledgeType, SemanticRefSearchResult]
|
250
|
+
),
|
251
|
+
options: SearchOptions | None = None,
|
252
|
+
raw_query_text: str | None = None,
|
253
|
+
) -> GetScoredMessagesExpr:
|
254
|
+
query: IQueryOpExpr = MessagesFromKnowledgeExpr(knowledge)
|
255
|
+
if options is not None:
|
256
|
+
query = await self.compile_message_re_rank(
|
257
|
+
query,
|
258
|
+
raw_query_text,
|
259
|
+
options,
|
260
|
+
)
|
261
|
+
if options.max_chars_in_budget and options.max_chars_in_budget > 0:
|
262
|
+
query = SelectMessagesInCharBudget(
|
263
|
+
query,
|
264
|
+
options.max_chars_in_budget,
|
265
|
+
)
|
266
|
+
|
267
|
+
return GetScoredMessagesExpr(query)
|
268
|
+
|
269
|
+
# TODO: compile_message_similarity_query
|
270
|
+
|
271
|
+
async def compile_query(
|
272
|
+
self,
|
273
|
+
search_term_group: SearchTermGroup,
|
274
|
+
filter: WhenFilter | None = None,
|
275
|
+
options: SearchOptions | None = None,
|
276
|
+
) -> IQueryOpExpr[dict[KnowledgeType, SemanticRefAccumulator]]:
|
277
|
+
select_expr = self.compile_select(
|
278
|
+
search_term_group,
|
279
|
+
await self.compile_scope(search_term_group, filter),
|
280
|
+
options,
|
281
|
+
)
|
282
|
+
# Constrain the select with scopes and 'where'.
|
283
|
+
if filter:
|
284
|
+
select_expr = WhereSemanticRefExpr(
|
285
|
+
select_expr,
|
286
|
+
self.compile_where(filter),
|
287
|
+
)
|
288
|
+
# And lastly, select 'TopN' and group knowledge by type.
|
289
|
+
tmp = GroupByKnowledgeTypeExpr(select_expr)
|
290
|
+
return SelectTopNKnowledgeGroupExpr(
|
291
|
+
tmp,
|
292
|
+
(
|
293
|
+
options.max_knowledge_matches
|
294
|
+
if options
|
295
|
+
and hasattr(options, "max_knowledge_matches")
|
296
|
+
and options.max_knowledge_matches
|
297
|
+
else None
|
298
|
+
),
|
299
|
+
)
|
300
|
+
|
301
|
+
def compile_select(
|
302
|
+
self,
|
303
|
+
term_group: SearchTermGroup,
|
304
|
+
scope_expr: GetScopeExpr | None = None,
|
305
|
+
options: SearchOptions | None = None,
|
306
|
+
) -> IQueryOpExpr[SemanticRefAccumulator]:
|
307
|
+
search_terms_used, select_expr = self.compile_search_group_terms(
|
308
|
+
term_group, scope_expr
|
309
|
+
)
|
310
|
+
self.all_search_terms.extend(search_terms_used)
|
311
|
+
return select_expr
|
312
|
+
|
313
|
+
def compile_search_group_terms(
|
314
|
+
self,
|
315
|
+
search_group: SearchTermGroup,
|
316
|
+
scope_expr: GetScopeExpr | None = None,
|
317
|
+
) -> tuple[list[CompiledTermGroup], IQueryOpExpr[SemanticRefAccumulator]]:
|
318
|
+
return self.compile_search_group(
|
319
|
+
search_group,
|
320
|
+
create_match_terms_boolean_expr,
|
321
|
+
scope_expr,
|
322
|
+
)
|
323
|
+
|
324
|
+
def compile_search_group_messages(
|
325
|
+
self,
|
326
|
+
search_group: SearchTermGroup,
|
327
|
+
) -> tuple[list[CompiledTermGroup], IQueryOpExpr[MessageAccumulator]]:
|
328
|
+
return self.compile_search_group(
|
329
|
+
search_group, create_match_messages_boolean_expr
|
330
|
+
)
|
331
|
+
|
332
|
+
def compile_search_group(
|
333
|
+
self,
|
334
|
+
search_group: SearchTermGroup,
|
335
|
+
create_op: Callable[
|
336
|
+
[list[IQueryOpExpr], BooleanOp, GetScopeExpr | None],
|
337
|
+
IQueryOpExpr[SemanticRefAccumulator | MessageAccumulator],
|
338
|
+
],
|
339
|
+
scope_expr: GetScopeExpr | None = None,
|
340
|
+
) -> tuple[list[CompiledTermGroup], IQueryOpExpr]:
|
341
|
+
t0_terms: list[CompiledSearchTerm] = []
|
342
|
+
compiled_terms: list[CompiledTermGroup] = [
|
343
|
+
CompiledTermGroup(boolean_op=search_group.boolean_op, terms=t0_terms)
|
344
|
+
]
|
345
|
+
term_expressions: list[IQueryOpExpr[SemanticRefAccumulator | None]] = []
|
346
|
+
for term in search_group.terms:
|
347
|
+
if isinstance(term, PropertySearchTerm):
|
348
|
+
term_expressions.append(self.compile_property_term(term))
|
349
|
+
if not isinstance(term.property_name, str):
|
350
|
+
t0_terms.append(to_required_search_term(term.property_name))
|
351
|
+
t0_terms.append(to_required_search_term(term.property_value))
|
352
|
+
elif isinstance(term, SearchTermGroup):
|
353
|
+
nested_terms, group_expr = self.compile_search_group(term, create_op)
|
354
|
+
compiled_terms.extend(nested_terms)
|
355
|
+
term_expressions.append(group_expr)
|
356
|
+
else:
|
357
|
+
term_expressions.append(self.compile_search_term(term))
|
358
|
+
t0_terms.append(to_non_required_search_term(term))
|
359
|
+
bool_expr = create_op(term_expressions, search_group.boolean_op, scope_expr)
|
360
|
+
return (compiled_terms, bool_expr)
|
361
|
+
|
362
|
+
def compile_search_term(
|
363
|
+
self,
|
364
|
+
term: SearchTerm,
|
365
|
+
) -> IQueryOpExpr[SemanticRefAccumulator | None]:
|
366
|
+
boost_weight = self.entity_term_match_weight / self.default_term_match_weight
|
367
|
+
return MatchSearchTermExpr(
|
368
|
+
term,
|
369
|
+
lambda term, sr, scored: self.boost_entities(
|
370
|
+
term, sr, scored, boost_weight
|
371
|
+
),
|
372
|
+
)
|
373
|
+
|
374
|
+
def compile_property_term(
|
375
|
+
self,
|
376
|
+
term: PropertySearchTerm,
|
377
|
+
) -> IQueryOpExpr[SemanticRefAccumulator | None]:
|
378
|
+
match term.property_name:
|
379
|
+
case "tag":
|
380
|
+
return MatchTagExpr(term.property_value)
|
381
|
+
case "topic":
|
382
|
+
return MatchTopicExpr(term.property_value)
|
383
|
+
case _:
|
384
|
+
if term.property_name in ("name", "type"):
|
385
|
+
tpvt = term.property_value.term
|
386
|
+
if tpvt.weight is None:
|
387
|
+
tpvt.weight = self.entity_term_match_weight
|
388
|
+
return MatchPropertySearchTermExpr(term)
|
389
|
+
|
390
|
+
async def compile_scope(
|
391
|
+
self,
|
392
|
+
term_group: SearchTermGroup | None = None,
|
393
|
+
filter: WhenFilter | None = None,
|
394
|
+
) -> GetScopeExpr | None:
|
395
|
+
scope_selectors: list[IQueryTextRangeSelector] = []
|
396
|
+
|
397
|
+
# First, use any provided date ranges to select scope
|
398
|
+
if filter and filter.date_range:
|
399
|
+
scope_selectors.append(TextRangesInDateRangeSelector(filter.date_range))
|
400
|
+
|
401
|
+
# Apply 'OUTER' scope
|
402
|
+
# If specific scoping terms were provided
|
403
|
+
if filter and filter.scope_defining_terms is not None:
|
404
|
+
self.add_terms_scope_selector(filter.scope_defining_terms, scope_selectors)
|
405
|
+
elif term_group is not None:
|
406
|
+
# Treat any actions as inherently scope selecting
|
407
|
+
action_terms_group = self.get_action_terms_from_search_group(term_group)
|
408
|
+
if action_terms_group is not None:
|
409
|
+
self.add_terms_scope_selector(action_terms_group, scope_selectors)
|
410
|
+
|
411
|
+
# Include any ranges directly provided by the caller
|
412
|
+
if filter and filter.text_ranges_in_scope:
|
413
|
+
scope_selectors.append(TextRangeSelector(filter.text_ranges_in_scope))
|
414
|
+
|
415
|
+
# Tags...
|
416
|
+
if filter and filter.tags:
|
417
|
+
self.add_terms_scope_selector(
|
418
|
+
create_tag_search_term_group(filter.tags), scope_selectors
|
419
|
+
)
|
420
|
+
|
421
|
+
# If a thread index is available...
|
422
|
+
threads = None
|
423
|
+
if self.secondary_indexes:
|
424
|
+
threads = self.secondary_indexes.threads
|
425
|
+
if filter and filter.thread_description and threads:
|
426
|
+
threads_in_scope = await threads.lookup_thread(filter.thread_description)
|
427
|
+
if threads_in_scope:
|
428
|
+
scope_selectors.append(
|
429
|
+
ThreadSelector(
|
430
|
+
[threads.threads[t.thread_ordinal] for t in threads_in_scope]
|
431
|
+
)
|
432
|
+
)
|
433
|
+
|
434
|
+
return GetScopeExpr(scope_selectors) if scope_selectors else None
|
435
|
+
|
436
|
+
def add_terms_scope_selector(
|
437
|
+
self,
|
438
|
+
term_group: SearchTermGroup,
|
439
|
+
scope_selectors: list[IQueryTextRangeSelector],
|
440
|
+
) -> None:
|
441
|
+
if term_group.terms:
|
442
|
+
search_terms_used, select_expr = self.compile_search_group_messages(
|
443
|
+
term_group
|
444
|
+
)
|
445
|
+
scope_selectors.append(TextRangesFromMessagesSelector(select_expr))
|
446
|
+
self.all_scope_search_terms.extend(search_terms_used)
|
447
|
+
|
448
|
+
def compile_where(self, filter: WhenFilter) -> list[IQuerySemanticRefPredicate]:
|
449
|
+
predicates: list[IQuerySemanticRefPredicate] = []
|
450
|
+
if filter.knowledge_type:
|
451
|
+
predicates.append(KnowledgeTypePredicate(filter.knowledge_type))
|
452
|
+
return predicates
|
453
|
+
|
454
|
+
async def compile_message_re_rank(
|
455
|
+
self,
|
456
|
+
src_expr: IQueryOpExpr,
|
457
|
+
raw_query_text: str | None = None,
|
458
|
+
options: SearchOptions | None = None,
|
459
|
+
) -> IQueryOpExpr:
|
460
|
+
message_index = (
|
461
|
+
self.conversation.secondary_indexes.message_index
|
462
|
+
if self.conversation.secondary_indexes
|
463
|
+
else None
|
464
|
+
)
|
465
|
+
if (
|
466
|
+
raw_query_text is not None
|
467
|
+
and isinstance(message_index, IMessageTextEmbeddingIndex)
|
468
|
+
and not await message_index.is_empty()
|
469
|
+
):
|
470
|
+
embedding = await message_index.generate_embedding(raw_query_text)
|
471
|
+
return RankMessagesBySimilarityExpr(
|
472
|
+
src_expr,
|
473
|
+
embedding,
|
474
|
+
options.max_message_matches if options else None,
|
475
|
+
options.threshold_score if options else None,
|
476
|
+
)
|
477
|
+
elif (
|
478
|
+
options
|
479
|
+
and options.max_message_matches is not None
|
480
|
+
and options.max_message_matches > 0
|
481
|
+
):
|
482
|
+
return SelectTopNExpr(src_expr, options.max_message_matches)
|
483
|
+
else:
|
484
|
+
return NoOpExpr(src_expr)
|
485
|
+
|
486
|
+
# TODO: compile_message_similarity
|
487
|
+
|
488
|
+
def get_action_terms_from_search_group(
|
489
|
+
self,
|
490
|
+
search_group: SearchTermGroup,
|
491
|
+
) -> SearchTermGroup | None:
|
492
|
+
action_group: SearchTermGroup | None = None
|
493
|
+
for term in search_group.terms:
|
494
|
+
if isinstance(term, PropertySearchTerm) and is_action_property_term(term):
|
495
|
+
action_group = action_group or SearchTermGroup(boolean_op="and")
|
496
|
+
action_group.terms.append(term)
|
497
|
+
return action_group
|
498
|
+
|
499
|
+
async def resolve_related_terms(
|
500
|
+
self,
|
501
|
+
compiled_terms: list[CompiledTermGroup],
|
502
|
+
dedupe: bool,
|
503
|
+
filter: WhenFilter | None = None,
|
504
|
+
) -> None:
|
505
|
+
if not compiled_terms:
|
506
|
+
return
|
507
|
+
for ct in compiled_terms:
|
508
|
+
self.validate_and_prepare_search_terms(ct.terms)
|
509
|
+
if (
|
510
|
+
self.secondary_indexes is not None
|
511
|
+
and self.secondary_indexes.term_to_related_terms_index is not None
|
512
|
+
):
|
513
|
+
await resolve_related_terms(
|
514
|
+
self.secondary_indexes.term_to_related_terms_index,
|
515
|
+
compiled_terms,
|
516
|
+
dedupe,
|
517
|
+
)
|
518
|
+
# Ensure that the resolved terms are valid etc.
|
519
|
+
for ct in compiled_terms:
|
520
|
+
self.validate_and_prepare_search_terms(ct.terms)
|
521
|
+
|
522
|
+
def validate_and_prepare_search_terms(
|
523
|
+
self, terms: list[CompiledSearchTerm]
|
524
|
+
) -> None:
|
525
|
+
for term in terms:
|
526
|
+
self.validate_and_prepare_search_term(term)
|
527
|
+
|
528
|
+
def validate_and_prepare_search_term(self, search_term: CompiledSearchTerm) -> bool:
|
529
|
+
if not self.validate_and_prepare_term(search_term.term):
|
530
|
+
return False
|
531
|
+
# Matching the term - exact match - counts for more than matching related terms
|
532
|
+
# Therefore, we boost any matches where the term matches directly...
|
533
|
+
if search_term.term.weight is None:
|
534
|
+
search_term.term.weight = self.default_term_match_weight
|
535
|
+
if search_term.related_terms is not None:
|
536
|
+
for related_term in search_term.related_terms:
|
537
|
+
if not self.validate_and_prepare_term(related_term):
|
538
|
+
return False
|
539
|
+
# If related term is *really* similar to the main term, score it the same
|
540
|
+
if (
|
541
|
+
related_term.weight is not None
|
542
|
+
and related_term.weight >= self.related_is_exact_threshold
|
543
|
+
):
|
544
|
+
related_term.weight = self.default_term_match_weight
|
545
|
+
return True
|
546
|
+
|
547
|
+
# Currently, just changes the case of a term
|
548
|
+
# But here, we may do other things like:
|
549
|
+
# - Check for noise terms
|
550
|
+
# - Do additional rewriting
|
551
|
+
# - Additional checks that *reject* certain search terms
|
552
|
+
# Return false if the term should be rejected
|
553
|
+
def validate_and_prepare_term(self, term: Term | None) -> bool:
|
554
|
+
if term:
|
555
|
+
term.text = term.text.lower()
|
556
|
+
return True
|
557
|
+
|
558
|
+
def boost_entities(
|
559
|
+
self,
|
560
|
+
search_term: SearchTerm,
|
561
|
+
sr: SemanticRef,
|
562
|
+
scored_ref: ScoredSemanticRefOrdinal,
|
563
|
+
boost_weight: float,
|
564
|
+
) -> ScoredSemanticRefOrdinal:
|
565
|
+
if sr.knowledge.knowledge_type == "entity" and match_entity_name_or_type(
|
566
|
+
search_term, cast(ConcreteEntity, sr.knowledge)
|
567
|
+
):
|
568
|
+
return ScoredSemanticRefOrdinal(
|
569
|
+
scored_ref.semantic_ref_ordinal,
|
570
|
+
scored_ref.score * boost_weight,
|
571
|
+
)
|
572
|
+
else:
|
573
|
+
return scored_ref
|
574
|
+
|
575
|
+
|
576
|
+
def has_conversation_results(results: list[ConversationSearchResult]) -> bool:
|
577
|
+
return any(r.knowledge_matches or r.message_matches for r in results)
|
578
|
+
|
579
|
+
|
580
|
+
def has_conversation_result(result: ConversationSearchResult) -> bool:
|
581
|
+
return bool(result.knowledge_matches or result.message_matches)
|
582
|
+
|
583
|
+
|
584
|
+
# TODO: Move to compilelib.py
|
585
|
+
def create_match_terms_boolean_expr(
|
586
|
+
term_expressions: list[IQueryOpExpr[SemanticRefAccumulator | None]],
|
587
|
+
boolean_op: BooleanOp,
|
588
|
+
scope_expr: GetScopeExpr | None = None,
|
589
|
+
) -> MatchTermsBooleanExpr:
|
590
|
+
match boolean_op:
|
591
|
+
case "and":
|
592
|
+
return MatchTermsAndExpr(term_expressions, scope_expr)
|
593
|
+
case "or":
|
594
|
+
return MatchTermsOrExpr(term_expressions, scope_expr)
|
595
|
+
case "or_max":
|
596
|
+
return MatchTermsOrMaxExpr(term_expressions, scope_expr)
|
597
|
+
case _:
|
598
|
+
raise ValueError(f"Unknown boolean op: {boolean_op}")
|
599
|
+
|
600
|
+
|
601
|
+
# TODO: Move to compilelib.py
|
602
|
+
def create_match_messages_boolean_expr(
|
603
|
+
term_expressions: list[
|
604
|
+
IQueryOpExpr[SemanticRefAccumulator | MessageAccumulator | None]
|
605
|
+
],
|
606
|
+
boolean_op: BooleanOp,
|
607
|
+
scope_expr: GetScopeExpr | None = None,
|
608
|
+
) -> MatchMessagesBooleanExpr:
|
609
|
+
match boolean_op:
|
610
|
+
case "and":
|
611
|
+
return MatchMessagesAndExpr(term_expressions)
|
612
|
+
case "or":
|
613
|
+
return MatchMessagesOrExpr(term_expressions)
|
614
|
+
case "or_max":
|
615
|
+
return MatchMessagesOrMaxExpr(term_expressions)
|
616
|
+
case _:
|
617
|
+
raise ValueError(f"Unknown boolean op: {boolean_op}")
|
618
|
+
|
619
|
+
|
620
|
+
# TODO: Move to compilelib.py
|
621
|
+
# TODO: Just call isinstance!
|
622
|
+
def is_property_term(term: SearchTerm) -> TypeGuard[PropertySearchTerm]:
|
623
|
+
return isinstance(term, PropertySearchTerm)
|
624
|
+
|
625
|
+
|
626
|
+
# TODO: Move to compilelib.py
|
627
|
+
def is_action_property_term(term: PropertySearchTerm) -> bool:
|
628
|
+
return term.property_name in ("subject", "verb", "object", "indirectObject")
|