typeagent-py 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- typeagent/aitools/auth.py +61 -0
- typeagent/aitools/embeddings.py +232 -0
- typeagent/aitools/utils.py +244 -0
- typeagent/aitools/vectorbase.py +175 -0
- typeagent/knowpro/answer_context_schema.py +49 -0
- typeagent/knowpro/answer_response_schema.py +34 -0
- typeagent/knowpro/answers.py +577 -0
- typeagent/knowpro/collections.py +759 -0
- typeagent/knowpro/common.py +9 -0
- typeagent/knowpro/convknowledge.py +112 -0
- typeagent/knowpro/convsettings.py +94 -0
- typeagent/knowpro/convutils.py +49 -0
- typeagent/knowpro/date_time_schema.py +32 -0
- typeagent/knowpro/field_helpers.py +87 -0
- typeagent/knowpro/fuzzyindex.py +144 -0
- typeagent/knowpro/interfaces.py +818 -0
- typeagent/knowpro/knowledge.py +88 -0
- typeagent/knowpro/kplib.py +125 -0
- typeagent/knowpro/query.py +1128 -0
- typeagent/knowpro/search.py +628 -0
- typeagent/knowpro/search_query_schema.py +165 -0
- typeagent/knowpro/searchlang.py +729 -0
- typeagent/knowpro/searchlib.py +345 -0
- typeagent/knowpro/secindex.py +100 -0
- typeagent/knowpro/serialization.py +390 -0
- typeagent/knowpro/textlocindex.py +179 -0
- typeagent/knowpro/utils.py +17 -0
- typeagent/mcp/server.py +139 -0
- typeagent/podcasts/podcast.py +473 -0
- typeagent/podcasts/podcast_import.py +105 -0
- typeagent/storage/__init__.py +25 -0
- typeagent/storage/memory/__init__.py +13 -0
- typeagent/storage/memory/collections.py +68 -0
- typeagent/storage/memory/convthreads.py +81 -0
- typeagent/storage/memory/messageindex.py +178 -0
- typeagent/storage/memory/propindex.py +289 -0
- typeagent/storage/memory/provider.py +84 -0
- typeagent/storage/memory/reltermsindex.py +318 -0
- typeagent/storage/memory/semrefindex.py +660 -0
- typeagent/storage/memory/timestampindex.py +176 -0
- typeagent/storage/sqlite/__init__.py +31 -0
- typeagent/storage/sqlite/collections.py +362 -0
- typeagent/storage/sqlite/messageindex.py +382 -0
- typeagent/storage/sqlite/propindex.py +119 -0
- typeagent/storage/sqlite/provider.py +293 -0
- typeagent/storage/sqlite/reltermsindex.py +328 -0
- typeagent/storage/sqlite/schema.py +248 -0
- typeagent/storage/sqlite/semrefindex.py +156 -0
- typeagent/storage/sqlite/timestampindex.py +146 -0
- typeagent/storage/utils.py +41 -0
- typeagent_py-0.1.0.dist-info/METADATA +28 -0
- typeagent_py-0.1.0.dist-info/RECORD +55 -0
- typeagent_py-0.1.0.dist-info/WHEEL +5 -0
- typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
- typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,729 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
from collections.abc import Callable
|
5
|
+
import copy
|
6
|
+
from dataclasses import dataclass, replace
|
7
|
+
import datetime
|
8
|
+
from typing import Literal, TypeGuard, cast
|
9
|
+
|
10
|
+
import typechat
|
11
|
+
|
12
|
+
from ..knowpro.collections import PropertyTermSet
|
13
|
+
from ..knowpro.convutils import get_time_range_prompt_section_for_conversation
|
14
|
+
from ..knowpro.interfaces import (
|
15
|
+
DateRange,
|
16
|
+
Datetime,
|
17
|
+
IConversation,
|
18
|
+
KnowledgePropertyName,
|
19
|
+
KnowledgeType,
|
20
|
+
PropertySearchTerm,
|
21
|
+
SearchSelectExpr,
|
22
|
+
SearchTerm,
|
23
|
+
SearchTermGroup,
|
24
|
+
SearchTermGroupTypes,
|
25
|
+
Term,
|
26
|
+
WhenFilter,
|
27
|
+
)
|
28
|
+
from ..storage.memory.propindex import PropertyNames
|
29
|
+
from ..knowpro.search import (
|
30
|
+
ConversationSearchResult,
|
31
|
+
SearchOptions,
|
32
|
+
SearchQueryExpr,
|
33
|
+
has_conversation_result,
|
34
|
+
has_conversation_results,
|
35
|
+
run_search_query,
|
36
|
+
)
|
37
|
+
from ..knowpro.searchlib import create_property_search_term
|
38
|
+
|
39
|
+
from .date_time_schema import DateTime, DateTimeRange
|
40
|
+
from .search_query_schema import (
|
41
|
+
ActionTerm,
|
42
|
+
EntityTerm,
|
43
|
+
SearchExpr,
|
44
|
+
SearchFilter,
|
45
|
+
SearchQuery,
|
46
|
+
VerbsTerm,
|
47
|
+
)
|
48
|
+
|
49
|
+
# APIs for searching with Natural Language.
|
50
|
+
# Work in progress; frequent improvements/tweaks.
|
51
|
+
|
52
|
+
|
53
|
+
type SearchQueryTranslator = typechat.TypeChatJsonTranslator[SearchQuery]
|
54
|
+
|
55
|
+
|
56
|
+
@dataclass
|
57
|
+
class LanguageSearchFilter:
|
58
|
+
"""Type representing the filter options for language search."""
|
59
|
+
|
60
|
+
knowledgeType: KnowledgeType | None = None
|
61
|
+
threadDescription: str | None = None
|
62
|
+
tags: list[str] | None = None
|
63
|
+
|
64
|
+
|
65
|
+
@dataclass
|
66
|
+
class LanguageQueryExpr:
|
67
|
+
query_text: str # The text of the query.
|
68
|
+
query: SearchQuery # The structured search query the queryText was translated to.
|
69
|
+
# The search query expressions the structured query was compiled to:
|
70
|
+
query_expressions: list[SearchQueryExpr]
|
71
|
+
|
72
|
+
|
73
|
+
@dataclass
|
74
|
+
class LanguageQueryCompileOptions:
|
75
|
+
exact_scope: bool = False # Is fuzzy matching enabled when applying scope?
|
76
|
+
verb_scope: bool = True
|
77
|
+
term_filter: Callable[[str], bool] | None = None # To ignore noise terms
|
78
|
+
# Debug flags:
|
79
|
+
apply_scope: bool = True # False to turn off scope matching entirely
|
80
|
+
|
81
|
+
|
82
|
+
@dataclass
|
83
|
+
class LanguageSearchOptions(SearchOptions):
|
84
|
+
compile_options: LanguageQueryCompileOptions | None = None
|
85
|
+
fallback_rag_options: None = None # Don't need LanguageSearchRagOptions yet
|
86
|
+
model_instructions: list[typechat.PromptSection] | None = None
|
87
|
+
|
88
|
+
def __repr__(self):
|
89
|
+
parts = []
|
90
|
+
for key in dir(self):
|
91
|
+
if not key.startswith("_"):
|
92
|
+
value = getattr(self, key)
|
93
|
+
if value is not None:
|
94
|
+
parts.append(f"{key}={value!r}")
|
95
|
+
return f"{self.__class__.__name__}({', '.join(parts)})"
|
96
|
+
|
97
|
+
|
98
|
+
@dataclass
|
99
|
+
class LanguageSearchDebugContext:
|
100
|
+
# Query returned by the LLM:
|
101
|
+
search_query: SearchQuery | None = None
|
102
|
+
# What search_query was compiled into:
|
103
|
+
search_query_expr: list[SearchQueryExpr] | None = None
|
104
|
+
# For each expr in searchQueryExpr, returns if a raw text similarity match was used:
|
105
|
+
# TODO: used_similarity_fallback: list[bool] | None = None
|
106
|
+
|
107
|
+
# Values to override the search query
|
108
|
+
use_search_query: SearchQuery | None = None
|
109
|
+
use_compiled_search_query_exprs: list[SearchQueryExpr] | None = None
|
110
|
+
|
111
|
+
|
112
|
+
# NOTE: Arguments 2 and 3 are reversed compared to the TypeScript version
|
113
|
+
# for consistency with other similar functions in this file.
|
114
|
+
async def search_conversation_with_language(
|
115
|
+
# TODO: Add comments to the parameters (copy from @param in TS code).
|
116
|
+
conversation: IConversation,
|
117
|
+
query_translator: SearchQueryTranslator,
|
118
|
+
search_text: str,
|
119
|
+
options: LanguageSearchOptions | None = None,
|
120
|
+
lang_search_filter: LanguageSearchFilter | None = None,
|
121
|
+
debug_context: LanguageSearchDebugContext | None = None,
|
122
|
+
) -> typechat.Result[list[ConversationSearchResult]]:
|
123
|
+
options = options or LanguageSearchOptions()
|
124
|
+
if debug_context and debug_context.use_compiled_search_query_exprs:
|
125
|
+
search_query = debug_context.use_search_query
|
126
|
+
search_query_exprs = debug_context.use_compiled_search_query_exprs
|
127
|
+
else:
|
128
|
+
lang_query_result = await search_query_expr_from_language(
|
129
|
+
conversation,
|
130
|
+
query_translator,
|
131
|
+
search_text,
|
132
|
+
options,
|
133
|
+
lang_search_filter,
|
134
|
+
debug_context,
|
135
|
+
)
|
136
|
+
if not isinstance(lang_query_result, typechat.Success):
|
137
|
+
return lang_query_result
|
138
|
+
search_query = lang_query_result.value.query
|
139
|
+
search_query_exprs = lang_query_result.value.query_expressions
|
140
|
+
|
141
|
+
if debug_context:
|
142
|
+
debug_context.search_query_expr = search_query_exprs
|
143
|
+
# TODO: debug_context.used_similarity_fallback = [False] * len(search_query_exprs)
|
144
|
+
|
145
|
+
fallback_query_exprs: list[SearchQueryExpr] | None = None
|
146
|
+
if search_query:
|
147
|
+
fallback_query_exprs = _compile_fallback_query(
|
148
|
+
conversation,
|
149
|
+
search_query,
|
150
|
+
options.compile_options or LanguageQueryCompileOptions(),
|
151
|
+
lang_search_filter,
|
152
|
+
)
|
153
|
+
|
154
|
+
search_results: list[ConversationSearchResult] = []
|
155
|
+
for i, search_query_expr in enumerate(search_query_exprs):
|
156
|
+
fallback_query = fallback_query_exprs[i] if fallback_query_exprs else None
|
157
|
+
query_result = await run_search_query(conversation, search_query_expr, options)
|
158
|
+
if fallback_query and not has_conversation_results(query_result):
|
159
|
+
# Rerun the query but with verb matching turned off for scopes.
|
160
|
+
query_result = await run_search_query(
|
161
|
+
conversation,
|
162
|
+
fallback_query,
|
163
|
+
options,
|
164
|
+
)
|
165
|
+
# TODO: If no matches and fallback enabled... run the raw query. (RAG fallback)
|
166
|
+
search_results.extend(query_result)
|
167
|
+
|
168
|
+
return typechat.Success(search_results)
|
169
|
+
|
170
|
+
|
171
|
+
def _compile_fallback_query(
|
172
|
+
conversation: IConversation,
|
173
|
+
query: SearchQuery,
|
174
|
+
compile_options: LanguageQueryCompileOptions,
|
175
|
+
lang_search_filter: LanguageSearchFilter | None = None,
|
176
|
+
) -> list[SearchQueryExpr] | None:
|
177
|
+
"""
|
178
|
+
Scoping queries can be precise. However, there may be random variations in how LLMs
|
179
|
+
translate some user utterances into queries... particularly verbs.
|
180
|
+
The verbs may not match action verbs actually in the index...
|
181
|
+
related terms may not meet the similarity cutoff.
|
182
|
+
If configured (compile_options.exact_scope == false),
|
183
|
+
we can do a fallback query that does not enforce verb matching.
|
184
|
+
This improves recall while still providing a reasonable level of scoping because it
|
185
|
+
will still match the action subject and object.
|
186
|
+
"""
|
187
|
+
# If no exact scope... and verbScope is not provided or true,
|
188
|
+
# then we can build a fallback query that is more forgiving.
|
189
|
+
if compile_options.verb_scope and not compile_options.exact_scope:
|
190
|
+
return compile_search_query(
|
191
|
+
conversation,
|
192
|
+
query,
|
193
|
+
replace(compile_options, verb_scope=False),
|
194
|
+
lang_search_filter,
|
195
|
+
)
|
196
|
+
|
197
|
+
# No fallback query currently possible.
|
198
|
+
return None
|
199
|
+
|
200
|
+
|
201
|
+
async def search_query_expr_from_language(
|
202
|
+
# TODO: Add comments to the parameters (copy from @param in TS code).
|
203
|
+
conversation: IConversation,
|
204
|
+
translator: SearchQueryTranslator,
|
205
|
+
query_text: str,
|
206
|
+
options: LanguageSearchOptions | None = None,
|
207
|
+
lang_search_filter: LanguageSearchFilter | None = None,
|
208
|
+
debug_context: LanguageSearchDebugContext | None = None,
|
209
|
+
) -> typechat.Result[LanguageQueryExpr]:
|
210
|
+
options = options or LanguageSearchOptions()
|
211
|
+
if debug_context and debug_context.use_search_query:
|
212
|
+
# If the debug context has a use_search query, use it instead of translating.
|
213
|
+
query = debug_context.use_search_query
|
214
|
+
else:
|
215
|
+
query_result = await search_query_from_language(
|
216
|
+
conversation,
|
217
|
+
translator,
|
218
|
+
query_text,
|
219
|
+
options.model_instructions,
|
220
|
+
)
|
221
|
+
if not isinstance(query_result, typechat.Success):
|
222
|
+
return query_result
|
223
|
+
query = query_result.value
|
224
|
+
if debug_context:
|
225
|
+
debug_context.search_query = query
|
226
|
+
query_expressions = compile_search_query(
|
227
|
+
conversation,
|
228
|
+
query,
|
229
|
+
options.compile_options,
|
230
|
+
lang_search_filter,
|
231
|
+
)
|
232
|
+
return typechat.Success(
|
233
|
+
LanguageQueryExpr(
|
234
|
+
query_text,
|
235
|
+
query,
|
236
|
+
query_expressions,
|
237
|
+
)
|
238
|
+
)
|
239
|
+
|
240
|
+
|
241
|
+
def compile_search_query(
|
242
|
+
conversation: IConversation,
|
243
|
+
query: SearchQuery,
|
244
|
+
options: LanguageQueryCompileOptions | None = None,
|
245
|
+
lang_search_filter: LanguageSearchFilter | None = None,
|
246
|
+
) -> list[SearchQueryExpr]:
|
247
|
+
compiler = SearchQueryCompiler(
|
248
|
+
conversation,
|
249
|
+
options or LanguageQueryCompileOptions(),
|
250
|
+
lang_search_filter,
|
251
|
+
)
|
252
|
+
return compiler.compile_query(query)
|
253
|
+
|
254
|
+
|
255
|
+
def compile_search_filter(
|
256
|
+
conversation: IConversation,
|
257
|
+
search_filter: SearchFilter,
|
258
|
+
options: LanguageQueryCompileOptions | None = None,
|
259
|
+
lang_search_filter: LanguageSearchFilter | None = None,
|
260
|
+
) -> SearchSelectExpr:
|
261
|
+
compiler = SearchQueryCompiler(
|
262
|
+
conversation,
|
263
|
+
options or LanguageQueryCompileOptions(),
|
264
|
+
lang_search_filter,
|
265
|
+
)
|
266
|
+
return compiler.compile_search_filter(search_filter)
|
267
|
+
|
268
|
+
|
269
|
+
class SearchQueryCompiler:
|
270
|
+
|
271
|
+
def __init__(
|
272
|
+
self,
|
273
|
+
conversation: IConversation,
|
274
|
+
options: LanguageQueryCompileOptions | None = None,
|
275
|
+
lang_search_filter: LanguageSearchFilter | None = None,
|
276
|
+
):
|
277
|
+
self.conversation = conversation
|
278
|
+
self.options = options = options or LanguageQueryCompileOptions()
|
279
|
+
self.lang_search_filter = lang_search_filter or LanguageSearchFilter()
|
280
|
+
self.exact_scope = options.exact_scope
|
281
|
+
self.verb_scope = options.verb_scope
|
282
|
+
self.term_filter = options.term_filter
|
283
|
+
self.apply_scope = options.apply_scope
|
284
|
+
|
285
|
+
self.entity_terms_added = PropertyTermSet()
|
286
|
+
self.dedupe = True
|
287
|
+
|
288
|
+
def compile_query(self, query: SearchQuery) -> list[SearchQueryExpr]:
|
289
|
+
query = copy.copy(query) # Shallow copy so we can modify it
|
290
|
+
query_expressions: list[SearchQueryExpr] = []
|
291
|
+
for search_expr in query.search_expressions:
|
292
|
+
query_expressions.append(self.compile_search_expr(search_expr))
|
293
|
+
return query_expressions
|
294
|
+
|
295
|
+
# Every searchExpr has one or more filters.
|
296
|
+
# Each filter is compiled into a selectExpr.
|
297
|
+
def compile_search_expr(self, search_expr: SearchExpr) -> SearchQueryExpr:
|
298
|
+
query_expr = SearchQueryExpr(select_expressions=[])
|
299
|
+
if search_expr.filters:
|
300
|
+
for filter in search_expr.filters:
|
301
|
+
query_expr.select_expressions.append(self.compile_search_filter(filter))
|
302
|
+
query_expr.raw_query = search_expr.rewritten_query
|
303
|
+
return query_expr
|
304
|
+
|
305
|
+
def compile_search_filter(self, filter: SearchFilter) -> SearchSelectExpr:
|
306
|
+
search_term_group = self.compile_term_group(filter)
|
307
|
+
when = self.compile_when(filter)
|
308
|
+
return SearchSelectExpr(
|
309
|
+
search_term_group,
|
310
|
+
when,
|
311
|
+
)
|
312
|
+
|
313
|
+
def compile_term_group(self, filter: SearchFilter) -> SearchTermGroup:
|
314
|
+
term_group = SearchTermGroup(boolean_op="or", terms=[])
|
315
|
+
self.entity_terms_added.clear()
|
316
|
+
terms = filter.entity_search_terms
|
317
|
+
if is_entity_term_list(terms):
|
318
|
+
self.compile_entity_terms(terms, term_group)
|
319
|
+
if filter.action_search_term:
|
320
|
+
# term_group.terms.append("filter.actionSearchTerm, false, true")
|
321
|
+
self.compile_action_term_as_search_terms(
|
322
|
+
filter.action_search_term, term_group, False
|
323
|
+
)
|
324
|
+
if filter.search_terms is not None:
|
325
|
+
self.compile_search_terms(filter.search_terms, term_group)
|
326
|
+
elif len(term_group.terms) == 0:
|
327
|
+
# Summary
|
328
|
+
term_group.terms.append(create_property_search_term("topic", "*"))
|
329
|
+
return term_group
|
330
|
+
|
331
|
+
def compile_when(self, filter: SearchFilter) -> WhenFilter | None:
|
332
|
+
when: WhenFilter | None = None
|
333
|
+
action_term = filter.action_search_term
|
334
|
+
if (
|
335
|
+
self.apply_scope
|
336
|
+
and action_term is not None
|
337
|
+
and self.should_add_scope(action_term)
|
338
|
+
):
|
339
|
+
scope_defining_terms = self.compile_scope(
|
340
|
+
action_term,
|
341
|
+
include_additional_entities=False,
|
342
|
+
include_verbs=self.verb_scope if self.verb_scope is not None else True,
|
343
|
+
)
|
344
|
+
if scope_defining_terms.terms:
|
345
|
+
if when is None:
|
346
|
+
when = WhenFilter()
|
347
|
+
when.scope_defining_terms = scope_defining_terms
|
348
|
+
if filter.time_range is not None:
|
349
|
+
if when is None:
|
350
|
+
when = WhenFilter()
|
351
|
+
when.date_range = date_range_from_datetime_range(filter.time_range)
|
352
|
+
return when
|
353
|
+
|
354
|
+
def compile_action_term_as_search_terms(
|
355
|
+
self,
|
356
|
+
action_term: ActionTerm,
|
357
|
+
term_group: SearchTermGroup | None = None,
|
358
|
+
use_or_max: bool = True,
|
359
|
+
) -> SearchTermGroup:
|
360
|
+
if term_group is None:
|
361
|
+
term_group = SearchTermGroup("or")
|
362
|
+
action_group = SearchTermGroup("or_max") if use_or_max else term_group
|
363
|
+
if action_term.action_verbs is not None:
|
364
|
+
for verb in action_term.action_verbs.words:
|
365
|
+
self.add_property_term_to_group("topic", verb, action_group)
|
366
|
+
if is_entity_term_list(action_term.actor_entities):
|
367
|
+
self.compile_entity_terms_as_search_terms(
|
368
|
+
action_term.actor_entities, action_group
|
369
|
+
)
|
370
|
+
if is_entity_term_list(action_term.target_entities):
|
371
|
+
self.compile_entity_terms_as_search_terms(
|
372
|
+
action_term.target_entities, action_group
|
373
|
+
)
|
374
|
+
if is_entity_term_list(action_term.additional_entities):
|
375
|
+
self.compile_entity_terms_as_search_terms(
|
376
|
+
action_term.additional_entities, action_group
|
377
|
+
)
|
378
|
+
return term_group
|
379
|
+
|
380
|
+
def compile_search_terms(
|
381
|
+
self, search_terms: list[str], term_group: SearchTermGroup | None = None
|
382
|
+
) -> SearchTermGroup:
|
383
|
+
if term_group is None:
|
384
|
+
term_group = SearchTermGroup(boolean_op="or", terms=[])
|
385
|
+
for search_term in search_terms:
|
386
|
+
term_group.terms.append(SearchTerm(Term(search_term)))
|
387
|
+
return term_group
|
388
|
+
|
389
|
+
def compile_entity_terms(
|
390
|
+
self,
|
391
|
+
entity_terms: list[EntityTerm],
|
392
|
+
term_group: SearchTermGroup,
|
393
|
+
use_or_max: bool = True,
|
394
|
+
) -> None:
|
395
|
+
if use_or_max:
|
396
|
+
save_dedupe = self.dedupe
|
397
|
+
self.dedupe = False
|
398
|
+
for term in entity_terms:
|
399
|
+
or_max = SearchTermGroup(
|
400
|
+
boolean_op="or_max",
|
401
|
+
terms=[],
|
402
|
+
)
|
403
|
+
self.add_entity_term_to_group(term, or_max)
|
404
|
+
term_group.terms.append(optimize_or_max(or_max))
|
405
|
+
self.dedupe = save_dedupe
|
406
|
+
else:
|
407
|
+
for term in entity_terms:
|
408
|
+
self.add_entity_term_to_group(term, term_group)
|
409
|
+
# Also search for topics.
|
410
|
+
for term in entity_terms:
|
411
|
+
self.add_entity_name_to_group(term, PropertyNames.Topic, term_group)
|
412
|
+
if term.facets is not None:
|
413
|
+
for facet in term.facets:
|
414
|
+
if facet.facet_value not in (None, "*"):
|
415
|
+
self.add_property_term_to_group(
|
416
|
+
facet.facet_value, "topic", term_group
|
417
|
+
)
|
418
|
+
|
419
|
+
def compile_entity_terms_as_search_terms(
|
420
|
+
self,
|
421
|
+
entity_terms: list[EntityTerm],
|
422
|
+
term_group: SearchTermGroup,
|
423
|
+
) -> None:
|
424
|
+
for term in entity_terms:
|
425
|
+
self.add_entity_term_as_search_terms_to_group(term, term_group)
|
426
|
+
|
427
|
+
def compile_scope(
|
428
|
+
self,
|
429
|
+
action_term: ActionTerm,
|
430
|
+
include_additional_entities: bool = True,
|
431
|
+
include_verbs: bool = True,
|
432
|
+
) -> SearchTermGroup:
|
433
|
+
save_dedupe = self.dedupe
|
434
|
+
self.dedupe = False
|
435
|
+
|
436
|
+
term_group = self.compile_action_term(action_term, True, include_verbs)
|
437
|
+
if include_additional_entities and is_entity_term_list(
|
438
|
+
action_term.additional_entities
|
439
|
+
):
|
440
|
+
self.add_entity_names_to_group(
|
441
|
+
action_term.additional_entities,
|
442
|
+
PropertyNames.EntityName,
|
443
|
+
term_group,
|
444
|
+
self.exact_scope,
|
445
|
+
)
|
446
|
+
|
447
|
+
self.dedupe = save_dedupe
|
448
|
+
return term_group
|
449
|
+
|
450
|
+
def compile_action_term(
|
451
|
+
self,
|
452
|
+
action_term: ActionTerm,
|
453
|
+
use_and: bool,
|
454
|
+
include_verbs: bool,
|
455
|
+
) -> SearchTermGroup:
|
456
|
+
save_dedupe = self.dedupe
|
457
|
+
self.dedupe = False
|
458
|
+
|
459
|
+
term_group: SearchTermGroup
|
460
|
+
if is_entity_term_list(action_term.target_entities):
|
461
|
+
term_group = SearchTermGroup("and" if use_and else "or")
|
462
|
+
for entity in action_term.target_entities:
|
463
|
+
# S.V.O. == Subject, Verb, Object
|
464
|
+
svo_term_group = (
|
465
|
+
self.compile_subject_and_verb(action_term)
|
466
|
+
if include_verbs
|
467
|
+
else self.compile_subject(action_term)
|
468
|
+
)
|
469
|
+
# A target can be the name of an object of an action OR the name of an entity.
|
470
|
+
object_term_group = self.compile_object(entity)
|
471
|
+
if object_term_group.terms:
|
472
|
+
svo_term_group.terms.append(object_term_group)
|
473
|
+
term_group.terms.append(svo_term_group)
|
474
|
+
if len(term_group.terms) == 1:
|
475
|
+
term_group = cast(SearchTermGroup, term_group.terms[0])
|
476
|
+
else:
|
477
|
+
term_group = self.compile_subject_and_verb(action_term)
|
478
|
+
|
479
|
+
self.dedupe = save_dedupe
|
480
|
+
return term_group
|
481
|
+
|
482
|
+
def compile_subject_and_verb(self, action_term: ActionTerm) -> SearchTermGroup:
|
483
|
+
term_group = SearchTermGroup("and")
|
484
|
+
self.add_subject_to_group(action_term, term_group)
|
485
|
+
if action_term.action_verbs is not None:
|
486
|
+
self.add_verbs_to_group(action_term.action_verbs, term_group)
|
487
|
+
return term_group
|
488
|
+
|
489
|
+
def compile_subject(self, action_term: ActionTerm) -> SearchTermGroup:
|
490
|
+
term_group = SearchTermGroup("and")
|
491
|
+
self.add_subject_to_group(action_term, term_group)
|
492
|
+
return term_group
|
493
|
+
|
494
|
+
def add_subject_to_group(
|
495
|
+
self,
|
496
|
+
action_term: ActionTerm,
|
497
|
+
term_group: SearchTermGroup,
|
498
|
+
) -> None:
|
499
|
+
if is_entity_term_list(action_term.actor_entities):
|
500
|
+
self.add_entity_names_to_group(
|
501
|
+
action_term.actor_entities, PropertyNames.Subject, term_group
|
502
|
+
)
|
503
|
+
|
504
|
+
def compile_object(self, entity: EntityTerm) -> SearchTermGroup:
|
505
|
+
# A target can be the name of an object of an action OR the name of an entity.
|
506
|
+
term_group = SearchTermGroup("or")
|
507
|
+
self.add_entity_name_to_group(entity, PropertyNames.Object, term_group)
|
508
|
+
self.add_entity_name_to_group(
|
509
|
+
entity, PropertyNames.EntityName, term_group, self.exact_scope
|
510
|
+
)
|
511
|
+
self.add_entity_name_to_group(
|
512
|
+
entity, PropertyNames.Topic, term_group, self.exact_scope
|
513
|
+
)
|
514
|
+
return term_group
|
515
|
+
|
516
|
+
def add_verbs_to_group(
|
517
|
+
self,
|
518
|
+
verbs: VerbsTerm,
|
519
|
+
term_group: SearchTermGroup,
|
520
|
+
) -> None:
|
521
|
+
for verb in verbs.words:
|
522
|
+
self.add_property_term_to_group("verb", verb, term_group)
|
523
|
+
|
524
|
+
def add_entity_term_as_search_terms_to_group(
|
525
|
+
self, entity_term: EntityTerm, term_group: SearchTermGroup
|
526
|
+
) -> None:
|
527
|
+
if entity_term.is_name_pronoun:
|
528
|
+
return
|
529
|
+
self.add_search_term_to_group(entity_term.name, term_group)
|
530
|
+
if entity_term.type:
|
531
|
+
for type in entity_term.type:
|
532
|
+
self.add_search_term_to_group(type, term_group)
|
533
|
+
if entity_term.facets:
|
534
|
+
for facet in entity_term.facets:
|
535
|
+
self.add_search_term_to_group(facet.facet_name, term_group)
|
536
|
+
self.add_search_term_to_group(facet.facet_value, term_group)
|
537
|
+
|
538
|
+
def add_search_term_to_group(
|
539
|
+
self,
|
540
|
+
term: str,
|
541
|
+
term_group: SearchTermGroup,
|
542
|
+
) -> None:
|
543
|
+
if self.is_searchable_string(term):
|
544
|
+
term_group.terms.append(SearchTerm(Term(term)))
|
545
|
+
|
546
|
+
def add_entity_term_to_group(
|
547
|
+
self,
|
548
|
+
entity_term: EntityTerm,
|
549
|
+
term_group: SearchTermGroup,
|
550
|
+
exact_match_name=False,
|
551
|
+
) -> None:
|
552
|
+
self.add_property_term_to_group(
|
553
|
+
PropertyNames.EntityName.value,
|
554
|
+
entity_term.name,
|
555
|
+
term_group,
|
556
|
+
exact_match_name,
|
557
|
+
)
|
558
|
+
if entity_term.type:
|
559
|
+
for type in entity_term.type:
|
560
|
+
self.add_property_term_to_group(
|
561
|
+
PropertyNames.EntityType.value, type, term_group
|
562
|
+
)
|
563
|
+
if entity_term.facets:
|
564
|
+
for facet in entity_term.facets:
|
565
|
+
name_is_wildcard = facet.facet_name == "*"
|
566
|
+
value_is_wildcard = facet.facet_value == "*"
|
567
|
+
match name_is_wildcard, value_is_wildcard:
|
568
|
+
case False, False:
|
569
|
+
self.add_property_term_to_group(
|
570
|
+
facet.facet_name,
|
571
|
+
facet.facet_value,
|
572
|
+
term_group,
|
573
|
+
)
|
574
|
+
case False, True:
|
575
|
+
self.add_property_term_to_group(
|
576
|
+
PropertyNames.FacetName.value,
|
577
|
+
facet.facet_name,
|
578
|
+
term_group,
|
579
|
+
)
|
580
|
+
case True, False:
|
581
|
+
self.add_property_term_to_group(
|
582
|
+
PropertyNames.FacetValue.value,
|
583
|
+
facet.facet_value,
|
584
|
+
term_group,
|
585
|
+
)
|
586
|
+
case True, True:
|
587
|
+
pass
|
588
|
+
|
589
|
+
def add_entity_names_to_group(
|
590
|
+
self,
|
591
|
+
entity_terms: list[EntityTerm],
|
592
|
+
property_name: PropertyNames,
|
593
|
+
term_group: SearchTermGroup,
|
594
|
+
exact_match_value: bool = False,
|
595
|
+
) -> None:
|
596
|
+
for entity_term in entity_terms:
|
597
|
+
self.add_entity_name_to_group(
|
598
|
+
entity_term, property_name, term_group, exact_match_value
|
599
|
+
)
|
600
|
+
|
601
|
+
def add_entity_name_to_group(
|
602
|
+
self,
|
603
|
+
entity_term: EntityTerm,
|
604
|
+
property_name: PropertyNames,
|
605
|
+
term_group: SearchTermGroup,
|
606
|
+
exact_match_value: bool = False,
|
607
|
+
) -> None:
|
608
|
+
if not entity_term.is_name_pronoun:
|
609
|
+
self.add_property_term_to_group(
|
610
|
+
property_name.value,
|
611
|
+
entity_term.name,
|
612
|
+
term_group,
|
613
|
+
exact_match_value,
|
614
|
+
)
|
615
|
+
|
616
|
+
def add_search_term_to_groupadd_entity_name_to_group(
|
617
|
+
self,
|
618
|
+
entity_term: EntityTerm,
|
619
|
+
property_name: PropertyNames,
|
620
|
+
term_group: SearchTermGroup,
|
621
|
+
exact_match_value: bool = False,
|
622
|
+
) -> None:
|
623
|
+
if not entity_term.is_name_pronoun:
|
624
|
+
self.add_property_term_to_group(
|
625
|
+
property_name.value,
|
626
|
+
entity_term.name,
|
627
|
+
term_group,
|
628
|
+
exact_match_value,
|
629
|
+
)
|
630
|
+
|
631
|
+
def add_property_term_to_group(
|
632
|
+
self,
|
633
|
+
property_name: str,
|
634
|
+
property_value: str,
|
635
|
+
term_group: SearchTermGroup,
|
636
|
+
exact_match_value=False,
|
637
|
+
) -> None:
|
638
|
+
if not self.is_searchable_string(property_name):
|
639
|
+
return
|
640
|
+
if not self.is_searchable_string(property_value):
|
641
|
+
return
|
642
|
+
if self.is_noise_term(property_value):
|
643
|
+
return
|
644
|
+
# Dedupe any terms already added to the group earlier.
|
645
|
+
if not self.dedupe or not self.entity_terms_added.has(
|
646
|
+
property_name, property_value
|
647
|
+
):
|
648
|
+
search_term = create_property_search_term(
|
649
|
+
property_name, property_value, exact_match_value
|
650
|
+
)
|
651
|
+
term_group.terms.append(search_term)
|
652
|
+
self.entity_terms_added.add(property_name, search_term.property_value.term)
|
653
|
+
|
654
|
+
def is_searchable_string(self, value: str) -> bool:
|
655
|
+
if not value or value == "*":
|
656
|
+
return False
|
657
|
+
return self.term_filter is None or self.term_filter(value)
|
658
|
+
|
659
|
+
def is_noise_term(self, value: str) -> bool:
|
660
|
+
return value.lower() in ("thing", "object", "concept", "idea", "entity")
|
661
|
+
|
662
|
+
def should_add_scope(self, action_term: ActionTerm) -> bool:
|
663
|
+
if not action_term or action_term.is_informational:
|
664
|
+
return False
|
665
|
+
if self.exact_scope:
|
666
|
+
return True
|
667
|
+
# If the action has no subject, disable scope.
|
668
|
+
return is_entity_term_list(action_term.actor_entities)
|
669
|
+
|
670
|
+
|
671
|
+
# Miscellaneous helper functions.
|
672
|
+
|
673
|
+
|
674
|
+
# A type guard makes a promise to the type checker when it returns True.
|
675
|
+
def is_entity_term_list(
|
676
|
+
terms: list[EntityTerm] | Literal["*"] | None,
|
677
|
+
) -> TypeGuard[list[EntityTerm]]:
|
678
|
+
return isinstance(terms, list)
|
679
|
+
|
680
|
+
|
681
|
+
def optimize_or_max(term_group: SearchTermGroup) -> SearchTermGroupTypes:
|
682
|
+
if len(term_group.terms) == 1:
|
683
|
+
return term_group.terms[0]
|
684
|
+
return term_group
|
685
|
+
|
686
|
+
|
687
|
+
def date_range_from_datetime_range(date_time_range: DateTimeRange) -> DateRange:
|
688
|
+
return DateRange(
|
689
|
+
start=datetime_from_date_time(date_time_range.start_date),
|
690
|
+
end=(
|
691
|
+
datetime_from_date_time(date_time_range.stop_date)
|
692
|
+
if date_time_range.stop_date
|
693
|
+
else None
|
694
|
+
),
|
695
|
+
)
|
696
|
+
|
697
|
+
|
698
|
+
def datetime_from_date_time(date_time: DateTime) -> Datetime:
|
699
|
+
# We ASSUME that the LLM gave the DateTime in UTC.
|
700
|
+
# If it didn't, well, how would we know???
|
701
|
+
dt = Datetime(
|
702
|
+
year=date_time.date.year,
|
703
|
+
month=date_time.date.month,
|
704
|
+
day=date_time.date.day,
|
705
|
+
hour=date_time.time.hour if date_time.time else 0,
|
706
|
+
minute=date_time.time.minute if date_time.time else 0,
|
707
|
+
second=date_time.time.seconds if date_time.time else 0,
|
708
|
+
tzinfo=datetime.timezone.utc,
|
709
|
+
)
|
710
|
+
return dt
|
711
|
+
|
712
|
+
|
713
|
+
# TODO: Move to searchquerytranslator.py?
|
714
|
+
async def search_query_from_language(
|
715
|
+
conversation: IConversation,
|
716
|
+
translator: SearchQueryTranslator,
|
717
|
+
query_text: str,
|
718
|
+
model_instructions: list[typechat.PromptSection] | None = None,
|
719
|
+
) -> typechat.Result[SearchQuery]:
|
720
|
+
time_range = await get_time_range_prompt_section_for_conversation(conversation)
|
721
|
+
prompt_preamble: list[typechat.PromptSection] = []
|
722
|
+
if model_instructions:
|
723
|
+
prompt_preamble.extend(model_instructions)
|
724
|
+
if time_range:
|
725
|
+
prompt_preamble.append(time_range)
|
726
|
+
# print("[" * 50)
|
727
|
+
# print(translator.schema_str)
|
728
|
+
# print("]" * 50)
|
729
|
+
return await translator.translate(query_text, prompt_preamble=prompt_preamble)
|