typeagent-py 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- typeagent/aitools/auth.py +61 -0
- typeagent/aitools/embeddings.py +232 -0
- typeagent/aitools/utils.py +244 -0
- typeagent/aitools/vectorbase.py +175 -0
- typeagent/knowpro/answer_context_schema.py +49 -0
- typeagent/knowpro/answer_response_schema.py +34 -0
- typeagent/knowpro/answers.py +577 -0
- typeagent/knowpro/collections.py +759 -0
- typeagent/knowpro/common.py +9 -0
- typeagent/knowpro/convknowledge.py +112 -0
- typeagent/knowpro/convsettings.py +94 -0
- typeagent/knowpro/convutils.py +49 -0
- typeagent/knowpro/date_time_schema.py +32 -0
- typeagent/knowpro/field_helpers.py +87 -0
- typeagent/knowpro/fuzzyindex.py +144 -0
- typeagent/knowpro/interfaces.py +818 -0
- typeagent/knowpro/knowledge.py +88 -0
- typeagent/knowpro/kplib.py +125 -0
- typeagent/knowpro/query.py +1128 -0
- typeagent/knowpro/search.py +628 -0
- typeagent/knowpro/search_query_schema.py +165 -0
- typeagent/knowpro/searchlang.py +729 -0
- typeagent/knowpro/searchlib.py +345 -0
- typeagent/knowpro/secindex.py +100 -0
- typeagent/knowpro/serialization.py +390 -0
- typeagent/knowpro/textlocindex.py +179 -0
- typeagent/knowpro/utils.py +17 -0
- typeagent/mcp/server.py +139 -0
- typeagent/podcasts/podcast.py +473 -0
- typeagent/podcasts/podcast_import.py +105 -0
- typeagent/storage/__init__.py +25 -0
- typeagent/storage/memory/__init__.py +13 -0
- typeagent/storage/memory/collections.py +68 -0
- typeagent/storage/memory/convthreads.py +81 -0
- typeagent/storage/memory/messageindex.py +178 -0
- typeagent/storage/memory/propindex.py +289 -0
- typeagent/storage/memory/provider.py +84 -0
- typeagent/storage/memory/reltermsindex.py +318 -0
- typeagent/storage/memory/semrefindex.py +660 -0
- typeagent/storage/memory/timestampindex.py +176 -0
- typeagent/storage/sqlite/__init__.py +31 -0
- typeagent/storage/sqlite/collections.py +362 -0
- typeagent/storage/sqlite/messageindex.py +382 -0
- typeagent/storage/sqlite/propindex.py +119 -0
- typeagent/storage/sqlite/provider.py +293 -0
- typeagent/storage/sqlite/reltermsindex.py +328 -0
- typeagent/storage/sqlite/schema.py +248 -0
- typeagent/storage/sqlite/semrefindex.py +156 -0
- typeagent/storage/sqlite/timestampindex.py +146 -0
- typeagent/storage/utils.py +41 -0
- typeagent_py-0.1.0.dist-info/METADATA +28 -0
- typeagent_py-0.1.0.dist-info/RECORD +55 -0
- typeagent_py-0.1.0.dist-info/WHEEL +5 -0
- typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
- typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
from typechat import Result, TypeChatLanguageModel
|
5
|
+
|
6
|
+
from . import convknowledge
|
7
|
+
from . import kplib
|
8
|
+
|
9
|
+
|
10
|
+
def create_knowledge_extractor(
|
11
|
+
chat_model: TypeChatLanguageModel | None = None,
|
12
|
+
) -> convknowledge.KnowledgeExtractor:
|
13
|
+
"""Create a knowledge extractor using the given Chat Model."""
|
14
|
+
chat_model = chat_model or convknowledge.create_typechat_model()
|
15
|
+
extractor = convknowledge.KnowledgeExtractor(
|
16
|
+
chat_model, max_chars_per_chunk=4096, merge_action_knowledge=False
|
17
|
+
)
|
18
|
+
return extractor
|
19
|
+
|
20
|
+
|
21
|
+
async def extract_knowledge_from_text(
|
22
|
+
knowledge_extractor: convknowledge.KnowledgeExtractor,
|
23
|
+
text: str,
|
24
|
+
max_retries: int,
|
25
|
+
) -> Result[kplib.KnowledgeResponse]:
|
26
|
+
"""Extract knowledge from a single text input with retries."""
|
27
|
+
# TODO: Add a retry mechanism to handle transient errors.
|
28
|
+
return await knowledge_extractor.extract(text)
|
29
|
+
|
30
|
+
|
31
|
+
async def extract_knowledge_from_text_batch(
|
32
|
+
knowledge_extractor: convknowledge.KnowledgeExtractor,
|
33
|
+
text_batch: list[str],
|
34
|
+
concurrency: int = 2,
|
35
|
+
max_retries: int = 3,
|
36
|
+
) -> list[Result[kplib.KnowledgeResponse]]:
|
37
|
+
"""Extract knowledge from a batch of text inputs concurrently."""
|
38
|
+
# TODO: Use concurrency.
|
39
|
+
results: list[Result[kplib.KnowledgeResponse]] = []
|
40
|
+
for text in text_batch:
|
41
|
+
result = await extract_knowledge_from_text(
|
42
|
+
knowledge_extractor, text, max_retries
|
43
|
+
)
|
44
|
+
results.append(result)
|
45
|
+
return results
|
46
|
+
|
47
|
+
|
48
|
+
def merge_concrete_entities(
|
49
|
+
entities: list[kplib.ConcreteEntity],
|
50
|
+
) -> list[kplib.ConcreteEntity]:
|
51
|
+
"""Merge a list of concrete entities into a single list of merged entities."""
|
52
|
+
raise NotImplementedError("TODO")
|
53
|
+
# merged_entities = concrete_to_merged_entities(entities)
|
54
|
+
|
55
|
+
# merged_concrete_entities = []
|
56
|
+
# for merged_entity in merged_entities.values():
|
57
|
+
# merged_concrete_entities.append(merged_to_concrete_entity(merged_entity))
|
58
|
+
# return merged_concrete_entities
|
59
|
+
|
60
|
+
|
61
|
+
def merge_topics(topics: list[str]) -> list[str]:
|
62
|
+
"""Merge a list of topics into a unique list of topics."""
|
63
|
+
# TODO: Preserve order of first occurrence?
|
64
|
+
merged_topics = set(topics)
|
65
|
+
return list(merged_topics)
|
66
|
+
|
67
|
+
|
68
|
+
async def extract_knowledge_for_text_batch_q(
|
69
|
+
knowledge_extractor: convknowledge.KnowledgeExtractor,
|
70
|
+
text_batch: list[str],
|
71
|
+
concurrency: int = 2,
|
72
|
+
max_retries: int = 3,
|
73
|
+
) -> list[Result[kplib.KnowledgeResponse]]:
|
74
|
+
"""Extract knowledge for a batch of text inputs using a task queue."""
|
75
|
+
raise NotImplementedError("TODO")
|
76
|
+
# TODO: BatchTask etc.
|
77
|
+
# task_batch = [BatchTask(task=text) for text in text_batch]
|
78
|
+
|
79
|
+
# await run_in_batches(
|
80
|
+
# task_batch,
|
81
|
+
# lambda text: extract_knowledge_from_text(knowledge_extractor, text, max_retries),
|
82
|
+
# concurrency,
|
83
|
+
# )
|
84
|
+
|
85
|
+
# results = []
|
86
|
+
# for task in task_batch:
|
87
|
+
# results.append(task.result if task.result else Failure("No result"))
|
88
|
+
# return results
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
"""This file defines the data types generated by knowledge extraction.
|
5
|
+
|
6
|
+
It also doubles as the schema for the extraction process.
|
7
|
+
Comments that should go into the schema are in docstrings and Doc() annotations.
|
8
|
+
"""
|
9
|
+
|
10
|
+
from pydantic.dataclasses import dataclass
|
11
|
+
from pydantic import Field, AliasChoices
|
12
|
+
from typing import Annotated, ClassVar, Literal
|
13
|
+
from typing_extensions import Doc
|
14
|
+
|
15
|
+
from .field_helpers import CamelCaseField
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class Quantity:
|
20
|
+
amount: float
|
21
|
+
units: str
|
22
|
+
|
23
|
+
def __str__(self) -> str:
|
24
|
+
return f"{self.amount:g} {self.units}"
|
25
|
+
|
26
|
+
|
27
|
+
type Value = str | float | bool | Quantity
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class Facet:
|
32
|
+
name: str
|
33
|
+
value: Annotated[Value, Doc("Very concise values.")]
|
34
|
+
|
35
|
+
def __repr__(self) -> str:
|
36
|
+
return f"{self.__class__.__name__}({self.name!r}, {self.value!r})"
|
37
|
+
|
38
|
+
|
39
|
+
@dataclass
|
40
|
+
class ConcreteEntity:
|
41
|
+
"""Specific, tangible people, places, institutions or things only."""
|
42
|
+
|
43
|
+
knowledge_type: ClassVar[Literal["entity"]] = "entity"
|
44
|
+
|
45
|
+
name: Annotated[
|
46
|
+
str,
|
47
|
+
Doc(
|
48
|
+
"The name of the entity or thing such as 'Bach', 'Great Gatsby', "
|
49
|
+
"'frog' or 'piano'."
|
50
|
+
),
|
51
|
+
]
|
52
|
+
type: Annotated[
|
53
|
+
list[str],
|
54
|
+
Doc(
|
55
|
+
"The types of the entity such as 'speaker', 'person', 'artist', "
|
56
|
+
"'animal', 'object', 'instrument', 'school', 'room', 'museum', 'food' etc. "
|
57
|
+
"An entity can have multiple types; entity types should be single words."
|
58
|
+
),
|
59
|
+
]
|
60
|
+
facets: Annotated[
|
61
|
+
list[Facet] | None,
|
62
|
+
Doc(
|
63
|
+
"A specific, inherent, defining, or non-immediate facet of the entity "
|
64
|
+
"such as 'blue', 'old', 'famous', 'sister', 'aunt_of', 'weight: 4 kg'. "
|
65
|
+
"Trivial actions or state changes are not facets. "
|
66
|
+
"Facets are concise 'properties'."
|
67
|
+
),
|
68
|
+
] = None
|
69
|
+
|
70
|
+
def __repr__(self) -> str:
|
71
|
+
return f"{self.__class__.__name__}({self.name!r}, {self.type}, {self.facets})"
|
72
|
+
|
73
|
+
|
74
|
+
@dataclass
|
75
|
+
class ActionParam:
|
76
|
+
name: str
|
77
|
+
value: Value
|
78
|
+
|
79
|
+
|
80
|
+
type VerbTense = Literal["past", "present", "future"]
|
81
|
+
|
82
|
+
|
83
|
+
@dataclass
|
84
|
+
class Action:
|
85
|
+
knowledge_type: ClassVar[Literal["action"]] = "action"
|
86
|
+
|
87
|
+
verbs: Annotated[list[str], Doc("Each verb is typically a word.")]
|
88
|
+
verb_tense: VerbTense = CamelCaseField("The tense of the verb")
|
89
|
+
subject_entity_name: str | Literal["none"] = CamelCaseField(
|
90
|
+
"The name of the subject entity", default="none"
|
91
|
+
)
|
92
|
+
object_entity_name: str | Literal["none"] = CamelCaseField(
|
93
|
+
"The name of the object entity", default="none"
|
94
|
+
)
|
95
|
+
indirect_object_entity_name: str | Literal["none"] = CamelCaseField(
|
96
|
+
"The name of the indirect object entity", default="none"
|
97
|
+
)
|
98
|
+
params: list[str | ActionParam] | None = None
|
99
|
+
subject_entity_facet: Facet | None = CamelCaseField(
|
100
|
+
"If the action implies this additional facet or property of the subject entity, such as hobbies, activities, interests, personality",
|
101
|
+
default=None,
|
102
|
+
)
|
103
|
+
|
104
|
+
|
105
|
+
@dataclass
|
106
|
+
class KnowledgeResponse:
|
107
|
+
"""Detailed and comprehensive knowledge response."""
|
108
|
+
|
109
|
+
entities: list[ConcreteEntity]
|
110
|
+
actions: Annotated[
|
111
|
+
list[Action],
|
112
|
+
Doc(
|
113
|
+
"The 'subject_entity_name' and 'object_entity_name' must correspond "
|
114
|
+
"to the 'name' of an entity listed in the 'entities' array."
|
115
|
+
),
|
116
|
+
]
|
117
|
+
inverse_actions: Annotated[
|
118
|
+
list[Action],
|
119
|
+
Doc(
|
120
|
+
"Some actions can ALSO be expressed in a reverse way... "
|
121
|
+
"E.g. (A give to B) --> (B receive from A) and vice versa. "
|
122
|
+
"If so, also return the reverse form of the action, full filled out."
|
123
|
+
),
|
124
|
+
]
|
125
|
+
topics: Annotated[list[str], Doc("Detailed, descriptive topics and keywords.")]
|