typeagent-py 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- typeagent/aitools/auth.py +61 -0
- typeagent/aitools/embeddings.py +232 -0
- typeagent/aitools/utils.py +244 -0
- typeagent/aitools/vectorbase.py +175 -0
- typeagent/knowpro/answer_context_schema.py +49 -0
- typeagent/knowpro/answer_response_schema.py +34 -0
- typeagent/knowpro/answers.py +577 -0
- typeagent/knowpro/collections.py +759 -0
- typeagent/knowpro/common.py +9 -0
- typeagent/knowpro/convknowledge.py +112 -0
- typeagent/knowpro/convsettings.py +94 -0
- typeagent/knowpro/convutils.py +49 -0
- typeagent/knowpro/date_time_schema.py +32 -0
- typeagent/knowpro/field_helpers.py +87 -0
- typeagent/knowpro/fuzzyindex.py +144 -0
- typeagent/knowpro/interfaces.py +818 -0
- typeagent/knowpro/knowledge.py +88 -0
- typeagent/knowpro/kplib.py +125 -0
- typeagent/knowpro/query.py +1128 -0
- typeagent/knowpro/search.py +628 -0
- typeagent/knowpro/search_query_schema.py +165 -0
- typeagent/knowpro/searchlang.py +729 -0
- typeagent/knowpro/searchlib.py +345 -0
- typeagent/knowpro/secindex.py +100 -0
- typeagent/knowpro/serialization.py +390 -0
- typeagent/knowpro/textlocindex.py +179 -0
- typeagent/knowpro/utils.py +17 -0
- typeagent/mcp/server.py +139 -0
- typeagent/podcasts/podcast.py +473 -0
- typeagent/podcasts/podcast_import.py +105 -0
- typeagent/storage/__init__.py +25 -0
- typeagent/storage/memory/__init__.py +13 -0
- typeagent/storage/memory/collections.py +68 -0
- typeagent/storage/memory/convthreads.py +81 -0
- typeagent/storage/memory/messageindex.py +178 -0
- typeagent/storage/memory/propindex.py +289 -0
- typeagent/storage/memory/provider.py +84 -0
- typeagent/storage/memory/reltermsindex.py +318 -0
- typeagent/storage/memory/semrefindex.py +660 -0
- typeagent/storage/memory/timestampindex.py +176 -0
- typeagent/storage/sqlite/__init__.py +31 -0
- typeagent/storage/sqlite/collections.py +362 -0
- typeagent/storage/sqlite/messageindex.py +382 -0
- typeagent/storage/sqlite/propindex.py +119 -0
- typeagent/storage/sqlite/provider.py +293 -0
- typeagent/storage/sqlite/reltermsindex.py +328 -0
- typeagent/storage/sqlite/schema.py +248 -0
- typeagent/storage/sqlite/semrefindex.py +156 -0
- typeagent/storage/sqlite/timestampindex.py +146 -0
- typeagent/storage/utils.py +41 -0
- typeagent_py-0.1.0.dist-info/METADATA +28 -0
- typeagent_py-0.1.0.dist-info/RECORD +55 -0
- typeagent_py-0.1.0.dist-info/WHEEL +5 -0
- typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
- typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,248 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
"""SQLite database schema definitions."""
|
5
|
+
|
6
|
+
import sqlite3
|
7
|
+
from dataclasses import dataclass
|
8
|
+
from datetime import datetime, timezone
|
9
|
+
import typing
|
10
|
+
import numpy as np
|
11
|
+
|
12
|
+
from ...aitools.embeddings import NormalizedEmbedding
|
13
|
+
|
14
|
+
# Constants
|
15
|
+
CONVERSATION_SCHEMA_VERSION = "0.1"
|
16
|
+
|
17
|
+
MESSAGES_SCHEMA = """
|
18
|
+
CREATE TABLE IF NOT EXISTS Messages (
|
19
|
+
msg_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
20
|
+
-- Messages can store chunks directly in JSON or reference external storage via URI
|
21
|
+
chunks JSON NULL, -- JSON array of text chunks, or NULL if using chunk_uri
|
22
|
+
chunk_uri TEXT NULL, -- URI for external chunk storage, or NULL if using chunks
|
23
|
+
start_timestamp TEXT NULL, -- ISO format with Z timezone
|
24
|
+
tags JSON NULL, -- JSON array of tags
|
25
|
+
metadata JSON NULL, -- Message metadata (source, dest, etc.)
|
26
|
+
extra JSON NULL, -- Extra message fields that were serialized
|
27
|
+
|
28
|
+
CONSTRAINT chunks_xor_chunkuri CHECK (
|
29
|
+
(chunks IS NOT NULL AND chunk_uri IS NULL) OR
|
30
|
+
(chunks IS NULL AND chunk_uri IS NOT NULL)
|
31
|
+
)
|
32
|
+
);
|
33
|
+
"""
|
34
|
+
|
35
|
+
TIMESTAMP_INDEX_SCHEMA = """
|
36
|
+
CREATE INDEX IF NOT EXISTS idx_messages_start_timestamp ON Messages(start_timestamp);
|
37
|
+
"""
|
38
|
+
|
39
|
+
# Conversation metadata table (single row)
|
40
|
+
CONVERSATION_METADATA_SCHEMA = """
|
41
|
+
CREATE TABLE IF NOT EXISTS ConversationMetadata (
|
42
|
+
name_tag TEXT NOT NULL, -- User-defined name for this conversation
|
43
|
+
schema_version TEXT NOT NULL, -- Version of the metadata schema
|
44
|
+
created_at TEXT NOT NULL, -- UTC timestamp when conversation was created
|
45
|
+
updated_at TEXT NOT NULL, -- UTC timestamp when metadata was last updated
|
46
|
+
tags JSON NOT NULL, -- JSON array of string tags
|
47
|
+
extra JSON NOT NULL -- JSON object for additional metadata
|
48
|
+
);
|
49
|
+
"""
|
50
|
+
|
51
|
+
SEMANTIC_REFS_SCHEMA = """
|
52
|
+
CREATE TABLE IF NOT EXISTS SemanticRefs (
|
53
|
+
semref_id INTEGER PRIMARY KEY,
|
54
|
+
range_json JSON NOT NULL, -- JSON of the TextRange object
|
55
|
+
knowledge_type TEXT NOT NULL, -- Required to distinguish JSON types (entity, topic, etc.)
|
56
|
+
knowledge_json JSON NOT NULL -- JSON of the Knowledge object
|
57
|
+
);
|
58
|
+
"""
|
59
|
+
|
60
|
+
SEMANTIC_REF_INDEX_SCHEMA = """
|
61
|
+
CREATE TABLE IF NOT EXISTS SemanticRefIndex (
|
62
|
+
term TEXT NOT NULL, -- lowercased, not-unique/normalized
|
63
|
+
semref_id INTEGER NOT NULL,
|
64
|
+
|
65
|
+
FOREIGN KEY (semref_id) REFERENCES SemanticRefs(semref_id) ON DELETE CASCADE
|
66
|
+
);
|
67
|
+
"""
|
68
|
+
|
69
|
+
SEMANTIC_REF_INDEX_TERM_INDEX = """
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_semantic_ref_index_term ON SemanticRefIndex(term);
|
71
|
+
"""
|
72
|
+
|
73
|
+
MESSAGE_TEXT_INDEX_SCHEMA = """
|
74
|
+
CREATE TABLE IF NOT EXISTS MessageTextIndex (
|
75
|
+
msg_id INTEGER NOT NULL,
|
76
|
+
chunk_ordinal INTEGER NOT NULL,
|
77
|
+
embedding BLOB NOT NULL, -- Serialized embedding (numpy array as bytes)
|
78
|
+
index_position INTEGER, -- Position in VectorBase index for fast lookup
|
79
|
+
|
80
|
+
PRIMARY KEY (msg_id, chunk_ordinal),
|
81
|
+
FOREIGN KEY (msg_id) REFERENCES Messages(msg_id) ON DELETE CASCADE
|
82
|
+
);
|
83
|
+
"""
|
84
|
+
|
85
|
+
MESSAGE_TEXT_INDEX_MESSAGE_INDEX = """
|
86
|
+
CREATE INDEX IF NOT EXISTS idx_message_text_index_message ON MessageTextIndex(msg_id, chunk_ordinal);
|
87
|
+
"""
|
88
|
+
|
89
|
+
MESSAGE_TEXT_INDEX_POSITION_INDEX = """
|
90
|
+
CREATE INDEX IF NOT EXISTS idx_message_text_index_position ON MessageTextIndex(index_position);
|
91
|
+
"""
|
92
|
+
|
93
|
+
PROPERTY_INDEX_SCHEMA = """
|
94
|
+
CREATE TABLE IF NOT EXISTS PropertyIndex (
|
95
|
+
prop_name TEXT NOT NULL,
|
96
|
+
value_str TEXT NOT NULL,
|
97
|
+
score REAL NOT NULL DEFAULT 1.0,
|
98
|
+
semref_id INTEGER NOT NULL,
|
99
|
+
|
100
|
+
FOREIGN KEY (semref_id) REFERENCES SemanticRefs(semref_id) ON DELETE CASCADE
|
101
|
+
);
|
102
|
+
"""
|
103
|
+
|
104
|
+
PROPERTY_INDEX_PROP_NAME_INDEX = """
|
105
|
+
CREATE INDEX IF NOT EXISTS idx_property_index_prop_name ON PropertyIndex(prop_name);
|
106
|
+
"""
|
107
|
+
|
108
|
+
PROPERTY_INDEX_VALUE_STR_INDEX = """
|
109
|
+
CREATE INDEX IF NOT EXISTS idx_property_index_value_str ON PropertyIndex(value_str);
|
110
|
+
"""
|
111
|
+
|
112
|
+
PROPERTY_INDEX_COMBINED_INDEX = """
|
113
|
+
CREATE INDEX IF NOT EXISTS idx_property_index_combined ON PropertyIndex(prop_name, value_str);
|
114
|
+
"""
|
115
|
+
|
116
|
+
RELATED_TERMS_ALIASES_SCHEMA = """
|
117
|
+
CREATE TABLE IF NOT EXISTS RelatedTermsAliases (
|
118
|
+
term TEXT NOT NULL,
|
119
|
+
alias TEXT NOT NULL,
|
120
|
+
|
121
|
+
PRIMARY KEY (term, alias)
|
122
|
+
);
|
123
|
+
"""
|
124
|
+
|
125
|
+
RELATED_TERMS_ALIASES_TERM_INDEX = """
|
126
|
+
CREATE INDEX IF NOT EXISTS idx_related_aliases_term ON RelatedTermsAliases(term);
|
127
|
+
"""
|
128
|
+
|
129
|
+
RELATED_TERMS_ALIASES_ALIAS_INDEX = """
|
130
|
+
CREATE INDEX IF NOT EXISTS idx_related_aliases_alias ON RelatedTermsAliases(alias);
|
131
|
+
"""
|
132
|
+
|
133
|
+
RELATED_TERMS_FUZZY_SCHEMA = """
|
134
|
+
CREATE TABLE IF NOT EXISTS RelatedTermsFuzzy (
|
135
|
+
term TEXT NOT NULL PRIMARY KEY,
|
136
|
+
term_embedding BLOB NOT NULL -- Serialized embedding for the term
|
137
|
+
);
|
138
|
+
"""
|
139
|
+
|
140
|
+
RELATED_TERMS_FUZZY_TERM_INDEX = """
|
141
|
+
CREATE INDEX IF NOT EXISTS idx_related_fuzzy_term ON RelatedTermsFuzzy(term);
|
142
|
+
"""
|
143
|
+
|
144
|
+
# Type aliases for database row tuples
|
145
|
+
type ShreddedMessage = tuple[
|
146
|
+
str | None, str | None, str | None, str | None, str | None, str | None
|
147
|
+
]
|
148
|
+
type ShreddedSemanticRef = tuple[int, str, str, str]
|
149
|
+
|
150
|
+
type ShreddedMessageText = tuple[int, int, str, bytes | None]
|
151
|
+
type ShreddedPropertyIndex = tuple[str, str, float, int]
|
152
|
+
type ShreddedRelatedTermsAlias = tuple[str, str]
|
153
|
+
type ShreddedRelatedTermsFuzzy = tuple[str, float, bytes]
|
154
|
+
|
155
|
+
|
156
|
+
@dataclass
|
157
|
+
class ConversationMetadata:
|
158
|
+
"""Metadata for the current conversation stored in SQLite."""
|
159
|
+
|
160
|
+
name_tag: str
|
161
|
+
schema_version: str
|
162
|
+
created_at: datetime
|
163
|
+
updated_at: datetime
|
164
|
+
tags: list[str]
|
165
|
+
extra: dict[str, typing.Any]
|
166
|
+
|
167
|
+
|
168
|
+
@typing.overload
|
169
|
+
def serialize_embedding(embedding: NormalizedEmbedding) -> bytes: ...
|
170
|
+
|
171
|
+
|
172
|
+
@typing.overload
|
173
|
+
def serialize_embedding(embedding: None) -> None: ...
|
174
|
+
|
175
|
+
|
176
|
+
def serialize_embedding(embedding: NormalizedEmbedding | None) -> bytes | None:
|
177
|
+
"""Serialize a numpy embedding array to bytes for SQLite storage."""
|
178
|
+
if embedding is None:
|
179
|
+
return None
|
180
|
+
return embedding.tobytes()
|
181
|
+
|
182
|
+
|
183
|
+
@typing.overload
|
184
|
+
def deserialize_embedding(blob: bytes) -> NormalizedEmbedding: ...
|
185
|
+
|
186
|
+
|
187
|
+
@typing.overload
|
188
|
+
def deserialize_embedding(blob: None) -> None: ...
|
189
|
+
|
190
|
+
|
191
|
+
def deserialize_embedding(blob: bytes | None) -> NormalizedEmbedding | None:
|
192
|
+
"""Deserialize bytes back to numpy embedding array."""
|
193
|
+
if blob is None:
|
194
|
+
return None
|
195
|
+
return np.frombuffer(blob, dtype=np.float32)
|
196
|
+
|
197
|
+
|
198
|
+
def _create_default_metadata() -> ConversationMetadata:
|
199
|
+
"""Create default conversation metadata."""
|
200
|
+
current_time = datetime.now(timezone.utc)
|
201
|
+
return ConversationMetadata(
|
202
|
+
name_tag="",
|
203
|
+
schema_version=CONVERSATION_SCHEMA_VERSION,
|
204
|
+
tags=[],
|
205
|
+
extra={},
|
206
|
+
created_at=current_time,
|
207
|
+
updated_at=current_time,
|
208
|
+
)
|
209
|
+
|
210
|
+
|
211
|
+
def init_db_schema(db: sqlite3.Connection) -> None:
|
212
|
+
"""Initialize the database schema with all required tables."""
|
213
|
+
cursor = db.cursor()
|
214
|
+
|
215
|
+
# Create all tables
|
216
|
+
cursor.execute(CONVERSATION_METADATA_SCHEMA)
|
217
|
+
cursor.execute(MESSAGES_SCHEMA)
|
218
|
+
cursor.execute(SEMANTIC_REFS_SCHEMA)
|
219
|
+
cursor.execute(SEMANTIC_REF_INDEX_SCHEMA)
|
220
|
+
cursor.execute(MESSAGE_TEXT_INDEX_SCHEMA)
|
221
|
+
cursor.execute(PROPERTY_INDEX_SCHEMA)
|
222
|
+
cursor.execute(RELATED_TERMS_ALIASES_SCHEMA)
|
223
|
+
cursor.execute(RELATED_TERMS_FUZZY_SCHEMA)
|
224
|
+
cursor.execute(TIMESTAMP_INDEX_SCHEMA)
|
225
|
+
|
226
|
+
# Create additional indexes
|
227
|
+
cursor.execute(SEMANTIC_REF_INDEX_TERM_INDEX)
|
228
|
+
cursor.execute(MESSAGE_TEXT_INDEX_MESSAGE_INDEX)
|
229
|
+
cursor.execute(MESSAGE_TEXT_INDEX_POSITION_INDEX)
|
230
|
+
cursor.execute(RELATED_TERMS_ALIASES_TERM_INDEX)
|
231
|
+
cursor.execute(RELATED_TERMS_ALIASES_ALIAS_INDEX)
|
232
|
+
cursor.execute(RELATED_TERMS_FUZZY_TERM_INDEX)
|
233
|
+
|
234
|
+
|
235
|
+
def get_db_schema_version(db: sqlite3.Connection) -> str:
|
236
|
+
"""Get the database schema version."""
|
237
|
+
try:
|
238
|
+
cursor = db.cursor()
|
239
|
+
cursor.execute("SELECT schema_version FROM ConversationMetadata LIMIT 1")
|
240
|
+
row = cursor.fetchone()
|
241
|
+
return row[0] if row else CONVERSATION_SCHEMA_VERSION
|
242
|
+
except sqlite3.OperationalError:
|
243
|
+
# Table doesn't exist, return current version
|
244
|
+
return CONVERSATION_SCHEMA_VERSION
|
245
|
+
|
246
|
+
|
247
|
+
# Schema aliases for backward compatibility
|
248
|
+
CONVERSATIONS_SCHEMA = CONVERSATION_METADATA_SCHEMA
|
@@ -0,0 +1,156 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
"""SQLite-based semantic reference index implementation."""
|
5
|
+
|
6
|
+
import re
|
7
|
+
import sqlite3
|
8
|
+
import unicodedata
|
9
|
+
|
10
|
+
from ...knowpro import interfaces
|
11
|
+
from ...knowpro.interfaces import ScoredSemanticRefOrdinal
|
12
|
+
|
13
|
+
|
14
|
+
class SqliteTermToSemanticRefIndex(interfaces.ITermToSemanticRefIndex):
|
15
|
+
"""SQLite-backed implementation of term to semantic ref index."""
|
16
|
+
|
17
|
+
def __init__(self, db: sqlite3.Connection):
|
18
|
+
self.db = db
|
19
|
+
|
20
|
+
async def size(self) -> int:
|
21
|
+
cursor = self.db.cursor()
|
22
|
+
cursor.execute("SELECT COUNT(DISTINCT term) FROM SemanticRefIndex")
|
23
|
+
return cursor.fetchone()[0]
|
24
|
+
|
25
|
+
async def get_terms(self) -> list[str]:
|
26
|
+
cursor = self.db.cursor()
|
27
|
+
cursor.execute("SELECT DISTINCT term FROM SemanticRefIndex ORDER BY term")
|
28
|
+
return [row[0] for row in cursor.fetchall()]
|
29
|
+
|
30
|
+
async def add_term(
|
31
|
+
self,
|
32
|
+
term: str,
|
33
|
+
semantic_ref_ordinal: (
|
34
|
+
interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal
|
35
|
+
),
|
36
|
+
) -> str:
|
37
|
+
if not term:
|
38
|
+
return term
|
39
|
+
|
40
|
+
term = self._prepare_term(term)
|
41
|
+
|
42
|
+
# Extract semref_id from the ordinal
|
43
|
+
if isinstance(semantic_ref_ordinal, interfaces.ScoredSemanticRefOrdinal):
|
44
|
+
semref_id = semantic_ref_ordinal.semantic_ref_ordinal
|
45
|
+
else:
|
46
|
+
semref_id = semantic_ref_ordinal
|
47
|
+
|
48
|
+
cursor = self.db.cursor()
|
49
|
+
cursor.execute(
|
50
|
+
"""
|
51
|
+
INSERT OR IGNORE INTO SemanticRefIndex (term, semref_id)
|
52
|
+
VALUES (?, ?)
|
53
|
+
""",
|
54
|
+
(term, semref_id),
|
55
|
+
)
|
56
|
+
|
57
|
+
return term
|
58
|
+
|
59
|
+
async def remove_term(
|
60
|
+
self, term: str, semantic_ref_ordinal: interfaces.SemanticRefOrdinal
|
61
|
+
) -> None:
|
62
|
+
term = self._prepare_term(term)
|
63
|
+
cursor = self.db.cursor()
|
64
|
+
cursor.execute(
|
65
|
+
"DELETE FROM SemanticRefIndex WHERE term = ? AND semref_id = ?",
|
66
|
+
(term, semantic_ref_ordinal),
|
67
|
+
)
|
68
|
+
|
69
|
+
async def lookup_term(
|
70
|
+
self, term: str
|
71
|
+
) -> list[interfaces.ScoredSemanticRefOrdinal] | None:
|
72
|
+
term = self._prepare_term(term)
|
73
|
+
cursor = self.db.cursor()
|
74
|
+
cursor.execute(
|
75
|
+
"SELECT semref_id FROM SemanticRefIndex WHERE term = ?",
|
76
|
+
(term,),
|
77
|
+
)
|
78
|
+
|
79
|
+
# Return as ScoredSemanticRefOrdinal with default score of 1.0
|
80
|
+
results = []
|
81
|
+
for row in cursor.fetchall():
|
82
|
+
semref_id = row[0]
|
83
|
+
results.append(ScoredSemanticRefOrdinal(semref_id, 1.0))
|
84
|
+
return results
|
85
|
+
|
86
|
+
async def clear(self) -> None:
|
87
|
+
"""Clear all terms from the semantic ref index."""
|
88
|
+
cursor = self.db.cursor()
|
89
|
+
cursor.execute("DELETE FROM SemanticRefIndex")
|
90
|
+
|
91
|
+
async def serialize(self) -> interfaces.TermToSemanticRefIndexData:
|
92
|
+
"""Serialize the index data for compatibility with in-memory version."""
|
93
|
+
cursor = self.db.cursor()
|
94
|
+
cursor.execute(
|
95
|
+
"SELECT term, semref_id FROM SemanticRefIndex ORDER BY term, semref_id"
|
96
|
+
)
|
97
|
+
|
98
|
+
# Group by term
|
99
|
+
term_to_semrefs: dict[str, list[interfaces.ScoredSemanticRefOrdinalData]] = {}
|
100
|
+
for term, semref_id in cursor.fetchall():
|
101
|
+
if term not in term_to_semrefs:
|
102
|
+
term_to_semrefs[term] = []
|
103
|
+
scored_ref = ScoredSemanticRefOrdinal(semref_id, 1.0)
|
104
|
+
term_to_semrefs[term].append(scored_ref.serialize())
|
105
|
+
|
106
|
+
# Convert to the expected format
|
107
|
+
items = []
|
108
|
+
for term, semref_ordinals in term_to_semrefs.items():
|
109
|
+
items.append(
|
110
|
+
interfaces.TermToSemanticRefIndexItemData(
|
111
|
+
term=term, semanticRefOrdinals=semref_ordinals
|
112
|
+
)
|
113
|
+
)
|
114
|
+
|
115
|
+
return interfaces.TermToSemanticRefIndexData(items=items)
|
116
|
+
|
117
|
+
async def deserialize(self, data: interfaces.TermToSemanticRefIndexData) -> None:
|
118
|
+
"""Deserialize index data by populating the SQLite table."""
|
119
|
+
cursor = self.db.cursor()
|
120
|
+
|
121
|
+
# Clear existing data
|
122
|
+
cursor.execute("DELETE FROM SemanticRefIndex")
|
123
|
+
|
124
|
+
# Prepare all insertion data for bulk operation
|
125
|
+
insertion_data = []
|
126
|
+
for item in data["items"]:
|
127
|
+
if item and item["term"]:
|
128
|
+
term = self._prepare_term(item["term"])
|
129
|
+
for semref_ordinal_data in item["semanticRefOrdinals"]:
|
130
|
+
if isinstance(semref_ordinal_data, dict):
|
131
|
+
semref_id = semref_ordinal_data["semanticRefOrdinal"]
|
132
|
+
else:
|
133
|
+
# Fallback for direct integer
|
134
|
+
semref_id = semref_ordinal_data
|
135
|
+
insertion_data.append((term, semref_id))
|
136
|
+
|
137
|
+
# Bulk insert all the data
|
138
|
+
if insertion_data:
|
139
|
+
cursor.executemany(
|
140
|
+
"INSERT OR IGNORE INTO SemanticRefIndex (term, semref_id) VALUES (?, ?)",
|
141
|
+
insertion_data,
|
142
|
+
)
|
143
|
+
|
144
|
+
def _prepare_term(self, term: str) -> str:
|
145
|
+
"""Normalize term by converting to lowercase, stripping whitespace, and normalizing Unicode."""
|
146
|
+
# Strip leading/trailing whitespace
|
147
|
+
term = term.strip()
|
148
|
+
|
149
|
+
# Normalize Unicode to NFC form (canonical composition)
|
150
|
+
term = unicodedata.normalize("NFC", term)
|
151
|
+
|
152
|
+
# Collapse multiple whitespace characters to single space
|
153
|
+
term = re.sub(r"\s+", " ", term)
|
154
|
+
|
155
|
+
# Convert to lowercase
|
156
|
+
return term.lower()
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
"""SQLite-based timestamp index implementation."""
|
5
|
+
|
6
|
+
import sqlite3
|
7
|
+
|
8
|
+
from ...knowpro import interfaces
|
9
|
+
|
10
|
+
|
11
|
+
class SqliteTimestampToTextRangeIndex(interfaces.ITimestampToTextRangeIndex):
|
12
|
+
"""SQL-based timestamp index that queries Messages table directly."""
|
13
|
+
|
14
|
+
def __init__(self, db: sqlite3.Connection):
|
15
|
+
self.db = db
|
16
|
+
|
17
|
+
async def size(self) -> int:
|
18
|
+
return self._size()
|
19
|
+
|
20
|
+
def _size(self) -> int:
|
21
|
+
cursor = self.db.cursor()
|
22
|
+
cursor.execute(
|
23
|
+
"SELECT COUNT(*) FROM Messages WHERE start_timestamp IS NOT NULL"
|
24
|
+
)
|
25
|
+
return cursor.fetchone()[0]
|
26
|
+
|
27
|
+
async def add_timestamp(
|
28
|
+
self, message_ordinal: interfaces.MessageOrdinal, timestamp: str
|
29
|
+
) -> bool:
|
30
|
+
return self._add_timestamp(message_ordinal, timestamp)
|
31
|
+
|
32
|
+
def _add_timestamp(
|
33
|
+
self, message_ordinal: interfaces.MessageOrdinal, timestamp: str
|
34
|
+
) -> bool:
|
35
|
+
"""Add timestamp to Messages table start_timestamp column."""
|
36
|
+
cursor = self.db.cursor()
|
37
|
+
cursor.execute(
|
38
|
+
"UPDATE Messages SET start_timestamp = ? WHERE msg_id = ?",
|
39
|
+
(timestamp, message_ordinal),
|
40
|
+
)
|
41
|
+
return cursor.rowcount > 0
|
42
|
+
|
43
|
+
async def get_timestamp_ranges(
|
44
|
+
self, start_timestamp: str, end_timestamp: str | None = None
|
45
|
+
) -> list[interfaces.TimestampedTextRange]:
|
46
|
+
"""Get timestamp ranges from Messages table."""
|
47
|
+
cursor = self.db.cursor()
|
48
|
+
|
49
|
+
if end_timestamp is None:
|
50
|
+
# Single timestamp query
|
51
|
+
cursor.execute(
|
52
|
+
"""
|
53
|
+
SELECT msg_id, start_timestamp
|
54
|
+
FROM Messages
|
55
|
+
WHERE start_timestamp = ?
|
56
|
+
ORDER BY msg_id
|
57
|
+
""",
|
58
|
+
(start_timestamp,),
|
59
|
+
)
|
60
|
+
else:
|
61
|
+
# Range query
|
62
|
+
cursor.execute(
|
63
|
+
"""
|
64
|
+
SELECT msg_id, start_timestamp
|
65
|
+
FROM Messages
|
66
|
+
WHERE start_timestamp >= ? AND start_timestamp <= ?
|
67
|
+
ORDER BY msg_id
|
68
|
+
""",
|
69
|
+
(start_timestamp, end_timestamp),
|
70
|
+
)
|
71
|
+
|
72
|
+
results = []
|
73
|
+
for msg_id, timestamp in cursor.fetchall():
|
74
|
+
# Create text range for message
|
75
|
+
from ...knowpro.interfaces import TextLocation, TextRange
|
76
|
+
|
77
|
+
text_range = TextRange(
|
78
|
+
start=TextLocation(message_ordinal=msg_id, chunk_ordinal=0)
|
79
|
+
)
|
80
|
+
results.append(
|
81
|
+
interfaces.TimestampedTextRange(range=text_range, timestamp=timestamp)
|
82
|
+
)
|
83
|
+
|
84
|
+
return results
|
85
|
+
|
86
|
+
async def add_timestamps(
|
87
|
+
self, message_timestamps: list[tuple[interfaces.MessageOrdinal, str]]
|
88
|
+
) -> None:
|
89
|
+
"""Add multiple timestamps."""
|
90
|
+
cursor = self.db.cursor()
|
91
|
+
for message_ordinal, timestamp in message_timestamps:
|
92
|
+
cursor.execute(
|
93
|
+
"UPDATE Messages SET start_timestamp = ? WHERE msg_id = ?",
|
94
|
+
(timestamp, message_ordinal),
|
95
|
+
)
|
96
|
+
|
97
|
+
async def lookup_range(
|
98
|
+
self, date_range: interfaces.DateRange
|
99
|
+
) -> list[interfaces.TimestampedTextRange]:
|
100
|
+
"""Lookup messages in a date range."""
|
101
|
+
cursor = self.db.cursor()
|
102
|
+
|
103
|
+
# Convert datetime objects to ISO format strings for comparison
|
104
|
+
start_timestamp = date_range.start.isoformat().replace("+00:00", "Z")
|
105
|
+
end_timestamp = (
|
106
|
+
date_range.end.isoformat().replace("+00:00", "Z")
|
107
|
+
if date_range.end
|
108
|
+
else None
|
109
|
+
)
|
110
|
+
|
111
|
+
if date_range.end is None:
|
112
|
+
# Point query
|
113
|
+
cursor.execute(
|
114
|
+
"""
|
115
|
+
SELECT msg_id, start_timestamp, chunks
|
116
|
+
FROM Messages
|
117
|
+
WHERE start_timestamp = ?
|
118
|
+
ORDER BY msg_id
|
119
|
+
""",
|
120
|
+
(start_timestamp,),
|
121
|
+
)
|
122
|
+
else:
|
123
|
+
# Range query
|
124
|
+
cursor.execute(
|
125
|
+
"""
|
126
|
+
SELECT msg_id, start_timestamp, chunks
|
127
|
+
FROM Messages
|
128
|
+
WHERE start_timestamp >= ? AND start_timestamp < ?
|
129
|
+
ORDER BY msg_id
|
130
|
+
""",
|
131
|
+
(start_timestamp, end_timestamp),
|
132
|
+
)
|
133
|
+
|
134
|
+
results = []
|
135
|
+
for msg_id, timestamp, chunks in cursor.fetchall():
|
136
|
+
text_location = interfaces.TextLocation(
|
137
|
+
message_ordinal=msg_id, chunk_ordinal=0
|
138
|
+
)
|
139
|
+
text_range = interfaces.TextRange(
|
140
|
+
start=text_location, end=None # Point range
|
141
|
+
)
|
142
|
+
results.append(
|
143
|
+
interfaces.TimestampedTextRange(timestamp=timestamp, range=text_range)
|
144
|
+
)
|
145
|
+
|
146
|
+
return results
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
2
|
+
# Licensed under the MIT License.
|
3
|
+
|
4
|
+
"""Storage provider utilities.
|
5
|
+
|
6
|
+
This module provides utility functions for creating storage providers
|
7
|
+
without circular import issues.
|
8
|
+
"""
|
9
|
+
|
10
|
+
from ..knowpro.interfaces import IMessage, IStorageProvider
|
11
|
+
from ..knowpro.convsettings import MessageTextIndexSettings, RelatedTermIndexSettings
|
12
|
+
|
13
|
+
|
14
|
+
async def create_storage_provider[TMessage: IMessage](
|
15
|
+
message_text_settings: MessageTextIndexSettings,
|
16
|
+
related_terms_settings: RelatedTermIndexSettings,
|
17
|
+
dbname: str | None = None,
|
18
|
+
message_type: type[TMessage] | None = None,
|
19
|
+
) -> IStorageProvider[TMessage]:
|
20
|
+
"""Create a storage provider.
|
21
|
+
|
22
|
+
MemoryStorageProvider if dbname is None, SqliteStorageProvider otherwise.
|
23
|
+
"""
|
24
|
+
if dbname is None:
|
25
|
+
from .memory import MemoryStorageProvider
|
26
|
+
|
27
|
+
return MemoryStorageProvider(message_text_settings, related_terms_settings)
|
28
|
+
else:
|
29
|
+
from .sqlite import SqliteStorageProvider
|
30
|
+
|
31
|
+
if message_type is None:
|
32
|
+
raise ValueError("Message type must be specified for SQLite storage")
|
33
|
+
|
34
|
+
# Create the new provider directly (constructor is now synchronous)
|
35
|
+
provider = SqliteStorageProvider(
|
36
|
+
db_path=dbname,
|
37
|
+
message_type=message_type,
|
38
|
+
message_text_index_settings=message_text_settings,
|
39
|
+
related_term_index_settings=related_terms_settings,
|
40
|
+
)
|
41
|
+
return provider
|
@@ -0,0 +1,28 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: typeagent-py
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: TypeAgent implements an agentic memory framework.
|
5
|
+
Author: Steven Lucco, Umesh Madan, Guido van Rossum
|
6
|
+
Author-email: Guido van Rossum <gvanrossum@microsoft.com>
|
7
|
+
Project-URL: Homepage, https://github.com/microsoft/TypeAgent/tree/main/python/ta
|
8
|
+
Requires-Python: <3.14,>=3.12
|
9
|
+
License-File: LICENSE
|
10
|
+
Requires-Dist: azure-identity>=1.22.0
|
11
|
+
Requires-Dist: black>=25.1.0
|
12
|
+
Requires-Dist: build>=1.2.2.post1
|
13
|
+
Requires-Dist: colorama>=0.4.6
|
14
|
+
Requires-Dist: coverage[toml]>=7.9.1
|
15
|
+
Requires-Dist: logfire>=4.1.0
|
16
|
+
Requires-Dist: mcp[cli]>=1.12.1
|
17
|
+
Requires-Dist: numpy>=2.2.6
|
18
|
+
Requires-Dist: openai>=1.81.0
|
19
|
+
Requires-Dist: opentelemetry-instrumentation-httpx>=0.57b0
|
20
|
+
Requires-Dist: pydantic>=2.11.4
|
21
|
+
Requires-Dist: pydantic-ai-slim[openai]>=0.5.0
|
22
|
+
Requires-Dist: pyright>=1.1.405
|
23
|
+
Requires-Dist: pytest>=8.3.5
|
24
|
+
Requires-Dist: pytest-asyncio>=0.26.0
|
25
|
+
Requires-Dist: pytest-mock>=3.14.0
|
26
|
+
Requires-Dist: python-dotenv>=1.1.0
|
27
|
+
Requires-Dist: typechat
|
28
|
+
Dynamic: license-file
|
@@ -0,0 +1,55 @@
|
|
1
|
+
typeagent/aitools/auth.py,sha256=bnHqU8G3hDW2A50ulwmFeUfz0OzaGzyVKQUMXoUmqao,1659
|
2
|
+
typeagent/aitools/embeddings.py,sha256=Bveu0SXXUvsGDPoyLeCB6DE86o8-PQnaR9KJ1z7oIo0,9490
|
3
|
+
typeagent/aitools/utils.py,sha256=3EQVBf3uFEdqN_xQdHZbf8PVt6vic4XCPST7QjT4NC8,8027
|
4
|
+
typeagent/aitools/vectorbase.py,sha256=zdTM1mKkD49pKcUC5xeXmNcQZzUCX73SOugGwU8FeSc,6340
|
5
|
+
typeagent/knowpro/answer_context_schema.py,sha256=dIlTmUC3VT0Q_5ybVTDY2jVPA6KH1407EBBCsKYpqb8,1627
|
6
|
+
typeagent/knowpro/answer_response_schema.py,sha256=bo-owIba8WaeKPve_NQx86kHIrnDujlUPcCVBP34-aI,990
|
7
|
+
typeagent/knowpro/answers.py,sha256=P4urCHqkLZa-h57HLadrnvwIHeSKWz1mYCVVcKKKQwo,20131
|
8
|
+
typeagent/knowpro/collections.py,sha256=llBt1mmPm9DRc9OIIKcCpzpNQapSP6jA3ojlohfFgPs,25663
|
9
|
+
typeagent/knowpro/common.py,sha256=H087hBsHhPrwVM2m3PIIbAKNkwjfWl00R13JB-nTyQ4,261
|
10
|
+
typeagent/knowpro/convknowledge.py,sha256=jAoXRrd_6g8NOxjAXPPz_SFMhsZDnBsnxn1xOXxwHyA,4165
|
11
|
+
typeagent/knowpro/convsettings.py,sha256=o_f2-PFXUpzNi7WYOKaickOaBTVAj4aaYUpR13g8I0k,3514
|
12
|
+
typeagent/knowpro/convutils.py,sha256=jiUtbhSDbKe4wi_2uglVI7p_DsdJNd9uTo-wvgKEMS8,1564
|
13
|
+
typeagent/knowpro/date_time_schema.py,sha256=42HdjWsyDR6REwpZSiNz-4TwMHWQHpGtPu4TFUSzHxU,536
|
14
|
+
typeagent/knowpro/field_helpers.py,sha256=El2pOEsRUwJDQ-mohW7O78bcnbEnciCtV_RpZqFFoCU,3109
|
15
|
+
typeagent/knowpro/fuzzyindex.py,sha256=CewYKAM-OqEuhtqAmhSTRfnSKxAIOxOLy0yRwKjWlgI,4935
|
16
|
+
typeagent/knowpro/interfaces.py,sha256=wSbgE3RATBmN5y9cimHOKOSJ4mg-zisOB8-xYA1TdtU,24087
|
17
|
+
typeagent/knowpro/knowledge.py,sha256=lbEMOTZZmKpS-ldFyV9RXb--g9eZhLAuJnFKqHRzX6s,3004
|
18
|
+
typeagent/knowpro/kplib.py,sha256=CIW2LsS2ZFUV5w1cQG1DBOg9qgkgPJDaEnZv_nYh5rQ,3819
|
19
|
+
typeagent/knowpro/query.py,sha256=8i1fY4fxt9WMbqYx5-t_9VEjpZzy1-bzHeZVKuaB21M,39726
|
20
|
+
typeagent/knowpro/search.py,sha256=iUbTv53XBQYw5hqsedouEpi6KfLs9VSIZZ-M752Si5Y,22992
|
21
|
+
typeagent/knowpro/search_query_schema.py,sha256=Xn9n11M8f4jFc-ICz8i_3cf2LBLwUzl52ctneqbKa6g,5583
|
22
|
+
typeagent/knowpro/searchlang.py,sha256=bYEetPVgdEmkc3Nmw1bPmJ59QonkUpZwKclRKXrt3Z4,26731
|
23
|
+
typeagent/knowpro/searchlib.py,sha256=p9nDFQwI9aZVqltJ78WVXO08OwBPCPQvg9yxgX-S6AU,9703
|
24
|
+
typeagent/knowpro/secindex.py,sha256=KPweoLG2ozx5sayfD16dZ1X3BP5Fc4BhIar86WAU1ak,3706
|
25
|
+
typeagent/knowpro/serialization.py,sha256=46_uX3yJf29uWaXWuT6fARUvdC1BmsgPcxQxNzunvso,13065
|
26
|
+
typeagent/knowpro/textlocindex.py,sha256=Esy67bXYtyqWdj5dwfAsVBaA-0FzhiZHekYfyhtpzpw,5955
|
27
|
+
typeagent/knowpro/utils.py,sha256=UqilFoWBi507hDEDFxmFNm-QlihTdPZa-ESIohP7xws,474
|
28
|
+
typeagent/mcp/server.py,sha256=pvQRekhD28nYt-3iV5-2I7hPgjnGIHoPbcILkUh_Z-o,4573
|
29
|
+
typeagent/podcasts/podcast.py,sha256=WHpzYo1Kz-rkyhNBRy4oc38pQaQy3dVTmy7SCShgPgM,18488
|
30
|
+
typeagent/podcasts/podcast_import.py,sha256=Au9a2qaiEZT9RmRP6gCUNqLTiJJ2fRDh7OYGVM7uouE,3694
|
31
|
+
typeagent/storage/__init__.py,sha256=nzXmnjCld060avN92QgkkbKSkunMr7hhrhUY9g9r2dY,589
|
32
|
+
typeagent/storage/utils.py,sha256=EF2wzp94hF4qqOGhiJIUq94SNbYoElyF6NhomDk0nJ8,1437
|
33
|
+
typeagent/storage/memory/__init__.py,sha256=6NtHL0sOIRxALBb28wjKXc80PF_Vn3b1HNXldQXiajs,351
|
34
|
+
typeagent/storage/memory/collections.py,sha256=alhOo8proevJKINBF5iqPtmRdSKZ2ROkD2D7L3vnnUc,1993
|
35
|
+
typeagent/storage/memory/convthreads.py,sha256=PKbtJr7MPq4325UNb-DNWbxTJWFkcBQoo9P76ZqIhL8,2685
|
36
|
+
typeagent/storage/memory/messageindex.py,sha256=zd10U6E-cB7Bg2G9ivJU3ua51UeHI-twZPOd5ler3iM,6803
|
37
|
+
typeagent/storage/memory/propindex.py,sha256=fpH7296L2B6JQ0dplHZY8KYDXxY55sup3bZrvY2Pzp0,9797
|
38
|
+
typeagent/storage/memory/provider.py,sha256=HH3v1UAdfL_k-0BRDPPoUZsMdayAFyTAZ14xaxM_eCE,3241
|
39
|
+
typeagent/storage/memory/reltermsindex.py,sha256=Y_SpZYcXvRkhp3AThfbtNe7RLgbHvYsDjM7t1rdq0jY,11813
|
40
|
+
typeagent/storage/memory/semrefindex.py,sha256=kUxw7s9boj5cHLz-ywrohKTmxPUzXRCG7xHL_fwgNW0,21279
|
41
|
+
typeagent/storage/memory/timestampindex.py,sha256=6OFJnncKjvVhMpfaQBOWhG1ozuV1PRTdoFGtPoQk-uw,6361
|
42
|
+
typeagent/storage/sqlite/__init__.py,sha256=eriIYr7iDFctxax-OmI1eGlrBqoYL3Jta6vruishPfM,947
|
43
|
+
typeagent/storage/sqlite/collections.py,sha256=5O_fUICw1Cdr510ek8sIgLqSw1JPVqoKxV0TPyeQrrs,12817
|
44
|
+
typeagent/storage/sqlite/messageindex.py,sha256=VkXPPGnqgK42HX0jCsQBhC16lbe9I-wopkUY707Yy-0,14328
|
45
|
+
typeagent/storage/sqlite/propindex.py,sha256=nWfVPH4XBtNGp6rrAsbwgclXn6pL9W0SquSdcPwRv3k,4240
|
46
|
+
typeagent/storage/sqlite/provider.py,sha256=XrUAdi6NMNdYzG0F96WvIFi8QxQ6wHjGSmGLHUrUwL8,11552
|
47
|
+
typeagent/storage/sqlite/reltermsindex.py,sha256=VwmUH-awNZ5YeMZTuFVfKP-8G0WQQ1klFq2I4KEOykY,12568
|
48
|
+
typeagent/storage/sqlite/schema.py,sha256=c5-dff8wdIA37SegPOI-_h-w2eCPSnpnPQAC3vcNzYo,8061
|
49
|
+
typeagent/storage/sqlite/semrefindex.py,sha256=eqHrQMyVdFS9HOXV1dLvp0bMs8JKoPQLmV46Cs0HQJM,5456
|
50
|
+
typeagent/storage/sqlite/timestampindex.py,sha256=gnmmwgRKCwFi2iGzGJVe7Zz12rblB-5-5WZkqpDgySM,4764
|
51
|
+
typeagent_py-0.1.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
|
52
|
+
typeagent_py-0.1.0.dist-info/METADATA,sha256=BA3AfIIF4hAKz9m-WlqH3bC82lGGL6jaFS__1SyLuxs,1002
|
53
|
+
typeagent_py-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
54
|
+
typeagent_py-0.1.0.dist-info/top_level.txt,sha256=uXuso6jrsvRIIZsh6WfAvTjk5wOgClsFUiiuo1hpFZ8,10
|
55
|
+
typeagent_py-0.1.0.dist-info/RECORD,,
|