typeagent-py 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. typeagent/aitools/auth.py +61 -0
  2. typeagent/aitools/embeddings.py +232 -0
  3. typeagent/aitools/utils.py +244 -0
  4. typeagent/aitools/vectorbase.py +175 -0
  5. typeagent/knowpro/answer_context_schema.py +49 -0
  6. typeagent/knowpro/answer_response_schema.py +34 -0
  7. typeagent/knowpro/answers.py +577 -0
  8. typeagent/knowpro/collections.py +759 -0
  9. typeagent/knowpro/common.py +9 -0
  10. typeagent/knowpro/convknowledge.py +112 -0
  11. typeagent/knowpro/convsettings.py +94 -0
  12. typeagent/knowpro/convutils.py +49 -0
  13. typeagent/knowpro/date_time_schema.py +32 -0
  14. typeagent/knowpro/field_helpers.py +87 -0
  15. typeagent/knowpro/fuzzyindex.py +144 -0
  16. typeagent/knowpro/interfaces.py +818 -0
  17. typeagent/knowpro/knowledge.py +88 -0
  18. typeagent/knowpro/kplib.py +125 -0
  19. typeagent/knowpro/query.py +1128 -0
  20. typeagent/knowpro/search.py +628 -0
  21. typeagent/knowpro/search_query_schema.py +165 -0
  22. typeagent/knowpro/searchlang.py +729 -0
  23. typeagent/knowpro/searchlib.py +345 -0
  24. typeagent/knowpro/secindex.py +100 -0
  25. typeagent/knowpro/serialization.py +390 -0
  26. typeagent/knowpro/textlocindex.py +179 -0
  27. typeagent/knowpro/utils.py +17 -0
  28. typeagent/mcp/server.py +139 -0
  29. typeagent/podcasts/podcast.py +473 -0
  30. typeagent/podcasts/podcast_import.py +105 -0
  31. typeagent/storage/__init__.py +25 -0
  32. typeagent/storage/memory/__init__.py +13 -0
  33. typeagent/storage/memory/collections.py +68 -0
  34. typeagent/storage/memory/convthreads.py +81 -0
  35. typeagent/storage/memory/messageindex.py +178 -0
  36. typeagent/storage/memory/propindex.py +289 -0
  37. typeagent/storage/memory/provider.py +84 -0
  38. typeagent/storage/memory/reltermsindex.py +318 -0
  39. typeagent/storage/memory/semrefindex.py +660 -0
  40. typeagent/storage/memory/timestampindex.py +176 -0
  41. typeagent/storage/sqlite/__init__.py +31 -0
  42. typeagent/storage/sqlite/collections.py +362 -0
  43. typeagent/storage/sqlite/messageindex.py +382 -0
  44. typeagent/storage/sqlite/propindex.py +119 -0
  45. typeagent/storage/sqlite/provider.py +293 -0
  46. typeagent/storage/sqlite/reltermsindex.py +328 -0
  47. typeagent/storage/sqlite/schema.py +248 -0
  48. typeagent/storage/sqlite/semrefindex.py +156 -0
  49. typeagent/storage/sqlite/timestampindex.py +146 -0
  50. typeagent/storage/utils.py +41 -0
  51. typeagent_py-0.1.0.dist-info/METADATA +28 -0
  52. typeagent_py-0.1.0.dist-info/RECORD +55 -0
  53. typeagent_py-0.1.0.dist-info/WHEEL +5 -0
  54. typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
  55. typeagent_py-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,176 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ # Timestamp-to-text-range in-memory index (pre-SQLite prep).
5
+ #
6
+ # Contract (stable regardless of backing store):
7
+ # - add_timestamp(s) accepts ISO 8601 timestamps that are lexicographically sortable
8
+ # (Datetime.isoformat). Missing/None timestamps are ignored.
9
+ # - lookup_range(DateRange) returns items whose ISO timestamp t satisfies
10
+ # start <= t < end (end is exclusive). If end is None, treat as a point
11
+ # query with end = start + epsilon.
12
+ # - Results are sorted ascending by timestamp; stability across runs is expected.
13
+ #
14
+ # SQLite plan (no behavior change now):
15
+ # - This in-memory structure will be replaced by direct queries over a Messages table
16
+ # with a timestamp column (or start/end timestamps if ranges are later needed).
17
+ # - The public methods and semantics here define the contract for the future provider
18
+ # implementation; callers should not rely on internal list layout or mutability.
19
+
20
+
21
+ import bisect
22
+ from collections.abc import AsyncIterable, Callable
23
+ from typing import Any
24
+
25
+ from ...knowpro.interfaces import (
26
+ DateRange,
27
+ Datetime,
28
+ IConversation,
29
+ IMessage,
30
+ ITimestampToTextRangeIndex,
31
+ MessageOrdinal,
32
+ TimestampedTextRange,
33
+ )
34
+ from ...knowpro.utils import text_range_from_message_chunk
35
+
36
+
37
+ class TimestampToTextRangeIndex(ITimestampToTextRangeIndex):
38
+ # In-memory implementation of ITimestampToTextRangeIndex.
39
+ #
40
+ # Notes for SQLite implementation:
41
+ # - add_timestamp(s): will translate to inserting/updating rows in the Messages
42
+ # storage (or a dedicated index table) keyed by message ordinal with an ISO
43
+ # timestamp column indexed for range scans.
44
+ # - lookup_range(): will map to a single indexed range query on the timestamp
45
+ # column and project the corresponding text ranges.
46
+ def __init__(self):
47
+ self._ranges: list[TimestampedTextRange] = []
48
+
49
+ async def size(self) -> int:
50
+ return self._size()
51
+
52
+ def _size(self) -> int:
53
+ return len(self._ranges)
54
+
55
+ async def lookup_range(self, date_range: DateRange) -> list[TimestampedTextRange]:
56
+ return self._lookup_range(date_range)
57
+
58
+ def _lookup_range(self, date_range: DateRange) -> list[TimestampedTextRange]:
59
+ start_at = date_range.start.isoformat()
60
+ stop_at = None if date_range.end is None else date_range.end.isoformat()
61
+ return get_in_range(
62
+ self._ranges,
63
+ start_at,
64
+ stop_at,
65
+ key=lambda x: x.timestamp,
66
+ )
67
+
68
+ async def add_timestamp(
69
+ self,
70
+ message_ordinal: MessageOrdinal,
71
+ timestamp: str,
72
+ ) -> bool:
73
+ return self._add_timestamp(message_ordinal, timestamp)
74
+
75
+ def _add_timestamp(
76
+ self,
77
+ message_ordinal: MessageOrdinal,
78
+ timestamp: str,
79
+ ) -> bool:
80
+ return self._insert_timestamp(message_ordinal, timestamp, True)
81
+
82
+ async def add_timestamps(
83
+ self,
84
+ message_timestamps: list[tuple[MessageOrdinal, str]],
85
+ ) -> None:
86
+ self._add_timestamps(message_timestamps)
87
+
88
+ def _add_timestamps(
89
+ self,
90
+ message_timestamps: list[tuple[MessageOrdinal, str]],
91
+ ) -> None:
92
+ for message_ordinal, timestamp in message_timestamps:
93
+ self._insert_timestamp(message_ordinal, timestamp, False)
94
+ self._ranges.sort(key=lambda x: x.timestamp)
95
+
96
+ def _insert_timestamp(
97
+ self,
98
+ message_ordinal: MessageOrdinal,
99
+ timestamp: str | None,
100
+ in_order: bool,
101
+ ) -> bool:
102
+ if not timestamp:
103
+ return False
104
+ timestamp_datetime = Datetime.fromisoformat(timestamp)
105
+ entry: TimestampedTextRange = TimestampedTextRange(
106
+ range=text_range_from_message_chunk(message_ordinal),
107
+ # This string is formatted to be lexically sortable.
108
+ timestamp=timestamp_datetime.isoformat(),
109
+ )
110
+ if in_order:
111
+ where = bisect.bisect_left(
112
+ self._ranges, entry.timestamp, key=lambda x: x.timestamp
113
+ )
114
+ self._ranges.insert(where, entry)
115
+ else:
116
+ self._ranges.append(entry)
117
+ return True
118
+
119
+
120
+ def get_in_range[T, S: Any](
121
+ values: list[T],
122
+ start_at: S,
123
+ stop_at: S | None,
124
+ key: Callable[[T], S],
125
+ ) -> list[T]:
126
+ # Return the sublist of values with key in [start_at, stop_at), sorted.
127
+ # Details:
128
+ # - End is exclusive: values with key == stop_at are not returned.
129
+ # - If stop_at is None, treat as a point query with end = start_at + epsilon.
130
+ # - Requires that values are already sorted by the provided key.
131
+ istart = bisect.bisect_left(values, start_at, key=key)
132
+ if istart == len(values):
133
+ return []
134
+ if stop_at is None:
135
+ # Point query: include only items exactly equal to start_at
136
+ istop = bisect.bisect_right(values, start_at, istart, key=key)
137
+ return values[istart:istop]
138
+ # End-exclusive: do not include items with key == stop_at
139
+ istop = bisect.bisect_left(values, stop_at, istart, key=key)
140
+ return values[istart:istop]
141
+
142
+
143
+ async def build_timestamp_index(conversation: IConversation) -> None:
144
+ if conversation.messages is not None and conversation.secondary_indexes is not None:
145
+ # There's nothing to do if there are no messages
146
+ if await conversation.messages.size() == 0:
147
+ return
148
+
149
+ # There's nothing to do for persistent collections; the timestamp index
150
+ # is created implicitly (as an index over the message collection)
151
+ if conversation.messages.is_persistent:
152
+ return
153
+
154
+ # Caller must have established the timestamp index
155
+ assert conversation.secondary_indexes.timestamp_index is not None
156
+
157
+ await add_to_timestamp_index(
158
+ conversation.secondary_indexes.timestamp_index,
159
+ conversation.messages,
160
+ 0,
161
+ )
162
+
163
+
164
+ async def add_to_timestamp_index(
165
+ timestamp_index: ITimestampToTextRangeIndex,
166
+ messages: AsyncIterable[IMessage],
167
+ base_message_ordinal: int,
168
+ ) -> None:
169
+ message_timestamps: list[tuple[int, str]] = []
170
+ i = 0
171
+ async for message in messages:
172
+ timestamp = message.timestamp
173
+ if timestamp:
174
+ message_timestamps.append((base_message_ordinal + i, timestamp))
175
+ i += 1
176
+ await timestamp_index.add_timestamps(message_timestamps)
@@ -0,0 +1,31 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ """SQLite-based storage implementations."""
5
+
6
+ from .collections import SqliteMessageCollection, SqliteSemanticRefCollection
7
+ from .messageindex import SqliteMessageTextIndex
8
+ from .propindex import SqlitePropertyIndex
9
+ from .reltermsindex import SqliteRelatedTermsIndex
10
+ from .semrefindex import SqliteTermToSemanticRefIndex
11
+ from .timestampindex import SqliteTimestampToTextRangeIndex
12
+ from .provider import SqliteStorageProvider
13
+ from .schema import (
14
+ ConversationMetadata,
15
+ init_db_schema,
16
+ get_db_schema_version,
17
+ )
18
+
19
+ __all__ = [
20
+ "SqliteMessageCollection",
21
+ "SqliteSemanticRefCollection",
22
+ "SqliteMessageTextIndex",
23
+ "SqlitePropertyIndex",
24
+ "SqliteRelatedTermsIndex",
25
+ "SqliteTermToSemanticRefIndex",
26
+ "SqliteTimestampToTextRangeIndex",
27
+ "SqliteStorageProvider",
28
+ "ConversationMetadata",
29
+ "init_db_schema",
30
+ "get_db_schema_version",
31
+ ]
@@ -0,0 +1,362 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ """SQLite-based collection implementations."""
5
+
6
+ import json
7
+ import sqlite3
8
+ import typing
9
+
10
+ from .schema import ShreddedMessage, ShreddedSemanticRef
11
+ from ...knowpro import interfaces
12
+ from ...knowpro import serialization
13
+
14
+
15
+ class SqliteMessageCollection[TMessage: interfaces.IMessage](
16
+ interfaces.IMessageCollection[TMessage]
17
+ ):
18
+ """SQLite-backed message collection."""
19
+
20
+ def __init__(
21
+ self,
22
+ db: sqlite3.Connection,
23
+ message_type: type[TMessage] | None = None,
24
+ message_text_index: "interfaces.IMessageTextIndex[TMessage] | None" = None,
25
+ ):
26
+ self.db = db
27
+ self.message_type = message_type
28
+ self.message_text_index = message_text_index
29
+
30
+ def set_message_text_index(
31
+ self, message_text_index: "interfaces.IMessageTextIndex[TMessage]"
32
+ ) -> None:
33
+ """Set the message text index for automatic indexing of new messages."""
34
+ self.message_text_index = message_text_index
35
+
36
+ @property
37
+ def is_persistent(self) -> bool:
38
+ return True
39
+
40
+ async def size(self) -> int:
41
+ cursor = self.db.cursor()
42
+ cursor.execute("SELECT COUNT(*) FROM Messages")
43
+ return cursor.fetchone()[0]
44
+
45
+ def __aiter__(self) -> typing.AsyncGenerator[TMessage, None]:
46
+ return self._async_iterator()
47
+
48
+ async def _async_iterator(self) -> typing.AsyncGenerator[TMessage, None]:
49
+ cursor = self.db.cursor()
50
+ cursor.execute(
51
+ """
52
+ SELECT chunks, chunk_uri, start_timestamp, tags, metadata, extra
53
+ FROM Messages ORDER BY msg_id
54
+ """
55
+ )
56
+ for row in cursor:
57
+ message = self._deserialize_message_from_row(row)
58
+ yield message
59
+
60
+ def _deserialize_message_from_row(self, row: ShreddedMessage) -> TMessage:
61
+ """Rehydrate a message from database row columns."""
62
+ (
63
+ chunks_json,
64
+ chunk_uri,
65
+ start_timestamp,
66
+ tags_json,
67
+ metadata_json,
68
+ extra_json,
69
+ ) = row
70
+
71
+ # Parse JSON fields and build a JSON object using camelCase.
72
+ message_data = json.loads(extra_json) if extra_json else {}
73
+ message_data["textChunks"] = json.loads(chunks_json) if chunks_json else []
74
+ message_data["timestamp"] = start_timestamp
75
+ message_data["tags"] = json.loads(tags_json) if tags_json else []
76
+ message_data["metadata"] = json.loads(metadata_json) if metadata_json else {}
77
+
78
+ # The serialization.deserialize_object will convert to snake_case Python attributes.
79
+ if self.message_type is None:
80
+ raise ValueError(
81
+ "Deserialization requires message_type passed to SqliteMessageCollection"
82
+ )
83
+ return serialization.deserialize_object(self.message_type, message_data)
84
+
85
+ def _serialize_message_to_row(self, message: TMessage) -> ShreddedMessage:
86
+ """Shred a message object into database columns."""
87
+ # Serialize the message to JSON first (this uses camelCase)
88
+ message_data = serialization.serialize_object(message)
89
+
90
+ # Extract shredded fields (JSON uses camelCase)
91
+ chunks_json = json.dumps(message_data.pop("textChunks", []))
92
+ chunk_uri = None # For now, we're not using chunk URIs
93
+ start_timestamp = message_data.pop("timestamp", None)
94
+ tags_json = json.dumps(message_data.pop("tags", []))
95
+ metadata_json = json.dumps(message_data.pop("metadata", {}))
96
+
97
+ # What's left in message_data becomes 'extra'.
98
+ extra_json = json.dumps(message_data) if message_data else None
99
+
100
+ return (
101
+ chunks_json,
102
+ chunk_uri,
103
+ start_timestamp,
104
+ tags_json,
105
+ metadata_json,
106
+ extra_json,
107
+ )
108
+
109
+ async def get_item(self, arg: int) -> TMessage:
110
+ if not isinstance(arg, int):
111
+ raise TypeError(f"Index must be an int, not {type(arg).__name__}")
112
+ cursor = self.db.cursor()
113
+ cursor.execute(
114
+ """
115
+ SELECT chunks, chunk_uri, start_timestamp, tags, metadata, extra
116
+ FROM Messages WHERE msg_id = ?
117
+ """,
118
+ (arg,),
119
+ )
120
+ row = cursor.fetchone()
121
+ if row:
122
+ return self._deserialize_message_from_row(row)
123
+ raise IndexError("Message not found")
124
+
125
+ async def get_slice(self, start: int, stop: int) -> list[TMessage]:
126
+ if stop <= start:
127
+ return []
128
+ cursor = self.db.cursor()
129
+ cursor.execute(
130
+ """
131
+ SELECT chunks, chunk_uri, start_timestamp, tags, metadata, extra
132
+ FROM Messages WHERE msg_id >= ? AND msg_id < ? ORDER BY msg_id
133
+ """,
134
+ (start, stop),
135
+ )
136
+ rows = cursor.fetchall()
137
+ return [self._deserialize_message_from_row(row) for row in rows]
138
+
139
+ async def get_multiple(self, arg: list[int]) -> list[TMessage]:
140
+ results = []
141
+ for i in arg:
142
+ results.append(await self.get_item(i))
143
+ return results
144
+
145
+ async def append(self, item: TMessage) -> None:
146
+ cursor = self.db.cursor()
147
+ (
148
+ chunks_json,
149
+ chunk_uri,
150
+ start_timestamp,
151
+ tags_json,
152
+ metadata_json,
153
+ extra_json,
154
+ ) = self._serialize_message_to_row(item)
155
+ # Use the current size as the ID to maintain 0-based indexing like the old implementation
156
+ msg_id = await self.size()
157
+ cursor.execute(
158
+ """
159
+ INSERT INTO Messages (msg_id, chunks, chunk_uri, start_timestamp, tags, metadata, extra)
160
+ VALUES (?, ?, ?, ?, ?, ?, ?)
161
+ """,
162
+ (
163
+ msg_id,
164
+ chunks_json,
165
+ chunk_uri,
166
+ start_timestamp,
167
+ tags_json,
168
+ metadata_json,
169
+ extra_json,
170
+ ),
171
+ )
172
+
173
+ # Also add to message text index if available
174
+ if self.message_text_index is not None:
175
+ await self.message_text_index.add_messages_starting_at(msg_id, [item])
176
+
177
+ async def extend(self, items: typing.Iterable[TMessage]) -> None:
178
+ items_list = list(items) # Convert to list to iterate twice
179
+ if not items_list:
180
+ return
181
+
182
+ # Get the starting ordinal before adding any messages
183
+ current_size = await self.size()
184
+
185
+ # Prepare all insertion data for bulk operation
186
+ insertion_data = []
187
+ for msg_id, item in enumerate(items_list, current_size):
188
+ (
189
+ chunks_json,
190
+ chunk_uri,
191
+ start_timestamp,
192
+ tags_json,
193
+ metadata_json,
194
+ extra_json,
195
+ ) = self._serialize_message_to_row(item)
196
+ insertion_data.append(
197
+ (
198
+ msg_id,
199
+ chunks_json,
200
+ chunk_uri,
201
+ start_timestamp,
202
+ tags_json,
203
+ metadata_json,
204
+ extra_json,
205
+ )
206
+ )
207
+
208
+ # Bulk insert all messages
209
+ cursor = self.db.cursor()
210
+ if insertion_data:
211
+ cursor.executemany(
212
+ """
213
+ INSERT INTO Messages (msg_id, chunks, chunk_uri, start_timestamp, tags, metadata, extra)
214
+ VALUES (?, ?, ?, ?, ?, ?, ?)
215
+ """,
216
+ insertion_data,
217
+ )
218
+
219
+ # Also add to message text index if available
220
+ if self.message_text_index is not None:
221
+ await self.message_text_index.add_messages_starting_at(
222
+ current_size, items_list
223
+ )
224
+
225
+
226
+ class SqliteSemanticRefCollection(interfaces.ISemanticRefCollection):
227
+ """SQLite-backed semantic reference collection."""
228
+
229
+ def __init__(self, db: sqlite3.Connection):
230
+ self.db = db
231
+
232
+ def _deserialize_semantic_ref_from_row(
233
+ self, row: ShreddedSemanticRef
234
+ ) -> interfaces.SemanticRef:
235
+ """Deserialize a semantic ref from database row columns."""
236
+ semref_id, range_json, knowledge_type, knowledge_json = row
237
+
238
+ # Build semantic ref data using camelCase (JSON format)
239
+ semantic_ref_data = interfaces.SemanticRefData(
240
+ semanticRefOrdinal=semref_id,
241
+ range=json.loads(range_json),
242
+ knowledgeType=knowledge_type, # type: ignore
243
+ knowledge=json.loads(knowledge_json),
244
+ )
245
+
246
+ return interfaces.SemanticRef.deserialize(semantic_ref_data)
247
+
248
+ def _serialize_semantic_ref_to_row(
249
+ self, semantic_ref: interfaces.SemanticRef
250
+ ) -> ShreddedSemanticRef:
251
+ """Serialize a semantic ref object into database columns."""
252
+ # Serialize the semantic ref to JSON first (this uses camelCase)
253
+ semantic_ref_data = semantic_ref.serialize()
254
+
255
+ # Extract shredded fields (JSON uses camelCase)
256
+ semref_id = semantic_ref_data["semanticRefOrdinal"]
257
+ range_json = json.dumps(semantic_ref_data["range"])
258
+ knowledge_type = semantic_ref_data["knowledgeType"]
259
+ knowledge_json = json.dumps(semantic_ref_data["knowledge"])
260
+
261
+ return (semref_id, range_json, knowledge_type, knowledge_json)
262
+
263
+ @property
264
+ def is_persistent(self) -> bool:
265
+ return True
266
+
267
+ async def size(self) -> int:
268
+ return self._size()
269
+
270
+ def _size(self) -> int:
271
+ cursor = self.db.cursor()
272
+ cursor.execute("SELECT COUNT(*) FROM SemanticRefs")
273
+ return cursor.fetchone()[0]
274
+
275
+ async def __aiter__(self) -> typing.AsyncGenerator[interfaces.SemanticRef, None]:
276
+ cursor = self.db.cursor()
277
+ cursor.execute(
278
+ """
279
+ SELECT semref_id, range_json, knowledge_type, knowledge_json
280
+ FROM SemanticRefs ORDER BY semref_id
281
+ """
282
+ )
283
+ for row in cursor:
284
+ yield self._deserialize_semantic_ref_from_row(row)
285
+
286
+ async def get_item(self, arg: int) -> interfaces.SemanticRef:
287
+ if not isinstance(arg, int):
288
+ raise TypeError(f"Index must be an int, not {type(arg).__name__}")
289
+ cursor = self.db.cursor()
290
+ cursor.execute(
291
+ """
292
+ SELECT semref_id, range_json, knowledge_type, knowledge_json
293
+ FROM SemanticRefs WHERE semref_id = ?
294
+ """,
295
+ (arg,),
296
+ )
297
+ row = cursor.fetchone()
298
+ if row:
299
+ return self._deserialize_semantic_ref_from_row(row)
300
+ raise IndexError("SemanticRef not found")
301
+
302
+ async def get_slice(self, start: int, stop: int) -> list[interfaces.SemanticRef]:
303
+ if stop <= start:
304
+ return []
305
+ cursor = self.db.cursor()
306
+ cursor.execute(
307
+ """
308
+ SELECT semref_id, range_json, knowledge_type, knowledge_json
309
+ FROM SemanticRefs WHERE semref_id >= ? AND semref_id < ?
310
+ ORDER BY semref_id
311
+ """,
312
+ (start, stop),
313
+ )
314
+ rows = cursor.fetchall()
315
+ return [self._deserialize_semantic_ref_from_row(row) for row in rows]
316
+
317
+ async def get_multiple(self, arg: list[int]) -> list[interfaces.SemanticRef]:
318
+ # TODO: Do we really want to support this?
319
+ # If so, we should probably try to optimize it.
320
+ results = []
321
+ for i in arg:
322
+ results.append(await self.get_item(i))
323
+ return results
324
+
325
+ async def append(self, item: interfaces.SemanticRef) -> None:
326
+ cursor = self.db.cursor()
327
+ semref_id, range_json, knowledge_type, knowledge_json = (
328
+ self._serialize_semantic_ref_to_row(item)
329
+ )
330
+ cursor.execute(
331
+ """
332
+ INSERT INTO SemanticRefs (semref_id, range_json, knowledge_type, knowledge_json)
333
+ VALUES (?, ?, ?, ?)
334
+ """,
335
+ (semref_id, range_json, knowledge_type, knowledge_json),
336
+ )
337
+
338
+ async def extend(self, items: typing.Iterable[interfaces.SemanticRef]) -> None:
339
+ items_list = list(items)
340
+ if not items_list:
341
+ return
342
+
343
+ # Prepare all insertion data for bulk operation
344
+ insertion_data = []
345
+ for item in items_list:
346
+ semref_id, range_json, knowledge_type, knowledge_json = (
347
+ self._serialize_semantic_ref_to_row(item)
348
+ )
349
+ insertion_data.append(
350
+ (semref_id, range_json, knowledge_type, knowledge_json)
351
+ )
352
+
353
+ # Bulk insert all semantic refs
354
+ cursor = self.db.cursor()
355
+ if insertion_data:
356
+ cursor.executemany(
357
+ """
358
+ INSERT INTO SemanticRefs (semref_id, range_json, knowledge_type, knowledge_json)
359
+ VALUES (?, ?, ?, ?)
360
+ """,
361
+ insertion_data,
362
+ )