PyPI - typeagent-py - Versions diffs - 0.1.0__py3-none-any.whl - Mend

typeagent-py 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

typeagent/aitools/auth.py +61 -0
typeagent/aitools/embeddings.py +232 -0
typeagent/aitools/utils.py +244 -0
typeagent/aitools/vectorbase.py +175 -0
typeagent/knowpro/answer_context_schema.py +49 -0
typeagent/knowpro/answer_response_schema.py +34 -0
typeagent/knowpro/answers.py +577 -0
typeagent/knowpro/collections.py +759 -0
typeagent/knowpro/common.py +9 -0
typeagent/knowpro/convknowledge.py +112 -0
typeagent/knowpro/convsettings.py +94 -0
typeagent/knowpro/convutils.py +49 -0
typeagent/knowpro/date_time_schema.py +32 -0
typeagent/knowpro/field_helpers.py +87 -0
typeagent/knowpro/fuzzyindex.py +144 -0
typeagent/knowpro/interfaces.py +818 -0
typeagent/knowpro/knowledge.py +88 -0
typeagent/knowpro/kplib.py +125 -0
typeagent/knowpro/query.py +1128 -0
typeagent/knowpro/search.py +628 -0
typeagent/knowpro/search_query_schema.py +165 -0
typeagent/knowpro/searchlang.py +729 -0
typeagent/knowpro/searchlib.py +345 -0
typeagent/knowpro/secindex.py +100 -0
typeagent/knowpro/serialization.py +390 -0
typeagent/knowpro/textlocindex.py +179 -0
typeagent/knowpro/utils.py +17 -0
typeagent/mcp/server.py +139 -0
typeagent/podcasts/podcast.py +473 -0
typeagent/podcasts/podcast_import.py +105 -0
typeagent/storage/__init__.py +25 -0
typeagent/storage/memory/__init__.py +13 -0
typeagent/storage/memory/collections.py +68 -0
typeagent/storage/memory/convthreads.py +81 -0
typeagent/storage/memory/messageindex.py +178 -0
typeagent/storage/memory/propindex.py +289 -0
typeagent/storage/memory/provider.py +84 -0
typeagent/storage/memory/reltermsindex.py +318 -0
typeagent/storage/memory/semrefindex.py +660 -0
typeagent/storage/memory/timestampindex.py +176 -0
typeagent/storage/sqlite/__init__.py +31 -0
typeagent/storage/sqlite/collections.py +362 -0
typeagent/storage/sqlite/messageindex.py +382 -0
typeagent/storage/sqlite/propindex.py +119 -0
typeagent/storage/sqlite/provider.py +293 -0
typeagent/storage/sqlite/reltermsindex.py +328 -0
typeagent/storage/sqlite/schema.py +248 -0
typeagent/storage/sqlite/semrefindex.py +156 -0
typeagent/storage/sqlite/timestampindex.py +146 -0
typeagent/storage/utils.py +41 -0
typeagent_py-0.1.0.dist-info/METADATA +28 -0
typeagent_py-0.1.0.dist-info/RECORD +55 -0
typeagent_py-0.1.0.dist-info/WHEEL +5 -0
typeagent_py-0.1.0.dist-info/licenses/LICENSE +21 -0
typeagent_py-0.1.0.dist-info/top_level.txt +1 -0

typeagent/storage/memory/timestampindex.py ADDED Viewed

@@ -0,0 +1,176 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# Timestamp-to-text-range in-memory index (pre-SQLite prep).
+#
+# Contract (stable regardless of backing store):
+# - add_timestamp(s) accepts ISO 8601 timestamps that are lexicographically sortable
+#   (Datetime.isoformat). Missing/None timestamps are ignored.
+# - lookup_range(DateRange) returns items whose ISO timestamp t satisfies
+#   start <= t < end (end is exclusive). If end is None, treat as a point
+#   query with end = start + epsilon.
+# - Results are sorted ascending by timestamp; stability across runs is expected.
+#
+# SQLite plan (no behavior change now):
+# - This in-memory structure will be replaced by direct queries over a Messages table
+#   with a timestamp column (or start/end timestamps if ranges are later needed).
+# - The public methods and semantics here define the contract for the future provider
+#   implementation; callers should not rely on internal list layout or mutability.
+import bisect
+from collections.abc import AsyncIterable, Callable
+from typing import Any
+from ...knowpro.interfaces import (
+    DateRange,
+    Datetime,
+    IConversation,
+    IMessage,
+    ITimestampToTextRangeIndex,
+    MessageOrdinal,
+    TimestampedTextRange,
+)
+from ...knowpro.utils import text_range_from_message_chunk
+class TimestampToTextRangeIndex(ITimestampToTextRangeIndex):
+    # In-memory implementation of ITimestampToTextRangeIndex.
+    #
+    # Notes for SQLite implementation:
+    # - add_timestamp(s): will translate to inserting/updating rows in the Messages
+    #   storage (or a dedicated index table) keyed by message ordinal with an ISO
+    #   timestamp column indexed for range scans.
+    # - lookup_range(): will map to a single indexed range query on the timestamp
+    #   column and project the corresponding text ranges.
+    def __init__(self):
+        self._ranges: list[TimestampedTextRange] = []
+    async def size(self) -> int:
+        return self._size()
+    def _size(self) -> int:
+        return len(self._ranges)
+    async def lookup_range(self, date_range: DateRange) -> list[TimestampedTextRange]:
+        return self._lookup_range(date_range)
+    def _lookup_range(self, date_range: DateRange) -> list[TimestampedTextRange]:
+        start_at = date_range.start.isoformat()
+        stop_at = None if date_range.end is None else date_range.end.isoformat()
+        return get_in_range(
+            self._ranges,
+            start_at,
+            stop_at,
+            key=lambda x: x.timestamp,
+        )
+    async def add_timestamp(
+        self,
+        message_ordinal: MessageOrdinal,
+        timestamp: str,
+    ) -> bool:
+        return self._add_timestamp(message_ordinal, timestamp)
+    def _add_timestamp(
+        self,
+        message_ordinal: MessageOrdinal,
+        timestamp: str,
+    ) -> bool:
+        return self._insert_timestamp(message_ordinal, timestamp, True)
+    async def add_timestamps(
+        self,
+        message_timestamps: list[tuple[MessageOrdinal, str]],
+    ) -> None:
+        self._add_timestamps(message_timestamps)
+    def _add_timestamps(
+        self,
+        message_timestamps: list[tuple[MessageOrdinal, str]],
+    ) -> None:
+        for message_ordinal, timestamp in message_timestamps:
+            self._insert_timestamp(message_ordinal, timestamp, False)
+        self._ranges.sort(key=lambda x: x.timestamp)
+    def _insert_timestamp(
+        self,
+        message_ordinal: MessageOrdinal,
+        timestamp: str | None,
+        in_order: bool,
+    ) -> bool:
+        if not timestamp:
+            return False
+        timestamp_datetime = Datetime.fromisoformat(timestamp)
+        entry: TimestampedTextRange = TimestampedTextRange(
+            range=text_range_from_message_chunk(message_ordinal),
+            # This string is formatted to be lexically sortable.
+            timestamp=timestamp_datetime.isoformat(),
+        )
+        if in_order:
+            where = bisect.bisect_left(
+                self._ranges, entry.timestamp, key=lambda x: x.timestamp
+            )
+            self._ranges.insert(where, entry)
+        else:
+            self._ranges.append(entry)
+        return True
+def get_in_range[T, S: Any](
+    values: list[T],
+    start_at: S,
+    stop_at: S | None,
+    key: Callable[[T], S],
+) -> list[T]:
+    # Return the sublist of values with key in [start_at, stop_at), sorted.
+    # Details:
+    # - End is exclusive: values with key == stop_at are not returned.
+    # - If stop_at is None, treat as a point query with end = start_at + epsilon.
+    # - Requires that values are already sorted by the provided key.
+    istart = bisect.bisect_left(values, start_at, key=key)
+    if istart == len(values):
+        return []
+    if stop_at is None:
+        # Point query: include only items exactly equal to start_at
+        istop = bisect.bisect_right(values, start_at, istart, key=key)
+        return values[istart:istop]
+    # End-exclusive: do not include items with key == stop_at
+    istop = bisect.bisect_left(values, stop_at, istart, key=key)
+    return values[istart:istop]
+async def build_timestamp_index(conversation: IConversation) -> None:
+    if conversation.messages is not None and conversation.secondary_indexes is not None:
+        # There's nothing to do if there are no messages
+        if await conversation.messages.size() == 0:
+            return
+        # There's nothing to do for persistent collections; the timestamp index
+        # is created implicitly (as an index over the message collection)
+        if conversation.messages.is_persistent:
+            return
+        # Caller must have established the timestamp index
+        assert conversation.secondary_indexes.timestamp_index is not None
+        await add_to_timestamp_index(
+            conversation.secondary_indexes.timestamp_index,
+            conversation.messages,
+            0,
+        )
+async def add_to_timestamp_index(
+    timestamp_index: ITimestampToTextRangeIndex,
+    messages: AsyncIterable[IMessage],
+    base_message_ordinal: int,
+) -> None:
+    message_timestamps: list[tuple[int, str]] = []
+    i = 0
+    async for message in messages:
+        timestamp = message.timestamp
+        if timestamp:
+            message_timestamps.append((base_message_ordinal + i, timestamp))
+        i += 1
+    await timestamp_index.add_timestamps(message_timestamps)

typeagent/storage/sqlite/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""SQLite-based storage implementations."""
+from .collections import SqliteMessageCollection, SqliteSemanticRefCollection
+from .messageindex import SqliteMessageTextIndex
+from .propindex import SqlitePropertyIndex
+from .reltermsindex import SqliteRelatedTermsIndex
+from .semrefindex import SqliteTermToSemanticRefIndex
+from .timestampindex import SqliteTimestampToTextRangeIndex
+from .provider import SqliteStorageProvider
+from .schema import (
+    ConversationMetadata,
+    init_db_schema,
+    get_db_schema_version,
+)
+__all__ = [
+    "SqliteMessageCollection",
+    "SqliteSemanticRefCollection",
+    "SqliteMessageTextIndex",
+    "SqlitePropertyIndex",
+    "SqliteRelatedTermsIndex",
+    "SqliteTermToSemanticRefIndex",
+    "SqliteTimestampToTextRangeIndex",
+    "SqliteStorageProvider",
+    "ConversationMetadata",
+    "init_db_schema",
+    "get_db_schema_version",
+]

typeagent/storage/sqlite/collections.py ADDED Viewed

@@ -0,0 +1,362 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""SQLite-based collection implementations."""
+import json
+import sqlite3
+import typing
+from .schema import ShreddedMessage, ShreddedSemanticRef
+from ...knowpro import interfaces
+from ...knowpro import serialization
+class SqliteMessageCollection[TMessage: interfaces.IMessage](
+    interfaces.IMessageCollection[TMessage]
+):
+    """SQLite-backed message collection."""
+    def __init__(
+        self,
+        db: sqlite3.Connection,
+        message_type: type[TMessage] | None = None,
+        message_text_index: "interfaces.IMessageTextIndex[TMessage] | None" = None,
+    ):
+        self.db = db
+        self.message_type = message_type
+        self.message_text_index = message_text_index
+    def set_message_text_index(
+        self, message_text_index: "interfaces.IMessageTextIndex[TMessage]"
+    ) -> None:
+        """Set the message text index for automatic indexing of new messages."""
+        self.message_text_index = message_text_index
+    @property
+    def is_persistent(self) -> bool:
+        return True
+    async def size(self) -> int:
+        cursor = self.db.cursor()
+        cursor.execute("SELECT COUNT(*) FROM Messages")
+        return cursor.fetchone()[0]
+    def __aiter__(self) -> typing.AsyncGenerator[TMessage, None]:
+        return self._async_iterator()
+    async def _async_iterator(self) -> typing.AsyncGenerator[TMessage, None]:
+        cursor = self.db.cursor()
+        cursor.execute(
+            """
+            SELECT chunks, chunk_uri, start_timestamp, tags, metadata, extra
+            FROM Messages ORDER BY msg_id
+            """
+        )
+        for row in cursor:
+            message = self._deserialize_message_from_row(row)
+            yield message
+    def _deserialize_message_from_row(self, row: ShreddedMessage) -> TMessage:
+        """Rehydrate a message from database row columns."""
+        (
+            chunks_json,
+            chunk_uri,
+            start_timestamp,
+            tags_json,
+            metadata_json,
+            extra_json,
+        ) = row
+        # Parse JSON fields and build a JSON object using camelCase.
+        message_data = json.loads(extra_json) if extra_json else {}
+        message_data["textChunks"] = json.loads(chunks_json) if chunks_json else []
+        message_data["timestamp"] = start_timestamp
+        message_data["tags"] = json.loads(tags_json) if tags_json else []
+        message_data["metadata"] = json.loads(metadata_json) if metadata_json else {}
+        # The serialization.deserialize_object will convert to snake_case Python attributes.
+        if self.message_type is None:
+            raise ValueError(
+                "Deserialization requires message_type passed to SqliteMessageCollection"
+            )
+        return serialization.deserialize_object(self.message_type, message_data)
+    def _serialize_message_to_row(self, message: TMessage) -> ShreddedMessage:
+        """Shred a message object into database columns."""
+        # Serialize the message to JSON first (this uses camelCase)
+        message_data = serialization.serialize_object(message)
+        # Extract shredded fields (JSON uses camelCase)
+        chunks_json = json.dumps(message_data.pop("textChunks", []))
+        chunk_uri = None  # For now, we're not using chunk URIs
+        start_timestamp = message_data.pop("timestamp", None)
+        tags_json = json.dumps(message_data.pop("tags", []))
+        metadata_json = json.dumps(message_data.pop("metadata", {}))
+        # What's left in message_data becomes 'extra'.
+        extra_json = json.dumps(message_data) if message_data else None
+        return (
+            chunks_json,
+            chunk_uri,
+            start_timestamp,
+            tags_json,
+            metadata_json,
+            extra_json,
+        )
+    async def get_item(self, arg: int) -> TMessage:
+        if not isinstance(arg, int):
+            raise TypeError(f"Index must be an int, not {type(arg).__name__}")
+        cursor = self.db.cursor()
+        cursor.execute(
+            """
+            SELECT chunks, chunk_uri, start_timestamp, tags, metadata, extra
+            FROM Messages WHERE msg_id = ?
+            """,
+            (arg,),
+        )
+        row = cursor.fetchone()
+        if row:
+            return self._deserialize_message_from_row(row)
+        raise IndexError("Message not found")
+    async def get_slice(self, start: int, stop: int) -> list[TMessage]:
+        if stop <= start:
+            return []
+        cursor = self.db.cursor()
+        cursor.execute(
+            """
+            SELECT chunks, chunk_uri, start_timestamp, tags, metadata, extra
+            FROM Messages WHERE msg_id >= ? AND msg_id < ? ORDER BY msg_id
+            """,
+            (start, stop),
+        )
+        rows = cursor.fetchall()
+        return [self._deserialize_message_from_row(row) for row in rows]
+    async def get_multiple(self, arg: list[int]) -> list[TMessage]:
+        results = []
+        for i in arg:
+            results.append(await self.get_item(i))
+        return results
+    async def append(self, item: TMessage) -> None:
+        cursor = self.db.cursor()
+        (
+            chunks_json,
+            chunk_uri,
+            start_timestamp,
+            tags_json,
+            metadata_json,
+            extra_json,
+        ) = self._serialize_message_to_row(item)
+        # Use the current size as the ID to maintain 0-based indexing like the old implementation
+        msg_id = await self.size()
+        cursor.execute(
+            """
+                INSERT INTO Messages (msg_id, chunks, chunk_uri, start_timestamp, tags, metadata, extra)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+                """,
+            (
+                msg_id,
+                chunks_json,
+                chunk_uri,
+                start_timestamp,
+                tags_json,
+                metadata_json,
+                extra_json,
+            ),
+        )
+        # Also add to message text index if available
+        if self.message_text_index is not None:
+            await self.message_text_index.add_messages_starting_at(msg_id, [item])
+    async def extend(self, items: typing.Iterable[TMessage]) -> None:
+        items_list = list(items)  # Convert to list to iterate twice
+        if not items_list:
+            return
+        # Get the starting ordinal before adding any messages
+        current_size = await self.size()
+        # Prepare all insertion data for bulk operation
+        insertion_data = []
+        for msg_id, item in enumerate(items_list, current_size):
+            (
+                chunks_json,
+                chunk_uri,
+                start_timestamp,
+                tags_json,
+                metadata_json,
+                extra_json,
+            ) = self._serialize_message_to_row(item)
+            insertion_data.append(
+                (
+                    msg_id,
+                    chunks_json,
+                    chunk_uri,
+                    start_timestamp,
+                    tags_json,
+                    metadata_json,
+                    extra_json,
+                )
+            )
+        # Bulk insert all messages
+        cursor = self.db.cursor()
+        if insertion_data:
+            cursor.executemany(
+                """
+                INSERT INTO Messages (msg_id, chunks, chunk_uri, start_timestamp, tags, metadata, extra)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+                """,
+                insertion_data,
+            )
+        # Also add to message text index if available
+        if self.message_text_index is not None:
+            await self.message_text_index.add_messages_starting_at(
+                current_size, items_list
+            )
+class SqliteSemanticRefCollection(interfaces.ISemanticRefCollection):
+    """SQLite-backed semantic reference collection."""
+    def __init__(self, db: sqlite3.Connection):
+        self.db = db
+    def _deserialize_semantic_ref_from_row(
+        self, row: ShreddedSemanticRef
+    ) -> interfaces.SemanticRef:
+        """Deserialize a semantic ref from database row columns."""
+        semref_id, range_json, knowledge_type, knowledge_json = row
+        # Build semantic ref data using camelCase (JSON format)
+        semantic_ref_data = interfaces.SemanticRefData(
+            semanticRefOrdinal=semref_id,
+            range=json.loads(range_json),
+            knowledgeType=knowledge_type,  # type: ignore
+            knowledge=json.loads(knowledge_json),
+        )
+        return interfaces.SemanticRef.deserialize(semantic_ref_data)
+    def _serialize_semantic_ref_to_row(
+        self, semantic_ref: interfaces.SemanticRef
+    ) -> ShreddedSemanticRef:
+        """Serialize a semantic ref object into database columns."""
+        # Serialize the semantic ref to JSON first (this uses camelCase)
+        semantic_ref_data = semantic_ref.serialize()
+        # Extract shredded fields (JSON uses camelCase)
+        semref_id = semantic_ref_data["semanticRefOrdinal"]
+        range_json = json.dumps(semantic_ref_data["range"])
+        knowledge_type = semantic_ref_data["knowledgeType"]
+        knowledge_json = json.dumps(semantic_ref_data["knowledge"])
+        return (semref_id, range_json, knowledge_type, knowledge_json)
+    @property
+    def is_persistent(self) -> bool:
+        return True
+    async def size(self) -> int:
+        return self._size()
+    def _size(self) -> int:
+        cursor = self.db.cursor()
+        cursor.execute("SELECT COUNT(*) FROM SemanticRefs")
+        return cursor.fetchone()[0]
+    async def __aiter__(self) -> typing.AsyncGenerator[interfaces.SemanticRef, None]:
+        cursor = self.db.cursor()
+        cursor.execute(
+            """
+            SELECT semref_id, range_json, knowledge_type, knowledge_json
+            FROM SemanticRefs ORDER BY semref_id
+            """
+        )
+        for row in cursor:
+            yield self._deserialize_semantic_ref_from_row(row)
+    async def get_item(self, arg: int) -> interfaces.SemanticRef:
+        if not isinstance(arg, int):
+            raise TypeError(f"Index must be an int, not {type(arg).__name__}")
+        cursor = self.db.cursor()
+        cursor.execute(
+            """
+            SELECT semref_id, range_json, knowledge_type, knowledge_json
+            FROM SemanticRefs WHERE semref_id = ?
+            """,
+            (arg,),
+        )
+        row = cursor.fetchone()
+        if row:
+            return self._deserialize_semantic_ref_from_row(row)
+        raise IndexError("SemanticRef not found")
+    async def get_slice(self, start: int, stop: int) -> list[interfaces.SemanticRef]:
+        if stop <= start:
+            return []
+        cursor = self.db.cursor()
+        cursor.execute(
+            """
+            SELECT semref_id, range_json, knowledge_type, knowledge_json
+            FROM SemanticRefs WHERE semref_id >= ? AND semref_id < ?
+            ORDER BY semref_id
+            """,
+            (start, stop),
+        )
+        rows = cursor.fetchall()
+        return [self._deserialize_semantic_ref_from_row(row) for row in rows]
+    async def get_multiple(self, arg: list[int]) -> list[interfaces.SemanticRef]:
+        # TODO: Do we really want to support this?
+        # If so, we should probably try to optimize it.
+        results = []
+        for i in arg:
+            results.append(await self.get_item(i))
+        return results
+    async def append(self, item: interfaces.SemanticRef) -> None:
+        cursor = self.db.cursor()
+        semref_id, range_json, knowledge_type, knowledge_json = (
+            self._serialize_semantic_ref_to_row(item)
+        )
+        cursor.execute(
+            """
+                INSERT INTO SemanticRefs (semref_id, range_json, knowledge_type, knowledge_json)
+                VALUES (?, ?, ?, ?)
+                """,
+            (semref_id, range_json, knowledge_type, knowledge_json),
+        )
+    async def extend(self, items: typing.Iterable[interfaces.SemanticRef]) -> None:
+        items_list = list(items)
+        if not items_list:
+            return
+        # Prepare all insertion data for bulk operation
+        insertion_data = []
+        for item in items_list:
+            semref_id, range_json, knowledge_type, knowledge_json = (
+                self._serialize_semantic_ref_to_row(item)
+            )
+            insertion_data.append(
+                (semref_id, range_json, knowledge_type, knowledge_json)
+            )
+        # Bulk insert all semantic refs
+        cursor = self.db.cursor()
+        if insertion_data:
+            cursor.executemany(
+                """
+                INSERT INTO SemanticRefs (semref_id, range_json, knowledge_type, knowledge_json)
+                VALUES (?, ?, ?, ?)
+                """,
+                insertion_data,
+            )