PyPI - unique_toolkit - Versions diffs - 0.8.14__py3-none-any.whl → 0.8.16__py3-none-any.whl - Mend

unique_toolkit 0.8.14py3-none-any.whl → 0.8.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

unique_toolkit/evals/tests/test_context_relevancy_service.py ADDED Viewed

@@ -0,0 +1,252 @@
+from unittest.mock import MagicMock, patch
+import pytest
+from unique_toolkit.app.schemas import ChatEvent
+from unique_toolkit.chat.service import LanguageModelName
+from unique_toolkit.language_model.infos import (
+    LanguageModelInfo,
+)
+from unique_toolkit.language_model.schemas import (
+    LanguageModelAssistantMessage,
+    LanguageModelCompletionChoice,
+    LanguageModelMessages,
+)
+from unique_toolkit.language_model.service import LanguageModelResponse
+from unique_toolkit.evals.config import EvaluationMetricConfig
+from unique_toolkit.evals.context_relevancy.prompts import (
+    CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
+)
+from unique_toolkit.evals.context_relevancy.schema import (
+    EvaluationSchemaStructuredOutput,
+)
+from unique_toolkit.evals.context_relevancy.service import (
+    ContextRelevancyEvaluator,
+)
+from unique_toolkit.evals.exception import EvaluatorException
+from unique_toolkit.evals.schemas import (
+    EvaluationMetricInput,
+    EvaluationMetricName,
+    EvaluationMetricResult,
+)
+@pytest.fixture
+def event():
+    event = MagicMock(spec=ChatEvent)
+    event.payload = MagicMock()
+    event.payload.user_message = MagicMock()
+    event.payload.user_message.text = "Test query"
+    event.user_id = "user_0"
+    event.company_id = "company_0"
+    return event
+@pytest.fixture
+def evaluator(event):
+    return ContextRelevancyEvaluator(event)
+@pytest.fixture
+def basic_config():
+    return EvaluationMetricConfig(
+        enabled=True,
+        name=EvaluationMetricName.CONTEXT_RELEVANCY,
+        language_model=LanguageModelInfo.from_name(
+            LanguageModelName.AZURE_GPT_4o_2024_0806
+        ),
+    )
+@pytest.fixture
+def structured_config(basic_config):
+    model_info = LanguageModelInfo.from_name(LanguageModelName.AZURE_GPT_4o_2024_0806)
+    return EvaluationMetricConfig(
+        enabled=True,
+        name=EvaluationMetricName.CONTEXT_RELEVANCY,
+        language_model=model_info,
+    )
+@pytest.fixture
+def sample_input():
+    return EvaluationMetricInput(
+        input_text="test query",
+        context_texts=["test context 1", "test context 2"],
+    )
+@pytest.mark.asyncio
+async def test_analyze_disabled(evaluator, sample_input, basic_config):
+    basic_config.enabled = False
+    result = await evaluator.analyze(sample_input, basic_config)
+    assert result is None
+@pytest.mark.asyncio
+async def test_analyze_empty_context(evaluator, basic_config):
+    input_with_empty_context = EvaluationMetricInput(
+        input_text="test query", context_texts=[]
+    )
+    with pytest.raises(EvaluatorException) as exc_info:
+        await evaluator.analyze(input_with_empty_context, basic_config)
+    assert "No context texts provided." in str(exc_info.value)
+@pytest.mark.asyncio
+async def test_analyze_regular_output(evaluator, sample_input, basic_config):
+    mock_result = LanguageModelResponse(
+        choices=[
+            LanguageModelCompletionChoice(
+                index=0,
+                message=LanguageModelAssistantMessage(
+                    content="""{
+                        "value": "high",
+                        "reason": "Test reason"
+                    }"""
+                ),
+                finish_reason="stop",
+            )
+        ]
+    )
+    with patch.object(
+        evaluator.language_model_service,
+        "complete_async",
+        return_value=mock_result,
+    ) as mock_complete:
+        result = await evaluator.analyze(sample_input, basic_config)
+        assert isinstance(result, EvaluationMetricResult)
+        assert result.value.lower() == "high"
+        mock_complete.assert_called_once()
+@pytest.mark.asyncio
+async def test_analyze_structured_output(evaluator, sample_input, structured_config):
+    mock_result = LanguageModelResponse(
+        choices=[
+            LanguageModelCompletionChoice(
+                index=0,
+                message=LanguageModelAssistantMessage(
+                    content="HIGH",
+                    parsed={"value": "high", "reason": "Test reason"},
+                ),
+                finish_reason="stop",
+            )
+        ]
+    )
+    structured_output_schema = EvaluationSchemaStructuredOutput
+    with patch.object(
+        evaluator.language_model_service,
+        "complete_async",
+        return_value=mock_result,
+    ) as mock_complete:
+        result = await evaluator.analyze(
+            sample_input, structured_config, structured_output_schema
+        )
+        assert isinstance(result, EvaluationMetricResult)
+        assert result.value.lower() == "high"
+        mock_complete.assert_called_once()
+@pytest.mark.asyncio
+async def test_analyze_structured_output_validation_error(
+    evaluator, sample_input, structured_config
+):
+    mock_result = LanguageModelResponse(
+        choices=[
+            LanguageModelCompletionChoice(
+                index=0,
+                message=LanguageModelAssistantMessage(
+                    content="HIGH", parsed={"invalid": "data"}
+                ),
+                finish_reason="stop",
+            )
+        ]
+    )
+    structured_output_schema = EvaluationSchemaStructuredOutput
+    with patch.object(
+        evaluator.language_model_service,
+        "complete_async",
+        return_value=mock_result,
+    ):
+        with pytest.raises(EvaluatorException) as exc_info:
+            await evaluator.analyze(
+                sample_input, structured_config, structured_output_schema
+            )
+        assert "Error occurred during structured output validation" in str(
+            exc_info.value
+        )
+@pytest.mark.asyncio
+async def test_analyze_regular_output_empty_response(
+    evaluator, sample_input, basic_config
+):
+    mock_result = LanguageModelResponse(
+        choices=[
+            LanguageModelCompletionChoice(
+                index=0,
+                message=LanguageModelAssistantMessage(content=""),
+                finish_reason="stop",
+            )
+        ]
+    )
+    with patch.object(
+        evaluator.language_model_service,
+        "complete_async",
+        return_value=mock_result,
+    ):
+        with pytest.raises(EvaluatorException) as exc_info:
+            await evaluator.analyze(sample_input, basic_config)
+        assert "did not return a result" in str(exc_info.value)
+def test_compose_msgs_regular(evaluator, sample_input, basic_config):
+    messages = evaluator._compose_msgs(
+        sample_input, basic_config, enable_structured_output=False
+    )
+    assert isinstance(messages, LanguageModelMessages)
+    assert messages.root[0].content == CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG
+    assert isinstance(messages.root[1].content, str)
+    assert "test query" in messages.root[1].content
+    assert "test context 1" in messages.root[1].content
+    assert "test context 2" in messages.root[1].content
+def test_compose_msgs_structured(evaluator, sample_input, structured_config):
+    messages = evaluator._compose_msgs(
+        sample_input, structured_config, enable_structured_output=True
+    )
+    assert isinstance(messages, LanguageModelMessages)
+    assert len(messages.root) == 2
+    assert (
+        messages.root[0].content != CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG
+    )  # Should use structured output prompt
+    assert isinstance(messages.root[1].content, str)
+    assert "test query" in messages.root[1].content
+    assert "test context 1" in messages.root[1].content
+    assert "test context 2" in messages.root[1].content
+@pytest.mark.asyncio
+async def test_analyze_unknown_error(evaluator, sample_input, basic_config):
+    with patch.object(
+        evaluator.language_model_service,
+        "complete_async",
+        side_effect=Exception("Unknown error"),
+    ):
+        with pytest.raises(EvaluatorException) as exc_info:
+            await evaluator.analyze(sample_input, basic_config)
+        assert "Unknown error occurred during context relevancy metric analysis" in str(
+            exc_info.value
+        )

unique_toolkit/evals/tests/test_output_parser.py ADDED Viewed

@@ -0,0 +1,80 @@
+import pytest
+from unique_toolkit.evals.context_relevancy.schema import EvaluationSchemaStructuredOutput, Fact
+from unique_toolkit.evals.exception import EvaluatorException
+from unique_toolkit.evals.output_parser import parse_eval_metric_result, parse_eval_metric_result_structured_output
+from unique_toolkit.evals.schemas import EvaluationMetricName, EvaluationMetricResult
+def test_parse_eval_metric_result_success():
+    # Test successful parsing with all fields
+    result = '{"value": "high", "reason": "Test reason"}'
+    parsed = parse_eval_metric_result(result, EvaluationMetricName.CONTEXT_RELEVANCY)
+    assert isinstance(parsed, EvaluationMetricResult)
+    assert parsed.name == EvaluationMetricName.CONTEXT_RELEVANCY
+    assert parsed.value == "high"
+    assert parsed.reason == "Test reason"
+    assert parsed.fact_list == []
+def test_parse_eval_metric_result_missing_fields():
+    # Test parsing with missing fields (should use default "None")
+    result = '{"value": "high"}'
+    parsed = parse_eval_metric_result(result, EvaluationMetricName.CONTEXT_RELEVANCY)
+    assert isinstance(parsed, EvaluationMetricResult)
+    assert parsed.name == EvaluationMetricName.CONTEXT_RELEVANCY
+    assert parsed.value == "high"
+    assert parsed.reason == "None"
+    assert parsed.fact_list == []
+def test_parse_eval_metric_result_invalid_json():
+    # Test parsing with invalid JSON
+    result = "invalid json"
+    with pytest.raises(EvaluatorException) as exc_info:
+        parse_eval_metric_result(result, EvaluationMetricName.CONTEXT_RELEVANCY)
+    assert "Error occurred during parsing the evaluation metric result" in str(
+        exc_info.value
+    )
+def test_parse_eval_metric_result_structured_output_basic():
+    # Test basic structured output without fact list
+    result = EvaluationSchemaStructuredOutput(value="high", reason="Test reason")
+    parsed = parse_eval_metric_result_structured_output(
+        result, EvaluationMetricName.CONTEXT_RELEVANCY
+    )
+    assert isinstance(parsed, EvaluationMetricResult)
+    assert parsed.name == EvaluationMetricName.CONTEXT_RELEVANCY
+    assert parsed.value == "high"
+    assert parsed.reason == "Test reason"
+    assert parsed.fact_list == []
+def test_parse_eval_metric_result_structured_output_with_facts():
+    # Test structured output with fact list
+    result = EvaluationSchemaStructuredOutput(
+        value="high",
+        reason="Test reason",
+        fact_list=[
+            Fact(fact="Fact 1"),
+            Fact(fact="Fact 2"),
+        ],
+    )
+    parsed = parse_eval_metric_result_structured_output(
+        result, EvaluationMetricName.CONTEXT_RELEVANCY
+    )
+    assert isinstance(parsed, EvaluationMetricResult)
+    assert parsed.name == EvaluationMetricName.CONTEXT_RELEVANCY
+    assert parsed.value == "high"
+    assert parsed.reason == "Test reason"
+    assert parsed.fact_list == ["Fact 1", "Fact 2"]
+    assert isinstance(parsed.fact_list, list)
+    assert len(parsed.fact_list) == 2  # None fact should be filtered out

unique_toolkit/history_manager/history_construction_with_contents.py ADDED Viewed

@@ -0,0 +1,307 @@
+import base64
+import mimetypes
+from datetime import datetime
+from enum import StrEnum
+import numpy as np
+import tiktoken
+from pydantic import RootModel
+from _common.token.token_counting import num_tokens_per_language_model_message
+from chat.service import ChatService
+from content.service import ContentService
+from language_model.schemas import LanguageModelMessages
+from unique_toolkit.app import ChatEventUserMessage
+from unique_toolkit.chat.schemas import ChatMessage
+from unique_toolkit.chat.schemas import ChatMessageRole as ChatRole
+from unique_toolkit.content.schemas import Content
+from unique_toolkit.language_model import LanguageModelMessageRole as LLMRole
+from unique_toolkit.language_model.infos import EncoderName
+# TODO: Test this once it moves into the unique toolkit
+map_chat_llm_message_role = {
+    ChatRole.USER: LLMRole.USER,
+    ChatRole.ASSISTANT: LLMRole.ASSISTANT,
+}
+class ImageMimeType(StrEnum):
+    JPEG = "image/jpeg"
+    PNG = "image/png"
+    GIF = "image/gif"
+    BMP = "image/bmp"
+    WEBP = "image/webp"
+    TIFF = "image/tiff"
+    SVG = "image/svg+xml"
+class FileMimeType(StrEnum):
+    PDF = "application/pdf"
+    DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+    DOC = "application/msword"
+    XLSX = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    XLS = "application/vnd.ms-excel"
+    PPTX = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+    CSV = "text/csv"
+    HTML = "text/html"
+    MD = "text/markdown"
+    TXT = "text/plain"
+class ChatMessageWithContents(ChatMessage):
+    contents: list[Content] = []
+class ChatHistoryWithContent(RootModel):
+    root: list[ChatMessageWithContents]
+    @classmethod
+    def from_chat_history_and_contents(
+        cls,
+        chat_history: list[ChatMessage],
+        chat_contents: list[Content],
+    ):
+        combined = chat_contents + chat_history
+        combined.sort(key=lambda x: x.created_at or datetime.min)
+        grouped_elements = []
+        content_container = []
+        # Content is collected and added to the next chat message
+        for c in combined:
+            if isinstance(c, ChatMessage):
+                grouped_elements.append(
+                    ChatMessageWithContents(
+                        contents=content_container.copy(),
+                        **c.model_dump(),
+                    ),
+                )
+                content_container.clear()
+            else:
+                content_container.append(c)
+        return cls(root=grouped_elements)
+    def __iter__(self):
+        return iter(self.root)
+    def __getitem__(self, item):
+        return self.root[item]
+def is_image_content(filename: str) -> bool:
+    mimetype, _ = mimetypes.guess_type(filename)
+    if not mimetype:
+        return False
+    return mimetype in ImageMimeType.__members__.values()
+def is_file_content(filename: str) -> bool:
+    mimetype, _ = mimetypes.guess_type(filename)
+    if not mimetype:
+        return False
+    return mimetype in FileMimeType.__members__.values()
+def get_chat_history_with_contents(
+    user_message: ChatEventUserMessage,
+    chat_id: str,
+    chat_history: list[ChatMessage],
+    content_service: ContentService,
+) -> ChatHistoryWithContent:
+    last_user_message = ChatMessage(
+        id=user_message.id,
+        chat_id=chat_id,
+        text=user_message.text,
+        originalText=user_message.original_text,
+        role=ChatRole.USER,
+        gpt_request=None,
+        created_at=datetime.fromisoformat(user_message.created_at),
+    )
+    if len(chat_history) > 0 and last_user_message.id == chat_history[-1].id:
+        pass
+    else:
+        chat_history.append(last_user_message)
+    chat_contents = content_service.search_contents(
+        where={
+            "ownerId": {
+                "equals": chat_id,
+            },
+        },
+    )
+    return ChatHistoryWithContent.from_chat_history_and_contents(
+        chat_history,
+        chat_contents,
+    )
+def download_encoded_images(
+    contents: list[Content],
+    content_service: ContentService,
+    chat_id: str,
+) -> list[str]:
+    base64_encoded_images = []
+    for im in contents:
+        if is_image_content(im.key):
+            try:
+                file_bytes = content_service.download_content_to_bytes(
+                    content_id=im.id,
+                    chat_id=chat_id,
+                )
+                mime_type, _ = mimetypes.guess_type(im.key)
+                encoded_string = base64.b64encode(file_bytes).decode("utf-8")
+                image_string = f"data:{mime_type};base64," + encoded_string
+                base64_encoded_images.append(image_string)
+            except Exception as e:
+                print(e)
+    return base64_encoded_images
+class FileContentSerialization(StrEnum):
+    NONE = "none"
+    FILE_NAME = "file_name"
+class ImageContentInclusion(StrEnum):
+    NONE = "none"
+    ALL = "all"
+def file_content_serialization(
+    file_contents: list[Content],
+    file_content_serialization: FileContentSerialization,
+) -> str:
+    match file_content_serialization:
+        case FileContentSerialization.NONE:
+            return ""
+        case FileContentSerialization.FILE_NAME:
+            file_names = [
+                f"- Uploaded file: {f.key} at {f.created_at}"
+                for f in file_contents
+            ]
+            return "\n".join(
+                [
+                    "Files Uploaded to Chat can be accessed by internal search tool if available:\n",
+                ]
+                + file_names,
+            )
+def get_full_history_with_contents(
+    user_message: ChatEventUserMessage,
+    chat_id: str,
+    chat_service: ChatService,
+    content_service: ContentService,
+    include_images: ImageContentInclusion = ImageContentInclusion.ALL,
+    file_content_serialization_type: FileContentSerialization = FileContentSerialization.FILE_NAME,
+) -> LanguageModelMessages:
+    grouped_elements = get_chat_history_with_contents(
+        user_message=user_message,
+        chat_id=chat_id,
+        chat_history=chat_service.get_full_history(),
+        content_service=content_service,
+    )
+    builder = LanguageModelMessages([]).builder()
+    for c in grouped_elements:
+        # LanguageModelUserMessage has not field original content
+        text = c.original_content if c.original_content else c.content
+        if text is None:
+            if c.role == ChatRole.USER:
+                raise ValueError(
+                    "Content or original_content of LanguageModelMessages should exist.",
+                )
+            text = ""
+        if len(c.contents) > 0:
+            file_contents = [
+                co for co in c.contents if is_file_content(co.key)
+            ]
+            image_contents = [
+                co for co in c.contents if is_image_content(co.key)
+            ]
+            content = (
+                text
+                + "\n\n"
+                + file_content_serialization(
+                    file_contents,
+                    file_content_serialization_type,
+                )
+            )
+            content = content.strip()
+            if include_images and len(image_contents) > 0:
+                builder.image_message_append(
+                    content=content,
+                    images=download_encoded_images(
+                        contents=image_contents,
+                        content_service=content_service,
+                        chat_id=chat_id,
+                    ),
+                    role=map_chat_llm_message_role[c.role],
+                )
+            else:
+                builder.message_append(
+                    role=map_chat_llm_message_role[c.role],
+                    content=content,
+                )
+        else:
+            builder.message_append(
+                role=map_chat_llm_message_role[c.role],
+                content=text,
+            )
+    return builder.build()
+def get_full_history_as_llm_messages(
+    chat_service: ChatService,
+) -> LanguageModelMessages:
+    chat_history = chat_service.get_full_history()
+    map_chat_llm_message_role = {
+        ChatRole.USER: LLMRole.USER,
+        ChatRole.ASSISTANT: LLMRole.ASSISTANT,
+    }
+    builder = LanguageModelMessages([]).builder()
+    for c in chat_history:
+        builder.message_append(
+            role=map_chat_llm_message_role[c.role],
+            content=c.content or "",
+        )
+    return builder.build()
+def limit_to_token_window(
+    messages: LanguageModelMessages,
+    token_limit: int,
+    encoding_name: EncoderName = EncoderName.O200K_BASE,
+) -> LanguageModelMessages:
+    encoder = tiktoken.get_encoding(encoding_name)
+    token_per_message_reversed = num_tokens_per_language_model_message(
+        messages,
+        encode=encoder.encode,
+    )
+    to_take: list[bool] = (
+        np.cumsum(token_per_message_reversed) < token_limit
+    ).tolist()
+    to_take.reverse()
+    return LanguageModelMessages(
+        root=[m for m, tt in zip(messages, to_take, strict=False) if tt],
+    )

unique_toolkit 0.8.14__py3-none-any.whl → 0.8.16__py3-none-any.whl

unique_toolkit 0.8.14py3-none-any.whl → 0.8.16py3-none-any.whl