PyPI - unique_toolkit - Versions diffs - 1.45.5__py3-none-any.whl → 1.45.7__py3-none-any.whl - Mend

unique_toolkit 1.45.5py3-none-any.whl → 1.45.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py CHANGED Viewed

@@ -1,103 +1,88 @@
+"""Tests for context relevancy evaluation service."""
 from unittest.mock import MagicMock, patch
 import pytest
 from unique_toolkit.agentic.evaluation.config import EvaluationMetricConfig
-from unique_toolkit.agentic.evaluation.context_relevancy.prompts import (
-    CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
-)
 from unique_toolkit.agentic.evaluation.context_relevancy.schema import (
     EvaluationSchemaStructuredOutput,
 )
-from unique_toolkit.agentic.evaluation.context_relevancy.service import (
-    ContextRelevancyEvaluator,
-)
 from unique_toolkit.agentic.evaluation.exception import EvaluatorException
 from unique_toolkit.agentic.evaluation.schemas import (
     EvaluationMetricInput,
-    EvaluationMetricName,
     EvaluationMetricResult,
 )
-from unique_toolkit.app.schemas import ChatEvent
-from unique_toolkit.chat.service import LanguageModelName
-from unique_toolkit.language_model.infos import (
-    LanguageModelInfo,
-)
 from unique_toolkit.language_model.schemas import (
     LanguageModelAssistantMessage,
     LanguageModelCompletionChoice,
-    LanguageModelMessages,
 )
 from unique_toolkit.language_model.service import LanguageModelResponse
-@pytest.fixture
-def event():
-    event = MagicMock(spec=ChatEvent)
-    event.payload = MagicMock()
-    event.payload.user_message = MagicMock()
-    event.payload.user_message.text = "Test query"
-    event.user_id = "user_0"
-    event.company_id = "company_0"
-    return event
-@pytest.fixture
-def evaluator(event):
-    return ContextRelevancyEvaluator(event)
-@pytest.fixture
-def basic_config():
-    return EvaluationMetricConfig(
-        enabled=True,
-        name=EvaluationMetricName.CONTEXT_RELEVANCY,
-        language_model=LanguageModelInfo.from_name(
-            LanguageModelName.AZURE_GPT_4o_2024_0806
-        ),
-    )
-@pytest.fixture
-def structured_config(basic_config):
-    model_info = LanguageModelInfo.from_name(LanguageModelName.AZURE_GPT_4o_2024_0806)
-    return EvaluationMetricConfig(
-        enabled=True,
-        name=EvaluationMetricName.CONTEXT_RELEVANCY,
-        language_model=model_info,
-    )
-@pytest.fixture
-def sample_input():
-    return EvaluationMetricInput(
-        input_text="test query",
-        context_texts=["test context 1", "test context 2"],
+@pytest.mark.ai
+@pytest.mark.asyncio
+async def test_analyze__returns_none__when_disabled(
+    context_relevancy_evaluator: MagicMock,
+    sample_evaluation_input: EvaluationMetricInput,
+    basic_evaluation_config: EvaluationMetricConfig,
+) -> None:
+    """
+    Purpose: Verify that analyze returns None when evaluation is disabled in config.
+    Why this matters: Ensures evaluation can be toggled off without errors or side effects.
+    Setup summary: Set config.enabled=False, call analyze, assert None returned.
+    """
+    # Arrange
+    basic_evaluation_config.enabled = False
+    # Act
+    result = await context_relevancy_evaluator.analyze(
+        sample_evaluation_input, basic_evaluation_config
     )
-@pytest.mark.asyncio
-async def test_analyze_disabled(evaluator, sample_input, basic_config):
-    basic_config.enabled = False
-    result = await evaluator.analyze(sample_input, basic_config)
+    # Assert
     assert result is None
+@pytest.mark.ai
 @pytest.mark.asyncio
-async def test_analyze_empty_context(evaluator, basic_config):
-    input_with_empty_context = EvaluationMetricInput(
+async def test_analyze__raises_evaluator_exception__with_empty_context(
+    context_relevancy_evaluator: MagicMock,
+    basic_evaluation_config: EvaluationMetricConfig,
+) -> None:
+    """
+    Purpose: Verify that analyze raises exception when context texts are empty.
+    Why this matters: Context relevancy evaluation requires at least one context.
+    Setup summary: Create input with empty context_texts, assert EvaluatorException raised.
+    """
+    # Arrange
+    input_with_empty_context: EvaluationMetricInput = EvaluationMetricInput(
         input_text="test query", context_texts=[]
     )
+    # Act & Assert
     with pytest.raises(EvaluatorException) as exc_info:
-        await evaluator.analyze(input_with_empty_context, basic_config)
+        await context_relevancy_evaluator.analyze(
+            input_with_empty_context, basic_evaluation_config
+        )
     assert "No context texts provided." in str(exc_info.value)
+@pytest.mark.ai
 @pytest.mark.asyncio
-async def test_analyze_regular_output(evaluator, sample_input, basic_config):
-    mock_result = LanguageModelResponse(
+async def test_analyze__returns_valid_result__with_regular_output(
+    context_relevancy_evaluator: MagicMock,
+    sample_evaluation_input: EvaluationMetricInput,
+    basic_evaluation_config: EvaluationMetricConfig,
+) -> None:
+    """
+    Purpose: Verify analyze successfully processes regular (non-structured) output from LLM.
+    Why this matters: Core functionality for evaluation with standard JSON responses.
+    Setup summary: Mock LLM response with JSON, call analyze, assert correct result parsing.
+    """
+    # Arrange
+    mock_result: LanguageModelResponse = LanguageModelResponse(
         choices=[
             LanguageModelCompletionChoice(
                 index=0,
@@ -112,21 +97,36 @@ async def test_analyze_regular_output(evaluator, sample_input, basic_config):
         ]
     )
+    # Act
     with patch.object(
-        evaluator.language_model_service,
+        context_relevancy_evaluator.language_model_service,
         "complete_async",
         return_value=mock_result,
     ) as mock_complete:
-        result = await evaluator.analyze(sample_input, basic_config)
+        result: EvaluationMetricResult = await context_relevancy_evaluator.analyze(
+            sample_evaluation_input, basic_evaluation_config
+        )
+        # Assert
         assert isinstance(result, EvaluationMetricResult)
         assert result.value.lower() == "high"
         mock_complete.assert_called_once()
+@pytest.mark.ai
 @pytest.mark.asyncio
-async def test_analyze_structured_output(evaluator, sample_input, structured_config):
-    mock_result = LanguageModelResponse(
+async def test_analyze__returns_valid_result__with_structured_output(
+    context_relevancy_evaluator: MagicMock,
+    sample_evaluation_input: EvaluationMetricInput,
+    structured_evaluation_config: EvaluationMetricConfig,
+) -> None:
+    """
+    Purpose: Verify analyze successfully processes structured output from LLM.
+    Why this matters: Structured output provides more reliable parsing for evaluation results.
+    Setup summary: Mock LLM response with structured output, call analyze with schema, assert parsing.
+    """
+    # Arrange
+    mock_result: LanguageModelResponse = LanguageModelResponse(
         choices=[
             LanguageModelCompletionChoice(
                 index=0,
@@ -138,27 +138,42 @@ async def test_analyze_structured_output(evaluator, sample_input, structured_con
             )
         ]
     )
+    structured_output_schema: type[EvaluationSchemaStructuredOutput] = (
+        EvaluationSchemaStructuredOutput
+    )
-    structured_output_schema = EvaluationSchemaStructuredOutput
+    # Act
     with patch.object(
-        evaluator.language_model_service,
+        context_relevancy_evaluator.language_model_service,
         "complete_async",
         return_value=mock_result,
     ) as mock_complete:
-        result = await evaluator.analyze(
-            sample_input, structured_config, structured_output_schema
+        result: EvaluationMetricResult = await context_relevancy_evaluator.analyze(
+            sample_evaluation_input,
+            structured_evaluation_config,
+            structured_output_schema,
         )
+        # Assert
         assert isinstance(result, EvaluationMetricResult)
         assert result.value.lower() == "high"
         mock_complete.assert_called_once()
+@pytest.mark.ai
 @pytest.mark.asyncio
-async def test_analyze_structured_output_validation_error(
-    evaluator, sample_input, structured_config
-):
-    mock_result = LanguageModelResponse(
+async def test_analyze__raises_evaluator_exception__with_invalid_structured_output(
+    context_relevancy_evaluator: MagicMock,
+    sample_evaluation_input: EvaluationMetricInput,
+    structured_evaluation_config: EvaluationMetricConfig,
+) -> None:
+    """
+    Purpose: Verify analyze raises exception when structured output fails validation.
+    Why this matters: Invalid structured output should fail fast with clear error message.
+    Setup summary: Mock LLM response with invalid schema data, assert EvaluatorException raised.
+    """
+    # Arrange
+    mock_result: LanguageModelResponse = LanguageModelResponse(
         choices=[
             LanguageModelCompletionChoice(
                 index=0,
@@ -169,28 +184,42 @@ async def test_analyze_structured_output_validation_error(
             )
         ]
     )
+    structured_output_schema: type[EvaluationSchemaStructuredOutput] = (
+        EvaluationSchemaStructuredOutput
+    )
-    structured_output_schema = EvaluationSchemaStructuredOutput
+    # Act & Assert
     with patch.object(
-        evaluator.language_model_service,
+        context_relevancy_evaluator.language_model_service,
         "complete_async",
         return_value=mock_result,
     ):
         with pytest.raises(EvaluatorException) as exc_info:
-            await evaluator.analyze(
-                sample_input, structured_config, structured_output_schema
+            await context_relevancy_evaluator.analyze(
+                sample_evaluation_input,
+                structured_evaluation_config,
+                structured_output_schema,
             )
         assert "Error occurred during structured output validation" in str(
             exc_info.value
         )
+@pytest.mark.ai
 @pytest.mark.asyncio
-async def test_analyze_regular_output_empty_response(
-    evaluator, sample_input, basic_config
-):
-    mock_result = LanguageModelResponse(
+async def test_analyze__raises_evaluator_exception__with_empty_response(
+    context_relevancy_evaluator: MagicMock,
+    sample_evaluation_input: EvaluationMetricInput,
+    basic_evaluation_config: EvaluationMetricConfig,
+) -> None:
+    """
+    Purpose: Verify analyze raises exception when LLM returns empty response content.
+    Why this matters: Empty responses should fail fast with clear error message.
+    Setup summary: Mock LLM response with empty content, assert EvaluatorException raised.
+    """
+    # Arrange
+    mock_result: LanguageModelResponse = LanguageModelResponse(
         choices=[
             LanguageModelCompletionChoice(
                 index=0,
@@ -200,54 +229,45 @@ async def test_analyze_regular_output_empty_response(
         ]
     )
+    # Act & Assert
     with patch.object(
-        evaluator.language_model_service,
+        context_relevancy_evaluator.language_model_service,
         "complete_async",
         return_value=mock_result,
     ):
         with pytest.raises(EvaluatorException) as exc_info:
-            await evaluator.analyze(sample_input, basic_config)
-        assert "did not return a result" in str(exc_info.value)
-def test_compose_msgs_regular(evaluator, sample_input, basic_config):
-    messages = evaluator._compose_msgs(
-        sample_input, basic_config, enable_structured_output=False
-    )
-    assert isinstance(messages, LanguageModelMessages)
-    assert messages.root[0].content == CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG
-    assert isinstance(messages.root[1].content, str)
-    assert "test query" in messages.root[1].content
-    assert "test context 1" in messages.root[1].content
-    assert "test context 2" in messages.root[1].content
-def test_compose_msgs_structured(evaluator, sample_input, structured_config):
-    messages = evaluator._compose_msgs(
-        sample_input, structured_config, enable_structured_output=True
-    )
+            await context_relevancy_evaluator.analyze(
+                sample_evaluation_input, basic_evaluation_config
+            )
-    assert isinstance(messages, LanguageModelMessages)
-    assert len(messages.root) == 2
-    assert (
-        messages.root[0].content != CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG
-    )  # Should use structured output prompt
-    assert isinstance(messages.root[1].content, str)
-    assert "test query" in messages.root[1].content
-    assert "test context 1" in messages.root[1].content
-    assert "test context 2" in messages.root[1].content
+        assert "did not return a result" in str(exc_info.value)
+@pytest.mark.ai
 @pytest.mark.asyncio
-async def test_analyze_unknown_error(evaluator, sample_input, basic_config):
+async def test_analyze__raises_evaluator_exception__with_unknown_error(
+    context_relevancy_evaluator: MagicMock,
+    sample_evaluation_input: EvaluationMetricInput,
+    basic_evaluation_config: EvaluationMetricConfig,
+) -> None:
+    """
+    Purpose: Verify analyze handles unexpected errors gracefully with wrapped exception.
+    Why this matters: Provides consistent error handling for all failure modes.
+    Setup summary: Mock LLM to raise generic exception, assert EvaluatorException wrapper.
+    """
+    # Arrange - No additional setup needed
+    # Act & Assert
     with patch.object(
-        evaluator.language_model_service,
+        context_relevancy_evaluator.language_model_service,
         "complete_async",
         side_effect=Exception("Unknown error"),
     ):
         with pytest.raises(EvaluatorException) as exc_info:
-            await evaluator.analyze(sample_input, basic_config)
+            await context_relevancy_evaluator.analyze(
+                sample_evaluation_input, basic_evaluation_config
+            )
         assert "Unknown error occurred during context relevancy metric analysis" in str(
             exc_info.value
         )

unique_toolkit 1.45.5__py3-none-any.whl → 1.45.7__py3-none-any.whl

unique_toolkit 1.45.5py3-none-any.whl → 1.45.7py3-none-any.whl