unique_toolkit 1.45.5__tar.gz → 1.45.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/CHANGELOG.md +3 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/PKG-INFO +4 -1
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/pyproject.toml +1 -1
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/config.py +47 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/context_relevancy/prompts/__init__.py +13 -0
- unique_toolkit-1.45.5/unique_toolkit/agentic/evaluation/context_relevancy/prompts.py → unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/context_relevancy/prompts/system_prompt.j2 +11 -43
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/context_relevancy/prompts/user_prompt.j2 +15 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/context_relevancy/service.py +24 -56
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/hallucination/constants.py +26 -15
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/hallucination/prompts/__init__.py +13 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/hallucination/prompts/system_prompt.j2 +35 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/hallucination/prompts/user_prompt.j2 +27 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/hallucination/utils.py +153 -102
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/tests/fixtures.py +102 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/tests/test_config.py +247 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +273 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/tests/test_hallucination_constants.py +600 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/tests/test_hallucination_utils.py +1009 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/tests/test_output_parser.py +146 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/tests/test_prompt_loaders.py +348 -0
- unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/utils.py +8 -0
- unique_toolkit-1.45.5/unique_toolkit/agentic/evaluation/config.py +0 -28
- unique_toolkit-1.45.5/unique_toolkit/agentic/evaluation/hallucination/prompts.py +0 -79
- unique_toolkit-1.45.5/unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +0 -253
- unique_toolkit-1.45.5/unique_toolkit/agentic/evaluation/tests/test_output_parser.py +0 -87
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/LICENSE +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/README.md +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/_base_service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/_time_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/api_calling/human_verification_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/base_model_type_attribute.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/chunk_relevancy_sorter/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/chunk_relevancy_sorter/exception.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/chunk_relevancy_sorter/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/default_language_model.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/docx_generator/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/docx_generator/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/docx_generator/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/docx_generator/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/endpoint_builder.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/endpoint_requestor.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/exception.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/execution.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/endpoint_builder.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/endpoint_requestor.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/README.md +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/agent.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/examples/data.csv +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/examples/example_usage.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/exceptions.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/exceptions.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/system_prompt.j2 +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/user_prompt.j2 +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/template_handler/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/template_handler/default_template.j2 +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/template_handler/exceptions.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/template_handler/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/services/template_handler/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/experimental/write_up_agent/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/feature_flags/schema.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/pydantic/rjsf_tags.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/pydantic_helpers.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/referencing.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/string_utilities.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/tests/test_referencing.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/tests/test_string_utilities.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/token/image_token_counting.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/token/token_counting.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/files.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/image/encode.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/jinja/helpers.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/jinja/render.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/jinja/schema.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/jinja/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/structured_output/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/structured_output/schema.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/utils/write_configuration.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/validate_required_values.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/_common/validators.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/context_relevancy/schema.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/evaluation_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/exception.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/hallucination/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/output_parser.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/evaluation/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/feature_flags/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/feature_flags/feature_flags.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/history_manager/history_construction_with_contents.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/history_manager/history_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/history_manager/loop_token_reducer.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/history_manager/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/_iteration_handler_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/_responses_iteration_handler_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/_responses_stream_handler_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/_stream_handler_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/base.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/middleware/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/middleware/planning/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/middleware/planning/planning.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/middleware/planning/schema.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/runners/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/runners/basic.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/runners/qwen/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/runners/qwen/helpers.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/loop_runner/runners/qwen/qwen_runner.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/message_log_manager/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/message_log_manager/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/postprocessor/postprocessor_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/reference_manager/reference_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/responses_api/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/responses_api/postprocessors/code_display.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/responses_api/stream_handler.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/thinking_manager/thinking_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/evaluation/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/postprocessing/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/postprocessing/display.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/postprocessing/references.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/prompts.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/response_watcher/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/tool/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/tool/_memory.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/tool/_schema.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/tool/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/tool/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/a2a/tool/test/test_service_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/agent_chunks_hanlder.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/factory.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/mcp/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/mcp/manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/mcp/models.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/mcp/tool_wrapper.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/openai_builtin/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/openai_builtin/base.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/openai_builtin/manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/test/test_mcp_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/tool.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/tool_manager.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/tool_progress_reporter.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/utils/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/utils/source_handling/schema.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic_table/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic_table/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/agentic_table/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/dev_util.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/fast_api_factory.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/init_logging.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/init_sdk.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/performance/async_tasks.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/performance/async_wrapper.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/unique_settings.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/verification.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/app/webhook.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/constants.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/deprecated/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/functions.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/rendering.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/responses_api.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/state.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/chat/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/content/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/content/constants.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/content/functions.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/content/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/content/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/content/smart_rules.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/content/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/data_extraction/README.md +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/data_extraction/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/data_extraction/augmented/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/data_extraction/augmented/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/data_extraction/base.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/data_extraction/basic/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/data_extraction/basic/config.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/data_extraction/basic/prompt.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/data_extraction/basic/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/embedding/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/embedding/constants.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/embedding/functions.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/embedding/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/embedding/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/embedding/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/framework_utilities/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/framework_utilities/langchain/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/framework_utilities/langchain/client.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/framework_utilities/langchain/history.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/framework_utilities/openai/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/framework_utilities/openai/client.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/framework_utilities/openai/message_builder.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/framework_utilities/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/_responses_api_utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/builder.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/constants.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/default_language_model.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/functions.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/infos.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/prompt.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/reference.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/language_model/utils.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/protocols/support.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/services/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/services/chat_service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/services/knowledge_base.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/short_term_memory/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/short_term_memory/constants.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/short_term_memory/functions.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/short_term_memory/schemas.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/short_term_memory/service.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/smart_rules/__init__.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/smart_rules/compile.py +0 -0
- {unique_toolkit-1.45.5 → unique_toolkit-1.45.6}/unique_toolkit/test_utilities/events.py +0 -0
|
@@ -5,6 +5,9 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.45.6] - 2026-01-30
|
|
9
|
+
- hallucination evaluator: Use original response to retrieve referenced chunk
|
|
10
|
+
|
|
8
11
|
## [1.45.5] - 2026-01-29
|
|
9
12
|
- Add HTML rendering support for code interpreter generated files
|
|
10
13
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unique_toolkit
|
|
3
|
-
Version: 1.45.
|
|
3
|
+
Version: 1.45.6
|
|
4
4
|
Summary:
|
|
5
5
|
License: Proprietary
|
|
6
6
|
Author: Cedric Klinkert
|
|
@@ -125,6 +125,9 @@ All notable changes to this project will be documented in this file.
|
|
|
125
125
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
126
126
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
127
127
|
|
|
128
|
+
## [1.45.6] - 2026-01-30
|
|
129
|
+
- hallucination evaluator: Use original response to retrieve referenced chunk
|
|
130
|
+
|
|
128
131
|
## [1.45.5] - 2026-01-29
|
|
129
132
|
- Add HTML rendering support for code interpreter generated files
|
|
130
133
|
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from typing import Annotated, Any
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
from pydantic.json_schema import SkipJsonSchema
|
|
5
|
+
|
|
6
|
+
from unique_toolkit._common.pydantic.rjsf_tags import RJSFMetaTag
|
|
7
|
+
from unique_toolkit._common.pydantic_helpers import get_configuration_dict
|
|
8
|
+
from unique_toolkit._common.validators import LMI
|
|
9
|
+
from unique_toolkit.agentic.evaluation.schemas import (
|
|
10
|
+
EvaluationMetricName,
|
|
11
|
+
)
|
|
12
|
+
from unique_toolkit.agentic.tools.schemas import BaseToolConfig
|
|
13
|
+
from unique_toolkit.language_model.default_language_model import DEFAULT_GPT_4o
|
|
14
|
+
from unique_toolkit.language_model.infos import LanguageModelInfo
|
|
15
|
+
|
|
16
|
+
PromptType = Annotated[str, RJSFMetaTag.StringWidget.textarea(rows=5)]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EvaluationMetricPromptsConfig(BaseModel):
|
|
20
|
+
model_config = get_configuration_dict()
|
|
21
|
+
|
|
22
|
+
system_prompt_template: PromptType = Field(
|
|
23
|
+
default="",
|
|
24
|
+
description="The system prompt for the evaluation metric.",
|
|
25
|
+
)
|
|
26
|
+
user_prompt_template: PromptType = Field(
|
|
27
|
+
default="",
|
|
28
|
+
description="The user prompt for the evaluation metric.",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class EvaluationMetricConfig(BaseToolConfig):
|
|
33
|
+
enabled: SkipJsonSchema[bool] = False
|
|
34
|
+
name: SkipJsonSchema[EvaluationMetricName]
|
|
35
|
+
language_model: LMI = LanguageModelInfo.from_name(
|
|
36
|
+
DEFAULT_GPT_4o,
|
|
37
|
+
)
|
|
38
|
+
additional_llm_options: dict[str, Any] = Field(
|
|
39
|
+
default={},
|
|
40
|
+
description="Additional options to pass to the language model.",
|
|
41
|
+
)
|
|
42
|
+
prompts_config: EvaluationMetricPromptsConfig = Field(
|
|
43
|
+
default_factory=EvaluationMetricPromptsConfig,
|
|
44
|
+
description="The prompts config for the evaluation metric.",
|
|
45
|
+
)
|
|
46
|
+
score_to_label: dict[str, str] = {}
|
|
47
|
+
score_to_title: dict[str, str] = {}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from unique_toolkit.agentic.evaluation.utils import load_template
|
|
4
|
+
|
|
5
|
+
CONTEXT_RELEVANCY_PROMPTS_DIR = Path(__file__).parent
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def system_prompt_loader():
|
|
9
|
+
return load_template(CONTEXT_RELEVANCY_PROMPTS_DIR, "system_prompt.j2")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def user_prompt_loader():
|
|
13
|
+
return load_template(CONTEXT_RELEVANCY_PROMPTS_DIR, "user_prompt.j2")
|
|
@@ -1,4 +1,13 @@
|
|
|
1
|
-
|
|
1
|
+
{% if structured_output %}
|
|
2
|
+
You will receive an input and a set of contexts.
|
|
3
|
+
Your task is to evaluate how relevant the contexts are to the input text.
|
|
4
|
+
Further you should extract relevant facts from the contexts.
|
|
5
|
+
|
|
6
|
+
# Output Format
|
|
7
|
+
- Generate data according to the provided data schema.
|
|
8
|
+
- Ensure the output adheres to the format required by the pydantic object.
|
|
9
|
+
- All necessary fields should be populated as per the data schema guidelines.
|
|
10
|
+
{% else %}
|
|
2
11
|
You will receive an input and a set of contexts.
|
|
3
12
|
Your task is to evaluate how relevant the contexts are to the input text.
|
|
4
13
|
|
|
@@ -12,45 +21,4 @@ Your answer must be in JSON format:
|
|
|
12
21
|
"reason": Your explanation of your judgement of the evaluation,
|
|
13
22
|
"value": decision, must be one of the following ["low", "medium", "high"]
|
|
14
23
|
}
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG_STRUCTURED_OUTPUT = """
|
|
18
|
-
You will receive an input and a set of contexts.
|
|
19
|
-
Your task is to evaluate how relevant the contexts are to the input text.
|
|
20
|
-
Further you should extract relevant facts from the contexts.
|
|
21
|
-
|
|
22
|
-
# Output Format
|
|
23
|
-
- Generate data according to the provided data schema.
|
|
24
|
-
- Ensure the output adheres to the format required by the pydantic object.
|
|
25
|
-
- All necessary fields should be populated as per the data schema guidelines.
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
CONTEXT_RELEVANCY_METRIC_USER_MSG = """
|
|
29
|
-
Here is the data:
|
|
30
|
-
|
|
31
|
-
Input:
|
|
32
|
-
'''
|
|
33
|
-
$input_text
|
|
34
|
-
'''
|
|
35
|
-
|
|
36
|
-
Contexts:
|
|
37
|
-
'''
|
|
38
|
-
$context_texts
|
|
39
|
-
'''
|
|
40
|
-
|
|
41
|
-
Answer as JSON:
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
|
-
CONTEXT_RELEVANCY_METRIC_USER_MSG_STRUCTURED_OUTPUT = """
|
|
45
|
-
Here is the data:
|
|
46
|
-
|
|
47
|
-
Input:
|
|
48
|
-
'''
|
|
49
|
-
$input_text
|
|
50
|
-
'''
|
|
51
|
-
|
|
52
|
-
Contexts:
|
|
53
|
-
'''
|
|
54
|
-
$context_texts
|
|
55
|
-
'''
|
|
56
|
-
"""
|
|
24
|
+
{% endif %}
|
|
@@ -4,10 +4,14 @@ from typing import overload
|
|
|
4
4
|
from pydantic import BaseModel, ValidationError
|
|
5
5
|
from typing_extensions import deprecated
|
|
6
6
|
|
|
7
|
+
from unique_toolkit._common.utils.jinja.render import render_template
|
|
7
8
|
from unique_toolkit._common.validate_required_values import (
|
|
8
9
|
validate_required_values,
|
|
9
10
|
)
|
|
10
|
-
from unique_toolkit.agentic.evaluation.config import
|
|
11
|
+
from unique_toolkit.agentic.evaluation.config import (
|
|
12
|
+
EvaluationMetricConfig,
|
|
13
|
+
EvaluationMetricPromptsConfig,
|
|
14
|
+
)
|
|
11
15
|
from unique_toolkit.agentic.evaluation.context_relevancy.schema import (
|
|
12
16
|
EvaluationSchemaStructuredOutput,
|
|
13
17
|
)
|
|
@@ -28,32 +32,25 @@ from unique_toolkit.language_model.infos import (
|
|
|
28
32
|
LanguageModelInfo,
|
|
29
33
|
ModelCapabilities,
|
|
30
34
|
)
|
|
31
|
-
from unique_toolkit.language_model.prompt import Prompt
|
|
32
35
|
from unique_toolkit.language_model.schemas import (
|
|
33
36
|
LanguageModelMessages,
|
|
37
|
+
LanguageModelSystemMessage,
|
|
38
|
+
LanguageModelUserMessage,
|
|
34
39
|
)
|
|
35
40
|
from unique_toolkit.language_model.service import (
|
|
36
41
|
LanguageModelService,
|
|
37
42
|
)
|
|
38
43
|
|
|
39
|
-
from .prompts import
|
|
40
|
-
CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
|
|
41
|
-
CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG_STRUCTURED_OUTPUT,
|
|
42
|
-
CONTEXT_RELEVANCY_METRIC_USER_MSG,
|
|
43
|
-
CONTEXT_RELEVANCY_METRIC_USER_MSG_STRUCTURED_OUTPUT,
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
SYSTEM_MSG_KEY = "systemPrompt"
|
|
47
|
-
USER_MSG_KEY = "userPrompt"
|
|
44
|
+
from .prompts import system_prompt_loader, user_prompt_loader
|
|
48
45
|
|
|
49
46
|
default_config = EvaluationMetricConfig(
|
|
50
47
|
enabled=False,
|
|
51
48
|
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
52
49
|
language_model=LanguageModelInfo.from_name(DEFAULT_GPT_4o),
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
50
|
+
prompts_config=EvaluationMetricPromptsConfig(
|
|
51
|
+
system_prompt_template=system_prompt_loader(),
|
|
52
|
+
user_prompt_template=user_prompt_loader(),
|
|
53
|
+
),
|
|
57
54
|
)
|
|
58
55
|
|
|
59
56
|
relevancy_required_input_fields = [
|
|
@@ -225,49 +222,20 @@ class ContextRelevancyEvaluator:
|
|
|
225
222
|
"""
|
|
226
223
|
Composes the messages for the relevancy metric.
|
|
227
224
|
"""
|
|
228
|
-
|
|
229
|
-
|
|
225
|
+
# Render system message
|
|
226
|
+
system_msg_content = render_template(
|
|
227
|
+
config.prompts_config.system_prompt_template,
|
|
228
|
+
structured_output=enable_structured_output,
|
|
229
|
+
)
|
|
230
|
+
system_msg = LanguageModelSystemMessage(content=system_msg_content)
|
|
230
231
|
|
|
231
|
-
|
|
232
|
-
|
|
232
|
+
# Render user message
|
|
233
|
+
user_msg_content = render_template(
|
|
234
|
+
config.prompts_config.user_prompt_template,
|
|
233
235
|
input_text=input.input_text,
|
|
234
236
|
context_texts=input.get_joined_context_texts(),
|
|
235
|
-
|
|
237
|
+
structured_output=enable_structured_output,
|
|
238
|
+
)
|
|
239
|
+
user_msg = LanguageModelUserMessage(content=user_msg_content)
|
|
236
240
|
|
|
237
241
|
return LanguageModelMessages([system_msg, user_msg])
|
|
238
|
-
|
|
239
|
-
def _get_system_prompt(
|
|
240
|
-
self,
|
|
241
|
-
config: EvaluationMetricConfig,
|
|
242
|
-
enable_structured_output: bool,
|
|
243
|
-
):
|
|
244
|
-
if (
|
|
245
|
-
enable_structured_output
|
|
246
|
-
and ModelCapabilities.STRUCTURED_OUTPUT
|
|
247
|
-
in config.language_model.capabilities
|
|
248
|
-
):
|
|
249
|
-
return config.custom_prompts.setdefault(
|
|
250
|
-
SYSTEM_MSG_KEY,
|
|
251
|
-
CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG_STRUCTURED_OUTPUT,
|
|
252
|
-
)
|
|
253
|
-
else:
|
|
254
|
-
return config.custom_prompts.setdefault(
|
|
255
|
-
SYSTEM_MSG_KEY,
|
|
256
|
-
CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
|
|
257
|
-
)
|
|
258
|
-
|
|
259
|
-
def _get_user_prompt(
|
|
260
|
-
self,
|
|
261
|
-
config: EvaluationMetricConfig,
|
|
262
|
-
enable_structured_output: bool,
|
|
263
|
-
):
|
|
264
|
-
if enable_structured_output:
|
|
265
|
-
return config.custom_prompts.setdefault(
|
|
266
|
-
USER_MSG_KEY,
|
|
267
|
-
CONTEXT_RELEVANCY_METRIC_USER_MSG_STRUCTURED_OUTPUT,
|
|
268
|
-
)
|
|
269
|
-
else:
|
|
270
|
-
return config.custom_prompts.setdefault(
|
|
271
|
-
USER_MSG_KEY,
|
|
272
|
-
CONTEXT_RELEVANCY_METRIC_USER_MSG,
|
|
273
|
-
)
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
1
2
|
from typing import Any
|
|
2
3
|
|
|
3
4
|
from pydantic import Field
|
|
4
5
|
from pydantic.json_schema import SkipJsonSchema
|
|
5
6
|
|
|
6
7
|
from unique_toolkit._common.validators import LMI
|
|
7
|
-
from unique_toolkit.agentic.evaluation.config import
|
|
8
|
+
from unique_toolkit.agentic.evaluation.config import (
|
|
9
|
+
EvaluationMetricConfig,
|
|
10
|
+
EvaluationMetricPromptsConfig,
|
|
11
|
+
PromptType,
|
|
12
|
+
)
|
|
8
13
|
from unique_toolkit.agentic.evaluation.hallucination.prompts import (
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
HALLUCINATION_METRIC_USER_MSG,
|
|
12
|
-
HALLUCINATION_METRIC_USER_MSG_DEFAULT,
|
|
14
|
+
system_prompt_loader,
|
|
15
|
+
user_prompt_loader,
|
|
13
16
|
)
|
|
14
17
|
from unique_toolkit.agentic.evaluation.schemas import (
|
|
15
18
|
EvaluationMetricInputFieldName,
|
|
@@ -18,28 +21,36 @@ from unique_toolkit.agentic.evaluation.schemas import (
|
|
|
18
21
|
from unique_toolkit.language_model.default_language_model import DEFAULT_GPT_4o
|
|
19
22
|
from unique_toolkit.language_model.infos import LanguageModelInfo
|
|
20
23
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
|
|
25
|
+
class SourceSelectionMode(StrEnum):
|
|
26
|
+
FROM_IDS = "FROM_IDS"
|
|
27
|
+
FROM_ORDER = "FROM_ORDER"
|
|
28
|
+
FROM_ORIGINAL_RESPONSE = "FROM_ORIGINAL_RESPONSE"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class HallucinationPromptsConfig(EvaluationMetricPromptsConfig):
|
|
32
|
+
system_prompt_template: PromptType = Field(default_factory=system_prompt_loader)
|
|
33
|
+
user_prompt_template: PromptType = Field(default_factory=user_prompt_loader)
|
|
25
34
|
|
|
26
35
|
|
|
27
36
|
class HallucinationConfig(EvaluationMetricConfig):
|
|
37
|
+
source_selection_mode: SourceSelectionMode = Field(
|
|
38
|
+
default=SourceSelectionMode.FROM_ORIGINAL_RESPONSE
|
|
39
|
+
)
|
|
40
|
+
reference_pattern: str = Field(default=r"[\[<]?source(\d+)[>\]]?")
|
|
28
41
|
enabled: SkipJsonSchema[bool] = False
|
|
29
42
|
name: SkipJsonSchema[EvaluationMetricName] = EvaluationMetricName.HALLUCINATION
|
|
30
43
|
language_model: LMI = LanguageModelInfo.from_name(
|
|
31
44
|
DEFAULT_GPT_4o,
|
|
32
45
|
)
|
|
46
|
+
prompts_config: HallucinationPromptsConfig = Field( # type: ignore[assignment]
|
|
47
|
+
default_factory=HallucinationPromptsConfig,
|
|
48
|
+
description="The prompts config for the hallucination metric",
|
|
49
|
+
)
|
|
33
50
|
additional_llm_options: dict[str, Any] = Field(
|
|
34
51
|
default={},
|
|
35
52
|
description="Additional options to pass to the language model.",
|
|
36
53
|
)
|
|
37
|
-
custom_prompts: dict = {
|
|
38
|
-
SYSTEM_MSG_KEY: HALLUCINATION_METRIC_SYSTEM_MSG,
|
|
39
|
-
USER_MSG_KEY: HALLUCINATION_METRIC_USER_MSG,
|
|
40
|
-
SYSTEM_MSG_DEFAULT_KEY: HALLUCINATION_METRIC_SYSTEM_MSG_DEFAULT,
|
|
41
|
-
USER_MSG_DEFAULT_KEY: HALLUCINATION_METRIC_USER_MSG_DEFAULT,
|
|
42
|
-
}
|
|
43
54
|
score_to_label: dict = {
|
|
44
55
|
"LOW": "GREEN",
|
|
45
56
|
"MEDIUM": "YELLOW",
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from unique_toolkit.agentic.evaluation.utils import load_template
|
|
4
|
+
|
|
5
|
+
HALLUCINATION_PROMPTS_DIR = Path(__file__).parent
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def system_prompt_loader():
|
|
9
|
+
return load_template(HALLUCINATION_PROMPTS_DIR, "system_prompt.j2")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def user_prompt_loader():
|
|
13
|
+
return load_template(HALLUCINATION_PROMPTS_DIR, "user_prompt.j2")
|
unique_toolkit-1.45.6/unique_toolkit/agentic/evaluation/hallucination/prompts/system_prompt.j2
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{% if has_context %}
|
|
2
|
+
You will receive a question, references, a conversation between a user and an agent, and an output.
|
|
3
|
+
The output is the answer to the question.
|
|
4
|
+
Your task is to evaluate whether the FACTS stated in the output are grounded in and supported by the provided references and conversation. Provide your reasoning in 2 sentences.
|
|
5
|
+
|
|
6
|
+
IMPORTANT: You are NOT evaluating completeness. An incomplete answer that is grounded in the sources is acceptable. You are ONLY checking if the facts that ARE present are supported by the sources.
|
|
7
|
+
|
|
8
|
+
Use the following hallucination scale:
|
|
9
|
+
[low] - All facts and claims in the output are directly supported by or grounded in the references/conversation. The answer may be incomplete, but what IS stated is accurate to the sources.
|
|
10
|
+
[medium] - Most facts are supported, but there are some claims or details that go beyond what the references/conversation state or make reasonable inferences not directly supported.
|
|
11
|
+
[high] - The output contains facts that contradict the references/conversation, or makes claims that have no basis in the provided sources.
|
|
12
|
+
|
|
13
|
+
Rules:
|
|
14
|
+
- Do NOT penalize incomplete answers - if the sources have more info but the output doesn't mention it, that's fine
|
|
15
|
+
- Do NOT use external knowledge - only evaluate based on the provided sources
|
|
16
|
+
- DO penalize when the output states facts that are not found in the sources (unsupported claims)
|
|
17
|
+
- DO penalize when the output contradicts what the sources say
|
|
18
|
+
- Ignore references to plots, charts, or visual elements in your evaluation
|
|
19
|
+
{% else %}
|
|
20
|
+
You will receive a question and an output.
|
|
21
|
+
The situation is that NO references were found to answer the question. Your task is to evaluate if the output inappropriately provides factual information despite having no sources to ground it in. Provide your reasoning in 2 sentences, mentioning that no references were provided.
|
|
22
|
+
|
|
23
|
+
Use the following hallucination scale:
|
|
24
|
+
[low] - The output appropriately indicates it cannot answer due to lack of information, or provides no substantive factual claims.
|
|
25
|
+
[medium] - The output provides some limited factual information or makes cautious claims without strong source support.
|
|
26
|
+
[high] - The output confidently provides detailed factual information despite having no sources to support it.
|
|
27
|
+
|
|
28
|
+
Rule: Without sources, providing factual answers constitutes hallucination regardless of external correctness.
|
|
29
|
+
{% endif %}
|
|
30
|
+
|
|
31
|
+
Your answer must be in JSON format:
|
|
32
|
+
{
|
|
33
|
+
"reason": Your explanation of your {{ 'judgement' if has_context else 'reasoning' }} of the evaluation,
|
|
34
|
+
"value": decision, must be one of the following: ["high", "medium", "low"]
|
|
35
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Here is the data:
|
|
2
|
+
|
|
3
|
+
Input:
|
|
4
|
+
'''
|
|
5
|
+
{{ input_text }}
|
|
6
|
+
'''
|
|
7
|
+
{% if contexts_text %}
|
|
8
|
+
|
|
9
|
+
References:
|
|
10
|
+
'''
|
|
11
|
+
{{ contexts_text }}
|
|
12
|
+
'''
|
|
13
|
+
{% endif %}
|
|
14
|
+
{% if history_messages_text %}
|
|
15
|
+
|
|
16
|
+
Conversation:
|
|
17
|
+
'''
|
|
18
|
+
{{ history_messages_text }}
|
|
19
|
+
'''
|
|
20
|
+
{% endif %}
|
|
21
|
+
|
|
22
|
+
Output:
|
|
23
|
+
'''
|
|
24
|
+
{{ output_text }}
|
|
25
|
+
'''
|
|
26
|
+
|
|
27
|
+
Answer as JSON:
|