unique_toolkit 0.8.30__tar.gz → 0.8.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/CHANGELOG.md +16 -0
  2. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/PKG-INFO +17 -1
  3. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/pyproject.toml +1 -1
  4. unique_toolkit-0.8.32/unique_toolkit/_common/chunk_relevancy_sorter/config.py +45 -0
  5. unique_toolkit-0.8.32/unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
  6. unique_toolkit-0.8.32/unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
  7. unique_toolkit-0.8.32/unique_toolkit/_common/chunk_relevancy_sorter/service.py +372 -0
  8. unique_toolkit-0.8.32/unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +273 -0
  9. unique_toolkit-0.8.32/unique_toolkit/_common/feature_flags/schema.py +13 -0
  10. unique_toolkit-0.8.32/unique_toolkit/_common/utils/structured_output/schema.py +5 -0
  11. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/context_relevancy/schema.py +2 -5
  12. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/context_relevancy/service.py +42 -10
  13. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/infos.py +156 -0
  14. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/a2a/config.py +1 -3
  15. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/a2a/manager.py +1 -2
  16. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/a2a/memory.py +0 -1
  17. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/a2a/service.py +6 -8
  18. unique_toolkit-0.8.32/unique_toolkit/tools/agent_chunks_hanlder.py +65 -0
  19. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/mcp/models.py +1 -0
  20. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/test/test_mcp_manager.py +10 -19
  21. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/tool.py +2 -0
  22. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/tool_manager.py +1 -3
  23. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/LICENSE +0 -0
  24. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/README.md +0 -0
  25. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/__init__.py +0 -0
  26. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/_common/_base_service.py +0 -0
  27. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/_common/_time_utils.py +0 -0
  28. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/_common/default_language_model.py +0 -0
  29. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/_common/endpoint_builder.py +0 -0
  30. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/_common/exception.py +0 -0
  31. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/_common/token/image_token_counting.py +0 -0
  32. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/_common/token/token_counting.py +0 -0
  33. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/_common/validate_required_values.py +0 -0
  34. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/_common/validators.py +0 -0
  35. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/app/__init__.py +0 -0
  36. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/app/dev_util.py +0 -0
  37. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/app/init_logging.py +0 -0
  38. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/app/init_sdk.py +0 -0
  39. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/app/performance/async_tasks.py +0 -0
  40. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/app/performance/async_wrapper.py +0 -0
  41. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/app/schemas.py +0 -0
  42. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/app/unique_settings.py +0 -0
  43. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/app/verification.py +0 -0
  44. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/chat/__init__.py +0 -0
  45. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/chat/constants.py +0 -0
  46. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/chat/functions.py +0 -0
  47. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/chat/schemas.py +0 -0
  48. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/chat/service.py +0 -0
  49. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/chat/state.py +0 -0
  50. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/chat/utils.py +0 -0
  51. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/content/__init__.py +0 -0
  52. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/content/constants.py +0 -0
  53. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/content/functions.py +0 -0
  54. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/content/schemas.py +0 -0
  55. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/content/service.py +0 -0
  56. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/content/utils.py +0 -0
  57. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/debug_info_manager/debug_info_manager.py +0 -0
  58. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/embedding/__init__.py +0 -0
  59. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/embedding/constants.py +0 -0
  60. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/embedding/functions.py +0 -0
  61. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/embedding/schemas.py +0 -0
  62. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/embedding/service.py +0 -0
  63. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/embedding/utils.py +0 -0
  64. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/config.py +0 -0
  65. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/context_relevancy/prompts.py +0 -0
  66. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/evaluation_manager.py +0 -0
  67. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/exception.py +0 -0
  68. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/hallucination/constants.py +0 -0
  69. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/hallucination/hallucination_evaluation.py +0 -0
  70. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/hallucination/prompts.py +0 -0
  71. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/hallucination/service.py +0 -0
  72. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/hallucination/utils.py +0 -0
  73. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/output_parser.py +0 -0
  74. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/schemas.py +0 -0
  75. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/tests/test_context_relevancy_service.py +0 -0
  76. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evals/tests/test_output_parser.py +0 -0
  77. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/__init__.py +0 -0
  78. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/config.py +0 -0
  79. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/constants.py +0 -0
  80. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/context_relevancy/constants.py +0 -0
  81. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/context_relevancy/prompts.py +0 -0
  82. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/context_relevancy/service.py +0 -0
  83. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/context_relevancy/utils.py +0 -0
  84. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/exception.py +0 -0
  85. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/hallucination/constants.py +0 -0
  86. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/hallucination/prompts.py +0 -0
  87. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/hallucination/service.py +0 -0
  88. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/hallucination/utils.py +0 -0
  89. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/output_parser.py +0 -0
  90. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/evaluators/schemas.py +0 -0
  91. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/framework_utilities/__init__.py +0 -0
  92. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/framework_utilities/langchain/client.py +0 -0
  93. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/framework_utilities/langchain/history.py +0 -0
  94. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/framework_utilities/openai/__init__.py +0 -0
  95. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/framework_utilities/openai/client.py +0 -0
  96. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/framework_utilities/openai/message_builder.py +0 -0
  97. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/framework_utilities/utils.py +0 -0
  98. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/history_manager/history_construction_with_contents.py +0 -0
  99. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/history_manager/history_manager.py +0 -0
  100. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/history_manager/loop_token_reducer.py +0 -0
  101. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/history_manager/utils.py +0 -0
  102. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/__init__.py +0 -0
  103. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/builder.py +0 -0
  104. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/constants.py +0 -0
  105. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/functions.py +0 -0
  106. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/prompt.py +0 -0
  107. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/reference.py +0 -0
  108. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/schemas.py +0 -0
  109. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/service.py +0 -0
  110. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/language_model/utils.py +0 -0
  111. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/postprocessor/postprocessor_manager.py +0 -0
  112. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/protocols/support.py +0 -0
  113. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/reference_manager/reference_manager.py +0 -0
  114. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/short_term_memory/__init__.py +0 -0
  115. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/short_term_memory/constants.py +0 -0
  116. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/short_term_memory/functions.py +0 -0
  117. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/short_term_memory/persistent_short_term_memory_manager.py +0 -0
  118. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/short_term_memory/schemas.py +0 -0
  119. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/short_term_memory/service.py +0 -0
  120. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/smart_rules/__init__.py +0 -0
  121. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/smart_rules/compile.py +0 -0
  122. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/thinking_manager/thinking_manager.py +0 -0
  123. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/a2a/__init__.py +0 -0
  124. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/a2a/schema.py +0 -0
  125. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/config.py +0 -0
  126. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/factory.py +0 -0
  127. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/mcp/__init__.py +0 -0
  128. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/mcp/manager.py +0 -0
  129. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/mcp/tool_wrapper.py +0 -0
  130. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/schemas.py +0 -0
  131. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/test/test_tool_progress_reporter.py +0 -0
  132. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/tool_progress_reporter.py +0 -0
  133. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/utils/execution/execution.py +0 -0
  134. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/utils/source_handling/schema.py +0 -0
  135. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/utils/source_handling/source_formatting.py +0 -0
  136. {unique_toolkit-0.8.30 → unique_toolkit-0.8.32}/unique_toolkit/tools/utils/source_handling/tests/test_source_formatting.py +0 -0
@@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+
9
+ ## [0.8.31] - 2025-08-30
10
+
11
+ moved over general packages for `web_search`
12
+
13
+ ## [0.8.31] - 2025-08-29
14
+ - Add various openai models to supported model list
15
+ - o1
16
+ - o3
17
+ - o3-deep-research
18
+ - o3-pro
19
+ - o4-mini
20
+ - o4-mini-deep-research
21
+ - gpt-4-1-mini
22
+ - gpt-4-1-nano
23
+
8
24
  ## [0.8.30] - 2025-08-28
9
25
  - Added A2A manager
10
26
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 0.8.30
3
+ Version: 0.8.32
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Martin Fadler
@@ -115,6 +115,22 @@ All notable changes to this project will be documented in this file.
115
115
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
116
116
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
117
117
 
118
+
119
+ ## [0.8.31] - 2025-08-30
120
+
121
+ moved over general packages for `web_search`
122
+
123
+ ## [0.8.31] - 2025-08-29
124
+ - Add various openai models to supported model list
125
+ - o1
126
+ - o3
127
+ - o3-deep-research
128
+ - o3-pro
129
+ - o4-mini
130
+ - o4-mini-deep-research
131
+ - gpt-4-1-mini
132
+ - gpt-4-1-nano
133
+
118
134
  ## [0.8.30] - 2025-08-28
119
135
  - Added A2A manager
120
136
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "unique_toolkit"
3
- version = "0.8.30"
3
+ version = "0.8.32"
4
4
  description = ""
5
5
  authors = [
6
6
  "Martin Fadler <martin.fadler@unique.ch>",
@@ -0,0 +1,45 @@
1
+ from typing import Any
2
+
3
+ from pydantic import BaseModel, Field
4
+ from pydantic.json_schema import SkipJsonSchema
5
+
6
+ from unique_toolkit._common.default_language_model import DEFAULT_GPT_35_TURBO
7
+ from unique_toolkit._common.validators import LMI, get_LMI_default_field
8
+ from unique_toolkit.evals.context_relevancy.schema import StructuredOutputConfig
9
+ from unique_toolkit.tools.config import get_configuration_dict
10
+
11
+
12
+ class ChunkRelevancySortConfig(BaseModel):
13
+ model_config = get_configuration_dict()
14
+ enabled: bool = Field(
15
+ default=False,
16
+ description="Whether to enable the chunk relevancy sort.",
17
+ )
18
+ relevancy_levels_to_consider: list[str] = Field(
19
+ default=["high", "medium", "low"],
20
+ description="The relevancy levels to consider.",
21
+ )
22
+ relevancy_level_order: dict[str, int] = Field(
23
+ default={"high": 0, "medium": 1, "low": 2},
24
+ description="The relevancy level order.",
25
+ )
26
+ language_model: LMI = get_LMI_default_field(
27
+ DEFAULT_GPT_35_TURBO,
28
+ description="The language model to use for the chunk relevancy sort.",
29
+ )
30
+ fallback_language_model: LMI = get_LMI_default_field(
31
+ DEFAULT_GPT_35_TURBO,
32
+ description="The language model to use as a fallback.",
33
+ )
34
+ additional_llm_options: dict[str, Any] = Field(
35
+ default={},
36
+ description="Additional options to pass to the language model.",
37
+ )
38
+ structured_output_config: StructuredOutputConfig = Field(
39
+ default_factory=StructuredOutputConfig,
40
+ description="The configuration for the structured output.",
41
+ )
42
+ max_tasks: int | SkipJsonSchema[None] = Field(
43
+ default=1000,
44
+ description="The maximum number of tasks to run in parallel.",
45
+ )
@@ -0,0 +1,5 @@
1
+ from unique_toolkit._common.exception import CommonException
2
+
3
+
4
+ class ChunkRelevancySorterException(CommonException):
5
+ pass
@@ -0,0 +1,46 @@
1
+ from typing import Optional
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from unique_toolkit.content.schemas import ContentChunk
6
+ from unique_toolkit.evals.schemas import EvaluationMetricResult
7
+
8
+
9
+ class ChunkRelevancy(BaseModel):
10
+ chunk: ContentChunk
11
+ relevancy: EvaluationMetricResult | None = None
12
+
13
+ def get_document_name(self):
14
+ title = self.chunk.key or self.chunk.title or "Unkown"
15
+ return title.split(":")[0]
16
+
17
+ def get_page_number(self):
18
+ start_page = self.chunk.start_page
19
+ end_page = self.chunk.end_page
20
+
21
+ if start_page is None or end_page is None:
22
+ return start_page or end_page or "Unknown Page"
23
+ elif start_page == end_page:
24
+ return str(start_page)
25
+ else:
26
+ return f"{start_page}-{end_page}"
27
+
28
+ def get_facts(self):
29
+ if self.relevancy is None:
30
+ return []
31
+ return self.relevancy.fact_list
32
+
33
+
34
+ class ChunkRelevancySorterResult(BaseModel):
35
+ relevancies: list[ChunkRelevancy]
36
+ user_message: Optional[str] = None
37
+
38
+ @staticmethod
39
+ def from_chunks(chunks: list[ContentChunk]):
40
+ return ChunkRelevancySorterResult(
41
+ relevancies=[ChunkRelevancy(chunk=chunk) for chunk in chunks],
42
+ )
43
+
44
+ @property
45
+ def content_chunks(self):
46
+ return [chunk.chunk for chunk in self.relevancies]
@@ -0,0 +1,372 @@
1
+ import logging
2
+ import time
3
+ from collections import Counter
4
+ from typing import Any, overload
5
+
6
+ from typing_extensions import deprecated
7
+
8
+ from unique_toolkit._common.chunk_relevancy_sorter.config import (
9
+ ChunkRelevancySortConfig,
10
+ )
11
+ from unique_toolkit._common.chunk_relevancy_sorter.exception import (
12
+ ChunkRelevancySorterException,
13
+ )
14
+ from unique_toolkit._common.chunk_relevancy_sorter.schemas import (
15
+ ChunkRelevancy,
16
+ ChunkRelevancySorterResult,
17
+ )
18
+ from unique_toolkit._common.validate_required_values import validate_required_values
19
+ from unique_toolkit.app.performance.async_tasks import run_async_tasks_parallel
20
+ from unique_toolkit.app.schemas import BaseEvent, ChatEvent
21
+ from unique_toolkit.content.schemas import ContentChunk
22
+ from unique_toolkit.evals.config import EvaluationMetricConfig
23
+ from unique_toolkit.evals.context_relevancy.schema import (
24
+ EvaluationSchemaStructuredOutput,
25
+ StructuredOutputConfig,
26
+ )
27
+ from unique_toolkit.evals.context_relevancy.service import ContextRelevancyEvaluator
28
+ from unique_toolkit.evals.exception import EvaluatorException
29
+ from unique_toolkit.evals.schemas import (
30
+ EvaluationMetricInput,
31
+ EvaluationMetricName,
32
+ EvaluationMetricResult,
33
+ )
34
+ from unique_toolkit.language_model.infos import LanguageModelInfo
35
+
36
+
37
+ class ChunkRelevancySorter:
38
+ @deprecated(
39
+ "Use __init__ with company_id and user_id instead or use the classmethod `from_event`"
40
+ )
41
+ @overload
42
+ def __init__(self, event: ChatEvent | BaseEvent):
43
+ """
44
+ Initialize the ChunkRelevancySorter with an event (deprecated)
45
+ """
46
+
47
+ @overload
48
+ def __init__(self, *, company_id: str, user_id: str):
49
+ """
50
+ Initialize the ChunkRelevancySorter with a company_id and user_id
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ event: ChatEvent | BaseEvent | None = None,
56
+ company_id: str | None = None,
57
+ user_id: str | None = None,
58
+ ):
59
+ if isinstance(event, (ChatEvent, BaseEvent)):
60
+ self.chunk_relevancy_evaluator = ContextRelevancyEvaluator.from_event(event)
61
+ else:
62
+ [company_id, user_id] = validate_required_values([company_id, user_id])
63
+ self.chunk_relevancy_evaluator = ContextRelevancyEvaluator(
64
+ company_id=company_id, user_id=user_id
65
+ )
66
+ module_name = "ChunkRelevancySorter"
67
+ self.logger = logging.getLogger(f"{module_name}.{__name__}")
68
+
69
+ @classmethod
70
+ def from_event(cls, event: ChatEvent | BaseEvent):
71
+ return cls(company_id=event.company_id, user_id=event.user_id)
72
+
73
+ async def run(
74
+ self,
75
+ input_text: str,
76
+ chunks: list[ContentChunk],
77
+ config: ChunkRelevancySortConfig,
78
+ ) -> ChunkRelevancySorterResult:
79
+ """
80
+ Resorts the search chunks by classifying each chunk into High, Medium, Low depending on the relevancy to the user input, then
81
+ sorts the chunks based on the classification while preserving the orginial order.
82
+
83
+ Args:
84
+ chunks (list[ContentChunk]): The list of search chunks to be reranked.
85
+
86
+ Returns:
87
+ ChunkRelevancySorterResult: The result of the chunk relevancy sort.
88
+
89
+ Raises:
90
+ ChunkRelevancySorterException: If an error occurs while sorting the chunks.
91
+ """
92
+
93
+ if not config.enabled:
94
+ self.logger.info("Chunk relevancy sort is disabled.")
95
+ return ChunkRelevancySorterResult.from_chunks(chunks)
96
+
97
+ self.logger.info("Running chunk relevancy sort.")
98
+ return await self._run_chunk_relevancy_sort(input_text, chunks, config)
99
+
100
+ async def _run_chunk_relevancy_sort(
101
+ self,
102
+ input_text: str,
103
+ chunks: list[ContentChunk],
104
+ config: ChunkRelevancySortConfig,
105
+ ) -> ChunkRelevancySorterResult:
106
+ start_time = time.time()
107
+
108
+ resorted_relevancies = []
109
+
110
+ try:
111
+ self.logger.info(f"Resorting {len(chunks)} chunks based on relevancy...")
112
+ chunk_relevancies = await self._evaluate_chunks_relevancy(
113
+ input_text,
114
+ chunks,
115
+ config,
116
+ )
117
+ resorted_relevancies = await self._validate_and_sort_relevant_chunks(
118
+ config,
119
+ chunk_relevancies,
120
+ )
121
+ except ChunkRelevancySorterException as e:
122
+ self.logger.error(e.error_message)
123
+ raise e
124
+ except Exception as e:
125
+ unknown_error_msg = "Unknown error occurred while resorting search results."
126
+ raise ChunkRelevancySorterException(
127
+ user_message=f"{unknown_error_msg}. Fallback to original search results.",
128
+ error_message=f"{unknown_error_msg}: {e}",
129
+ )
130
+ finally:
131
+ end_time = time.time()
132
+ duration = end_time - start_time
133
+ total_chunks = len(resorted_relevancies)
134
+ success_msg = f"Resorted {total_chunks} chunks in {duration:.2f} seconds."
135
+ self.logger.info(success_msg)
136
+ return ChunkRelevancySorterResult(
137
+ relevancies=resorted_relevancies,
138
+ user_message=success_msg,
139
+ )
140
+
141
+ async def _evaluate_chunks_relevancy(
142
+ self,
143
+ input_text: str,
144
+ chunks: list[ContentChunk],
145
+ config: ChunkRelevancySortConfig,
146
+ ) -> list[ChunkRelevancy]:
147
+ """
148
+ Evaluates the relevancy of the chunks.
149
+ """
150
+ self.logger.info(
151
+ f"Processing chunk relevancy for {len(chunks)} chunks with {config.language_model.name}. "
152
+ f"(Structured output: {config.structured_output_config.enabled}. Extract fact list: {config.structured_output_config.extract_fact_list})",
153
+ )
154
+
155
+ # Evaluate the relevancy of each chunk
156
+ tasks = [
157
+ self._process_relevancy_evaluation(input_text, chunk=chunk, config=config)
158
+ for chunk in chunks
159
+ ]
160
+ chunk_relevancies = await run_async_tasks_parallel(
161
+ tasks=tasks,
162
+ max_tasks=config.max_tasks,
163
+ logger=self.logger,
164
+ )
165
+
166
+ # handle exceptions
167
+ for chunk_relevancy in chunk_relevancies:
168
+ if isinstance(chunk_relevancy, Exception):
169
+ error_msg = "Error occurred while evaluating context relevancy of a specific chunk"
170
+ raise ChunkRelevancySorterException(
171
+ user_message=f"{error_msg}. Fallback to original search results.",
172
+ error_message=f"{error_msg}: {chunk_relevancy}",
173
+ exception=chunk_relevancy,
174
+ )
175
+
176
+ # This check is currently necessary for typing purposes only
177
+ # as the run_async_tasks_parallel function does not enforce the return type
178
+ # TODO fix return type in run_async_tasks_parallel
179
+ chunk_relevancies = [
180
+ chunk_relevancy
181
+ for chunk_relevancy in chunk_relevancies
182
+ if isinstance(chunk_relevancy, ChunkRelevancy)
183
+ ]
184
+
185
+ return chunk_relevancies
186
+
187
+ async def _evaluate_chunk_relevancy(
188
+ self,
189
+ input_text: str,
190
+ langugage_model: LanguageModelInfo,
191
+ chunk: ContentChunk,
192
+ structured_output_config: StructuredOutputConfig,
193
+ additional_llm_options: dict[str, Any],
194
+ ) -> EvaluationMetricResult | None:
195
+ """
196
+ Gets the relevancy score of the chunk compared to the user message txt.
197
+ """
198
+ structured_output_schema = (
199
+ (
200
+ EvaluationSchemaStructuredOutput.get_with_descriptions(
201
+ structured_output_config
202
+ )
203
+ )
204
+ if structured_output_config.enabled
205
+ else None
206
+ )
207
+
208
+ metric_config = EvaluationMetricConfig(
209
+ enabled=True,
210
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
211
+ language_model=langugage_model,
212
+ additional_llm_options=additional_llm_options,
213
+ )
214
+ relevancy_input = EvaluationMetricInput(
215
+ input_text=input_text,
216
+ context_texts=[chunk.text],
217
+ )
218
+
219
+ return await self.chunk_relevancy_evaluator.analyze(
220
+ input=relevancy_input,
221
+ config=metric_config,
222
+ structured_output_schema=structured_output_schema,
223
+ )
224
+
225
+ async def _process_relevancy_evaluation(
226
+ self,
227
+ input_text: str,
228
+ chunk: ContentChunk,
229
+ config: ChunkRelevancySortConfig,
230
+ ):
231
+ model = config.language_model
232
+ fallback_model = config.fallback_language_model
233
+ try:
234
+ relevancy = await self._evaluate_chunk_relevancy(
235
+ input_text=input_text,
236
+ langugage_model=model,
237
+ chunk=chunk,
238
+ structured_output_config=config.structured_output_config,
239
+ additional_llm_options=config.additional_llm_options,
240
+ )
241
+ return ChunkRelevancy(
242
+ chunk=chunk,
243
+ relevancy=relevancy,
244
+ )
245
+ except EvaluatorException as e:
246
+ if e.exception:
247
+ self.logger.warning(
248
+ "Error evaluating chunk ID %s with model %s. Trying fallback model %s.",
249
+ chunk.chunk_id,
250
+ model,
251
+ e.exception,
252
+ )
253
+ relevancy = await self._evaluate_chunk_relevancy(
254
+ input_text=input_text,
255
+ langugage_model=fallback_model,
256
+ chunk=chunk,
257
+ structured_output_config=config.structured_output_config,
258
+ additional_llm_options=config.additional_llm_options,
259
+ )
260
+ else:
261
+ raise e
262
+ except Exception as e:
263
+ raise ChunkRelevancySorterException(
264
+ user_message="Error occurred while evaluating context relevancy of a specific chunk.",
265
+ error_message=f"Error in _process_relevancy_evaluation: {e}",
266
+ exception=e,
267
+ )
268
+
269
+ async def _validate_and_sort_relevant_chunks(
270
+ self,
271
+ config: ChunkRelevancySortConfig,
272
+ chunk_relevancies: list[ChunkRelevancy],
273
+ ) -> list[ChunkRelevancy]:
274
+ """
275
+ Checks for error or no value in chunk relevancy.
276
+ """
277
+
278
+ # Check that all chunk relevancies have a relevancy level
279
+ await self._validate_chunk_relevancies(chunk_relevancies)
280
+
281
+ # Filter the chunks based on the relevancy levels to consider
282
+ chunk_relevancies = await self._filter_chunks_by_relevancy_levels(
283
+ config,
284
+ chunk_relevancies,
285
+ )
286
+
287
+ # Sort the chunks based on the relevancy levels
288
+ sorted_chunks = await self._sort_chunk_relevancies_by_relevancy_and_chunk(
289
+ config,
290
+ chunk_relevancies,
291
+ )
292
+
293
+ return sorted_chunks
294
+
295
+ async def _validate_chunk_relevancies(
296
+ self,
297
+ chunk_relevancies: list[ChunkRelevancy],
298
+ ):
299
+ for chunk_relevancy in chunk_relevancies:
300
+ if not chunk_relevancy.relevancy or not chunk_relevancy.relevancy.value:
301
+ raise ChunkRelevancySorterException(
302
+ user_message="Error occurred while evaluating chunk relevancy.",
303
+ error_message=f"No relevancy level returned for chunk ID {chunk_relevancy.chunk.chunk_id}.",
304
+ )
305
+
306
+ async def _sort_chunk_relevancies_by_relevancy_and_chunk(
307
+ self,
308
+ config: ChunkRelevancySortConfig,
309
+ chunk_relevancies: list[ChunkRelevancy],
310
+ ):
311
+ # Define the custom sorting order for relevancy
312
+ relevancy_level_order = config.relevancy_level_order
313
+
314
+ # Create a dictionary to map the chunk chunkId to its position in the original order
315
+ chunk_order = {
316
+ relevancy.chunk.chunk_id: index
317
+ for index, relevancy in enumerate(chunk_relevancies)
318
+ }
319
+
320
+ # Sort the chunk relevancies first by relevancy and then by original order within each relevancy level
321
+ sorted_chunk_relevancies = sorted(
322
+ chunk_relevancies,
323
+ key=lambda obj: (
324
+ relevancy_level_order[obj.relevancy.value.lower()], # type: ignore
325
+ chunk_order[obj.chunk.chunk_id],
326
+ ),
327
+ )
328
+
329
+ # Count and print the distinct values of relevancy
330
+ self._count_distinct_values(sorted_chunk_relevancies)
331
+
332
+ # Return only the chunk in the sorted order
333
+ return sorted_chunk_relevancies
334
+
335
+ async def _filter_chunks_by_relevancy_levels(
336
+ self,
337
+ config: ChunkRelevancySortConfig,
338
+ chunk_relevancies: list[ChunkRelevancy],
339
+ ) -> list[ChunkRelevancy]:
340
+ levels_to_consider = [
341
+ relevancy_level.lower()
342
+ for relevancy_level in config.relevancy_levels_to_consider
343
+ ]
344
+ if not levels_to_consider:
345
+ self.logger.warning("No relevancy levels defined, including all levels.")
346
+ return chunk_relevancies
347
+
348
+ self.logger.info(
349
+ "Filtering chunks by relevancy levels: %s.", levels_to_consider
350
+ )
351
+ return [
352
+ chunk_relevancy
353
+ for chunk_relevancy in chunk_relevancies
354
+ if chunk_relevancy.relevancy.value.lower() in levels_to_consider # type: ignore
355
+ ]
356
+
357
+ def _count_distinct_values(self, chunk_relevancies: list[ChunkRelevancy]):
358
+ # Extract the values from the relevancy field
359
+ values = [
360
+ cr.relevancy.value
361
+ for cr in chunk_relevancies
362
+ if cr.relevancy and cr.relevancy.value
363
+ ]
364
+
365
+ # Use Counter to count occurrences
366
+ value_counts = Counter(values)
367
+
368
+ self.logger.info("Count of distinct relevancy values:")
369
+ for value, count in value_counts.items():
370
+ self.logger.info(f"Relevancy: {value}, Count: {count}")
371
+
372
+ return value_counts