unique_toolkit 0.7.7__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unique_toolkit might be problematic. Click here for more details.
- unique_toolkit/__init__.py +28 -1
- unique_toolkit/_common/api_calling/human_verification_manager.py +343 -0
- unique_toolkit/_common/base_model_type_attribute.py +303 -0
- unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
- unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
- unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
- unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
- unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
- unique_toolkit/_common/default_language_model.py +12 -0
- unique_toolkit/_common/docx_generator/__init__.py +7 -0
- unique_toolkit/_common/docx_generator/config.py +12 -0
- unique_toolkit/_common/docx_generator/schemas.py +80 -0
- unique_toolkit/_common/docx_generator/service.py +252 -0
- unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
- unique_toolkit/_common/endpoint_builder.py +305 -0
- unique_toolkit/_common/endpoint_requestor.py +430 -0
- unique_toolkit/_common/exception.py +24 -0
- unique_toolkit/_common/feature_flags/schema.py +9 -0
- unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
- unique_toolkit/_common/pydantic_helpers.py +154 -0
- unique_toolkit/_common/referencing.py +53 -0
- unique_toolkit/_common/string_utilities.py +140 -0
- unique_toolkit/_common/tests/test_referencing.py +521 -0
- unique_toolkit/_common/tests/test_string_utilities.py +506 -0
- unique_toolkit/_common/token/image_token_counting.py +67 -0
- unique_toolkit/_common/token/token_counting.py +204 -0
- unique_toolkit/_common/utils/__init__.py +1 -0
- unique_toolkit/_common/utils/files.py +43 -0
- unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
- unique_toolkit/_common/utils/structured_output/schema.py +5 -0
- unique_toolkit/_common/utils/write_configuration.py +51 -0
- unique_toolkit/_common/validators.py +101 -4
- unique_toolkit/agentic/__init__.py +1 -0
- unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
- unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
- unique_toolkit/agentic/evaluation/config.py +36 -0
- unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
- unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
- unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
- unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
- unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
- unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +111 -0
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +16 -15
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +30 -20
- unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
- unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
- unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
- unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
- unique_toolkit/agentic/history_manager/history_construction_with_contents.py +297 -0
- unique_toolkit/agentic/history_manager/history_manager.py +242 -0
- unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
- unique_toolkit/agentic/history_manager/utils.py +96 -0
- unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
- unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
- unique_toolkit/agentic/responses_api/__init__.py +19 -0
- unique_toolkit/agentic/responses_api/postprocessors/code_display.py +63 -0
- unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +145 -0
- unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
- unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
- unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
- unique_toolkit/agentic/tools/__init__.py +1 -0
- unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
- unique_toolkit/agentic/tools/a2a/config.py +17 -0
- unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
- unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
- unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
- unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
- unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
- unique_toolkit/agentic/tools/a2a/manager.py +55 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +185 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +73 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/config.py +45 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/display.py +180 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +1335 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
- unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
- unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
- unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
- unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
- unique_toolkit/agentic/tools/a2a/tool/config.py +73 -0
- unique_toolkit/agentic/tools/a2a/tool/service.py +306 -0
- unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
- unique_toolkit/agentic/tools/config.py +167 -0
- unique_toolkit/agentic/tools/factory.py +44 -0
- unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
- unique_toolkit/agentic/tools/mcp/manager.py +71 -0
- unique_toolkit/agentic/tools/mcp/models.py +28 -0
- unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
- unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
- unique_toolkit/agentic/tools/openai_builtin/base.py +30 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +57 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +230 -0
- unique_toolkit/agentic/tools/openai_builtin/manager.py +62 -0
- unique_toolkit/agentic/tools/schemas.py +141 -0
- unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
- unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
- unique_toolkit/agentic/tools/tool.py +183 -0
- unique_toolkit/agentic/tools/tool_manager.py +523 -0
- unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
- unique_toolkit/agentic/tools/utils/__init__.py +19 -0
- unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
- unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
- unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
- unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
- unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
- unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
- unique_toolkit/app/__init__.py +6 -0
- unique_toolkit/app/dev_util.py +180 -0
- unique_toolkit/app/init_sdk.py +32 -1
- unique_toolkit/app/schemas.py +198 -31
- unique_toolkit/app/unique_settings.py +367 -0
- unique_toolkit/chat/__init__.py +8 -1
- unique_toolkit/chat/deprecated/service.py +232 -0
- unique_toolkit/chat/functions.py +642 -77
- unique_toolkit/chat/rendering.py +34 -0
- unique_toolkit/chat/responses_api.py +461 -0
- unique_toolkit/chat/schemas.py +133 -2
- unique_toolkit/chat/service.py +115 -767
- unique_toolkit/content/functions.py +153 -4
- unique_toolkit/content/schemas.py +122 -15
- unique_toolkit/content/service.py +278 -44
- unique_toolkit/content/smart_rules.py +301 -0
- unique_toolkit/content/utils.py +8 -3
- unique_toolkit/embedding/service.py +102 -11
- unique_toolkit/framework_utilities/__init__.py +1 -0
- unique_toolkit/framework_utilities/langchain/client.py +71 -0
- unique_toolkit/framework_utilities/langchain/history.py +19 -0
- unique_toolkit/framework_utilities/openai/__init__.py +6 -0
- unique_toolkit/framework_utilities/openai/client.py +83 -0
- unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
- unique_toolkit/framework_utilities/utils.py +23 -0
- unique_toolkit/language_model/__init__.py +3 -0
- unique_toolkit/language_model/builder.py +27 -11
- unique_toolkit/language_model/default_language_model.py +3 -0
- unique_toolkit/language_model/functions.py +327 -43
- unique_toolkit/language_model/infos.py +992 -50
- unique_toolkit/language_model/reference.py +242 -0
- unique_toolkit/language_model/schemas.py +475 -48
- unique_toolkit/language_model/service.py +228 -27
- unique_toolkit/protocols/support.py +145 -0
- unique_toolkit/services/__init__.py +7 -0
- unique_toolkit/services/chat_service.py +1630 -0
- unique_toolkit/services/knowledge_base.py +861 -0
- unique_toolkit/short_term_memory/service.py +178 -41
- unique_toolkit/smart_rules/__init__.py +0 -0
- unique_toolkit/smart_rules/compile.py +56 -0
- unique_toolkit/test_utilities/events.py +197 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/METADATA +606 -7
- unique_toolkit-1.23.0.dist-info/RECORD +182 -0
- unique_toolkit/evaluators/__init__.py +0 -1
- unique_toolkit/evaluators/config.py +0 -35
- unique_toolkit/evaluators/constants.py +0 -1
- unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
- unique_toolkit/evaluators/context_relevancy/service.py +0 -53
- unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
- unique_toolkit/evaluators/hallucination/constants.py +0 -41
- unique_toolkit-0.7.7.dist-info/RECORD +0 -64
- /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from unique_toolkit._common.chunk_relevancy_sorter.config import (
|
|
6
|
+
ChunkRelevancySortConfig,
|
|
7
|
+
)
|
|
8
|
+
from unique_toolkit._common.chunk_relevancy_sorter.exception import (
|
|
9
|
+
ChunkRelevancySorterException,
|
|
10
|
+
)
|
|
11
|
+
from unique_toolkit._common.chunk_relevancy_sorter.schemas import (
|
|
12
|
+
ChunkRelevancy,
|
|
13
|
+
ChunkRelevancySorterResult,
|
|
14
|
+
)
|
|
15
|
+
from unique_toolkit._common.chunk_relevancy_sorter.service import ChunkRelevancySorter
|
|
16
|
+
from unique_toolkit.agentic.evaluation.context_relevancy.schema import (
|
|
17
|
+
StructuredOutputConfig,
|
|
18
|
+
)
|
|
19
|
+
from unique_toolkit.agentic.evaluation.schemas import (
|
|
20
|
+
EvaluationMetricName,
|
|
21
|
+
EvaluationMetricResult,
|
|
22
|
+
)
|
|
23
|
+
from unique_toolkit.app.schemas import ChatEvent
|
|
24
|
+
from unique_toolkit.content.schemas import ContentChunk
|
|
25
|
+
from unique_toolkit.language_model.default_language_model import DEFAULT_GPT_4o
|
|
26
|
+
from unique_toolkit.language_model.infos import LanguageModelInfo
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.fixture
|
|
30
|
+
def event():
|
|
31
|
+
event = MagicMock(spec=ChatEvent)
|
|
32
|
+
event.payload = MagicMock()
|
|
33
|
+
event.payload.user_message = MagicMock()
|
|
34
|
+
event.payload.user_message.text = "Test query"
|
|
35
|
+
event.user_id = "user_0"
|
|
36
|
+
event.company_id = "company_0"
|
|
37
|
+
return event
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.fixture
|
|
41
|
+
def mock_chunks():
|
|
42
|
+
return [
|
|
43
|
+
ContentChunk(
|
|
44
|
+
id=f"chunk_{i}",
|
|
45
|
+
order=i,
|
|
46
|
+
chunk_id=f"chunk_{i}",
|
|
47
|
+
text=f"Test content {i}",
|
|
48
|
+
)
|
|
49
|
+
for i in range(3)
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@pytest.fixture
|
|
54
|
+
def config():
|
|
55
|
+
return ChunkRelevancySortConfig(
|
|
56
|
+
enabled=True,
|
|
57
|
+
relevancy_levels_to_consider=["high", "medium", "low"],
|
|
58
|
+
relevancy_level_order={"high": 0, "medium": 1, "low": 2},
|
|
59
|
+
language_model=LanguageModelInfo.from_name(DEFAULT_GPT_4o),
|
|
60
|
+
fallback_language_model=LanguageModelInfo.from_name(DEFAULT_GPT_4o),
|
|
61
|
+
structured_output_config=StructuredOutputConfig(
|
|
62
|
+
enabled=False,
|
|
63
|
+
extract_fact_list=False,
|
|
64
|
+
),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@pytest.fixture
|
|
69
|
+
def chunk_relevancy_sorter(event):
|
|
70
|
+
return ChunkRelevancySorter(event)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@pytest.mark.asyncio
|
|
74
|
+
async def test_run_disabled_config(chunk_relevancy_sorter, mock_chunks, config):
|
|
75
|
+
config.enabled = False
|
|
76
|
+
result = await chunk_relevancy_sorter.run("test input", mock_chunks, config)
|
|
77
|
+
|
|
78
|
+
assert isinstance(result, ChunkRelevancySorterResult)
|
|
79
|
+
assert result.content_chunks == mock_chunks
|
|
80
|
+
assert len(result.content_chunks) == len(mock_chunks)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@pytest.mark.asyncio
|
|
84
|
+
async def test_run_enabled_config(chunk_relevancy_sorter, mock_chunks, config):
|
|
85
|
+
with patch.object(chunk_relevancy_sorter, "_run_chunk_relevancy_sort") as mock_sort:
|
|
86
|
+
mock_sort.return_value = ChunkRelevancySorterResult.from_chunks(mock_chunks)
|
|
87
|
+
|
|
88
|
+
result = await chunk_relevancy_sorter.run("test input", mock_chunks, config)
|
|
89
|
+
|
|
90
|
+
assert isinstance(result, ChunkRelevancySorterResult)
|
|
91
|
+
assert result.content_chunks == mock_chunks
|
|
92
|
+
mock_sort.assert_called_once_with("test input", mock_chunks, config)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@pytest.mark.asyncio
|
|
96
|
+
async def test_evaluate_chunks_relevancy(chunk_relevancy_sorter, mock_chunks, config):
|
|
97
|
+
mock_relevancy = EvaluationMetricResult(
|
|
98
|
+
value="high",
|
|
99
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
100
|
+
reason="Test reason",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
with patch.object(
|
|
104
|
+
chunk_relevancy_sorter, "_process_relevancy_evaluation"
|
|
105
|
+
) as mock_process:
|
|
106
|
+
mock_process.return_value = ChunkRelevancy(
|
|
107
|
+
chunk=mock_chunks[0], relevancy=mock_relevancy
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
result = await chunk_relevancy_sorter._evaluate_chunks_relevancy(
|
|
111
|
+
"test input", mock_chunks, config
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
assert len(result) == len(mock_chunks)
|
|
115
|
+
assert all(isinstance(r, ChunkRelevancy) for r in result)
|
|
116
|
+
assert mock_process.call_count == len(mock_chunks)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@pytest.mark.asyncio
|
|
120
|
+
async def test_evaluate_chunk_relevancy(chunk_relevancy_sorter, mock_chunks, config):
|
|
121
|
+
with patch(
|
|
122
|
+
"unique_toolkit._common.chunk_relevancy_sorter.service.ContextRelevancyEvaluator.analyze"
|
|
123
|
+
) as mock_analyze:
|
|
124
|
+
mock_analyze.return_value = EvaluationMetricResult(
|
|
125
|
+
value="high",
|
|
126
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
127
|
+
reason="Test reason",
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
result = await chunk_relevancy_sorter._evaluate_chunk_relevancy(
|
|
131
|
+
input_text="test input",
|
|
132
|
+
chunk=mock_chunks[0],
|
|
133
|
+
langugage_model=config.language_model,
|
|
134
|
+
structured_output_config=config.structured_output_config,
|
|
135
|
+
additional_llm_options=config.additional_llm_options,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
assert isinstance(result, EvaluationMetricResult)
|
|
139
|
+
assert result.value == "high"
|
|
140
|
+
mock_analyze.assert_called_once()
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@pytest.mark.asyncio
|
|
144
|
+
async def test_process_relevancy_evaluation_success(
|
|
145
|
+
chunk_relevancy_sorter, mock_chunks, config
|
|
146
|
+
):
|
|
147
|
+
with patch.object(
|
|
148
|
+
chunk_relevancy_sorter, "_evaluate_chunk_relevancy"
|
|
149
|
+
) as mock_evaluate:
|
|
150
|
+
mock_evaluate.return_value = EvaluationMetricResult(
|
|
151
|
+
value="high",
|
|
152
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
153
|
+
reason="Test reason",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
result = await chunk_relevancy_sorter._process_relevancy_evaluation(
|
|
157
|
+
"test input", mock_chunks[0], config
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
assert isinstance(result, ChunkRelevancy)
|
|
161
|
+
assert result.chunk == mock_chunks[0]
|
|
162
|
+
assert result.relevancy is not None
|
|
163
|
+
assert result.relevancy.value == "high"
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@pytest.mark.asyncio
|
|
167
|
+
async def test_process_relevancy_evaluation_fallback(
|
|
168
|
+
chunk_relevancy_sorter, mock_chunks, config
|
|
169
|
+
):
|
|
170
|
+
with patch.object(
|
|
171
|
+
chunk_relevancy_sorter, "_evaluate_chunk_relevancy"
|
|
172
|
+
) as mock_evaluate:
|
|
173
|
+
# First call raises exception, second call succeeds
|
|
174
|
+
mock_evaluate.side_effect = [
|
|
175
|
+
Exception("Test error"),
|
|
176
|
+
EvaluationMetricResult(
|
|
177
|
+
value="medium",
|
|
178
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
179
|
+
reason="Test reason",
|
|
180
|
+
),
|
|
181
|
+
]
|
|
182
|
+
|
|
183
|
+
with pytest.raises(ChunkRelevancySorterException):
|
|
184
|
+
await chunk_relevancy_sorter._process_relevancy_evaluation(
|
|
185
|
+
"test input", mock_chunks[0], config
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@pytest.mark.asyncio
|
|
190
|
+
async def test_validate_and_sort_relevant_chunks(
|
|
191
|
+
chunk_relevancy_sorter, mock_chunks, config
|
|
192
|
+
):
|
|
193
|
+
chunk_relevancies = [
|
|
194
|
+
ChunkRelevancy(
|
|
195
|
+
chunk=mock_chunks[0],
|
|
196
|
+
relevancy=EvaluationMetricResult(
|
|
197
|
+
value="low",
|
|
198
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
199
|
+
reason="Test reason",
|
|
200
|
+
),
|
|
201
|
+
)
|
|
202
|
+
]
|
|
203
|
+
chunk_relevancies.append(
|
|
204
|
+
ChunkRelevancy(
|
|
205
|
+
chunk=mock_chunks[1],
|
|
206
|
+
relevancy=EvaluationMetricResult(
|
|
207
|
+
value="medium",
|
|
208
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
209
|
+
reason="Test reason",
|
|
210
|
+
),
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
chunk_relevancies.append(
|
|
214
|
+
ChunkRelevancy(
|
|
215
|
+
chunk=mock_chunks[2],
|
|
216
|
+
relevancy=EvaluationMetricResult(
|
|
217
|
+
value="high",
|
|
218
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
219
|
+
reason="Test reason",
|
|
220
|
+
),
|
|
221
|
+
)
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
result = await chunk_relevancy_sorter._validate_and_sort_relevant_chunks(
|
|
225
|
+
config, chunk_relevancies
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
assert isinstance(result, list)
|
|
229
|
+
assert len(result) == len(mock_chunks)
|
|
230
|
+
assert all(isinstance(relevancy.chunk, ContentChunk) for relevancy in result)
|
|
231
|
+
assert result[0].chunk == mock_chunks[2]
|
|
232
|
+
assert result[1].chunk == mock_chunks[1]
|
|
233
|
+
assert result[2].chunk == mock_chunks[0]
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
@pytest.mark.asyncio
|
|
237
|
+
async def test_validate_chunk_relevancies_invalid(chunk_relevancy_sorter):
|
|
238
|
+
invalid_relevancies = [
|
|
239
|
+
ChunkRelevancy(
|
|
240
|
+
chunk=ContentChunk(chunk_id="test", text="test", id="test", order=0),
|
|
241
|
+
relevancy=None,
|
|
242
|
+
)
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
with pytest.raises(ChunkRelevancySorterException):
|
|
246
|
+
await chunk_relevancy_sorter._validate_chunk_relevancies(invalid_relevancies)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def test_count_distinct_values(chunk_relevancy_sorter, mock_chunks):
|
|
250
|
+
chunk_relevancies = [
|
|
251
|
+
ChunkRelevancy(
|
|
252
|
+
chunk=chunk,
|
|
253
|
+
relevancy=EvaluationMetricResult(
|
|
254
|
+
value="high",
|
|
255
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
256
|
+
reason="Test reason",
|
|
257
|
+
),
|
|
258
|
+
)
|
|
259
|
+
for chunk in mock_chunks[:2]
|
|
260
|
+
]
|
|
261
|
+
chunk_relevancies.append(
|
|
262
|
+
ChunkRelevancy(
|
|
263
|
+
chunk=mock_chunks[2],
|
|
264
|
+
relevancy=EvaluationMetricResult(
|
|
265
|
+
value="medium",
|
|
266
|
+
name=EvaluationMetricName.CONTEXT_RELEVANCY,
|
|
267
|
+
reason="Test reason",
|
|
268
|
+
),
|
|
269
|
+
)
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
value_counts = chunk_relevancy_sorter._count_distinct_values(chunk_relevancies)
|
|
273
|
+
|
|
274
|
+
assert value_counts["high"] == 2
|
|
275
|
+
assert value_counts["medium"] == 1
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
from unique_toolkit.language_model.infos import LanguageModelName
|
|
4
|
+
|
|
5
|
+
warnings.warn(
|
|
6
|
+
"unique_toolkit._common.default_language_model is deprecated. "
|
|
7
|
+
"Import DEFAULT_GPT_4o from unique_toolkit.language_model instead.",
|
|
8
|
+
DeprecationWarning,
|
|
9
|
+
stacklevel=2,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
DEFAULT_GPT_4o = LanguageModelName.AZURE_GPT_4o_2024_1120
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
from unique_toolkit._common.pydantic_helpers import get_configuration_dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DocxGeneratorConfig(BaseModel):
|
|
7
|
+
model_config = get_configuration_dict()
|
|
8
|
+
|
|
9
|
+
template_content_id: str = Field(
|
|
10
|
+
default="",
|
|
11
|
+
description="The content id of the template file uploaded to the knowledge base.",
|
|
12
|
+
)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from docx.document import Document as DocumentObject
|
|
2
|
+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
|
3
|
+
from docxtpl import DocxTemplate
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class HeadingField(BaseModel):
|
|
8
|
+
text: str
|
|
9
|
+
level: int = 4
|
|
10
|
+
alignment: WD_PARAGRAPH_ALIGNMENT = WD_PARAGRAPH_ALIGNMENT.LEFT
|
|
11
|
+
|
|
12
|
+
def add(self, doc: DocumentObject):
|
|
13
|
+
p = doc.add_heading(self.text, level=self.level)
|
|
14
|
+
p.alignment = self.alignment
|
|
15
|
+
return p
|
|
16
|
+
|
|
17
|
+
def __str__(self):
|
|
18
|
+
return f"HeadingField(text={self.text}, level={self.level}, alignment={self.alignment})"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ParagraphField(BaseModel):
|
|
22
|
+
text: str
|
|
23
|
+
style: str | None = None
|
|
24
|
+
alignment: WD_PARAGRAPH_ALIGNMENT = WD_PARAGRAPH_ALIGNMENT.LEFT
|
|
25
|
+
|
|
26
|
+
def add(self, doc: DocumentObject):
|
|
27
|
+
p = doc.add_paragraph(self.text, style=self.style)
|
|
28
|
+
p.alignment = self.alignment
|
|
29
|
+
return p
|
|
30
|
+
|
|
31
|
+
def __str__(self):
|
|
32
|
+
return f"ParagraphField(text={self.text}, style={self.style}, alignment={self.alignment})"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class RunField(BaseModel):
|
|
36
|
+
text: str
|
|
37
|
+
italic: bool | None = False
|
|
38
|
+
bold: bool | None = False
|
|
39
|
+
alignment: WD_PARAGRAPH_ALIGNMENT = WD_PARAGRAPH_ALIGNMENT.LEFT
|
|
40
|
+
|
|
41
|
+
def __str__(self):
|
|
42
|
+
return f"RunField(text={self.text}, italic={self.italic}, alignment={self.alignment})"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class RunsField(BaseModel):
|
|
46
|
+
runs: list[RunField]
|
|
47
|
+
style: str | None = None
|
|
48
|
+
alignment: WD_PARAGRAPH_ALIGNMENT = WD_PARAGRAPH_ALIGNMENT.LEFT
|
|
49
|
+
|
|
50
|
+
def add(self, doc: DocumentObject):
|
|
51
|
+
if not self.runs:
|
|
52
|
+
return None
|
|
53
|
+
p = doc.add_paragraph(style=self.style)
|
|
54
|
+
for run in self.runs:
|
|
55
|
+
r = p.add_run(run.text)
|
|
56
|
+
if run.italic:
|
|
57
|
+
r.italic = True
|
|
58
|
+
if run.bold:
|
|
59
|
+
r.bold = True
|
|
60
|
+
return p
|
|
61
|
+
|
|
62
|
+
def __str__(self):
|
|
63
|
+
return f"RunsField(runs={self.runs}, style={self.style}, alignment={self.alignment})"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class ContentField(BaseModel):
|
|
67
|
+
contents: list[HeadingField | ParagraphField | RunsField]
|
|
68
|
+
|
|
69
|
+
def add(self, doc: DocxTemplate):
|
|
70
|
+
sd = doc.new_subdoc()
|
|
71
|
+
for content in self.contents:
|
|
72
|
+
# if isinstance(content, ImageField):
|
|
73
|
+
# content.download_image(self.download_path)
|
|
74
|
+
# content.add(sd) # type: ignore
|
|
75
|
+
# else:
|
|
76
|
+
content.add(sd) # type: ignore
|
|
77
|
+
return sd
|
|
78
|
+
|
|
79
|
+
def __str__(self):
|
|
80
|
+
return f"ContentField(contents={self.contents})"
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from docxtpl import DocxTemplate
|
|
7
|
+
from markdown_it import MarkdownIt
|
|
8
|
+
|
|
9
|
+
from unique_toolkit._common.docx_generator.config import DocxGeneratorConfig
|
|
10
|
+
from unique_toolkit._common.docx_generator.schemas import (
|
|
11
|
+
ContentField,
|
|
12
|
+
HeadingField,
|
|
13
|
+
# ImageField,
|
|
14
|
+
ParagraphField,
|
|
15
|
+
RunField,
|
|
16
|
+
RunsField,
|
|
17
|
+
)
|
|
18
|
+
from unique_toolkit.chat.service import ChatService
|
|
19
|
+
from unique_toolkit.services import KnowledgeBaseService
|
|
20
|
+
|
|
21
|
+
generator_dir_path = Path(__file__).resolve().parent
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
_LOGGER = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DocxGeneratorService:
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
chat_service: ChatService,
|
|
31
|
+
knowledge_base_service: KnowledgeBaseService,
|
|
32
|
+
config: DocxGeneratorConfig,
|
|
33
|
+
):
|
|
34
|
+
self._knowledge_base_service = knowledge_base_service
|
|
35
|
+
self._config = config
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def parse_markdown_to_list_content_fields(
|
|
39
|
+
markdown: str, offset_header_lvl: int = 0
|
|
40
|
+
) -> list[HeadingField | ParagraphField | RunsField]:
|
|
41
|
+
# Initialize markdown-it parser
|
|
42
|
+
md = MarkdownIt()
|
|
43
|
+
|
|
44
|
+
# Preprocess markdown.
|
|
45
|
+
# - Replace all headings with the correct heading level
|
|
46
|
+
# - Remove "Relevant sources" heading
|
|
47
|
+
# - Replace "# Proposed answer" with "#### Proposed answer"
|
|
48
|
+
markdown = re.sub(r"(?m)^\s*## ", "#### ", markdown)
|
|
49
|
+
markdown = re.sub(r"(?m)^\s*### ", "##### ", markdown)
|
|
50
|
+
markdown = markdown.replace("# Relevant sources", "")
|
|
51
|
+
markdown = markdown.replace("# Proposed answer", "#### Proposed answer")
|
|
52
|
+
|
|
53
|
+
tokens = md.parse(markdown)
|
|
54
|
+
|
|
55
|
+
elements = []
|
|
56
|
+
current_section = {}
|
|
57
|
+
in_list = False
|
|
58
|
+
bullet_list_indent_level = 0
|
|
59
|
+
list_item_open = False
|
|
60
|
+
|
|
61
|
+
for token in tokens:
|
|
62
|
+
if token.type == "bullet_list_open":
|
|
63
|
+
in_list = True
|
|
64
|
+
bullet_list_indent_level = int(token.level / 2)
|
|
65
|
+
|
|
66
|
+
elif token.type == "bullet_list_close":
|
|
67
|
+
in_list = False
|
|
68
|
+
bullet_list_indent_level = 0
|
|
69
|
+
|
|
70
|
+
elif token.type == "list_item_open":
|
|
71
|
+
if list_item_open:
|
|
72
|
+
elements.append(current_section)
|
|
73
|
+
list_item_open = True
|
|
74
|
+
list_level = token.level - bullet_list_indent_level
|
|
75
|
+
current_section = {
|
|
76
|
+
"type": RunsField,
|
|
77
|
+
"runs": [],
|
|
78
|
+
"is_list_item": True,
|
|
79
|
+
"level": list_level,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
elif token.type == "list_item_close":
|
|
83
|
+
if current_section and current_section.get("runs"):
|
|
84
|
+
elements.append(current_section)
|
|
85
|
+
current_section = {}
|
|
86
|
+
list_item_open = False
|
|
87
|
+
|
|
88
|
+
if token.type == "heading_open":
|
|
89
|
+
# Heading start, token.tag gives the level (e.g., 'h1', 'h2', etc.)
|
|
90
|
+
header_lvl = int(token.tag[1]) # Extract the level number from tag
|
|
91
|
+
current_section = {
|
|
92
|
+
"type": HeadingField,
|
|
93
|
+
"text": "",
|
|
94
|
+
"level": header_lvl + offset_header_lvl,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
elif token.type == "heading_close":
|
|
98
|
+
if current_section:
|
|
99
|
+
elements.append(current_section)
|
|
100
|
+
current_section = {}
|
|
101
|
+
|
|
102
|
+
elif token.type == "paragraph_open":
|
|
103
|
+
if not in_list: # Only create new paragraph if not in a list
|
|
104
|
+
current_section = {"type": RunsField, "runs": []}
|
|
105
|
+
|
|
106
|
+
elif token.type == "paragraph_close":
|
|
107
|
+
if not in_list and current_section: # Only append if not in a list
|
|
108
|
+
elements.append(current_section)
|
|
109
|
+
current_section = {}
|
|
110
|
+
|
|
111
|
+
elif token.type == "inline":
|
|
112
|
+
if current_section.get("type") == HeadingField:
|
|
113
|
+
content = token.content
|
|
114
|
+
if content.startswith("_page"):
|
|
115
|
+
# replace "_pageXXXX_" with "PageXXXX", where XXXX can be any characters and numbers
|
|
116
|
+
content = re.sub(
|
|
117
|
+
r"^_page([a-zA-Z0-9\s-]+)_(.*?)",
|
|
118
|
+
r"Page\1",
|
|
119
|
+
content,
|
|
120
|
+
)
|
|
121
|
+
bold = True
|
|
122
|
+
current_section["text"] += content
|
|
123
|
+
elif "runs" in current_section:
|
|
124
|
+
bold = False
|
|
125
|
+
italic = False
|
|
126
|
+
runs = []
|
|
127
|
+
if token.children:
|
|
128
|
+
for child in token.children:
|
|
129
|
+
content = child.content
|
|
130
|
+
if child.type == "strong_open":
|
|
131
|
+
bold = True
|
|
132
|
+
elif child.type == "strong_close":
|
|
133
|
+
bold = False
|
|
134
|
+
elif child.type == "em_open":
|
|
135
|
+
italic = True
|
|
136
|
+
elif child.type == "em_close":
|
|
137
|
+
italic = False
|
|
138
|
+
if child.type == "softbreak":
|
|
139
|
+
content += "\n"
|
|
140
|
+
if content: # Only add non-empty content
|
|
141
|
+
runs.append(
|
|
142
|
+
RunField(
|
|
143
|
+
text=content,
|
|
144
|
+
bold=bold,
|
|
145
|
+
italic=italic,
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
else:
|
|
149
|
+
runs.append(
|
|
150
|
+
RunField(
|
|
151
|
+
text=token.content,
|
|
152
|
+
bold=bold,
|
|
153
|
+
italic=italic,
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
current_section["runs"].extend(runs)
|
|
158
|
+
|
|
159
|
+
# Process remaining elements
|
|
160
|
+
contents = []
|
|
161
|
+
for element in elements:
|
|
162
|
+
if not element:
|
|
163
|
+
continue
|
|
164
|
+
if element["type"] == HeadingField:
|
|
165
|
+
contents.append(
|
|
166
|
+
HeadingField(
|
|
167
|
+
text=element["text"],
|
|
168
|
+
level=element["level"],
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
elif element["type"] == RunsField:
|
|
172
|
+
if element.get("is_list_item", False):
|
|
173
|
+
level: int = min(element.get("level", 1), 5)
|
|
174
|
+
if level > 1:
|
|
175
|
+
style = "List Bullet " + str(level)
|
|
176
|
+
else:
|
|
177
|
+
style = "List Bullet"
|
|
178
|
+
contents.append(RunsField(style=style, runs=element["runs"]))
|
|
179
|
+
else:
|
|
180
|
+
contents.append(RunsField(runs=element["runs"]))
|
|
181
|
+
|
|
182
|
+
return contents
|
|
183
|
+
|
|
184
|
+
def generate_from_template(
|
|
185
|
+
self,
|
|
186
|
+
subdoc_content: list[HeadingField | ParagraphField | RunsField],
|
|
187
|
+
fields: dict | None = None,
|
|
188
|
+
):
|
|
189
|
+
"""
|
|
190
|
+
Generate a docx file from a template with the given content.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
subdoc_content (list[HeadingField | ParagraphField | RunsField]): The content to be added to the docx file.
|
|
194
|
+
fields (dict): Other fields to be added to the docx file. Defaults to None.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
docx_template_object = self._get_template(self._config.template_content_id)
|
|
198
|
+
|
|
199
|
+
doc = DocxTemplate(io.BytesIO(docx_template_object))
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
content = {}
|
|
203
|
+
content["body"] = ContentField(contents=subdoc_content)
|
|
204
|
+
|
|
205
|
+
if fields:
|
|
206
|
+
content.update(fields)
|
|
207
|
+
|
|
208
|
+
for key, value in content.items():
|
|
209
|
+
if isinstance(value, ContentField):
|
|
210
|
+
content[key] = value.add(doc)
|
|
211
|
+
|
|
212
|
+
doc.render(content)
|
|
213
|
+
docx_rendered_object = io.BytesIO()
|
|
214
|
+
|
|
215
|
+
doc.save(docx_rendered_object)
|
|
216
|
+
docx_rendered_object.seek(0)
|
|
217
|
+
|
|
218
|
+
return docx_rendered_object.getvalue()
|
|
219
|
+
|
|
220
|
+
except Exception as e:
|
|
221
|
+
_LOGGER.error(f"Error generating docx: {e}")
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
def _get_template(self, template_content_id: str):
|
|
225
|
+
try:
|
|
226
|
+
if template_content_id:
|
|
227
|
+
_LOGGER.info(
|
|
228
|
+
f"Downloading template from content ID: {template_content_id}"
|
|
229
|
+
)
|
|
230
|
+
file_content = self._knowledge_base_service.download_content_to_bytes(
|
|
231
|
+
content_id=template_content_id
|
|
232
|
+
)
|
|
233
|
+
else:
|
|
234
|
+
_LOGGER.info("No template content ID provided. Using default template.")
|
|
235
|
+
file_content = self._get_default_template()
|
|
236
|
+
except Exception as e:
|
|
237
|
+
_LOGGER.warning(
|
|
238
|
+
f"An error occurred while downloading the template {e}. Make sure the template content ID is valid. Falling back to default template."
|
|
239
|
+
)
|
|
240
|
+
file_content = self._get_default_template()
|
|
241
|
+
|
|
242
|
+
return file_content
|
|
243
|
+
|
|
244
|
+
def _get_default_template(self):
|
|
245
|
+
generator_dir_path = Path(__file__).resolve().parent
|
|
246
|
+
path = generator_dir_path / "template" / "Doc Template.docx"
|
|
247
|
+
|
|
248
|
+
file_content = path.read_bytes()
|
|
249
|
+
|
|
250
|
+
_LOGGER.info("Template downloaded from default template")
|
|
251
|
+
|
|
252
|
+
return file_content
|
|
Binary file
|