unique_toolkit 0.7.7__py3-none-any.whl → 1.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unique_toolkit might be problematic. Click here for more details.

Files changed (166) hide show
  1. unique_toolkit/__init__.py +28 -1
  2. unique_toolkit/_common/api_calling/human_verification_manager.py +343 -0
  3. unique_toolkit/_common/base_model_type_attribute.py +303 -0
  4. unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
  5. unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
  6. unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
  7. unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
  8. unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
  9. unique_toolkit/_common/default_language_model.py +12 -0
  10. unique_toolkit/_common/docx_generator/__init__.py +7 -0
  11. unique_toolkit/_common/docx_generator/config.py +12 -0
  12. unique_toolkit/_common/docx_generator/schemas.py +80 -0
  13. unique_toolkit/_common/docx_generator/service.py +252 -0
  14. unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
  15. unique_toolkit/_common/endpoint_builder.py +305 -0
  16. unique_toolkit/_common/endpoint_requestor.py +430 -0
  17. unique_toolkit/_common/exception.py +24 -0
  18. unique_toolkit/_common/feature_flags/schema.py +9 -0
  19. unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
  20. unique_toolkit/_common/pydantic_helpers.py +154 -0
  21. unique_toolkit/_common/referencing.py +53 -0
  22. unique_toolkit/_common/string_utilities.py +140 -0
  23. unique_toolkit/_common/tests/test_referencing.py +521 -0
  24. unique_toolkit/_common/tests/test_string_utilities.py +506 -0
  25. unique_toolkit/_common/token/image_token_counting.py +67 -0
  26. unique_toolkit/_common/token/token_counting.py +204 -0
  27. unique_toolkit/_common/utils/__init__.py +1 -0
  28. unique_toolkit/_common/utils/files.py +43 -0
  29. unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
  30. unique_toolkit/_common/utils/structured_output/schema.py +5 -0
  31. unique_toolkit/_common/utils/write_configuration.py +51 -0
  32. unique_toolkit/_common/validators.py +101 -4
  33. unique_toolkit/agentic/__init__.py +1 -0
  34. unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
  35. unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
  36. unique_toolkit/agentic/evaluation/config.py +36 -0
  37. unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
  38. unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
  39. unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
  40. unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
  41. unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
  42. unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +111 -0
  43. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
  44. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +16 -15
  45. unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +30 -20
  46. unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
  47. unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
  48. unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
  49. unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
  50. unique_toolkit/agentic/history_manager/history_construction_with_contents.py +297 -0
  51. unique_toolkit/agentic/history_manager/history_manager.py +242 -0
  52. unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
  53. unique_toolkit/agentic/history_manager/utils.py +96 -0
  54. unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
  55. unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
  56. unique_toolkit/agentic/responses_api/__init__.py +19 -0
  57. unique_toolkit/agentic/responses_api/postprocessors/code_display.py +63 -0
  58. unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +145 -0
  59. unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
  60. unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
  61. unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
  62. unique_toolkit/agentic/tools/__init__.py +1 -0
  63. unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
  64. unique_toolkit/agentic/tools/a2a/config.py +17 -0
  65. unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
  66. unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
  67. unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
  68. unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
  69. unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
  70. unique_toolkit/agentic/tools/a2a/manager.py +55 -0
  71. unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
  72. unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +185 -0
  73. unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +73 -0
  74. unique_toolkit/agentic/tools/a2a/postprocessing/config.py +45 -0
  75. unique_toolkit/agentic/tools/a2a/postprocessing/display.py +180 -0
  76. unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
  77. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +1335 -0
  78. unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
  79. unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
  80. unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
  81. unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
  82. unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
  83. unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
  84. unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
  85. unique_toolkit/agentic/tools/a2a/tool/config.py +73 -0
  86. unique_toolkit/agentic/tools/a2a/tool/service.py +306 -0
  87. unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
  88. unique_toolkit/agentic/tools/config.py +167 -0
  89. unique_toolkit/agentic/tools/factory.py +44 -0
  90. unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
  91. unique_toolkit/agentic/tools/mcp/manager.py +71 -0
  92. unique_toolkit/agentic/tools/mcp/models.py +28 -0
  93. unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
  94. unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
  95. unique_toolkit/agentic/tools/openai_builtin/base.py +30 -0
  96. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
  97. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +57 -0
  98. unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +230 -0
  99. unique_toolkit/agentic/tools/openai_builtin/manager.py +62 -0
  100. unique_toolkit/agentic/tools/schemas.py +141 -0
  101. unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
  102. unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
  103. unique_toolkit/agentic/tools/tool.py +183 -0
  104. unique_toolkit/agentic/tools/tool_manager.py +523 -0
  105. unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
  106. unique_toolkit/agentic/tools/utils/__init__.py +19 -0
  107. unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
  108. unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
  109. unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
  110. unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
  111. unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
  112. unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
  113. unique_toolkit/app/__init__.py +6 -0
  114. unique_toolkit/app/dev_util.py +180 -0
  115. unique_toolkit/app/init_sdk.py +32 -1
  116. unique_toolkit/app/schemas.py +198 -31
  117. unique_toolkit/app/unique_settings.py +367 -0
  118. unique_toolkit/chat/__init__.py +8 -1
  119. unique_toolkit/chat/deprecated/service.py +232 -0
  120. unique_toolkit/chat/functions.py +642 -77
  121. unique_toolkit/chat/rendering.py +34 -0
  122. unique_toolkit/chat/responses_api.py +461 -0
  123. unique_toolkit/chat/schemas.py +133 -2
  124. unique_toolkit/chat/service.py +115 -767
  125. unique_toolkit/content/functions.py +153 -4
  126. unique_toolkit/content/schemas.py +122 -15
  127. unique_toolkit/content/service.py +278 -44
  128. unique_toolkit/content/smart_rules.py +301 -0
  129. unique_toolkit/content/utils.py +8 -3
  130. unique_toolkit/embedding/service.py +102 -11
  131. unique_toolkit/framework_utilities/__init__.py +1 -0
  132. unique_toolkit/framework_utilities/langchain/client.py +71 -0
  133. unique_toolkit/framework_utilities/langchain/history.py +19 -0
  134. unique_toolkit/framework_utilities/openai/__init__.py +6 -0
  135. unique_toolkit/framework_utilities/openai/client.py +83 -0
  136. unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
  137. unique_toolkit/framework_utilities/utils.py +23 -0
  138. unique_toolkit/language_model/__init__.py +3 -0
  139. unique_toolkit/language_model/builder.py +27 -11
  140. unique_toolkit/language_model/default_language_model.py +3 -0
  141. unique_toolkit/language_model/functions.py +327 -43
  142. unique_toolkit/language_model/infos.py +992 -50
  143. unique_toolkit/language_model/reference.py +242 -0
  144. unique_toolkit/language_model/schemas.py +475 -48
  145. unique_toolkit/language_model/service.py +228 -27
  146. unique_toolkit/protocols/support.py +145 -0
  147. unique_toolkit/services/__init__.py +7 -0
  148. unique_toolkit/services/chat_service.py +1630 -0
  149. unique_toolkit/services/knowledge_base.py +861 -0
  150. unique_toolkit/short_term_memory/service.py +178 -41
  151. unique_toolkit/smart_rules/__init__.py +0 -0
  152. unique_toolkit/smart_rules/compile.py +56 -0
  153. unique_toolkit/test_utilities/events.py +197 -0
  154. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/METADATA +606 -7
  155. unique_toolkit-1.23.0.dist-info/RECORD +182 -0
  156. unique_toolkit/evaluators/__init__.py +0 -1
  157. unique_toolkit/evaluators/config.py +0 -35
  158. unique_toolkit/evaluators/constants.py +0 -1
  159. unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
  160. unique_toolkit/evaluators/context_relevancy/service.py +0 -53
  161. unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
  162. unique_toolkit/evaluators/hallucination/constants.py +0 -41
  163. unique_toolkit-0.7.7.dist-info/RECORD +0 -64
  164. /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
  165. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/LICENSE +0 -0
  166. {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,275 @@
1
+ from unittest.mock import MagicMock, patch
2
+
3
+ import pytest
4
+
5
+ from unique_toolkit._common.chunk_relevancy_sorter.config import (
6
+ ChunkRelevancySortConfig,
7
+ )
8
+ from unique_toolkit._common.chunk_relevancy_sorter.exception import (
9
+ ChunkRelevancySorterException,
10
+ )
11
+ from unique_toolkit._common.chunk_relevancy_sorter.schemas import (
12
+ ChunkRelevancy,
13
+ ChunkRelevancySorterResult,
14
+ )
15
+ from unique_toolkit._common.chunk_relevancy_sorter.service import ChunkRelevancySorter
16
+ from unique_toolkit.agentic.evaluation.context_relevancy.schema import (
17
+ StructuredOutputConfig,
18
+ )
19
+ from unique_toolkit.agentic.evaluation.schemas import (
20
+ EvaluationMetricName,
21
+ EvaluationMetricResult,
22
+ )
23
+ from unique_toolkit.app.schemas import ChatEvent
24
+ from unique_toolkit.content.schemas import ContentChunk
25
+ from unique_toolkit.language_model.default_language_model import DEFAULT_GPT_4o
26
+ from unique_toolkit.language_model.infos import LanguageModelInfo
27
+
28
+
29
+ @pytest.fixture
30
+ def event():
31
+ event = MagicMock(spec=ChatEvent)
32
+ event.payload = MagicMock()
33
+ event.payload.user_message = MagicMock()
34
+ event.payload.user_message.text = "Test query"
35
+ event.user_id = "user_0"
36
+ event.company_id = "company_0"
37
+ return event
38
+
39
+
40
+ @pytest.fixture
41
+ def mock_chunks():
42
+ return [
43
+ ContentChunk(
44
+ id=f"chunk_{i}",
45
+ order=i,
46
+ chunk_id=f"chunk_{i}",
47
+ text=f"Test content {i}",
48
+ )
49
+ for i in range(3)
50
+ ]
51
+
52
+
53
+ @pytest.fixture
54
+ def config():
55
+ return ChunkRelevancySortConfig(
56
+ enabled=True,
57
+ relevancy_levels_to_consider=["high", "medium", "low"],
58
+ relevancy_level_order={"high": 0, "medium": 1, "low": 2},
59
+ language_model=LanguageModelInfo.from_name(DEFAULT_GPT_4o),
60
+ fallback_language_model=LanguageModelInfo.from_name(DEFAULT_GPT_4o),
61
+ structured_output_config=StructuredOutputConfig(
62
+ enabled=False,
63
+ extract_fact_list=False,
64
+ ),
65
+ )
66
+
67
+
68
+ @pytest.fixture
69
+ def chunk_relevancy_sorter(event):
70
+ return ChunkRelevancySorter(event)
71
+
72
+
73
+ @pytest.mark.asyncio
74
+ async def test_run_disabled_config(chunk_relevancy_sorter, mock_chunks, config):
75
+ config.enabled = False
76
+ result = await chunk_relevancy_sorter.run("test input", mock_chunks, config)
77
+
78
+ assert isinstance(result, ChunkRelevancySorterResult)
79
+ assert result.content_chunks == mock_chunks
80
+ assert len(result.content_chunks) == len(mock_chunks)
81
+
82
+
83
+ @pytest.mark.asyncio
84
+ async def test_run_enabled_config(chunk_relevancy_sorter, mock_chunks, config):
85
+ with patch.object(chunk_relevancy_sorter, "_run_chunk_relevancy_sort") as mock_sort:
86
+ mock_sort.return_value = ChunkRelevancySorterResult.from_chunks(mock_chunks)
87
+
88
+ result = await chunk_relevancy_sorter.run("test input", mock_chunks, config)
89
+
90
+ assert isinstance(result, ChunkRelevancySorterResult)
91
+ assert result.content_chunks == mock_chunks
92
+ mock_sort.assert_called_once_with("test input", mock_chunks, config)
93
+
94
+
95
+ @pytest.mark.asyncio
96
+ async def test_evaluate_chunks_relevancy(chunk_relevancy_sorter, mock_chunks, config):
97
+ mock_relevancy = EvaluationMetricResult(
98
+ value="high",
99
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
100
+ reason="Test reason",
101
+ )
102
+
103
+ with patch.object(
104
+ chunk_relevancy_sorter, "_process_relevancy_evaluation"
105
+ ) as mock_process:
106
+ mock_process.return_value = ChunkRelevancy(
107
+ chunk=mock_chunks[0], relevancy=mock_relevancy
108
+ )
109
+
110
+ result = await chunk_relevancy_sorter._evaluate_chunks_relevancy(
111
+ "test input", mock_chunks, config
112
+ )
113
+
114
+ assert len(result) == len(mock_chunks)
115
+ assert all(isinstance(r, ChunkRelevancy) for r in result)
116
+ assert mock_process.call_count == len(mock_chunks)
117
+
118
+
119
+ @pytest.mark.asyncio
120
+ async def test_evaluate_chunk_relevancy(chunk_relevancy_sorter, mock_chunks, config):
121
+ with patch(
122
+ "unique_toolkit._common.chunk_relevancy_sorter.service.ContextRelevancyEvaluator.analyze"
123
+ ) as mock_analyze:
124
+ mock_analyze.return_value = EvaluationMetricResult(
125
+ value="high",
126
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
127
+ reason="Test reason",
128
+ )
129
+
130
+ result = await chunk_relevancy_sorter._evaluate_chunk_relevancy(
131
+ input_text="test input",
132
+ chunk=mock_chunks[0],
133
+ langugage_model=config.language_model,
134
+ structured_output_config=config.structured_output_config,
135
+ additional_llm_options=config.additional_llm_options,
136
+ )
137
+
138
+ assert isinstance(result, EvaluationMetricResult)
139
+ assert result.value == "high"
140
+ mock_analyze.assert_called_once()
141
+
142
+
143
+ @pytest.mark.asyncio
144
+ async def test_process_relevancy_evaluation_success(
145
+ chunk_relevancy_sorter, mock_chunks, config
146
+ ):
147
+ with patch.object(
148
+ chunk_relevancy_sorter, "_evaluate_chunk_relevancy"
149
+ ) as mock_evaluate:
150
+ mock_evaluate.return_value = EvaluationMetricResult(
151
+ value="high",
152
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
153
+ reason="Test reason",
154
+ )
155
+
156
+ result = await chunk_relevancy_sorter._process_relevancy_evaluation(
157
+ "test input", mock_chunks[0], config
158
+ )
159
+
160
+ assert isinstance(result, ChunkRelevancy)
161
+ assert result.chunk == mock_chunks[0]
162
+ assert result.relevancy is not None
163
+ assert result.relevancy.value == "high"
164
+
165
+
166
+ @pytest.mark.asyncio
167
+ async def test_process_relevancy_evaluation_fallback(
168
+ chunk_relevancy_sorter, mock_chunks, config
169
+ ):
170
+ with patch.object(
171
+ chunk_relevancy_sorter, "_evaluate_chunk_relevancy"
172
+ ) as mock_evaluate:
173
+ # First call raises exception, second call succeeds
174
+ mock_evaluate.side_effect = [
175
+ Exception("Test error"),
176
+ EvaluationMetricResult(
177
+ value="medium",
178
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
179
+ reason="Test reason",
180
+ ),
181
+ ]
182
+
183
+ with pytest.raises(ChunkRelevancySorterException):
184
+ await chunk_relevancy_sorter._process_relevancy_evaluation(
185
+ "test input", mock_chunks[0], config
186
+ )
187
+
188
+
189
+ @pytest.mark.asyncio
190
+ async def test_validate_and_sort_relevant_chunks(
191
+ chunk_relevancy_sorter, mock_chunks, config
192
+ ):
193
+ chunk_relevancies = [
194
+ ChunkRelevancy(
195
+ chunk=mock_chunks[0],
196
+ relevancy=EvaluationMetricResult(
197
+ value="low",
198
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
199
+ reason="Test reason",
200
+ ),
201
+ )
202
+ ]
203
+ chunk_relevancies.append(
204
+ ChunkRelevancy(
205
+ chunk=mock_chunks[1],
206
+ relevancy=EvaluationMetricResult(
207
+ value="medium",
208
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
209
+ reason="Test reason",
210
+ ),
211
+ )
212
+ )
213
+ chunk_relevancies.append(
214
+ ChunkRelevancy(
215
+ chunk=mock_chunks[2],
216
+ relevancy=EvaluationMetricResult(
217
+ value="high",
218
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
219
+ reason="Test reason",
220
+ ),
221
+ )
222
+ )
223
+
224
+ result = await chunk_relevancy_sorter._validate_and_sort_relevant_chunks(
225
+ config, chunk_relevancies
226
+ )
227
+
228
+ assert isinstance(result, list)
229
+ assert len(result) == len(mock_chunks)
230
+ assert all(isinstance(relevancy.chunk, ContentChunk) for relevancy in result)
231
+ assert result[0].chunk == mock_chunks[2]
232
+ assert result[1].chunk == mock_chunks[1]
233
+ assert result[2].chunk == mock_chunks[0]
234
+
235
+
236
+ @pytest.mark.asyncio
237
+ async def test_validate_chunk_relevancies_invalid(chunk_relevancy_sorter):
238
+ invalid_relevancies = [
239
+ ChunkRelevancy(
240
+ chunk=ContentChunk(chunk_id="test", text="test", id="test", order=0),
241
+ relevancy=None,
242
+ )
243
+ ]
244
+
245
+ with pytest.raises(ChunkRelevancySorterException):
246
+ await chunk_relevancy_sorter._validate_chunk_relevancies(invalid_relevancies)
247
+
248
+
249
+ def test_count_distinct_values(chunk_relevancy_sorter, mock_chunks):
250
+ chunk_relevancies = [
251
+ ChunkRelevancy(
252
+ chunk=chunk,
253
+ relevancy=EvaluationMetricResult(
254
+ value="high",
255
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
256
+ reason="Test reason",
257
+ ),
258
+ )
259
+ for chunk in mock_chunks[:2]
260
+ ]
261
+ chunk_relevancies.append(
262
+ ChunkRelevancy(
263
+ chunk=mock_chunks[2],
264
+ relevancy=EvaluationMetricResult(
265
+ value="medium",
266
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
267
+ reason="Test reason",
268
+ ),
269
+ )
270
+ )
271
+
272
+ value_counts = chunk_relevancy_sorter._count_distinct_values(chunk_relevancies)
273
+
274
+ assert value_counts["high"] == 2
275
+ assert value_counts["medium"] == 1
@@ -0,0 +1,12 @@
1
+ import warnings
2
+
3
+ from unique_toolkit.language_model.infos import LanguageModelName
4
+
5
+ warnings.warn(
6
+ "unique_toolkit._common.default_language_model is deprecated. "
7
+ "Import DEFAULT_GPT_4o from unique_toolkit.language_model instead.",
8
+ DeprecationWarning,
9
+ stacklevel=2,
10
+ )
11
+
12
+ DEFAULT_GPT_4o = LanguageModelName.AZURE_GPT_4o_2024_1120
@@ -0,0 +1,7 @@
1
+ from unique_toolkit._common.docx_generator.config import DocxGeneratorConfig
2
+ from unique_toolkit._common.docx_generator.service import DocxGeneratorService
3
+
4
+ __all__ = [
5
+ "DocxGeneratorService",
6
+ "DocxGeneratorConfig",
7
+ ]
@@ -0,0 +1,12 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
4
+
5
+
6
+ class DocxGeneratorConfig(BaseModel):
7
+ model_config = get_configuration_dict()
8
+
9
+ template_content_id: str = Field(
10
+ default="",
11
+ description="The content id of the template file uploaded to the knowledge base.",
12
+ )
@@ -0,0 +1,80 @@
1
+ from docx.document import Document as DocumentObject
2
+ from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
3
+ from docxtpl import DocxTemplate
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class HeadingField(BaseModel):
8
+ text: str
9
+ level: int = 4
10
+ alignment: WD_PARAGRAPH_ALIGNMENT = WD_PARAGRAPH_ALIGNMENT.LEFT
11
+
12
+ def add(self, doc: DocumentObject):
13
+ p = doc.add_heading(self.text, level=self.level)
14
+ p.alignment = self.alignment
15
+ return p
16
+
17
+ def __str__(self):
18
+ return f"HeadingField(text={self.text}, level={self.level}, alignment={self.alignment})"
19
+
20
+
21
+ class ParagraphField(BaseModel):
22
+ text: str
23
+ style: str | None = None
24
+ alignment: WD_PARAGRAPH_ALIGNMENT = WD_PARAGRAPH_ALIGNMENT.LEFT
25
+
26
+ def add(self, doc: DocumentObject):
27
+ p = doc.add_paragraph(self.text, style=self.style)
28
+ p.alignment = self.alignment
29
+ return p
30
+
31
+ def __str__(self):
32
+ return f"ParagraphField(text={self.text}, style={self.style}, alignment={self.alignment})"
33
+
34
+
35
+ class RunField(BaseModel):
36
+ text: str
37
+ italic: bool | None = False
38
+ bold: bool | None = False
39
+ alignment: WD_PARAGRAPH_ALIGNMENT = WD_PARAGRAPH_ALIGNMENT.LEFT
40
+
41
+ def __str__(self):
42
+ return f"RunField(text={self.text}, italic={self.italic}, alignment={self.alignment})"
43
+
44
+
45
+ class RunsField(BaseModel):
46
+ runs: list[RunField]
47
+ style: str | None = None
48
+ alignment: WD_PARAGRAPH_ALIGNMENT = WD_PARAGRAPH_ALIGNMENT.LEFT
49
+
50
+ def add(self, doc: DocumentObject):
51
+ if not self.runs:
52
+ return None
53
+ p = doc.add_paragraph(style=self.style)
54
+ for run in self.runs:
55
+ r = p.add_run(run.text)
56
+ if run.italic:
57
+ r.italic = True
58
+ if run.bold:
59
+ r.bold = True
60
+ return p
61
+
62
+ def __str__(self):
63
+ return f"RunsField(runs={self.runs}, style={self.style}, alignment={self.alignment})"
64
+
65
+
66
+ class ContentField(BaseModel):
67
+ contents: list[HeadingField | ParagraphField | RunsField]
68
+
69
+ def add(self, doc: DocxTemplate):
70
+ sd = doc.new_subdoc()
71
+ for content in self.contents:
72
+ # if isinstance(content, ImageField):
73
+ # content.download_image(self.download_path)
74
+ # content.add(sd) # type: ignore
75
+ # else:
76
+ content.add(sd) # type: ignore
77
+ return sd
78
+
79
+ def __str__(self):
80
+ return f"ContentField(contents={self.contents})"
@@ -0,0 +1,252 @@
1
+ import io
2
+ import logging
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from docxtpl import DocxTemplate
7
+ from markdown_it import MarkdownIt
8
+
9
+ from unique_toolkit._common.docx_generator.config import DocxGeneratorConfig
10
+ from unique_toolkit._common.docx_generator.schemas import (
11
+ ContentField,
12
+ HeadingField,
13
+ # ImageField,
14
+ ParagraphField,
15
+ RunField,
16
+ RunsField,
17
+ )
18
+ from unique_toolkit.chat.service import ChatService
19
+ from unique_toolkit.services import KnowledgeBaseService
20
+
21
+ generator_dir_path = Path(__file__).resolve().parent
22
+
23
+
24
+ _LOGGER = logging.getLogger(__name__)
25
+
26
+
27
+ class DocxGeneratorService:
28
+ def __init__(
29
+ self,
30
+ chat_service: ChatService,
31
+ knowledge_base_service: KnowledgeBaseService,
32
+ config: DocxGeneratorConfig,
33
+ ):
34
+ self._knowledge_base_service = knowledge_base_service
35
+ self._config = config
36
+
37
+ @staticmethod
38
+ def parse_markdown_to_list_content_fields(
39
+ markdown: str, offset_header_lvl: int = 0
40
+ ) -> list[HeadingField | ParagraphField | RunsField]:
41
+ # Initialize markdown-it parser
42
+ md = MarkdownIt()
43
+
44
+ # Preprocess markdown.
45
+ # - Replace all headings with the correct heading level
46
+ # - Remove "Relevant sources" heading
47
+ # - Replace "# Proposed answer" with "#### Proposed answer"
48
+ markdown = re.sub(r"(?m)^\s*## ", "#### ", markdown)
49
+ markdown = re.sub(r"(?m)^\s*### ", "##### ", markdown)
50
+ markdown = markdown.replace("# Relevant sources", "")
51
+ markdown = markdown.replace("# Proposed answer", "#### Proposed answer")
52
+
53
+ tokens = md.parse(markdown)
54
+
55
+ elements = []
56
+ current_section = {}
57
+ in_list = False
58
+ bullet_list_indent_level = 0
59
+ list_item_open = False
60
+
61
+ for token in tokens:
62
+ if token.type == "bullet_list_open":
63
+ in_list = True
64
+ bullet_list_indent_level = int(token.level / 2)
65
+
66
+ elif token.type == "bullet_list_close":
67
+ in_list = False
68
+ bullet_list_indent_level = 0
69
+
70
+ elif token.type == "list_item_open":
71
+ if list_item_open:
72
+ elements.append(current_section)
73
+ list_item_open = True
74
+ list_level = token.level - bullet_list_indent_level
75
+ current_section = {
76
+ "type": RunsField,
77
+ "runs": [],
78
+ "is_list_item": True,
79
+ "level": list_level,
80
+ }
81
+
82
+ elif token.type == "list_item_close":
83
+ if current_section and current_section.get("runs"):
84
+ elements.append(current_section)
85
+ current_section = {}
86
+ list_item_open = False
87
+
88
+ if token.type == "heading_open":
89
+ # Heading start, token.tag gives the level (e.g., 'h1', 'h2', etc.)
90
+ header_lvl = int(token.tag[1]) # Extract the level number from tag
91
+ current_section = {
92
+ "type": HeadingField,
93
+ "text": "",
94
+ "level": header_lvl + offset_header_lvl,
95
+ }
96
+
97
+ elif token.type == "heading_close":
98
+ if current_section:
99
+ elements.append(current_section)
100
+ current_section = {}
101
+
102
+ elif token.type == "paragraph_open":
103
+ if not in_list: # Only create new paragraph if not in a list
104
+ current_section = {"type": RunsField, "runs": []}
105
+
106
+ elif token.type == "paragraph_close":
107
+ if not in_list and current_section: # Only append if not in a list
108
+ elements.append(current_section)
109
+ current_section = {}
110
+
111
+ elif token.type == "inline":
112
+ if current_section.get("type") == HeadingField:
113
+ content = token.content
114
+ if content.startswith("_page"):
115
+ # replace "_pageXXXX_" with "PageXXXX", where XXXX can be any characters and numbers
116
+ content = re.sub(
117
+ r"^_page([a-zA-Z0-9\s-]+)_(.*?)",
118
+ r"Page\1",
119
+ content,
120
+ )
121
+ bold = True
122
+ current_section["text"] += content
123
+ elif "runs" in current_section:
124
+ bold = False
125
+ italic = False
126
+ runs = []
127
+ if token.children:
128
+ for child in token.children:
129
+ content = child.content
130
+ if child.type == "strong_open":
131
+ bold = True
132
+ elif child.type == "strong_close":
133
+ bold = False
134
+ elif child.type == "em_open":
135
+ italic = True
136
+ elif child.type == "em_close":
137
+ italic = False
138
+ if child.type == "softbreak":
139
+ content += "\n"
140
+ if content: # Only add non-empty content
141
+ runs.append(
142
+ RunField(
143
+ text=content,
144
+ bold=bold,
145
+ italic=italic,
146
+ )
147
+ )
148
+ else:
149
+ runs.append(
150
+ RunField(
151
+ text=token.content,
152
+ bold=bold,
153
+ italic=italic,
154
+ )
155
+ )
156
+
157
+ current_section["runs"].extend(runs)
158
+
159
+ # Process remaining elements
160
+ contents = []
161
+ for element in elements:
162
+ if not element:
163
+ continue
164
+ if element["type"] == HeadingField:
165
+ contents.append(
166
+ HeadingField(
167
+ text=element["text"],
168
+ level=element["level"],
169
+ )
170
+ )
171
+ elif element["type"] == RunsField:
172
+ if element.get("is_list_item", False):
173
+ level: int = min(element.get("level", 1), 5)
174
+ if level > 1:
175
+ style = "List Bullet " + str(level)
176
+ else:
177
+ style = "List Bullet"
178
+ contents.append(RunsField(style=style, runs=element["runs"]))
179
+ else:
180
+ contents.append(RunsField(runs=element["runs"]))
181
+
182
+ return contents
183
+
184
+ def generate_from_template(
185
+ self,
186
+ subdoc_content: list[HeadingField | ParagraphField | RunsField],
187
+ fields: dict | None = None,
188
+ ):
189
+ """
190
+ Generate a docx file from a template with the given content.
191
+
192
+ Args:
193
+ subdoc_content (list[HeadingField | ParagraphField | RunsField]): The content to be added to the docx file.
194
+ fields (dict): Other fields to be added to the docx file. Defaults to None.
195
+ """
196
+
197
+ docx_template_object = self._get_template(self._config.template_content_id)
198
+
199
+ doc = DocxTemplate(io.BytesIO(docx_template_object))
200
+
201
+ try:
202
+ content = {}
203
+ content["body"] = ContentField(contents=subdoc_content)
204
+
205
+ if fields:
206
+ content.update(fields)
207
+
208
+ for key, value in content.items():
209
+ if isinstance(value, ContentField):
210
+ content[key] = value.add(doc)
211
+
212
+ doc.render(content)
213
+ docx_rendered_object = io.BytesIO()
214
+
215
+ doc.save(docx_rendered_object)
216
+ docx_rendered_object.seek(0)
217
+
218
+ return docx_rendered_object.getvalue()
219
+
220
+ except Exception as e:
221
+ _LOGGER.error(f"Error generating docx: {e}")
222
+ return None
223
+
224
+ def _get_template(self, template_content_id: str):
225
+ try:
226
+ if template_content_id:
227
+ _LOGGER.info(
228
+ f"Downloading template from content ID: {template_content_id}"
229
+ )
230
+ file_content = self._knowledge_base_service.download_content_to_bytes(
231
+ content_id=template_content_id
232
+ )
233
+ else:
234
+ _LOGGER.info("No template content ID provided. Using default template.")
235
+ file_content = self._get_default_template()
236
+ except Exception as e:
237
+ _LOGGER.warning(
238
+ f"An error occurred while downloading the template {e}. Make sure the template content ID is valid. Falling back to default template."
239
+ )
240
+ file_content = self._get_default_template()
241
+
242
+ return file_content
243
+
244
+ def _get_default_template(self):
245
+ generator_dir_path = Path(__file__).resolve().parent
246
+ path = generator_dir_path / "template" / "Doc Template.docx"
247
+
248
+ file_content = path.read_bytes()
249
+
250
+ _LOGGER.info("Template downloaded from default template")
251
+
252
+ return file_content