unique_toolkit 0.7.7__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unique_toolkit might be problematic. Click here for more details.
- unique_toolkit/__init__.py +28 -1
- unique_toolkit/_common/api_calling/human_verification_manager.py +343 -0
- unique_toolkit/_common/base_model_type_attribute.py +303 -0
- unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
- unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
- unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
- unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
- unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
- unique_toolkit/_common/default_language_model.py +12 -0
- unique_toolkit/_common/docx_generator/__init__.py +7 -0
- unique_toolkit/_common/docx_generator/config.py +12 -0
- unique_toolkit/_common/docx_generator/schemas.py +80 -0
- unique_toolkit/_common/docx_generator/service.py +252 -0
- unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
- unique_toolkit/_common/endpoint_builder.py +305 -0
- unique_toolkit/_common/endpoint_requestor.py +430 -0
- unique_toolkit/_common/exception.py +24 -0
- unique_toolkit/_common/feature_flags/schema.py +9 -0
- unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
- unique_toolkit/_common/pydantic_helpers.py +154 -0
- unique_toolkit/_common/referencing.py +53 -0
- unique_toolkit/_common/string_utilities.py +140 -0
- unique_toolkit/_common/tests/test_referencing.py +521 -0
- unique_toolkit/_common/tests/test_string_utilities.py +506 -0
- unique_toolkit/_common/token/image_token_counting.py +67 -0
- unique_toolkit/_common/token/token_counting.py +204 -0
- unique_toolkit/_common/utils/__init__.py +1 -0
- unique_toolkit/_common/utils/files.py +43 -0
- unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
- unique_toolkit/_common/utils/structured_output/schema.py +5 -0
- unique_toolkit/_common/utils/write_configuration.py +51 -0
- unique_toolkit/_common/validators.py +101 -4
- unique_toolkit/agentic/__init__.py +1 -0
- unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
- unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
- unique_toolkit/agentic/evaluation/config.py +36 -0
- unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
- unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
- unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
- unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
- unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
- unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +111 -0
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +16 -15
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +30 -20
- unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
- unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
- unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
- unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
- unique_toolkit/agentic/history_manager/history_construction_with_contents.py +297 -0
- unique_toolkit/agentic/history_manager/history_manager.py +242 -0
- unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
- unique_toolkit/agentic/history_manager/utils.py +96 -0
- unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
- unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
- unique_toolkit/agentic/responses_api/__init__.py +19 -0
- unique_toolkit/agentic/responses_api/postprocessors/code_display.py +63 -0
- unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +145 -0
- unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
- unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
- unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
- unique_toolkit/agentic/tools/__init__.py +1 -0
- unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
- unique_toolkit/agentic/tools/a2a/config.py +17 -0
- unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
- unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
- unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
- unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
- unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
- unique_toolkit/agentic/tools/a2a/manager.py +55 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +185 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +73 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/config.py +45 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/display.py +180 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +1335 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
- unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
- unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
- unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
- unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
- unique_toolkit/agentic/tools/a2a/tool/config.py +73 -0
- unique_toolkit/agentic/tools/a2a/tool/service.py +306 -0
- unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
- unique_toolkit/agentic/tools/config.py +167 -0
- unique_toolkit/agentic/tools/factory.py +44 -0
- unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
- unique_toolkit/agentic/tools/mcp/manager.py +71 -0
- unique_toolkit/agentic/tools/mcp/models.py +28 -0
- unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
- unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
- unique_toolkit/agentic/tools/openai_builtin/base.py +30 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +57 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +230 -0
- unique_toolkit/agentic/tools/openai_builtin/manager.py +62 -0
- unique_toolkit/agentic/tools/schemas.py +141 -0
- unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
- unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
- unique_toolkit/agentic/tools/tool.py +183 -0
- unique_toolkit/agentic/tools/tool_manager.py +523 -0
- unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
- unique_toolkit/agentic/tools/utils/__init__.py +19 -0
- unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
- unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
- unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
- unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
- unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
- unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
- unique_toolkit/app/__init__.py +6 -0
- unique_toolkit/app/dev_util.py +180 -0
- unique_toolkit/app/init_sdk.py +32 -1
- unique_toolkit/app/schemas.py +198 -31
- unique_toolkit/app/unique_settings.py +367 -0
- unique_toolkit/chat/__init__.py +8 -1
- unique_toolkit/chat/deprecated/service.py +232 -0
- unique_toolkit/chat/functions.py +642 -77
- unique_toolkit/chat/rendering.py +34 -0
- unique_toolkit/chat/responses_api.py +461 -0
- unique_toolkit/chat/schemas.py +133 -2
- unique_toolkit/chat/service.py +115 -767
- unique_toolkit/content/functions.py +153 -4
- unique_toolkit/content/schemas.py +122 -15
- unique_toolkit/content/service.py +278 -44
- unique_toolkit/content/smart_rules.py +301 -0
- unique_toolkit/content/utils.py +8 -3
- unique_toolkit/embedding/service.py +102 -11
- unique_toolkit/framework_utilities/__init__.py +1 -0
- unique_toolkit/framework_utilities/langchain/client.py +71 -0
- unique_toolkit/framework_utilities/langchain/history.py +19 -0
- unique_toolkit/framework_utilities/openai/__init__.py +6 -0
- unique_toolkit/framework_utilities/openai/client.py +83 -0
- unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
- unique_toolkit/framework_utilities/utils.py +23 -0
- unique_toolkit/language_model/__init__.py +3 -0
- unique_toolkit/language_model/builder.py +27 -11
- unique_toolkit/language_model/default_language_model.py +3 -0
- unique_toolkit/language_model/functions.py +327 -43
- unique_toolkit/language_model/infos.py +992 -50
- unique_toolkit/language_model/reference.py +242 -0
- unique_toolkit/language_model/schemas.py +475 -48
- unique_toolkit/language_model/service.py +228 -27
- unique_toolkit/protocols/support.py +145 -0
- unique_toolkit/services/__init__.py +7 -0
- unique_toolkit/services/chat_service.py +1630 -0
- unique_toolkit/services/knowledge_base.py +861 -0
- unique_toolkit/short_term_memory/service.py +178 -41
- unique_toolkit/smart_rules/__init__.py +0 -0
- unique_toolkit/smart_rules/compile.py +56 -0
- unique_toolkit/test_utilities/events.py +197 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/METADATA +606 -7
- unique_toolkit-1.23.0.dist-info/RECORD +182 -0
- unique_toolkit/evaluators/__init__.py +0 -1
- unique_toolkit/evaluators/config.py +0 -35
- unique_toolkit/evaluators/constants.py +0 -1
- unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
- unique_toolkit/evaluators/context_relevancy/service.py +0 -53
- unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
- unique_toolkit/evaluators/hallucination/constants.py +0 -41
- unique_toolkit-0.7.7.dist-info/RECORD +0 -64
- /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from string import Template
|
|
3
|
+
|
|
4
|
+
from unique_toolkit.agentic.tools.utils.source_handling.schema import SourceFormatConfig
|
|
5
|
+
from unique_toolkit.content.schemas import ContentChunk
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _format_page_range(chunk: ContentChunk) -> str:
|
|
9
|
+
"""Format page range string from chunk metadata."""
|
|
10
|
+
if not (
|
|
11
|
+
chunk.start_page
|
|
12
|
+
and chunk.end_page
|
|
13
|
+
and chunk.start_page > 0
|
|
14
|
+
and chunk.end_page > 0
|
|
15
|
+
):
|
|
16
|
+
return ""
|
|
17
|
+
return (
|
|
18
|
+
str(chunk.start_page)
|
|
19
|
+
if chunk.start_page == chunk.end_page
|
|
20
|
+
else f"{chunk.start_page} - {chunk.end_page}"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _parse_chunk(
|
|
25
|
+
chunk: ContentChunk, section_templates: dict[str, str]
|
|
26
|
+
) -> dict[str, str]:
|
|
27
|
+
"""Extract sections from chunk text using regex patterns."""
|
|
28
|
+
text = chunk.text
|
|
29
|
+
result = dict()
|
|
30
|
+
|
|
31
|
+
for section, template in section_templates.items():
|
|
32
|
+
# Document and info are the only sections that are included in the text
|
|
33
|
+
if section in [
|
|
34
|
+
"document",
|
|
35
|
+
"info",
|
|
36
|
+
]: # Skip page as it's derived from metadata
|
|
37
|
+
pattern = SourceFormatConfig.template_to_pattern(template)
|
|
38
|
+
match = re.search(pattern, text, re.DOTALL)
|
|
39
|
+
result[section] = match.group(1) if match else ""
|
|
40
|
+
text = text.replace(match.group(0), "") if match else text
|
|
41
|
+
|
|
42
|
+
result["text"] = text.strip()
|
|
43
|
+
return result
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def format_chunk(index: int, chunk: ContentChunk, config: SourceFormatConfig) -> str:
|
|
47
|
+
"""
|
|
48
|
+
This function formats a content chunk based on a given configuration template and its sections. Each chunk in the database includes a document section, an optional info section, and a text section, with the text section being the primary content. Typically, chunks are added to sources in search modules without any changes. However, certain scenarios necessitate extra formatting, such as incorporating page numbers or other metadata. This function enables the custom formatting of chunks when they are appended as sources.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
index (int): The source index number to be used in the template.
|
|
52
|
+
chunk (ContentChunk): A ContentChunk object containing:
|
|
53
|
+
- text (str): The main content text
|
|
54
|
+
- start_page (int, optional): Starting page number
|
|
55
|
+
- end_page (int, optional): Ending page number
|
|
56
|
+
- metadata (dict, optional): Additional metadata key-value pairs
|
|
57
|
+
config (SourceFormatConfig): Configuration object containing:
|
|
58
|
+
- source_template (str): The overall template for the output
|
|
59
|
+
- sections (dict): Mapping of section names to their format templates
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
str: Formatted string according to the template
|
|
63
|
+
|
|
64
|
+
Examples:
|
|
65
|
+
Using XML-style config without page numbers (default):
|
|
66
|
+
>>> config = SourceFormatConfig(
|
|
67
|
+
... source_template="<source${index}>${document}${info}${text}</source${index}>",
|
|
68
|
+
... sections={
|
|
69
|
+
... "document": "<|document|>{}<|/document|>\n",
|
|
70
|
+
... "info": "<|info|>{}<|/info|>\n",
|
|
71
|
+
... },
|
|
72
|
+
... )
|
|
73
|
+
>>> chunk = ContentChunk(
|
|
74
|
+
... text="<|document|>Sample Doc.pdf</|document|>\n<|info|>Important info</|info|>\nMain content"
|
|
75
|
+
... )
|
|
76
|
+
>>> format_chunk(1, chunk, config)
|
|
77
|
+
'<source1><|document|>Sample Doc.pdf</|document|>\n<|info|>Important info</|info|>\nMain content</source1>'
|
|
78
|
+
|
|
79
|
+
Using XML-style config with page numbers:
|
|
80
|
+
>>> config = SourceFormatConfig(
|
|
81
|
+
... source_template="<source${index}>${document}${page}${info}${text}</source${index}>",
|
|
82
|
+
... sections={
|
|
83
|
+
... "document": "<|document|>{}<|/document|>\n",
|
|
84
|
+
... "info": "<|info|>{}<|/info|>\n",
|
|
85
|
+
... "page": "<|page|>{}<|/page|>\n",
|
|
86
|
+
... },
|
|
87
|
+
... )
|
|
88
|
+
>>> chunk = ContentChunk(
|
|
89
|
+
... text="<|document|>Sample Doc.pdf</|document|>\n<|info|>Important info</|info|>\nMain content",
|
|
90
|
+
... start_page=1,
|
|
91
|
+
... end_page=3,
|
|
92
|
+
... )
|
|
93
|
+
>>> format_chunk(1, chunk, config)
|
|
94
|
+
'<source1><|document|>Sample Doc.pdf</|document|>\n<|page|>1 - 3</|page|>\n<|info|>Important info</|info|>\nMain content</source1>'
|
|
95
|
+
|
|
96
|
+
Using XML-style config with metadata:
|
|
97
|
+
>>> config = SourceFormatConfig(
|
|
98
|
+
... source_template="<source${index}>${document}${date}${text}</source${index}>",
|
|
99
|
+
... sections={
|
|
100
|
+
... "document": "<|document|>{}<|/document|>\n",
|
|
101
|
+
... "date": "<|DateFromMetaData|>{}<|/DateFromMetaData|>\n",
|
|
102
|
+
... },
|
|
103
|
+
... )
|
|
104
|
+
>>> chunk = ContentChunk(
|
|
105
|
+
... text="<|document|>Sample Doc.pdf</|document|>\nMain content",
|
|
106
|
+
... metadata={
|
|
107
|
+
... "key": "metadata-key",
|
|
108
|
+
... "mimeType": "text/plain",
|
|
109
|
+
... "date": "12.03.2025",
|
|
110
|
+
... },
|
|
111
|
+
... )
|
|
112
|
+
>>> format_chunk(1, chunk, config)
|
|
113
|
+
'<source1><|document|>Sample Doc.pdf</|document|>\n<|DateFromMetaData|>12.03.2025</|DateFromMetaData|>\nMain content</source1>'
|
|
114
|
+
|
|
115
|
+
Using JSON-style config:
|
|
116
|
+
>>> config = SourceFormatConfig(
|
|
117
|
+
... source_template="{'source_number': ${index}, 'content': '${document}${page}${info}${text}'}",
|
|
118
|
+
... sections={
|
|
119
|
+
... "document": "<|document|>{}<|/document|>\n",
|
|
120
|
+
... "info": "<|info|>{}<|/info|>\n",
|
|
121
|
+
... "page": "<|page|>{}<|/page|>\n",
|
|
122
|
+
... },
|
|
123
|
+
... )
|
|
124
|
+
>>> chunk = ContentChunk(
|
|
125
|
+
... text="<|document|>Sample Doc.pdf</|document|>\n<|info|>Important info</|info|>\nMain content",
|
|
126
|
+
... start_page=5,
|
|
127
|
+
... end_page=5,
|
|
128
|
+
... )
|
|
129
|
+
>>> format_chunk(1, chunk, config)
|
|
130
|
+
"{'source_number': 1, 'content': '<|document|>Sample Doc.pdf</|document|>\n<|page|>5</|page|>\n<|info|>Important info</|info|>\nMain content'}"
|
|
131
|
+
|
|
132
|
+
Notes:
|
|
133
|
+
- The function extracts document and info sections from the chunk text using regex patterns
|
|
134
|
+
- Page numbers are formatted as single numbers when start_page equals end_page
|
|
135
|
+
- Page numbers are formatted as ranges (e.g., "1 - 3") when start_page differs from end_page
|
|
136
|
+
- If page numbers are not available (None or 0), the page section will be empty
|
|
137
|
+
- Metadata keys that match section names (except 'document' and 'text') will be included in the output
|
|
138
|
+
- Metadata is processed by the _process_metadata function to update the parsed dictionary
|
|
139
|
+
- When using custom metadata tags like '<|DateFromMetaData|>', the key in chunk.metadata must match
|
|
140
|
+
the key in the sections dictionary (e.g., 'date' in the example above), not the tag name
|
|
141
|
+
"""
|
|
142
|
+
sections = config.sections
|
|
143
|
+
source_template = config.source_template
|
|
144
|
+
|
|
145
|
+
parsed = _parse_chunk(chunk, sections)
|
|
146
|
+
parsed["page"] = _format_page_range(chunk)
|
|
147
|
+
|
|
148
|
+
# Update parsed with metadata values
|
|
149
|
+
parsed = _process_metadata(chunk, parsed, sections)
|
|
150
|
+
|
|
151
|
+
# Create a new dictionary to hold the formatted sections
|
|
152
|
+
formatted_sections = {}
|
|
153
|
+
|
|
154
|
+
# Process each section
|
|
155
|
+
for section, template in sections.items():
|
|
156
|
+
if parsed.get(section):
|
|
157
|
+
formatted_sections[section] = template.format(parsed.get(section, ""))
|
|
158
|
+
else:
|
|
159
|
+
formatted_sections[section] = ""
|
|
160
|
+
|
|
161
|
+
# Add the text section
|
|
162
|
+
formatted_sections["text"] = parsed["text"]
|
|
163
|
+
|
|
164
|
+
return Template(source_template).substitute(index=index, **formatted_sections)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _process_metadata(
|
|
168
|
+
chunk: ContentChunk, parsed: dict[str, str], sections: dict[str, str]
|
|
169
|
+
) -> dict[str, str]:
|
|
170
|
+
"""
|
|
171
|
+
Process metadata from chunk and update the parsed dictionary.
|
|
172
|
+
|
|
173
|
+
This function extracts metadata from a ContentChunk object and updates the parsed
|
|
174
|
+
dictionary with values whose keys match section names defined in SourceFormatConfig.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
chunk (ContentChunk): The content chunk containing metadata
|
|
178
|
+
parsed (dict): The dictionary of already parsed sections to update
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
dict: The updated parsed dictionary with metadata values added
|
|
182
|
+
|
|
183
|
+
Notes:
|
|
184
|
+
- Keys 'document' and 'text' are explicitly excluded from metadata processing
|
|
185
|
+
- Only metadata keys that match section names in SourceFormatConfig will be processed
|
|
186
|
+
- If chunk.metadata is None or not iterable, the parsed dict is returned unchanged
|
|
187
|
+
- Metadata values are added directly to the parsed dictionary using their original keys
|
|
188
|
+
"""
|
|
189
|
+
# Return unchanged parsed dict if metadata is None
|
|
190
|
+
if not hasattr(chunk, "metadata") or chunk.metadata is None:
|
|
191
|
+
return parsed
|
|
192
|
+
|
|
193
|
+
# Ensure metadata is a dictionary
|
|
194
|
+
metadata_dict = dict(chunk.metadata) if hasattr(chunk.metadata, "__iter__") else {}
|
|
195
|
+
|
|
196
|
+
# Define keys that should not be treated as metadata keys
|
|
197
|
+
excluded_keys = {"document", "info"}
|
|
198
|
+
|
|
199
|
+
# Get the keys from SourceFormatConfig.sections
|
|
200
|
+
valid_section_keys = set(sections.keys()) - excluded_keys
|
|
201
|
+
|
|
202
|
+
# Update parsed with valid metadata entries
|
|
203
|
+
for key, value in metadata_dict.items():
|
|
204
|
+
if key in valid_section_keys:
|
|
205
|
+
parsed[key] = value
|
|
206
|
+
|
|
207
|
+
return parsed
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from unique_toolkit.agentic.tools.utils.source_handling.schema import SourceFormatConfig
|
|
4
|
+
from unique_toolkit.agentic.tools.utils.source_handling.source_formatting import (
|
|
5
|
+
_format_page_range,
|
|
6
|
+
format_chunk,
|
|
7
|
+
)
|
|
8
|
+
from unique_toolkit.content.schemas import ContentChunk
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.fixture
|
|
12
|
+
def default_config():
|
|
13
|
+
return SourceFormatConfig()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture
|
|
17
|
+
def xml_style_config_without_page_number():
|
|
18
|
+
return SourceFormatConfig(
|
|
19
|
+
source_template="<source${index}>${document}${info}${text}</source${index}>",
|
|
20
|
+
sections={
|
|
21
|
+
"document": "<|document|>{}<|/document|>\n",
|
|
22
|
+
"info": "<|info|>{}<|/info|>\n",
|
|
23
|
+
},
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@pytest.fixture
|
|
28
|
+
def xml_style_config_with_page_number():
|
|
29
|
+
return SourceFormatConfig(
|
|
30
|
+
source_template="<source${index}>${document}${page}${info}${text}</source${index}>",
|
|
31
|
+
sections={
|
|
32
|
+
"document": "<|document|>{}<|/document|>\n",
|
|
33
|
+
"info": "<|info|>{}<|/info|>\n",
|
|
34
|
+
"page": "<|page|>{}<|/page|>\n",
|
|
35
|
+
},
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@pytest.fixture
|
|
40
|
+
def xml_style_config_with_metadata():
|
|
41
|
+
return SourceFormatConfig(
|
|
42
|
+
source_template="<source${index}>${document}${date}${text}</source${index}>",
|
|
43
|
+
sections={
|
|
44
|
+
"document": "<|document|>{}<|/document|>\n",
|
|
45
|
+
"date": "<|DateFromMetaData|>{}<|/DateFromMetaData|>\n",
|
|
46
|
+
},
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pytest.fixture
|
|
51
|
+
def json_style_config():
|
|
52
|
+
return SourceFormatConfig(
|
|
53
|
+
source_template="{'source_number': ${index}, 'content': '${document}${page}${info}${text}'}",
|
|
54
|
+
sections={
|
|
55
|
+
"document": "<|document|>{}<|/document|>\n",
|
|
56
|
+
"info": "<|info|>{}<|/info|>\n",
|
|
57
|
+
"page": "<|page|>{}<|/page|>\n",
|
|
58
|
+
},
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_format_page_range():
|
|
63
|
+
# Test same start and end page
|
|
64
|
+
chunk = ContentChunk(id="1", order=1, text="test", start_page=1, end_page=1)
|
|
65
|
+
assert _format_page_range(chunk) == "1"
|
|
66
|
+
|
|
67
|
+
# Test page range
|
|
68
|
+
chunk = ContentChunk(id="1", order=1, text="test", start_page=1, end_page=3)
|
|
69
|
+
assert _format_page_range(chunk) == "1 - 3"
|
|
70
|
+
|
|
71
|
+
# Test invalid pages
|
|
72
|
+
chunk = ContentChunk(id="1", order=1, text="test", start_page=0, end_page=0)
|
|
73
|
+
assert _format_page_range(chunk) == ""
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_json_style_formatting(json_style_config):
|
|
77
|
+
chunk = ContentChunk(
|
|
78
|
+
id="1",
|
|
79
|
+
order=1,
|
|
80
|
+
text="<|document|>Doc1<|/document|>\n<|info|>Important<|/info|>\nContent text",
|
|
81
|
+
start_page=1,
|
|
82
|
+
end_page=2,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
formatted = format_chunk(1, chunk, json_style_config)
|
|
86
|
+
expected = "{'source_number': 1, 'content': '<|document|>Doc1<|/document|>\n<|page|>1 - 2<|/page|>\n<|info|>Important<|/info|>\nContent text'}"
|
|
87
|
+
assert formatted == expected
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_metadata_handling(xml_style_config_with_metadata):
|
|
91
|
+
# Test with metadata that matches a section name
|
|
92
|
+
chunk = ContentChunk(
|
|
93
|
+
id="1",
|
|
94
|
+
order=1,
|
|
95
|
+
text="<|document|>Doc1<|/document|>\nContent text",
|
|
96
|
+
metadata={
|
|
97
|
+
"key": "metadata-key",
|
|
98
|
+
"mimeType": "text/plain",
|
|
99
|
+
"date": "12.03.2025",
|
|
100
|
+
}, # type: ignore
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
formatted = format_chunk(1, chunk, xml_style_config_with_metadata)
|
|
104
|
+
expected = "<source1><|document|>Doc1<|/document|>\n<|DateFromMetaData|>12.03.2025<|/DateFromMetaData|>\nContent text</source1>"
|
|
105
|
+
assert formatted == expected
|
|
106
|
+
|
|
107
|
+
# Test with metadata that doesn't match a section name
|
|
108
|
+
chunk = ContentChunk(
|
|
109
|
+
id="1",
|
|
110
|
+
order=1,
|
|
111
|
+
text="<|document|>Doc1<|/document|>\nContent text",
|
|
112
|
+
metadata={
|
|
113
|
+
"key": "metadata-key",
|
|
114
|
+
"mimeType": "text/plain",
|
|
115
|
+
"unrelated_key": "Some value",
|
|
116
|
+
}, # type: ignore
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
formatted = format_chunk(1, chunk, xml_style_config_with_metadata)
|
|
120
|
+
expected = "<source1><|document|>Doc1<|/document|>\nContent text</source1>"
|
|
121
|
+
assert formatted == expected
|
|
122
|
+
|
|
123
|
+
# Test with minimal metadata
|
|
124
|
+
chunk = ContentChunk(
|
|
125
|
+
id="1",
|
|
126
|
+
order=1,
|
|
127
|
+
text="<|document|>Doc1<|/document|>\nContent text",
|
|
128
|
+
metadata={"key": "metadata-key", "mimeType": "text/plain"}, # type: ignore
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
formatted = format_chunk(1, chunk, xml_style_config_with_metadata)
|
|
132
|
+
expected = "<source1><|document|>Doc1<|/document|>\nContent text</source1>"
|
|
133
|
+
assert formatted == expected
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def test_default_style(
|
|
137
|
+
default_config,
|
|
138
|
+
):
|
|
139
|
+
chunk = ContentChunk(
|
|
140
|
+
id="1",
|
|
141
|
+
order=1,
|
|
142
|
+
text="<|document|>Doc1<|/document|>\n<|info|>Important<|/info|>\nContent text",
|
|
143
|
+
start_page=1,
|
|
144
|
+
end_page=2,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
formatted = format_chunk(1, chunk, default_config)
|
|
148
|
+
expected = "<source1><|document|>Doc1<|/document|>\n<|info|>Important<|/info|>\nContent text</source1>"
|
|
149
|
+
assert formatted == expected
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_xml_style_without_page_number_formatting(
|
|
153
|
+
xml_style_config_without_page_number,
|
|
154
|
+
):
|
|
155
|
+
chunk = ContentChunk(
|
|
156
|
+
id="1",
|
|
157
|
+
order=1,
|
|
158
|
+
text="<|document|>Doc1<|/document|>\n<|info|>Important<|/info|>\nContent text",
|
|
159
|
+
start_page=1,
|
|
160
|
+
end_page=2,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
formatted = format_chunk(1, chunk, xml_style_config_without_page_number)
|
|
164
|
+
expected = "<source1><|document|>Doc1<|/document|>\n<|info|>Important<|/info|>\nContent text</source1>"
|
|
165
|
+
assert formatted == expected
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def test_xml_style_with_page_number_formatting(
|
|
169
|
+
xml_style_config_with_page_number,
|
|
170
|
+
):
|
|
171
|
+
chunk = ContentChunk(
|
|
172
|
+
id="1",
|
|
173
|
+
order=1,
|
|
174
|
+
text="<|document|>Doc1<|/document|>\n<|info|>Important<|/info|>\nContent text",
|
|
175
|
+
start_page=1,
|
|
176
|
+
end_page=2,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
formatted = format_chunk(1, chunk, xml_style_config_with_page_number)
|
|
180
|
+
expected = "<source1><|document|>Doc1<|/document|>\n<|page|>1 - 2<|/page|>\n<|info|>Important<|/info|>\nContent text</source1>"
|
|
181
|
+
assert formatted == expected
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def test_special_characters_handling(json_style_config):
|
|
185
|
+
chunk = ContentChunk(
|
|
186
|
+
id="1",
|
|
187
|
+
order=1,
|
|
188
|
+
text="<|document|>Doc's \"title\"<|/document|>\n<|info|>Info with {brackets}<|/info|>\nContent: with 'quotes'",
|
|
189
|
+
start_page=1,
|
|
190
|
+
end_page=1,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
formatted = format_chunk(1, chunk, json_style_config)
|
|
194
|
+
expected = "{'source_number': 1, 'content': '<|document|>Doc's \"title\"<|/document|>\n<|page|>1<|/page|>\n<|info|>Info with {brackets}<|/info|>\nContent: with 'quotes''}"
|
|
195
|
+
assert formatted == expected
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def test_empty_sections(xml_style_config_without_page_number, json_style_config):
|
|
199
|
+
chunk = ContentChunk(
|
|
200
|
+
id="1",
|
|
201
|
+
order=1,
|
|
202
|
+
text="Just plain text without any sections",
|
|
203
|
+
start_page=None,
|
|
204
|
+
end_page=None,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Test XML style
|
|
208
|
+
xml_formatted = format_chunk(1, chunk, xml_style_config_without_page_number)
|
|
209
|
+
assert xml_formatted == "<source1>Just plain text without any sections</source1>"
|
|
210
|
+
|
|
211
|
+
# Test JSON style
|
|
212
|
+
json_formatted = format_chunk(1, chunk, json_style_config)
|
|
213
|
+
assert (
|
|
214
|
+
json_formatted
|
|
215
|
+
== "{'source_number': 1, 'content': 'Just plain text without any sections'}"
|
|
216
|
+
)
|
unique_toolkit/app/__init__.py
CHANGED
|
@@ -38,6 +38,12 @@ from .schemas import (
|
|
|
38
38
|
from .schemas import (
|
|
39
39
|
EventUserMessage as EventUserMessage,
|
|
40
40
|
)
|
|
41
|
+
from .schemas import (
|
|
42
|
+
McpServer as McpServer,
|
|
43
|
+
)
|
|
44
|
+
from .schemas import (
|
|
45
|
+
McpTool as McpTool,
|
|
46
|
+
)
|
|
41
47
|
from .verification import (
|
|
42
48
|
verify_signature_and_construct_event as verify_signature_and_construct_event,
|
|
43
49
|
)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
from logging import getLogger
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import (
|
|
6
|
+
Awaitable,
|
|
7
|
+
Callable,
|
|
8
|
+
Generator,
|
|
9
|
+
TypeVar,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from sseclient import SSEClient
|
|
13
|
+
|
|
14
|
+
from unique_toolkit._common.exception import ConfigurationException
|
|
15
|
+
from unique_toolkit.app import BaseEvent, ChatEvent, EventName
|
|
16
|
+
from unique_toolkit.app.init_sdk import init_unique_sdk
|
|
17
|
+
from unique_toolkit.app.unique_settings import UniqueSettings
|
|
18
|
+
|
|
19
|
+
T = TypeVar("T", bound=BaseEvent)
|
|
20
|
+
|
|
21
|
+
LOGGER = getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_event_name_from_event_class(event_class: type[T]) -> EventName | None:
|
|
25
|
+
if event_class is ChatEvent:
|
|
26
|
+
return EventName.EXTERNAL_MODULE_CHOSEN
|
|
27
|
+
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_sse_client(
|
|
32
|
+
unique_settings: UniqueSettings,
|
|
33
|
+
subscriptions: list[str],
|
|
34
|
+
) -> SSEClient:
|
|
35
|
+
headers = {
|
|
36
|
+
"Authorization": f"Bearer {unique_settings.app.key.get_secret_value()}",
|
|
37
|
+
"x-app-id": unique_settings.app.id.get_secret_value(),
|
|
38
|
+
"x-company-id": unique_settings.auth.company_id.get_secret_value(),
|
|
39
|
+
"x-user-id": unique_settings.auth.user_id.get_secret_value(),
|
|
40
|
+
"x-api-version": unique_settings.api.version,
|
|
41
|
+
}
|
|
42
|
+
return SSEClient(url=unique_settings.api.sse_url(subscriptions), headers=headers)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_event_generator(
|
|
46
|
+
unique_settings: UniqueSettings,
|
|
47
|
+
event_type: type[T],
|
|
48
|
+
) -> Generator[T, None, None]:
|
|
49
|
+
"""
|
|
50
|
+
Generator that updates the unique settings according to the events and
|
|
51
|
+
yields only events of the specified type from an SSE stream.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
sse_client: The SSE client to read events from
|
|
55
|
+
event_type: The event class type to filter for
|
|
56
|
+
|
|
57
|
+
Yields:
|
|
58
|
+
Events matching the specified type
|
|
59
|
+
"""
|
|
60
|
+
event_name = get_event_name_from_event_class(event_type)
|
|
61
|
+
if (
|
|
62
|
+
event_name is None
|
|
63
|
+
or not issubclass(event_type, BaseEvent)
|
|
64
|
+
or event_type is BaseEvent
|
|
65
|
+
):
|
|
66
|
+
raise ValueError(f"Event model {event_type} is not a valid event model")
|
|
67
|
+
|
|
68
|
+
subscription = event_name.value
|
|
69
|
+
|
|
70
|
+
for sse_event in get_sse_client(unique_settings, [subscription]):
|
|
71
|
+
try:
|
|
72
|
+
payload = json.loads(sse_event.data)
|
|
73
|
+
parsed_event = event_type.model_validate(payload)
|
|
74
|
+
if parsed_event is None or parsed_event.filter_event(
|
|
75
|
+
filter_options=unique_settings.chat_event_filter_options
|
|
76
|
+
):
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
unique_settings.update_from_event(event=parsed_event)
|
|
80
|
+
|
|
81
|
+
yield parsed_event
|
|
82
|
+
|
|
83
|
+
except ConfigurationException as e:
|
|
84
|
+
# Re-raise ConfigurationException from filter_event (configuration errors)
|
|
85
|
+
raise e
|
|
86
|
+
except Exception as e:
|
|
87
|
+
LOGGER.error(f"Could not parse SSE event data as JSON: {e}")
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_event_stream(
|
|
92
|
+
event_type: type[T] = BaseEvent,
|
|
93
|
+
settings_config: UniqueSettings | str | None = None,
|
|
94
|
+
) -> Generator[T, None, None]:
|
|
95
|
+
"""
|
|
96
|
+
Get an event stream from the SSE client.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
event_type: The type of event to get
|
|
100
|
+
settings_or_filename: The settings or filename to use to setup the Unique settings object
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
if isinstance(settings_config, str):
|
|
104
|
+
unique_settings = UniqueSettings.from_env_auto_with_sdk_init(
|
|
105
|
+
filename=settings_config
|
|
106
|
+
)
|
|
107
|
+
elif isinstance(settings_config, UniqueSettings):
|
|
108
|
+
unique_settings = settings_config
|
|
109
|
+
else:
|
|
110
|
+
unique_settings = UniqueSettings.from_env_auto_with_sdk_init()
|
|
111
|
+
|
|
112
|
+
return get_event_generator(unique_settings, event_type)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def run_demo_with_sse_client(
|
|
116
|
+
unique_settings: UniqueSettings,
|
|
117
|
+
handler: Callable[[BaseEvent], Awaitable[None] | None],
|
|
118
|
+
event_type: type[BaseEvent],
|
|
119
|
+
) -> None:
|
|
120
|
+
"""
|
|
121
|
+
Run a demo with an SSE client using sync handler.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
unique_settings: The unique settings to use for the SSE client
|
|
125
|
+
handler: The sync handler to use for the SSE client
|
|
126
|
+
event_type: The type of event to use for the SSE client
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
event_name = get_event_name_from_event_class(event_type)
|
|
130
|
+
if event_name is None:
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
init_unique_sdk(unique_settings=unique_settings)
|
|
134
|
+
is_async_handler = asyncio.iscoroutinefunction(handler)
|
|
135
|
+
|
|
136
|
+
for event in get_event_generator(unique_settings, event_type):
|
|
137
|
+
if is_async_handler:
|
|
138
|
+
loop = asyncio.get_event_loop()
|
|
139
|
+
loop.run_until_complete(handler(event))
|
|
140
|
+
else:
|
|
141
|
+
handler(event)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def load_event(file_path: Path, event_type: type[BaseEvent]) -> BaseEvent:
|
|
145
|
+
with file_path.open("r") as file:
|
|
146
|
+
event = json.load(file)
|
|
147
|
+
|
|
148
|
+
return event_type.model_validate(event)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def run_demo_with_with_saved_event(
|
|
152
|
+
unique_settings: UniqueSettings,
|
|
153
|
+
handler: Callable[[BaseEvent], Awaitable[None] | None],
|
|
154
|
+
event_type: type[BaseEvent],
|
|
155
|
+
file_path: Path,
|
|
156
|
+
) -> None:
|
|
157
|
+
"""
|
|
158
|
+
Run a demo with an SSE client.
|
|
159
|
+
|
|
160
|
+
Note: event_type is the type of event that the handler expects.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
unique_settings: The unique settings to use for the SSE client
|
|
164
|
+
handler: The handler to use for the SSE client
|
|
165
|
+
event_type: The type of event to use for the SSE client
|
|
166
|
+
"""
|
|
167
|
+
init_unique_sdk(unique_settings=unique_settings)
|
|
168
|
+
|
|
169
|
+
event_name = get_event_name_from_event_class(event_type)
|
|
170
|
+
if event_name is None:
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
event = load_event(file_path, event_type)
|
|
174
|
+
if event is None:
|
|
175
|
+
raise ValueError(f"Event not found in {file_path}")
|
|
176
|
+
|
|
177
|
+
if asyncio.iscoroutinefunction(handler):
|
|
178
|
+
asyncio.run(handler(event))
|
|
179
|
+
else:
|
|
180
|
+
handler(event)
|
unique_toolkit/app/init_sdk.py
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import overload
|
|
2
4
|
|
|
3
5
|
import unique_sdk
|
|
6
|
+
from typing_extensions import deprecated
|
|
7
|
+
|
|
8
|
+
from unique_toolkit.app.unique_settings import UniqueSettings
|
|
4
9
|
|
|
5
10
|
|
|
6
11
|
def get_env(var_name, default=None, strict=False):
|
|
@@ -24,12 +29,38 @@ def get_env(var_name, default=None, strict=False):
|
|
|
24
29
|
return val or default
|
|
25
30
|
|
|
26
31
|
|
|
27
|
-
|
|
32
|
+
@overload
|
|
33
|
+
def init_unique_sdk(*, env_file: Path | None = None): ...
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@overload
|
|
37
|
+
def init_unique_sdk(*, unique_settings: UniqueSettings): ...
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def init_unique_sdk(
|
|
41
|
+
*, unique_settings: UniqueSettings | None = None, env_file: Path | None = None
|
|
42
|
+
):
|
|
43
|
+
if unique_settings:
|
|
44
|
+
unique_sdk.api_key = unique_settings.app.key.get_secret_value()
|
|
45
|
+
unique_sdk.app_id = unique_settings.app.id.get_secret_value()
|
|
46
|
+
unique_sdk.api_base = unique_settings.api.sdk_url()
|
|
47
|
+
elif env_file:
|
|
48
|
+
unique_settings = UniqueSettings.from_env(env_file=env_file)
|
|
49
|
+
unique_sdk.api_key = unique_settings.app.key.get_secret_value()
|
|
50
|
+
unique_sdk.app_id = unique_settings.app.id.get_secret_value()
|
|
51
|
+
unique_sdk.api_base = unique_settings.api.sdk_url()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@deprecated("Use init_unique_sdk instead")
|
|
55
|
+
def init_sdk(
|
|
56
|
+
strict_all_vars: bool = False,
|
|
57
|
+
):
|
|
28
58
|
"""Initialize the SDK.
|
|
29
59
|
|
|
30
60
|
Args:
|
|
31
61
|
strict_all_vars (bool, optional): This method raises a ValueError if strict and no value is found in the environment. Defaults to False.
|
|
32
62
|
"""
|
|
63
|
+
|
|
33
64
|
unique_sdk.api_key = get_env("API_KEY", default="dummy", strict=strict_all_vars)
|
|
34
65
|
unique_sdk.app_id = get_env("APP_ID", default="dummy", strict=strict_all_vars)
|
|
35
66
|
unique_sdk.api_base = get_env("API_BASE", default=None, strict=strict_all_vars)
|