unique_toolkit 0.7.9__py3-none-any.whl → 1.33.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_toolkit/__init__.py +36 -3
- unique_toolkit/_common/api_calling/human_verification_manager.py +357 -0
- unique_toolkit/_common/base_model_type_attribute.py +303 -0
- unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
- unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
- unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
- unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
- unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
- unique_toolkit/_common/default_language_model.py +12 -0
- unique_toolkit/_common/docx_generator/__init__.py +7 -0
- unique_toolkit/_common/docx_generator/config.py +12 -0
- unique_toolkit/_common/docx_generator/schemas.py +80 -0
- unique_toolkit/_common/docx_generator/service.py +225 -0
- unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
- unique_toolkit/_common/endpoint_builder.py +368 -0
- unique_toolkit/_common/endpoint_requestor.py +480 -0
- unique_toolkit/_common/exception.py +24 -0
- unique_toolkit/_common/experimental/endpoint_builder.py +368 -0
- unique_toolkit/_common/experimental/endpoint_requestor.py +488 -0
- unique_toolkit/_common/feature_flags/schema.py +9 -0
- unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
- unique_toolkit/_common/pydantic_helpers.py +174 -0
- unique_toolkit/_common/referencing.py +53 -0
- unique_toolkit/_common/string_utilities.py +140 -0
- unique_toolkit/_common/tests/test_referencing.py +521 -0
- unique_toolkit/_common/tests/test_string_utilities.py +506 -0
- unique_toolkit/_common/token/image_token_counting.py +67 -0
- unique_toolkit/_common/token/token_counting.py +204 -0
- unique_toolkit/_common/utils/__init__.py +1 -0
- unique_toolkit/_common/utils/files.py +43 -0
- unique_toolkit/_common/utils/image/encode.py +25 -0
- unique_toolkit/_common/utils/jinja/helpers.py +10 -0
- unique_toolkit/_common/utils/jinja/render.py +18 -0
- unique_toolkit/_common/utils/jinja/schema.py +65 -0
- unique_toolkit/_common/utils/jinja/utils.py +80 -0
- unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
- unique_toolkit/_common/utils/structured_output/schema.py +5 -0
- unique_toolkit/_common/utils/write_configuration.py +51 -0
- unique_toolkit/_common/validators.py +101 -4
- unique_toolkit/agentic/__init__.py +1 -0
- unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
- unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
- unique_toolkit/agentic/evaluation/config.py +36 -0
- unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
- unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
- unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
- unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
- unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
- unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +112 -0
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +20 -16
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +32 -21
- unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
- unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
- unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
- unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
- unique_toolkit/agentic/history_manager/history_construction_with_contents.py +298 -0
- unique_toolkit/agentic/history_manager/history_manager.py +241 -0
- unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
- unique_toolkit/agentic/history_manager/utils.py +96 -0
- unique_toolkit/agentic/message_log_manager/__init__.py +5 -0
- unique_toolkit/agentic/message_log_manager/service.py +93 -0
- unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
- unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
- unique_toolkit/agentic/responses_api/__init__.py +19 -0
- unique_toolkit/agentic/responses_api/postprocessors/code_display.py +71 -0
- unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +297 -0
- unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
- unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
- unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
- unique_toolkit/agentic/tools/__init__.py +1 -0
- unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
- unique_toolkit/agentic/tools/a2a/config.py +17 -0
- unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
- unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
- unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
- unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
- unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
- unique_toolkit/agentic/tools/a2a/manager.py +55 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +240 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +84 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/config.py +78 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/display.py +264 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display.py +421 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +2103 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
- unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
- unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
- unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
- unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
- unique_toolkit/agentic/tools/a2a/tool/config.py +158 -0
- unique_toolkit/agentic/tools/a2a/tool/service.py +393 -0
- unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
- unique_toolkit/agentic/tools/config.py +128 -0
- unique_toolkit/agentic/tools/factory.py +44 -0
- unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
- unique_toolkit/agentic/tools/mcp/manager.py +71 -0
- unique_toolkit/agentic/tools/mcp/models.py +28 -0
- unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
- unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
- unique_toolkit/agentic/tools/openai_builtin/base.py +46 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +88 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +250 -0
- unique_toolkit/agentic/tools/openai_builtin/manager.py +79 -0
- unique_toolkit/agentic/tools/schemas.py +145 -0
- unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
- unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
- unique_toolkit/agentic/tools/tool.py +187 -0
- unique_toolkit/agentic/tools/tool_manager.py +492 -0
- unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
- unique_toolkit/agentic/tools/utils/__init__.py +19 -0
- unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
- unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
- unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
- unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
- unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
- unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
- unique_toolkit/app/__init__.py +9 -0
- unique_toolkit/app/dev_util.py +180 -0
- unique_toolkit/app/fast_api_factory.py +131 -0
- unique_toolkit/app/init_sdk.py +32 -1
- unique_toolkit/app/schemas.py +206 -31
- unique_toolkit/app/unique_settings.py +367 -0
- unique_toolkit/app/webhook.py +77 -0
- unique_toolkit/chat/__init__.py +8 -1
- unique_toolkit/chat/deprecated/service.py +232 -0
- unique_toolkit/chat/functions.py +648 -78
- unique_toolkit/chat/rendering.py +34 -0
- unique_toolkit/chat/responses_api.py +461 -0
- unique_toolkit/chat/schemas.py +134 -2
- unique_toolkit/chat/service.py +115 -767
- unique_toolkit/content/functions.py +353 -8
- unique_toolkit/content/schemas.py +128 -15
- unique_toolkit/content/service.py +321 -45
- unique_toolkit/content/smart_rules.py +301 -0
- unique_toolkit/content/utils.py +10 -3
- unique_toolkit/data_extraction/README.md +96 -0
- unique_toolkit/data_extraction/__init__.py +11 -0
- unique_toolkit/data_extraction/augmented/__init__.py +5 -0
- unique_toolkit/data_extraction/augmented/service.py +93 -0
- unique_toolkit/data_extraction/base.py +25 -0
- unique_toolkit/data_extraction/basic/__init__.py +11 -0
- unique_toolkit/data_extraction/basic/config.py +18 -0
- unique_toolkit/data_extraction/basic/prompt.py +13 -0
- unique_toolkit/data_extraction/basic/service.py +55 -0
- unique_toolkit/embedding/service.py +103 -12
- unique_toolkit/framework_utilities/__init__.py +1 -0
- unique_toolkit/framework_utilities/langchain/__init__.py +10 -0
- unique_toolkit/framework_utilities/langchain/client.py +71 -0
- unique_toolkit/framework_utilities/langchain/history.py +19 -0
- unique_toolkit/framework_utilities/openai/__init__.py +6 -0
- unique_toolkit/framework_utilities/openai/client.py +84 -0
- unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
- unique_toolkit/framework_utilities/utils.py +23 -0
- unique_toolkit/language_model/__init__.py +3 -0
- unique_toolkit/language_model/_responses_api_utils.py +93 -0
- unique_toolkit/language_model/builder.py +27 -11
- unique_toolkit/language_model/default_language_model.py +3 -0
- unique_toolkit/language_model/functions.py +345 -43
- unique_toolkit/language_model/infos.py +1288 -46
- unique_toolkit/language_model/reference.py +242 -0
- unique_toolkit/language_model/schemas.py +481 -49
- unique_toolkit/language_model/service.py +229 -28
- unique_toolkit/protocols/support.py +145 -0
- unique_toolkit/services/__init__.py +7 -0
- unique_toolkit/services/chat_service.py +1631 -0
- unique_toolkit/services/knowledge_base.py +1094 -0
- unique_toolkit/short_term_memory/service.py +178 -41
- unique_toolkit/smart_rules/__init__.py +0 -0
- unique_toolkit/smart_rules/compile.py +56 -0
- unique_toolkit/test_utilities/events.py +197 -0
- unique_toolkit-1.33.3.dist-info/METADATA +1145 -0
- unique_toolkit-1.33.3.dist-info/RECORD +205 -0
- unique_toolkit/evaluators/__init__.py +0 -1
- unique_toolkit/evaluators/config.py +0 -35
- unique_toolkit/evaluators/constants.py +0 -1
- unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
- unique_toolkit/evaluators/context_relevancy/service.py +0 -53
- unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
- unique_toolkit/evaluators/hallucination/constants.py +0 -41
- unique_toolkit-0.7.9.dist-info/METADATA +0 -413
- unique_toolkit-0.7.9.dist-info/RECORD +0 -64
- /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
- {unique_toolkit-0.7.9.dist-info → unique_toolkit-1.33.3.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.7.9.dist-info → unique_toolkit-1.33.3.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# Original source
|
|
2
|
+
# https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from typing import Any, Callable
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from unique_toolkit._common.token.image_token_counting import (
|
|
10
|
+
calculate_image_tokens_from_base64,
|
|
11
|
+
)
|
|
12
|
+
from unique_toolkit.language_model import (
|
|
13
|
+
LanguageModelMessage,
|
|
14
|
+
LanguageModelMessages,
|
|
15
|
+
LanguageModelName,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SpecialToolCallingTokens(BaseModel):
|
|
20
|
+
func_init: int = 0
|
|
21
|
+
prop_init: int = 0
|
|
22
|
+
prop_key: int = 0
|
|
23
|
+
enum_init: int = 0
|
|
24
|
+
enum_item: int = 0
|
|
25
|
+
func_end: int = 0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_special_token(model: LanguageModelName) -> SpecialToolCallingTokens:
|
|
29
|
+
special_token = SpecialToolCallingTokens()
|
|
30
|
+
|
|
31
|
+
match model:
|
|
32
|
+
case (
|
|
33
|
+
LanguageModelName.AZURE_GPT_4o_2024_0513
|
|
34
|
+
| LanguageModelName.AZURE_GPT_4o_2024_0806
|
|
35
|
+
| LanguageModelName.AZURE_GPT_4o_MINI_2024_0718
|
|
36
|
+
| LanguageModelName.AZURE_GPT_4o_2024_1120
|
|
37
|
+
):
|
|
38
|
+
special_token.func_init = 7
|
|
39
|
+
special_token.prop_init = 3
|
|
40
|
+
special_token.prop_key = 3
|
|
41
|
+
special_token.enum_init = -3
|
|
42
|
+
special_token.enum_item = 3
|
|
43
|
+
special_token.func_end = 12
|
|
44
|
+
|
|
45
|
+
case (
|
|
46
|
+
LanguageModelName.AZURE_GPT_35_TURBO_0125
|
|
47
|
+
| LanguageModelName.AZURE_GPT_4_0613
|
|
48
|
+
| LanguageModelName.AZURE_GPT_4_32K_0613
|
|
49
|
+
| LanguageModelName.AZURE_GPT_4_TURBO_2024_0409
|
|
50
|
+
):
|
|
51
|
+
special_token.func_init = 10
|
|
52
|
+
special_token.prop_init = 3
|
|
53
|
+
special_token.prop_key = 3
|
|
54
|
+
special_token.enum_init = -3
|
|
55
|
+
special_token.enum_item = 3
|
|
56
|
+
special_token.func_end = 12
|
|
57
|
+
|
|
58
|
+
case _:
|
|
59
|
+
raise NotImplementedError(
|
|
60
|
+
f"""num_tokens_for_tools() is not implemented for model {model}."""
|
|
61
|
+
)
|
|
62
|
+
return special_token
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def num_tokens_per_messages(
|
|
66
|
+
messages: list[dict[str, str]], encode: Callable[[str], list[int]]
|
|
67
|
+
) -> list[int]:
|
|
68
|
+
"""Return the number of tokens used by a list of messages."""
|
|
69
|
+
|
|
70
|
+
num_token_per_message = []
|
|
71
|
+
for message in messages:
|
|
72
|
+
num_tokens = 3 # extra_tokens_per_message
|
|
73
|
+
for key, value in message.items():
|
|
74
|
+
if key == "content":
|
|
75
|
+
if message.get("role") == "tool":
|
|
76
|
+
"""
|
|
77
|
+
We have observed a general difference in the way tool response messages are handled.
|
|
78
|
+
Specifically, if we take a list of tool responses and artificially transform them into user messages (content field stays the same)
|
|
79
|
+
the token consumption goes does drastically, this seems to scale with the number of tokens in the tool responses.
|
|
80
|
+
|
|
81
|
+
the json.dumps() method was found by trial and error. It will give a conservative estimate, but seems to be close to the token count
|
|
82
|
+
returned from the openai call.
|
|
83
|
+
"""
|
|
84
|
+
num_tokens += len(encode(json.dumps(value)))
|
|
85
|
+
elif isinstance(value, list):
|
|
86
|
+
# NOTE: The result returned by the function below is not 100% accurate.
|
|
87
|
+
num_tokens += handle_message_with_images(value, encode)
|
|
88
|
+
else:
|
|
89
|
+
num_tokens += len(encode(value))
|
|
90
|
+
elif isinstance(value, str):
|
|
91
|
+
num_tokens += len(encode(value))
|
|
92
|
+
|
|
93
|
+
if key == "name":
|
|
94
|
+
num_tokens += 1 # extra_tokens_per_name
|
|
95
|
+
|
|
96
|
+
num_token_per_message.append(num_tokens)
|
|
97
|
+
|
|
98
|
+
return num_token_per_message
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def num_tokens_from_messages(
|
|
102
|
+
messages: list[dict[str, str]], encode: Callable[[str], list[int]]
|
|
103
|
+
) -> int:
|
|
104
|
+
"""Return the number of tokens used by a list of messages."""
|
|
105
|
+
|
|
106
|
+
num_tokens_per_message = num_tokens_per_messages(messages, encode)
|
|
107
|
+
num_tokens = sum(num_tokens_per_message) + 3
|
|
108
|
+
|
|
109
|
+
return num_tokens
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def num_tokens_for_tools(
|
|
113
|
+
functions: list[dict[str, Any]],
|
|
114
|
+
special_token: SpecialToolCallingTokens,
|
|
115
|
+
encode: Callable[[str], list[int]],
|
|
116
|
+
):
|
|
117
|
+
def num_token_function_enum(
|
|
118
|
+
properties: dict[str, Any], encode: Callable[[str], list[int]]
|
|
119
|
+
):
|
|
120
|
+
enum_token_count = 0
|
|
121
|
+
enum_token_count += special_token.enum_init
|
|
122
|
+
for item in properties[key]["enum"]:
|
|
123
|
+
enum_token_count += special_token.enum_item
|
|
124
|
+
enum_token_count += len(encode(item))
|
|
125
|
+
|
|
126
|
+
return enum_token_count
|
|
127
|
+
|
|
128
|
+
func_token_count = 0
|
|
129
|
+
if len(functions) > 0:
|
|
130
|
+
for func in functions:
|
|
131
|
+
func_token_count += special_token.func_init
|
|
132
|
+
function = func.get("function", {})
|
|
133
|
+
func_token_count += len(
|
|
134
|
+
encode(
|
|
135
|
+
function.get("name", "")
|
|
136
|
+
+ ":"
|
|
137
|
+
+ function.get("description", "").rstrip(".").rstrip()
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
if len(function.get("parameters", {}).get("properties", "")) > 0:
|
|
141
|
+
properties = function.get("parameters", {}).get("properties", "")
|
|
142
|
+
func_token_count += special_token.prop_init
|
|
143
|
+
|
|
144
|
+
for key in list(properties.keys()):
|
|
145
|
+
func_token_count += special_token.prop_key
|
|
146
|
+
|
|
147
|
+
if "enum" in properties[key].keys():
|
|
148
|
+
func_token_count += num_token_function_enum(properties, encode)
|
|
149
|
+
|
|
150
|
+
func_token_count += len(
|
|
151
|
+
encode(
|
|
152
|
+
f"{key}:{properties[key]['type']}:{properties[key]['description'].rstrip('.').rstrip()}"
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
func_token_count += special_token.func_end
|
|
157
|
+
|
|
158
|
+
return func_token_count
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def handle_message_with_images(message: list[dict], encode: Callable[[str], list[int]]):
|
|
162
|
+
token_count = 0
|
|
163
|
+
for item in message:
|
|
164
|
+
if item.get("type") == "image_url":
|
|
165
|
+
image_url = item.get("imageUrl", {}).get("url")
|
|
166
|
+
if image_url:
|
|
167
|
+
token_count += calculate_image_tokens_from_base64(image_url)
|
|
168
|
+
elif item.get("type") == "text":
|
|
169
|
+
token_count += len(encode(item.get("text", "")))
|
|
170
|
+
return token_count
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def messages_to_openai_messages(
|
|
174
|
+
messages: LanguageModelMessages | list[LanguageModelMessage],
|
|
175
|
+
):
|
|
176
|
+
if isinstance(messages, list):
|
|
177
|
+
messages = LanguageModelMessages(messages)
|
|
178
|
+
|
|
179
|
+
return [
|
|
180
|
+
{
|
|
181
|
+
k: v
|
|
182
|
+
for k, v in m.items()
|
|
183
|
+
if (
|
|
184
|
+
k in ["content", "role", "name"] and v is not None
|
|
185
|
+
) # Ignore tool_calls for now
|
|
186
|
+
}
|
|
187
|
+
for m in messages.model_dump(mode="json")
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def num_tokens_per_language_model_message(
|
|
192
|
+
messages: LanguageModelMessages | list[LanguageModelMessage],
|
|
193
|
+
encode: Callable[[str], list[int]],
|
|
194
|
+
) -> list[int]:
|
|
195
|
+
return num_tokens_per_messages(
|
|
196
|
+
messages=messages_to_openai_messages(messages), encode=encode
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def num_token_for_language_model_messages(
|
|
201
|
+
messages: LanguageModelMessages | list[LanguageModelMessage],
|
|
202
|
+
encode: Callable[[str], list[int]],
|
|
203
|
+
) -> int:
|
|
204
|
+
return num_tokens_from_messages(messages_to_openai_messages(messages), encode)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Utils package for common utilities
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import mimetypes
|
|
2
|
+
from enum import StrEnum
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class FileMimeType(StrEnum):
|
|
6
|
+
PDF = "application/pdf"
|
|
7
|
+
DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
8
|
+
DOC = "application/msword"
|
|
9
|
+
XLSX = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
10
|
+
XLS = "application/vnd.ms-excel"
|
|
11
|
+
PPTX = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
|
12
|
+
CSV = "text/csv"
|
|
13
|
+
HTML = "text/html"
|
|
14
|
+
MD = "text/markdown"
|
|
15
|
+
TXT = "text/plain"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ImageMimeType(StrEnum):
|
|
19
|
+
JPEG = "image/jpeg"
|
|
20
|
+
PNG = "image/png"
|
|
21
|
+
GIF = "image/gif"
|
|
22
|
+
BMP = "image/bmp"
|
|
23
|
+
WEBP = "image/webp"
|
|
24
|
+
TIFF = "image/tiff"
|
|
25
|
+
SVG = "image/svg+xml"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def is_file_content(filename: str) -> bool:
|
|
29
|
+
mimetype, _ = mimetypes.guess_type(filename)
|
|
30
|
+
|
|
31
|
+
if not mimetype:
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
return mimetype in FileMimeType.__members__.values()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def is_image_content(filename: str) -> bool:
|
|
38
|
+
mimetype, _ = mimetypes.guess_type(filename)
|
|
39
|
+
|
|
40
|
+
if not mimetype:
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
return mimetype in ImageMimeType.__members__.values()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import io
|
|
3
|
+
|
|
4
|
+
from PIL.ImageFile import ImageFile
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def image_to_base64(image: ImageFile) -> str:
|
|
8
|
+
# Convert to RGB if needed
|
|
9
|
+
img = image
|
|
10
|
+
if image.mode != "RGB":
|
|
11
|
+
img = image.convert("RGB")
|
|
12
|
+
|
|
13
|
+
# Create BytesIO object to store compressed image
|
|
14
|
+
img_byte_arr = io.BytesIO()
|
|
15
|
+
|
|
16
|
+
# Save with compression
|
|
17
|
+
img.save(img_byte_arr, format="JPEG", quality=85, optimize=True)
|
|
18
|
+
img_byte_arr.seek(0)
|
|
19
|
+
|
|
20
|
+
# Encode compressed image
|
|
21
|
+
encoded_string = base64.b64encode(img_byte_arr.getvalue())
|
|
22
|
+
image_string = encoded_string.decode("utf-8")
|
|
23
|
+
|
|
24
|
+
image_string = "data:image/jpeg;base64," + image_string
|
|
25
|
+
return image_string
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from unique_toolkit._common.utils.jinja.schema import Jinja2PromptParams
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def render_template(
|
|
9
|
+
template: str, params: Jinja2PromptParams | dict[str, Any] | None = None, **kwargs
|
|
10
|
+
) -> str:
|
|
11
|
+
params = params or {}
|
|
12
|
+
|
|
13
|
+
if isinstance(params, Jinja2PromptParams):
|
|
14
|
+
params = params.model_dump(exclude_none=True, mode="json")
|
|
15
|
+
|
|
16
|
+
params.update(kwargs)
|
|
17
|
+
|
|
18
|
+
return Template(template, lstrip_blocks=True).render(**params)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
from typing import Annotated, Any
|
|
3
|
+
|
|
4
|
+
from jinja2 import Template
|
|
5
|
+
from pydantic import (
|
|
6
|
+
BaseModel,
|
|
7
|
+
ConfigDict,
|
|
8
|
+
Field,
|
|
9
|
+
SerializerFunctionWrapHandler,
|
|
10
|
+
WrapSerializer,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from unique_toolkit.agentic.tools.tool import Tool
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Jinja2PromptParams(BaseModel):
|
|
17
|
+
model_config = ConfigDict(str_strip_whitespace=True)
|
|
18
|
+
|
|
19
|
+
def render_template(self, template: str) -> str:
|
|
20
|
+
params = self.model_dump(exclude_none=True, mode="json")
|
|
21
|
+
|
|
22
|
+
return Template(template, lstrip_blocks=True).render(**params)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ToolPromptParams(Jinja2PromptParams):
|
|
26
|
+
name: str
|
|
27
|
+
tool_description_for_system_prompt: str = ""
|
|
28
|
+
tool_format_information_for_system_prompt: str = ""
|
|
29
|
+
tool_format_reminder_for_user_prompt: str = ""
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def from_tool(cls, tool: Tool) -> "ToolPromptParams":
|
|
33
|
+
return cls(
|
|
34
|
+
name=tool.name,
|
|
35
|
+
tool_description_for_system_prompt=tool.tool_description_for_system_prompt(),
|
|
36
|
+
tool_format_information_for_system_prompt=tool.tool_format_information_for_system_prompt(),
|
|
37
|
+
tool_format_reminder_for_user_prompt=tool.tool_format_reminder_for_user_prompt(),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def serialize_iso8601_date(v: Any, handler: SerializerFunctionWrapHandler) -> str:
|
|
42
|
+
if isinstance(v, date):
|
|
43
|
+
return v.isoformat()
|
|
44
|
+
return handler(v)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
ISO8601Date = Annotated[
|
|
48
|
+
date,
|
|
49
|
+
WrapSerializer(serialize_iso8601_date, return_type=str),
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class AgentSystemPromptParams(Jinja2PromptParams):
|
|
54
|
+
info_cutoff_at: ISO8601Date | None
|
|
55
|
+
current_date: ISO8601Date = Field(default_factory=lambda: datetime.now().date())
|
|
56
|
+
tools: list[ToolPromptParams]
|
|
57
|
+
used_tools: list[ToolPromptParams]
|
|
58
|
+
add_citation_appendix: bool = True
|
|
59
|
+
max_tools_per_iteration: int
|
|
60
|
+
max_loop_iterations: int
|
|
61
|
+
current_iteration: int
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class AgentUserPromptParams(Jinja2PromptParams):
|
|
65
|
+
user_prompt: str
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from jinja2 import Environment
|
|
2
|
+
from jinja2.nodes import Const, Getattr, Getitem, Name
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TemplateValidationResult(BaseModel):
|
|
7
|
+
is_valid: bool
|
|
8
|
+
missing_placeholders: list[str]
|
|
9
|
+
optional_placeholders: list[str]
|
|
10
|
+
unexpected_placeholders: list[str]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _get_nested_variables(node):
|
|
14
|
+
"""Recursively extract all variable references from a Jinja2 AST node."""
|
|
15
|
+
variables = set()
|
|
16
|
+
|
|
17
|
+
if isinstance(node, Name):
|
|
18
|
+
variables.add(node.name)
|
|
19
|
+
elif isinstance(node, (Getattr, Getitem)):
|
|
20
|
+
# For nested attributes like example.category
|
|
21
|
+
if isinstance(node.node, Name):
|
|
22
|
+
if isinstance(node, Getattr):
|
|
23
|
+
variables.add(f"{node.node.name}.{node.attr}")
|
|
24
|
+
else: # Getitem
|
|
25
|
+
if isinstance(node.arg, Const):
|
|
26
|
+
variables.add(f"{node.node.name}.{node.arg.value}")
|
|
27
|
+
else:
|
|
28
|
+
# For dynamic indices, just use the base variable
|
|
29
|
+
variables.add(node.node.name)
|
|
30
|
+
# Recursively process nested nodes
|
|
31
|
+
variables.update(_get_nested_variables(node.node))
|
|
32
|
+
|
|
33
|
+
# Process child nodes
|
|
34
|
+
for child in node.iter_child_nodes():
|
|
35
|
+
variables.update(_get_nested_variables(child))
|
|
36
|
+
|
|
37
|
+
return variables
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def validate_template_placeholders(
|
|
41
|
+
template_content: str,
|
|
42
|
+
required_placeholders: set[str],
|
|
43
|
+
optional_placeholders: set[str],
|
|
44
|
+
) -> TemplateValidationResult:
|
|
45
|
+
"""
|
|
46
|
+
Validates that all required placeholders in the template are present.
|
|
47
|
+
Handles both top-level and nested variables (e.g. example.category).
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
template_content (str): The content of the Jinja template
|
|
51
|
+
required_placeholders (set[str]): Set of required placeholder names
|
|
52
|
+
optional_placeholders (set[str]): Set of optional placeholder names
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
TemplateValidationResult: A result object containing validation information
|
|
56
|
+
"""
|
|
57
|
+
# Create a Jinja environment
|
|
58
|
+
env = Environment()
|
|
59
|
+
|
|
60
|
+
# Parse the template and get all variables including nested ones
|
|
61
|
+
ast = env.parse(template_content)
|
|
62
|
+
template_vars = _get_nested_variables(ast)
|
|
63
|
+
|
|
64
|
+
# Check for missing required placeholders
|
|
65
|
+
missing_placeholders = required_placeholders - template_vars
|
|
66
|
+
|
|
67
|
+
# Check for optional placeholders present
|
|
68
|
+
present_optional = optional_placeholders & template_vars
|
|
69
|
+
|
|
70
|
+
# Check for any unexpected placeholders
|
|
71
|
+
unexpected_placeholders = template_vars - (
|
|
72
|
+
required_placeholders | optional_placeholders
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return TemplateValidationResult(
|
|
76
|
+
is_valid=len(missing_placeholders) == 0,
|
|
77
|
+
missing_placeholders=sorted(list(missing_placeholders)),
|
|
78
|
+
optional_placeholders=sorted(list(present_optional)),
|
|
79
|
+
unexpected_placeholders=sorted(list(unexpected_placeholders)),
|
|
80
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Structured output utilities package
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from humps import kebabize, pascalize
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def write_module_configuration_schema(
|
|
9
|
+
service_folderpath: Path,
|
|
10
|
+
write_folderpath: Path,
|
|
11
|
+
config: BaseModel,
|
|
12
|
+
sub_name: str = "",
|
|
13
|
+
):
|
|
14
|
+
filename_prefix = pascalize(service_folderpath.name)
|
|
15
|
+
|
|
16
|
+
filepath = (
|
|
17
|
+
write_folderpath
|
|
18
|
+
/ f"{filename_prefix}{f'-{sub_name}' if sub_name else ''}Schema.json"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
with open(filepath, "w") as f:
|
|
22
|
+
json.dump(config.model_json_schema(by_alias=True), f, indent=4)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def write_service_configuration(
|
|
26
|
+
service_folderpath: Path,
|
|
27
|
+
write_folderpath: Path,
|
|
28
|
+
config: BaseModel,
|
|
29
|
+
sub_name: str = "",
|
|
30
|
+
):
|
|
31
|
+
filename_prefix = kebabize(service_folderpath.name)
|
|
32
|
+
|
|
33
|
+
filepath = (
|
|
34
|
+
write_folderpath
|
|
35
|
+
/ f"{filename_prefix}{f'-{sub_name}' if sub_name else ''}-configuration-schema.json"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
with open(filepath, "w") as f:
|
|
39
|
+
json.dump(config.model_json_schema(by_alias=True), f, indent=4)
|
|
40
|
+
filepath = (
|
|
41
|
+
write_folderpath
|
|
42
|
+
/ f"{filename_prefix}{f'-{sub_name}' if sub_name else ''}-default-configuration.json"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# We exclude language_model_info as it is infered from language_model_name
|
|
46
|
+
with open(filepath, "w") as f:
|
|
47
|
+
f.write(
|
|
48
|
+
config.model_dump_json(
|
|
49
|
+
by_alias=True, indent=4, exclude=set(["language_model_info"])
|
|
50
|
+
)
|
|
51
|
+
)
|
|
@@ -1,8 +1,105 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Annotated, Any
|
|
2
3
|
|
|
4
|
+
from pydantic import BeforeValidator, Field, PlainSerializer, ValidationInfo
|
|
5
|
+
from pydantic.fields import FieldInfo
|
|
6
|
+
|
|
7
|
+
from unique_toolkit.language_model import LanguageModelName
|
|
8
|
+
from unique_toolkit.language_model.infos import (
|
|
9
|
+
LanguageModelInfo,
|
|
10
|
+
LanguageModelProvider,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# TODO @klcd: Inform on deprecation of str as input
|
|
16
|
+
LMI = Annotated[
|
|
17
|
+
LanguageModelInfo,
|
|
18
|
+
BeforeValidator(
|
|
19
|
+
lambda v: validate_and_init_language_model_info(v),
|
|
20
|
+
json_schema_input_type=LanguageModelName
|
|
21
|
+
| Annotated[
|
|
22
|
+
str,
|
|
23
|
+
Field(
|
|
24
|
+
title="Language Model String",
|
|
25
|
+
),
|
|
26
|
+
]
|
|
27
|
+
| LanguageModelInfo,
|
|
28
|
+
),
|
|
29
|
+
PlainSerializer(
|
|
30
|
+
lambda v: serialize_lmi(v),
|
|
31
|
+
when_used="json",
|
|
32
|
+
return_type=str | LanguageModelInfo,
|
|
33
|
+
),
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_LMI_default_field(llm_name: LanguageModelName, **kwargs) -> Any:
|
|
38
|
+
return Field(
|
|
39
|
+
default=LanguageModelInfo.from_name(llm_name),
|
|
40
|
+
json_schema_extra={"default": llm_name},
|
|
41
|
+
**kwargs,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def serialize_lmi(model: LanguageModelInfo) -> str | LanguageModelInfo:
|
|
46
|
+
if model.provider == LanguageModelProvider.CUSTOM:
|
|
47
|
+
return model
|
|
48
|
+
|
|
49
|
+
return model.name
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def validate_and_init_language_model_info(
|
|
53
|
+
v: str | LanguageModelName | LanguageModelInfo,
|
|
54
|
+
) -> LanguageModelInfo:
|
|
55
|
+
"""Validate and initialize a LanguageModelInfo object.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
v: The input value to validate and initialize.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
LanguageModelInfo: The validated and initialized LanguageModelInfo object.
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
if isinstance(v, LanguageModelName):
|
|
65
|
+
return LanguageModelInfo.from_name(v)
|
|
66
|
+
if isinstance(v, str):
|
|
67
|
+
if v in [name.value for name in LanguageModelName]:
|
|
68
|
+
return LanguageModelInfo.from_name(LanguageModelName(v))
|
|
69
|
+
|
|
70
|
+
return LanguageModelInfo(
|
|
71
|
+
name=v,
|
|
72
|
+
version="custom",
|
|
73
|
+
provider=LanguageModelProvider.CUSTOM,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return v
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def ClipInt(*, min_value: int, max_value: int) -> tuple[BeforeValidator, FieldInfo]:
|
|
80
|
+
def _validator(value: Any, info: ValidationInfo) -> Any:
|
|
81
|
+
if not isinstance(value, int):
|
|
82
|
+
value = int(value)
|
|
83
|
+
|
|
84
|
+
field_name = info.field_name
|
|
85
|
+
if value < min_value:
|
|
86
|
+
logger.warning(
|
|
87
|
+
"Field %s is below the allowed minimum of %s. It will be set to %s.",
|
|
88
|
+
field_name,
|
|
89
|
+
min_value,
|
|
90
|
+
min_value,
|
|
91
|
+
)
|
|
92
|
+
return min_value
|
|
93
|
+
|
|
94
|
+
if value > max_value:
|
|
95
|
+
logger.warning(
|
|
96
|
+
"Field %s is above the allowed maximum of %s. It will be set to %s.",
|
|
97
|
+
field_name,
|
|
98
|
+
max_value,
|
|
99
|
+
max_value,
|
|
100
|
+
)
|
|
101
|
+
return max_value
|
|
3
102
|
|
|
4
|
-
def validate_and_init_language_model(value: LanguageModelName | LanguageModel | str):
|
|
5
|
-
if isinstance(value, LanguageModel):
|
|
6
103
|
return value
|
|
7
104
|
|
|
8
|
-
return
|
|
105
|
+
return (BeforeValidator(_validator), Field(ge=min_value, le=max_value))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from unique_toolkit.agentic.tools.schemas import ToolCallResponse
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DebugInfoManager:
|
|
7
|
+
def __init__(self):
|
|
8
|
+
self.debug_info = {"tools": []}
|
|
9
|
+
|
|
10
|
+
def extract_tool_debug_info(
|
|
11
|
+
self,
|
|
12
|
+
tool_call_responses: list[ToolCallResponse],
|
|
13
|
+
loop_iteration_index: int | None = None,
|
|
14
|
+
):
|
|
15
|
+
for tool_call_response in tool_call_responses:
|
|
16
|
+
tool_info = {
|
|
17
|
+
"name": tool_call_response.name,
|
|
18
|
+
"info": tool_call_response.debug_info,
|
|
19
|
+
}
|
|
20
|
+
if loop_iteration_index is not None:
|
|
21
|
+
tool_info["info"]["loop_iteration"] = loop_iteration_index
|
|
22
|
+
self.debug_info["tools"].append(tool_info)
|
|
23
|
+
|
|
24
|
+
def add(self, key: str, value: Any) -> None:
|
|
25
|
+
self.debug_info = self.debug_info | {key: value}
|
|
26
|
+
|
|
27
|
+
def get(self) -> dict[str, Any]:
|
|
28
|
+
return self.debug_info
|