unique_toolkit 0.7.7__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unique_toolkit might be problematic. Click here for more details.
- unique_toolkit/__init__.py +28 -1
- unique_toolkit/_common/api_calling/human_verification_manager.py +343 -0
- unique_toolkit/_common/base_model_type_attribute.py +303 -0
- unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
- unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
- unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
- unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
- unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
- unique_toolkit/_common/default_language_model.py +12 -0
- unique_toolkit/_common/docx_generator/__init__.py +7 -0
- unique_toolkit/_common/docx_generator/config.py +12 -0
- unique_toolkit/_common/docx_generator/schemas.py +80 -0
- unique_toolkit/_common/docx_generator/service.py +252 -0
- unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
- unique_toolkit/_common/endpoint_builder.py +305 -0
- unique_toolkit/_common/endpoint_requestor.py +430 -0
- unique_toolkit/_common/exception.py +24 -0
- unique_toolkit/_common/feature_flags/schema.py +9 -0
- unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
- unique_toolkit/_common/pydantic_helpers.py +154 -0
- unique_toolkit/_common/referencing.py +53 -0
- unique_toolkit/_common/string_utilities.py +140 -0
- unique_toolkit/_common/tests/test_referencing.py +521 -0
- unique_toolkit/_common/tests/test_string_utilities.py +506 -0
- unique_toolkit/_common/token/image_token_counting.py +67 -0
- unique_toolkit/_common/token/token_counting.py +204 -0
- unique_toolkit/_common/utils/__init__.py +1 -0
- unique_toolkit/_common/utils/files.py +43 -0
- unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
- unique_toolkit/_common/utils/structured_output/schema.py +5 -0
- unique_toolkit/_common/utils/write_configuration.py +51 -0
- unique_toolkit/_common/validators.py +101 -4
- unique_toolkit/agentic/__init__.py +1 -0
- unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
- unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
- unique_toolkit/agentic/evaluation/config.py +36 -0
- unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
- unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
- unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
- unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
- unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
- unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +111 -0
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +16 -15
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +30 -20
- unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
- unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
- unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
- unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
- unique_toolkit/agentic/history_manager/history_construction_with_contents.py +297 -0
- unique_toolkit/agentic/history_manager/history_manager.py +242 -0
- unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
- unique_toolkit/agentic/history_manager/utils.py +96 -0
- unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
- unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
- unique_toolkit/agentic/responses_api/__init__.py +19 -0
- unique_toolkit/agentic/responses_api/postprocessors/code_display.py +63 -0
- unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +145 -0
- unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
- unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
- unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
- unique_toolkit/agentic/tools/__init__.py +1 -0
- unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
- unique_toolkit/agentic/tools/a2a/config.py +17 -0
- unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
- unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
- unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
- unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
- unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
- unique_toolkit/agentic/tools/a2a/manager.py +55 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +185 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +73 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/config.py +45 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/display.py +180 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +1335 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
- unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
- unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
- unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
- unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
- unique_toolkit/agentic/tools/a2a/tool/config.py +73 -0
- unique_toolkit/agentic/tools/a2a/tool/service.py +306 -0
- unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
- unique_toolkit/agentic/tools/config.py +167 -0
- unique_toolkit/agentic/tools/factory.py +44 -0
- unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
- unique_toolkit/agentic/tools/mcp/manager.py +71 -0
- unique_toolkit/agentic/tools/mcp/models.py +28 -0
- unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
- unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
- unique_toolkit/agentic/tools/openai_builtin/base.py +30 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +57 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +230 -0
- unique_toolkit/agentic/tools/openai_builtin/manager.py +62 -0
- unique_toolkit/agentic/tools/schemas.py +141 -0
- unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
- unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
- unique_toolkit/agentic/tools/tool.py +183 -0
- unique_toolkit/agentic/tools/tool_manager.py +523 -0
- unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
- unique_toolkit/agentic/tools/utils/__init__.py +19 -0
- unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
- unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
- unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
- unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
- unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
- unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
- unique_toolkit/app/__init__.py +6 -0
- unique_toolkit/app/dev_util.py +180 -0
- unique_toolkit/app/init_sdk.py +32 -1
- unique_toolkit/app/schemas.py +198 -31
- unique_toolkit/app/unique_settings.py +367 -0
- unique_toolkit/chat/__init__.py +8 -1
- unique_toolkit/chat/deprecated/service.py +232 -0
- unique_toolkit/chat/functions.py +642 -77
- unique_toolkit/chat/rendering.py +34 -0
- unique_toolkit/chat/responses_api.py +461 -0
- unique_toolkit/chat/schemas.py +133 -2
- unique_toolkit/chat/service.py +115 -767
- unique_toolkit/content/functions.py +153 -4
- unique_toolkit/content/schemas.py +122 -15
- unique_toolkit/content/service.py +278 -44
- unique_toolkit/content/smart_rules.py +301 -0
- unique_toolkit/content/utils.py +8 -3
- unique_toolkit/embedding/service.py +102 -11
- unique_toolkit/framework_utilities/__init__.py +1 -0
- unique_toolkit/framework_utilities/langchain/client.py +71 -0
- unique_toolkit/framework_utilities/langchain/history.py +19 -0
- unique_toolkit/framework_utilities/openai/__init__.py +6 -0
- unique_toolkit/framework_utilities/openai/client.py +83 -0
- unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
- unique_toolkit/framework_utilities/utils.py +23 -0
- unique_toolkit/language_model/__init__.py +3 -0
- unique_toolkit/language_model/builder.py +27 -11
- unique_toolkit/language_model/default_language_model.py +3 -0
- unique_toolkit/language_model/functions.py +327 -43
- unique_toolkit/language_model/infos.py +992 -50
- unique_toolkit/language_model/reference.py +242 -0
- unique_toolkit/language_model/schemas.py +475 -48
- unique_toolkit/language_model/service.py +228 -27
- unique_toolkit/protocols/support.py +145 -0
- unique_toolkit/services/__init__.py +7 -0
- unique_toolkit/services/chat_service.py +1630 -0
- unique_toolkit/services/knowledge_base.py +861 -0
- unique_toolkit/short_term_memory/service.py +178 -41
- unique_toolkit/smart_rules/__init__.py +0 -0
- unique_toolkit/smart_rules/compile.py +56 -0
- unique_toolkit/test_utilities/events.py +197 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/METADATA +606 -7
- unique_toolkit-1.23.0.dist-info/RECORD +182 -0
- unique_toolkit/evaluators/__init__.py +0 -1
- unique_toolkit/evaluators/config.py +0 -35
- unique_toolkit/evaluators/constants.py +0 -1
- unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
- unique_toolkit/evaluators/context_relevancy/service.py +0 -53
- unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
- unique_toolkit/evaluators/hallucination/constants.py +0 -41
- unique_toolkit-0.7.7.dist-info/RECORD +0 -64
- /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for string utilities.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.mark.ai
|
|
11
|
+
def test_replace_in_text__replaces_single_pattern__with_string_pattern() -> None:
|
|
12
|
+
"""
|
|
13
|
+
Purpose: Verify replace_in_text handles single string pattern replacement.
|
|
14
|
+
Why this matters: Basic functionality for text transformation.
|
|
15
|
+
Setup summary: Single string pattern, assert proper replacement.
|
|
16
|
+
"""
|
|
17
|
+
# Arrange
|
|
18
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
19
|
+
|
|
20
|
+
text = "Hello world, hello universe"
|
|
21
|
+
repls = [("hello", "goodbye")]
|
|
22
|
+
|
|
23
|
+
# Act
|
|
24
|
+
result = replace_in_text(text, repls)
|
|
25
|
+
|
|
26
|
+
# Assert
|
|
27
|
+
assert result == "Hello world, goodbye universe"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.mark.ai
|
|
31
|
+
def test_replace_in_text__replaces_multiple_patterns__with_string_patterns() -> None:
|
|
32
|
+
"""
|
|
33
|
+
Purpose: Verify replace_in_text handles multiple independent replacements.
|
|
34
|
+
Why this matters: Enables batch text transformations.
|
|
35
|
+
Setup summary: Multiple non-overlapping patterns, assert all replaced.
|
|
36
|
+
"""
|
|
37
|
+
# Arrange
|
|
38
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
39
|
+
|
|
40
|
+
text = "The quick brown fox jumps over the lazy dog"
|
|
41
|
+
repls = [("quick", "slow"), ("brown", "red"), ("lazy", "energetic")]
|
|
42
|
+
|
|
43
|
+
# Act
|
|
44
|
+
result = replace_in_text(text, repls)
|
|
45
|
+
|
|
46
|
+
# Assert
|
|
47
|
+
assert result == "The slow red fox jumps over the energetic dog"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pytest.mark.ai
|
|
51
|
+
def test_replace_in_text__handles_overlapping_replacements__correctly() -> None:
|
|
52
|
+
"""
|
|
53
|
+
Purpose: Verify replace_in_text prevents replacement conflicts.
|
|
54
|
+
Why this matters: Critical for avoiding cascading replacement errors.
|
|
55
|
+
Setup summary: Overlapping patterns where one replacement could match another pattern.
|
|
56
|
+
"""
|
|
57
|
+
# Arrange
|
|
58
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
59
|
+
|
|
60
|
+
text = "A becomes B and B becomes C"
|
|
61
|
+
repls = [("A", "B"), ("B", "C")]
|
|
62
|
+
|
|
63
|
+
# Act
|
|
64
|
+
result = replace_in_text(text, repls)
|
|
65
|
+
|
|
66
|
+
# Assert
|
|
67
|
+
# A should become B, and original B should become C
|
|
68
|
+
# The new B (from A) should NOT become C
|
|
69
|
+
assert result == "B becomes C and C becomes C"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@pytest.mark.ai
|
|
73
|
+
def test_replace_in_text__replaces_with_regex_pattern__compiled_pattern() -> None:
|
|
74
|
+
"""
|
|
75
|
+
Purpose: Verify replace_in_text works with compiled regex patterns.
|
|
76
|
+
Why this matters: Enables advanced pattern matching beyond literal strings.
|
|
77
|
+
Setup summary: Use compiled regex pattern, assert proper replacement.
|
|
78
|
+
"""
|
|
79
|
+
# Arrange
|
|
80
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
81
|
+
|
|
82
|
+
text = "Contact: alice@example.com or bob@test.com"
|
|
83
|
+
pattern = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b")
|
|
84
|
+
repls = [(pattern, "[EMAIL]")]
|
|
85
|
+
|
|
86
|
+
# Act
|
|
87
|
+
result = replace_in_text(text, repls)
|
|
88
|
+
|
|
89
|
+
# Assert
|
|
90
|
+
assert result == "Contact: [EMAIL] or [EMAIL]"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@pytest.mark.ai
|
|
94
|
+
def test_replace_in_text__returns_unchanged__with_empty_replacements() -> None:
|
|
95
|
+
"""
|
|
96
|
+
Purpose: Verify replace_in_text handles empty replacement list gracefully.
|
|
97
|
+
Why this matters: Prevents errors when no replacements are needed.
|
|
98
|
+
Setup summary: Provide empty replacement list, assert text unchanged.
|
|
99
|
+
"""
|
|
100
|
+
# Arrange
|
|
101
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
102
|
+
|
|
103
|
+
text = "Original text remains unchanged"
|
|
104
|
+
repls: list[tuple[str, str]] = []
|
|
105
|
+
|
|
106
|
+
# Act
|
|
107
|
+
result = replace_in_text(text, repls)
|
|
108
|
+
|
|
109
|
+
# Assert
|
|
110
|
+
assert result == text
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@pytest.mark.ai
|
|
114
|
+
def test_replace_in_text__returns_unchanged__with_no_matches() -> None:
|
|
115
|
+
"""
|
|
116
|
+
Purpose: Verify replace_in_text handles patterns that don't match.
|
|
117
|
+
Why this matters: Ensures graceful handling of non-matching patterns.
|
|
118
|
+
Setup summary: Pattern that doesn't exist in text, assert unchanged.
|
|
119
|
+
"""
|
|
120
|
+
# Arrange
|
|
121
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
122
|
+
|
|
123
|
+
text = "Hello world"
|
|
124
|
+
repls = [("goodbye", "farewell"), ("universe", "cosmos")]
|
|
125
|
+
|
|
126
|
+
# Act
|
|
127
|
+
result = replace_in_text(text, repls)
|
|
128
|
+
|
|
129
|
+
# Assert
|
|
130
|
+
assert result == "Hello world"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@pytest.mark.ai
|
|
134
|
+
def test_replace_in_text__handles_empty_text__correctly() -> None:
|
|
135
|
+
"""
|
|
136
|
+
Purpose: Verify replace_in_text handles empty input text.
|
|
137
|
+
Why this matters: Prevents errors on edge case inputs.
|
|
138
|
+
Setup summary: Empty text with valid replacements, assert empty result.
|
|
139
|
+
"""
|
|
140
|
+
# Arrange
|
|
141
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
142
|
+
|
|
143
|
+
text = ""
|
|
144
|
+
repls = [("pattern", "replacement")]
|
|
145
|
+
|
|
146
|
+
# Act
|
|
147
|
+
result = replace_in_text(text, repls)
|
|
148
|
+
|
|
149
|
+
# Assert
|
|
150
|
+
assert result == ""
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@pytest.mark.ai
|
|
154
|
+
def test_replace_in_text__replaces_with_empty_string__removes_pattern() -> None:
|
|
155
|
+
"""
|
|
156
|
+
Purpose: Verify replace_in_text can remove patterns by replacing with empty string.
|
|
157
|
+
Why this matters: Enables pattern deletion use case.
|
|
158
|
+
Setup summary: Replace patterns with empty strings, assert removal.
|
|
159
|
+
"""
|
|
160
|
+
# Arrange
|
|
161
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
162
|
+
|
|
163
|
+
text = "Remove [TAG1] these [TAG2] tags"
|
|
164
|
+
repls = [(r"\[TAG\d+\]", "")]
|
|
165
|
+
|
|
166
|
+
# Act
|
|
167
|
+
result = replace_in_text(text, repls)
|
|
168
|
+
|
|
169
|
+
# Assert
|
|
170
|
+
assert result == "Remove these tags"
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@pytest.mark.ai
|
|
174
|
+
def test_replace_in_text__preserves_case__in_non_matching_text() -> None:
|
|
175
|
+
"""
|
|
176
|
+
Purpose: Verify replace_in_text only modifies matched patterns.
|
|
177
|
+
Why this matters: Ensures surgical precision in replacements.
|
|
178
|
+
Setup summary: Case-sensitive pattern, verify non-matches preserved.
|
|
179
|
+
"""
|
|
180
|
+
# Arrange
|
|
181
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
182
|
+
|
|
183
|
+
text = "Test test TEST TeSt"
|
|
184
|
+
repls = [("test", "exam")]
|
|
185
|
+
|
|
186
|
+
# Act
|
|
187
|
+
result = replace_in_text(text, repls)
|
|
188
|
+
|
|
189
|
+
# Assert
|
|
190
|
+
assert result == "Test exam TEST TeSt"
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@pytest.mark.ai
|
|
194
|
+
def test_replace_in_text__interprets_strings_as_regex__patterns() -> None:
|
|
195
|
+
"""
|
|
196
|
+
Purpose: Verify replace_in_text treats string patterns as regex patterns.
|
|
197
|
+
Why this matters: Documents that strings are interpreted as regex, requiring escaping.
|
|
198
|
+
Setup summary: String with regex special chars, verify regex interpretation.
|
|
199
|
+
"""
|
|
200
|
+
# Arrange
|
|
201
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
202
|
+
|
|
203
|
+
text = "Price: $100 (sale)"
|
|
204
|
+
# Need to escape regex special characters
|
|
205
|
+
repls = [(r"\$100", "$50"), (r"\(sale\)", "(clearance)")]
|
|
206
|
+
|
|
207
|
+
# Act
|
|
208
|
+
result = replace_in_text(text, repls)
|
|
209
|
+
|
|
210
|
+
# Assert
|
|
211
|
+
assert result == "Price: $50 (clearance)"
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@pytest.mark.ai
|
|
215
|
+
def test_replace_in_text__handles_multiple_occurrences__of_same_pattern() -> None:
|
|
216
|
+
"""
|
|
217
|
+
Purpose: Verify replace_in_text replaces all occurrences of a pattern.
|
|
218
|
+
Why this matters: Ensures global replacement behavior.
|
|
219
|
+
Setup summary: Pattern appearing multiple times, assert all replaced.
|
|
220
|
+
"""
|
|
221
|
+
# Arrange
|
|
222
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
223
|
+
|
|
224
|
+
text = "foo bar foo baz foo"
|
|
225
|
+
repls = [("foo", "qux")]
|
|
226
|
+
|
|
227
|
+
# Act
|
|
228
|
+
result = replace_in_text(text, repls)
|
|
229
|
+
|
|
230
|
+
# Assert
|
|
231
|
+
assert result == "qux bar qux baz qux"
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@pytest.mark.ai
|
|
235
|
+
def test_replace_in_text__handles_chain_replacement__without_cascading() -> None:
|
|
236
|
+
"""
|
|
237
|
+
Purpose: Verify replace_in_text prevents cascading replacements.
|
|
238
|
+
Why this matters: Ensures predictable replacement behavior in complex scenarios.
|
|
239
|
+
Setup summary: Chain of patterns where replacements could cascade, verify isolation.
|
|
240
|
+
"""
|
|
241
|
+
# Arrange
|
|
242
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
243
|
+
|
|
244
|
+
text = "Replace A with B, B with C, C with D"
|
|
245
|
+
repls = [("A", "X"), ("B", "Y"), ("C", "Z")]
|
|
246
|
+
|
|
247
|
+
# Act
|
|
248
|
+
result = replace_in_text(text, repls)
|
|
249
|
+
|
|
250
|
+
# Assert
|
|
251
|
+
# Each pattern should only match the original text, not intermediate results
|
|
252
|
+
assert result == "Replace X with Y, Y with Z, Z with D"
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@pytest.mark.ai
|
|
256
|
+
def test_replace_in_text__handles_overlapping_patterns__first_to_last() -> None:
|
|
257
|
+
"""
|
|
258
|
+
Purpose: Verify replace_in_text handles patterns that could interfere with each other.
|
|
259
|
+
Why this matters: Tests the placeholder mechanism that prevents interference.
|
|
260
|
+
Setup summary: Pattern where replacement contains another pattern to be replaced.
|
|
261
|
+
"""
|
|
262
|
+
# Arrange
|
|
263
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
264
|
+
|
|
265
|
+
text = "The value is 123"
|
|
266
|
+
repls = [("123", "456"), ("value", "result"), ("is", "equals")]
|
|
267
|
+
|
|
268
|
+
# Act
|
|
269
|
+
result = replace_in_text(text, repls)
|
|
270
|
+
|
|
271
|
+
# Assert
|
|
272
|
+
assert result == "The result equals 456"
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
@pytest.mark.ai
|
|
276
|
+
def test_replace_in_text__mixed_string_and_regex__patterns() -> None:
|
|
277
|
+
"""
|
|
278
|
+
Purpose: Verify replace_in_text handles mixed string and regex patterns.
|
|
279
|
+
Why this matters: Enables flexible pattern matching strategies in single call.
|
|
280
|
+
Setup summary: Mix of string literals and compiled regex patterns.
|
|
281
|
+
"""
|
|
282
|
+
# Arrange
|
|
283
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
284
|
+
|
|
285
|
+
text = "User: alice_smith, Email: alice@example.com, Age: 25"
|
|
286
|
+
repls = [
|
|
287
|
+
(re.compile(r"\b[A-Za-z]+@[A-Za-z]+\.[a-z]{2,}\b"), "[REDACTED]"),
|
|
288
|
+
("alice_smith", "[USERNAME]"),
|
|
289
|
+
("Age: 25", "Age: XX"),
|
|
290
|
+
]
|
|
291
|
+
|
|
292
|
+
# Act
|
|
293
|
+
result = replace_in_text(text, repls)
|
|
294
|
+
|
|
295
|
+
# Assert
|
|
296
|
+
assert result == "User: [USERNAME], Email: [REDACTED], Age: XX"
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
@pytest.mark.ai
|
|
300
|
+
def test_replace_in_text__handles_whitespace__in_patterns() -> None:
|
|
301
|
+
"""
|
|
302
|
+
Purpose: Verify replace_in_text correctly handles whitespace in patterns.
|
|
303
|
+
Why this matters: Ensures precise matching of whitespace-sensitive text.
|
|
304
|
+
Setup summary: Patterns with various whitespace, assert correct replacement.
|
|
305
|
+
"""
|
|
306
|
+
# Arrange
|
|
307
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
308
|
+
|
|
309
|
+
text = "Hello world\twith\nvarious spaces"
|
|
310
|
+
repls = [("\t", " "), ("\n", " ")]
|
|
311
|
+
|
|
312
|
+
# Act
|
|
313
|
+
result = replace_in_text(text, repls)
|
|
314
|
+
|
|
315
|
+
# Assert
|
|
316
|
+
assert result == "Hello world with various spaces"
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
@pytest.mark.ai
|
|
320
|
+
def test_replace_in_text__handles_unicode__characters() -> None:
|
|
321
|
+
"""
|
|
322
|
+
Purpose: Verify replace_in_text works with unicode characters.
|
|
323
|
+
Why this matters: Ensures internationalization support.
|
|
324
|
+
Setup summary: Text with unicode chars, assert proper replacement.
|
|
325
|
+
"""
|
|
326
|
+
# Arrange
|
|
327
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
328
|
+
|
|
329
|
+
text = "Café résumé naïve Ñoño"
|
|
330
|
+
repls = [("Café", "Coffee"), ("résumé", "resume"), ("Ñoño", "Nono")]
|
|
331
|
+
|
|
332
|
+
# Act
|
|
333
|
+
result = replace_in_text(text, repls)
|
|
334
|
+
|
|
335
|
+
# Assert
|
|
336
|
+
assert result == "Coffee resume naïve Nono"
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
@pytest.mark.ai
|
|
340
|
+
def test_replace_in_text__handles_multiple_word_replacements__efficiently() -> None:
|
|
341
|
+
"""
|
|
342
|
+
Purpose: Verify replace_in_text handles many replacements.
|
|
343
|
+
Why this matters: Ensures scalability for complex text transformations.
|
|
344
|
+
Setup summary: Multiple independent word replacements, verify all applied.
|
|
345
|
+
"""
|
|
346
|
+
# Arrange
|
|
347
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
348
|
+
|
|
349
|
+
text = "alpha beta gamma delta epsilon"
|
|
350
|
+
repls = [
|
|
351
|
+
("alpha", "ALPHA"),
|
|
352
|
+
("beta", "BETA"),
|
|
353
|
+
("gamma", "GAMMA"),
|
|
354
|
+
("delta", "DELTA"),
|
|
355
|
+
("epsilon", "EPSILON"),
|
|
356
|
+
]
|
|
357
|
+
|
|
358
|
+
# Act
|
|
359
|
+
result = replace_in_text(text, repls)
|
|
360
|
+
|
|
361
|
+
# Assert
|
|
362
|
+
assert result == "ALPHA BETA GAMMA DELTA EPSILON"
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
@pytest.mark.ai
|
|
366
|
+
def test_replace_in_text__preserves_replacement_order__in_output() -> None:
|
|
367
|
+
"""
|
|
368
|
+
Purpose: Verify replace_in_text applies replacements in deterministic manner.
|
|
369
|
+
Why this matters: Ensures predictable output for debugging and testing.
|
|
370
|
+
Setup summary: Multiple replacements in specific order, verify correct result.
|
|
371
|
+
"""
|
|
372
|
+
# Arrange
|
|
373
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
374
|
+
|
|
375
|
+
text = "First Second Third"
|
|
376
|
+
repls = [("First", "1st"), ("Second", "2nd"), ("Third", "3rd")]
|
|
377
|
+
|
|
378
|
+
# Act
|
|
379
|
+
result = replace_in_text(text, repls)
|
|
380
|
+
|
|
381
|
+
# Assert
|
|
382
|
+
assert result == "1st 2nd 3rd"
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
@pytest.mark.ai
|
|
386
|
+
def test_replace_in_text__handles_regex_groups__in_patterns() -> None:
|
|
387
|
+
"""
|
|
388
|
+
Purpose: Verify replace_in_text works with regex capture groups.
|
|
389
|
+
Why this matters: Enables advanced pattern transformations.
|
|
390
|
+
Setup summary: Use regex with groups, assert proper replacement.
|
|
391
|
+
"""
|
|
392
|
+
# Arrange
|
|
393
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
394
|
+
|
|
395
|
+
text = "Date: 2023-12-25, Event: Christmas"
|
|
396
|
+
pattern = re.compile(r"(\d{4})-(\d{2})-(\d{2})")
|
|
397
|
+
# Note: The placeholder replacement breaks capture group references,
|
|
398
|
+
# so we use a direct string replacement after matching
|
|
399
|
+
repls = [(pattern, "12/25/2023")]
|
|
400
|
+
|
|
401
|
+
# Act
|
|
402
|
+
result = replace_in_text(text, repls)
|
|
403
|
+
|
|
404
|
+
# Assert
|
|
405
|
+
assert result == "Date: 12/25/2023, Event: Christmas"
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
@pytest.mark.ai
|
|
409
|
+
def test_replace_in_text__handles_same_pattern_replacement__idempotent() -> None:
|
|
410
|
+
"""
|
|
411
|
+
Purpose: Verify replace_in_text handles pattern that equals its replacement.
|
|
412
|
+
Why this matters: Edge case that should not cause infinite loops.
|
|
413
|
+
Setup summary: Pattern replaced with itself, assert no changes or errors.
|
|
414
|
+
"""
|
|
415
|
+
# Arrange
|
|
416
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
417
|
+
|
|
418
|
+
text = "Same text Same"
|
|
419
|
+
repls = [("Same", "Same")]
|
|
420
|
+
|
|
421
|
+
# Act
|
|
422
|
+
result = replace_in_text(text, repls)
|
|
423
|
+
|
|
424
|
+
# Assert
|
|
425
|
+
assert result == "Same text Same"
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
@pytest.mark.ai
|
|
429
|
+
@pytest.mark.parametrize(
|
|
430
|
+
"text, repls, expected",
|
|
431
|
+
[
|
|
432
|
+
("", [], ""),
|
|
433
|
+
("test", [], "test"),
|
|
434
|
+
("", [("a", "b")], ""),
|
|
435
|
+
("The cat sat", [("cat", "dog")], "The dog sat"),
|
|
436
|
+
("aaa", [("a", "b")], "bbb"),
|
|
437
|
+
],
|
|
438
|
+
ids=[
|
|
439
|
+
"empty-text-empty-repls",
|
|
440
|
+
"no-repls",
|
|
441
|
+
"empty-text-with-repls",
|
|
442
|
+
"word-replaced",
|
|
443
|
+
"repeated-char",
|
|
444
|
+
],
|
|
445
|
+
)
|
|
446
|
+
def test_replace_in_text__edge_cases(
|
|
447
|
+
text: str, repls: list[tuple[str, str]], expected: str
|
|
448
|
+
) -> None:
|
|
449
|
+
"""
|
|
450
|
+
Purpose: Table-driven tests for edge cases and corner scenarios.
|
|
451
|
+
Why this matters: Ensures robust behavior across boundary conditions.
|
|
452
|
+
Setup summary: Parametrized edge case inputs with expected outputs.
|
|
453
|
+
"""
|
|
454
|
+
# Arrange
|
|
455
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
456
|
+
|
|
457
|
+
# Act
|
|
458
|
+
result = replace_in_text(text, repls)
|
|
459
|
+
|
|
460
|
+
# Assert
|
|
461
|
+
assert result == expected
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
@pytest.mark.ai
|
|
465
|
+
def test_replace_in_text__handles_newlines_and_multiline__text() -> None:
|
|
466
|
+
"""
|
|
467
|
+
Purpose: Verify replace_in_text works with multiline text.
|
|
468
|
+
Why this matters: Enables processing of documents and structured text.
|
|
469
|
+
Setup summary: Multiline text with replacements, assert correct handling.
|
|
470
|
+
"""
|
|
471
|
+
# Arrange
|
|
472
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
473
|
+
|
|
474
|
+
text = """Line 1: Hello
|
|
475
|
+
Line 2: World
|
|
476
|
+
Line 3: Test"""
|
|
477
|
+
repls = [("Hello", "Hi"), ("World", "Universe"), ("Test", "Example")]
|
|
478
|
+
|
|
479
|
+
# Act
|
|
480
|
+
result = replace_in_text(text, repls)
|
|
481
|
+
|
|
482
|
+
# Assert
|
|
483
|
+
expected = """Line 1: Hi
|
|
484
|
+
Line 2: Universe
|
|
485
|
+
Line 3: Example"""
|
|
486
|
+
assert result == expected
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
@pytest.mark.ai
|
|
490
|
+
def test_replace_in_text__handles_reference_pattern__use_case() -> None:
|
|
491
|
+
"""
|
|
492
|
+
Purpose: Verify replace_in_text works for reference renumbering use case.
|
|
493
|
+
Why this matters: Common use case in the codebase for managing citations.
|
|
494
|
+
Setup summary: Reference patterns similar to actual usage, verify correct replacement.
|
|
495
|
+
"""
|
|
496
|
+
# Arrange
|
|
497
|
+
from unique_toolkit._common.string_utilities import replace_in_text
|
|
498
|
+
|
|
499
|
+
text = "Text with <sup>1</sup> and <sup>2</sup> references."
|
|
500
|
+
repls = [("<sup>1</sup>", "<sup>5</sup>"), ("<sup>2</sup>", "<sup>6</sup>")]
|
|
501
|
+
|
|
502
|
+
# Act
|
|
503
|
+
result = replace_in_text(text, repls)
|
|
504
|
+
|
|
505
|
+
# Assert
|
|
506
|
+
assert result == "Text with <sup>5</sup> and <sup>6</sup> references."
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import math
|
|
3
|
+
import re
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from io import BytesIO
|
|
6
|
+
|
|
7
|
+
from PIL import Image
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DetailLevel(Enum):
|
|
11
|
+
LOW = "low"
|
|
12
|
+
HIGH = "high"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# https://platform.openai.com/docs/guides/vision/calculating-costs#calculating-costs
|
|
16
|
+
def calculate_image_tokens(width, height, detail: DetailLevel):
|
|
17
|
+
"""
|
|
18
|
+
Calculate the token cost of an image based on its dimensions and detail level.
|
|
19
|
+
NOTE: While we followed the documentation provided by openai to calculate image token cost, in practice,
|
|
20
|
+
we notice that this function overestimate the number of tokens consumed by the model.
|
|
21
|
+
|
|
22
|
+
Parameters:
|
|
23
|
+
- width (int): The width of the image in pixels.
|
|
24
|
+
- height (int): The height of the image in pixels.
|
|
25
|
+
- detail (str): The detail level, either "low" or "high".
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
- int: The token cost of the image.
|
|
29
|
+
"""
|
|
30
|
+
# Base cost for low detail
|
|
31
|
+
if detail == DetailLevel.LOW:
|
|
32
|
+
return 85
|
|
33
|
+
|
|
34
|
+
# Scaling for high detail
|
|
35
|
+
# Scale down to fit within 2048x2048 square
|
|
36
|
+
max_long_dim = 2048
|
|
37
|
+
long_dim = max(width, height)
|
|
38
|
+
if long_dim > max_long_dim:
|
|
39
|
+
scale_factor = long_dim / max_long_dim
|
|
40
|
+
width = int(width / scale_factor)
|
|
41
|
+
height = int(height / scale_factor)
|
|
42
|
+
|
|
43
|
+
# Scale down the shortest side to 768
|
|
44
|
+
max_short_dim = 768
|
|
45
|
+
short_dim = min(width, height)
|
|
46
|
+
if short_dim > max_short_dim:
|
|
47
|
+
scale_factor = short_dim / max_short_dim
|
|
48
|
+
width = int(width / scale_factor)
|
|
49
|
+
height = int(height / scale_factor)
|
|
50
|
+
|
|
51
|
+
# Step 3: Calculate the number of 512x512 tiles
|
|
52
|
+
tiles = math.ceil(width / 512) * math.ceil(height / 512)
|
|
53
|
+
# Step 4: Compute token cost
|
|
54
|
+
token_cost = (tiles * 170) + 85
|
|
55
|
+
return token_cost
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def calculate_image_tokens_from_base64(base64_string: str):
|
|
59
|
+
base64_string = remove_base64_header(base64_string)
|
|
60
|
+
image = Image.open(BytesIO(base64.b64decode(base64_string)))
|
|
61
|
+
# DETAIL LEVEL HIGH IS THE DEFAULT TO BE ON THE SAFE SIDE
|
|
62
|
+
return calculate_image_tokens(image.width, image.height, DetailLevel.HIGH)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def remove_base64_header(base64_string: str):
|
|
66
|
+
header_pattern = r"^data:image/\w+;base64,"
|
|
67
|
+
return re.sub(header_pattern, "", base64_string)
|