unique_toolkit 0.7.7__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unique_toolkit might be problematic. Click here for more details.
- unique_toolkit/__init__.py +28 -1
- unique_toolkit/_common/api_calling/human_verification_manager.py +343 -0
- unique_toolkit/_common/base_model_type_attribute.py +303 -0
- unique_toolkit/_common/chunk_relevancy_sorter/config.py +49 -0
- unique_toolkit/_common/chunk_relevancy_sorter/exception.py +5 -0
- unique_toolkit/_common/chunk_relevancy_sorter/schemas.py +46 -0
- unique_toolkit/_common/chunk_relevancy_sorter/service.py +374 -0
- unique_toolkit/_common/chunk_relevancy_sorter/tests/test_service.py +275 -0
- unique_toolkit/_common/default_language_model.py +12 -0
- unique_toolkit/_common/docx_generator/__init__.py +7 -0
- unique_toolkit/_common/docx_generator/config.py +12 -0
- unique_toolkit/_common/docx_generator/schemas.py +80 -0
- unique_toolkit/_common/docx_generator/service.py +252 -0
- unique_toolkit/_common/docx_generator/template/Doc Template.docx +0 -0
- unique_toolkit/_common/endpoint_builder.py +305 -0
- unique_toolkit/_common/endpoint_requestor.py +430 -0
- unique_toolkit/_common/exception.py +24 -0
- unique_toolkit/_common/feature_flags/schema.py +9 -0
- unique_toolkit/_common/pydantic/rjsf_tags.py +936 -0
- unique_toolkit/_common/pydantic_helpers.py +154 -0
- unique_toolkit/_common/referencing.py +53 -0
- unique_toolkit/_common/string_utilities.py +140 -0
- unique_toolkit/_common/tests/test_referencing.py +521 -0
- unique_toolkit/_common/tests/test_string_utilities.py +506 -0
- unique_toolkit/_common/token/image_token_counting.py +67 -0
- unique_toolkit/_common/token/token_counting.py +204 -0
- unique_toolkit/_common/utils/__init__.py +1 -0
- unique_toolkit/_common/utils/files.py +43 -0
- unique_toolkit/_common/utils/structured_output/__init__.py +1 -0
- unique_toolkit/_common/utils/structured_output/schema.py +5 -0
- unique_toolkit/_common/utils/write_configuration.py +51 -0
- unique_toolkit/_common/validators.py +101 -4
- unique_toolkit/agentic/__init__.py +1 -0
- unique_toolkit/agentic/debug_info_manager/debug_info_manager.py +28 -0
- unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py +278 -0
- unique_toolkit/agentic/evaluation/config.py +36 -0
- unique_toolkit/{evaluators → agentic/evaluation}/context_relevancy/prompts.py +25 -0
- unique_toolkit/agentic/evaluation/context_relevancy/schema.py +80 -0
- unique_toolkit/agentic/evaluation/context_relevancy/service.py +273 -0
- unique_toolkit/agentic/evaluation/evaluation_manager.py +218 -0
- unique_toolkit/agentic/evaluation/hallucination/constants.py +61 -0
- unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py +111 -0
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/prompts.py +1 -1
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/service.py +16 -15
- unique_toolkit/{evaluators → agentic/evaluation}/hallucination/utils.py +30 -20
- unique_toolkit/{evaluators → agentic/evaluation}/output_parser.py +20 -2
- unique_toolkit/{evaluators → agentic/evaluation}/schemas.py +27 -7
- unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +253 -0
- unique_toolkit/agentic/evaluation/tests/test_output_parser.py +87 -0
- unique_toolkit/agentic/history_manager/history_construction_with_contents.py +297 -0
- unique_toolkit/agentic/history_manager/history_manager.py +242 -0
- unique_toolkit/agentic/history_manager/loop_token_reducer.py +484 -0
- unique_toolkit/agentic/history_manager/utils.py +96 -0
- unique_toolkit/agentic/postprocessor/postprocessor_manager.py +212 -0
- unique_toolkit/agentic/reference_manager/reference_manager.py +103 -0
- unique_toolkit/agentic/responses_api/__init__.py +19 -0
- unique_toolkit/agentic/responses_api/postprocessors/code_display.py +63 -0
- unique_toolkit/agentic/responses_api/postprocessors/generated_files.py +145 -0
- unique_toolkit/agentic/responses_api/stream_handler.py +15 -0
- unique_toolkit/agentic/short_term_memory_manager/persistent_short_term_memory_manager.py +141 -0
- unique_toolkit/agentic/thinking_manager/thinking_manager.py +103 -0
- unique_toolkit/agentic/tools/__init__.py +1 -0
- unique_toolkit/agentic/tools/a2a/__init__.py +36 -0
- unique_toolkit/agentic/tools/a2a/config.py +17 -0
- unique_toolkit/agentic/tools/a2a/evaluation/__init__.py +15 -0
- unique_toolkit/agentic/tools/a2a/evaluation/_utils.py +66 -0
- unique_toolkit/agentic/tools/a2a/evaluation/config.py +55 -0
- unique_toolkit/agentic/tools/a2a/evaluation/evaluator.py +260 -0
- unique_toolkit/agentic/tools/a2a/evaluation/summarization_user_message.j2 +9 -0
- unique_toolkit/agentic/tools/a2a/manager.py +55 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/__init__.py +21 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_display_utils.py +185 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/_ref_utils.py +73 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/config.py +45 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/display.py +180 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/references.py +101 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_display_utils.py +1335 -0
- unique_toolkit/agentic/tools/a2a/postprocessing/test/test_ref_utils.py +603 -0
- unique_toolkit/agentic/tools/a2a/prompts.py +46 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/__init__.py +6 -0
- unique_toolkit/agentic/tools/a2a/response_watcher/service.py +91 -0
- unique_toolkit/agentic/tools/a2a/tool/__init__.py +4 -0
- unique_toolkit/agentic/tools/a2a/tool/_memory.py +26 -0
- unique_toolkit/agentic/tools/a2a/tool/_schema.py +9 -0
- unique_toolkit/agentic/tools/a2a/tool/config.py +73 -0
- unique_toolkit/agentic/tools/a2a/tool/service.py +306 -0
- unique_toolkit/agentic/tools/agent_chunks_hanlder.py +65 -0
- unique_toolkit/agentic/tools/config.py +167 -0
- unique_toolkit/agentic/tools/factory.py +44 -0
- unique_toolkit/agentic/tools/mcp/__init__.py +4 -0
- unique_toolkit/agentic/tools/mcp/manager.py +71 -0
- unique_toolkit/agentic/tools/mcp/models.py +28 -0
- unique_toolkit/agentic/tools/mcp/tool_wrapper.py +234 -0
- unique_toolkit/agentic/tools/openai_builtin/__init__.py +11 -0
- unique_toolkit/agentic/tools/openai_builtin/base.py +30 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/__init__.py +8 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/config.py +57 -0
- unique_toolkit/agentic/tools/openai_builtin/code_interpreter/service.py +230 -0
- unique_toolkit/agentic/tools/openai_builtin/manager.py +62 -0
- unique_toolkit/agentic/tools/schemas.py +141 -0
- unique_toolkit/agentic/tools/test/test_mcp_manager.py +536 -0
- unique_toolkit/agentic/tools/test/test_tool_progress_reporter.py +445 -0
- unique_toolkit/agentic/tools/tool.py +183 -0
- unique_toolkit/agentic/tools/tool_manager.py +523 -0
- unique_toolkit/agentic/tools/tool_progress_reporter.py +285 -0
- unique_toolkit/agentic/tools/utils/__init__.py +19 -0
- unique_toolkit/agentic/tools/utils/execution/__init__.py +1 -0
- unique_toolkit/agentic/tools/utils/execution/execution.py +286 -0
- unique_toolkit/agentic/tools/utils/source_handling/__init__.py +0 -0
- unique_toolkit/agentic/tools/utils/source_handling/schema.py +21 -0
- unique_toolkit/agentic/tools/utils/source_handling/source_formatting.py +207 -0
- unique_toolkit/agentic/tools/utils/source_handling/tests/test_source_formatting.py +216 -0
- unique_toolkit/app/__init__.py +6 -0
- unique_toolkit/app/dev_util.py +180 -0
- unique_toolkit/app/init_sdk.py +32 -1
- unique_toolkit/app/schemas.py +198 -31
- unique_toolkit/app/unique_settings.py +367 -0
- unique_toolkit/chat/__init__.py +8 -1
- unique_toolkit/chat/deprecated/service.py +232 -0
- unique_toolkit/chat/functions.py +642 -77
- unique_toolkit/chat/rendering.py +34 -0
- unique_toolkit/chat/responses_api.py +461 -0
- unique_toolkit/chat/schemas.py +133 -2
- unique_toolkit/chat/service.py +115 -767
- unique_toolkit/content/functions.py +153 -4
- unique_toolkit/content/schemas.py +122 -15
- unique_toolkit/content/service.py +278 -44
- unique_toolkit/content/smart_rules.py +301 -0
- unique_toolkit/content/utils.py +8 -3
- unique_toolkit/embedding/service.py +102 -11
- unique_toolkit/framework_utilities/__init__.py +1 -0
- unique_toolkit/framework_utilities/langchain/client.py +71 -0
- unique_toolkit/framework_utilities/langchain/history.py +19 -0
- unique_toolkit/framework_utilities/openai/__init__.py +6 -0
- unique_toolkit/framework_utilities/openai/client.py +83 -0
- unique_toolkit/framework_utilities/openai/message_builder.py +229 -0
- unique_toolkit/framework_utilities/utils.py +23 -0
- unique_toolkit/language_model/__init__.py +3 -0
- unique_toolkit/language_model/builder.py +27 -11
- unique_toolkit/language_model/default_language_model.py +3 -0
- unique_toolkit/language_model/functions.py +327 -43
- unique_toolkit/language_model/infos.py +992 -50
- unique_toolkit/language_model/reference.py +242 -0
- unique_toolkit/language_model/schemas.py +475 -48
- unique_toolkit/language_model/service.py +228 -27
- unique_toolkit/protocols/support.py +145 -0
- unique_toolkit/services/__init__.py +7 -0
- unique_toolkit/services/chat_service.py +1630 -0
- unique_toolkit/services/knowledge_base.py +861 -0
- unique_toolkit/short_term_memory/service.py +178 -41
- unique_toolkit/smart_rules/__init__.py +0 -0
- unique_toolkit/smart_rules/compile.py +56 -0
- unique_toolkit/test_utilities/events.py +197 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/METADATA +606 -7
- unique_toolkit-1.23.0.dist-info/RECORD +182 -0
- unique_toolkit/evaluators/__init__.py +0 -1
- unique_toolkit/evaluators/config.py +0 -35
- unique_toolkit/evaluators/constants.py +0 -1
- unique_toolkit/evaluators/context_relevancy/constants.py +0 -32
- unique_toolkit/evaluators/context_relevancy/service.py +0 -53
- unique_toolkit/evaluators/context_relevancy/utils.py +0 -142
- unique_toolkit/evaluators/hallucination/constants.py +0 -41
- unique_toolkit-0.7.7.dist-info/RECORD +0 -64
- /unique_toolkit/{evaluators → agentic/evaluation}/exception.py +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.7.7.dist-info → unique_toolkit-1.23.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from unique_toolkit.chat.schemas import ChatMessage
|
|
4
|
+
from unique_toolkit.content.schemas import ContentChunk, ContentReference
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def add_references_to_message(
|
|
8
|
+
message: ChatMessage,
|
|
9
|
+
search_context: list[ContentChunk],
|
|
10
|
+
model: str | None = None,
|
|
11
|
+
) -> tuple[ChatMessage, bool]:
|
|
12
|
+
"""Add references to a message and return the updated message with change status.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Tuple[ChatMessage, bool]: (updated_message, references_changed)
|
|
16
|
+
"""
|
|
17
|
+
if not message.content:
|
|
18
|
+
return message, False
|
|
19
|
+
|
|
20
|
+
if message.id is None:
|
|
21
|
+
raise ValueError("Message ID is required")
|
|
22
|
+
|
|
23
|
+
message.content = _preprocess_message(message.content)
|
|
24
|
+
text, ref_found = _add_references(
|
|
25
|
+
message.content, search_context, message.id, model
|
|
26
|
+
)
|
|
27
|
+
message.content = _postprocess_message(text)
|
|
28
|
+
|
|
29
|
+
message.references = ref_found
|
|
30
|
+
references_changed = len(ref_found) > 0
|
|
31
|
+
return message, references_changed
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _add_references(
|
|
35
|
+
text: str,
|
|
36
|
+
search_context: list[ContentChunk],
|
|
37
|
+
message_id: str,
|
|
38
|
+
model: str | None = None,
|
|
39
|
+
) -> tuple[str, list[ContentReference]]:
|
|
40
|
+
"""Add references to text and return the processed text with reference status.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Tuple[str, list[Reference]]: (processed_text, ref_found)
|
|
44
|
+
"""
|
|
45
|
+
references = _find_references(
|
|
46
|
+
text=text,
|
|
47
|
+
search_context=search_context,
|
|
48
|
+
message_id=message_id,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Only reference a source once, even if it is mentioned multiple times in the text.
|
|
52
|
+
with_footnotes = _add_footnotes_to_text(text=text, references=references)
|
|
53
|
+
|
|
54
|
+
# Gemini 2.5 models have tendency to add multiple references for the same fact
|
|
55
|
+
# This is a workaround to limit the number of references to 5
|
|
56
|
+
if model and model.startswith("litellm:gemini-2-5"):
|
|
57
|
+
reduced_text = _limit_consecutive_source_references(with_footnotes)
|
|
58
|
+
|
|
59
|
+
# Get the references that remain after reduction
|
|
60
|
+
remaining_numbers = set()
|
|
61
|
+
sup_matches = re.findall(r"<sup>(\d+)</sup>", reduced_text)
|
|
62
|
+
remaining_numbers = {int(match) for match in sup_matches}
|
|
63
|
+
|
|
64
|
+
references = [
|
|
65
|
+
ref for ref in references if ref.sequence_number in remaining_numbers
|
|
66
|
+
]
|
|
67
|
+
text = _remove_hallucinated_references(reduced_text)
|
|
68
|
+
else:
|
|
69
|
+
text = _remove_hallucinated_references(with_footnotes)
|
|
70
|
+
|
|
71
|
+
return text, references
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _preprocess_message(text: str) -> str:
|
|
75
|
+
"""Preprocess message text to normalize reference formats."""
|
|
76
|
+
# Remove user & assistant references: XML format '[<user>]', '[\<user>]', etc.
|
|
77
|
+
patterns = [
|
|
78
|
+
(r"\[(\\)?(<)?user(>)?\]", ""),
|
|
79
|
+
(r"\[(\\)?(<)?assistant(>)?\]", ""),
|
|
80
|
+
(r"source[\s]?\[(\\)?(<)?conversation(>)?\]", "the previous conversation"),
|
|
81
|
+
(r"\[(\\)?(<)?previous[_,\s]conversation(>)?\]", ""),
|
|
82
|
+
(r"\[(\\)?(<)?past[_,\s]conversation(>)?\]", ""),
|
|
83
|
+
(r"\[(\\)?(<)?previous[_,\s]?answer(>)?\]", ""),
|
|
84
|
+
(r"\[(\\)?(<)?previous[_,\s]question(>)?\]", ""),
|
|
85
|
+
(r"\[(\\)?(<)?conversation(>)?\]", ""),
|
|
86
|
+
(r"\[(\\)?(<)?none(>)?\]", ""),
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
for pattern, replacement in patterns:
|
|
90
|
+
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
|
|
91
|
+
|
|
92
|
+
# Replace XML format '[<source XX>]', '[<sourceXX>]' and '[\<sourceXX>]' with [XX]
|
|
93
|
+
text = re.sub(r"\[(\\)?<source[\s]?(\d+)>\]", r"[\2]", text)
|
|
94
|
+
|
|
95
|
+
# Replace format 'source XX', 'source_X' and 'sourceXX' references with XX, where XX is a number
|
|
96
|
+
text = re.sub(r"source[\s_]?(\d+)", r"[\1]", text)
|
|
97
|
+
|
|
98
|
+
# Replace 'source_number="X"' with X, where X is a number
|
|
99
|
+
text = re.sub(r"source_number=\"(\d+)\"", r"[\1]", text)
|
|
100
|
+
|
|
101
|
+
# Make all references non-bold
|
|
102
|
+
text = re.sub(r"\[\*\*(\d+)\*\*\]", r"[\1]", text)
|
|
103
|
+
|
|
104
|
+
# Replace 'SOURCEXX' and 'SOURCE XX' with [XX]
|
|
105
|
+
text = re.sub(r"source[\s]?(\d+)", r"[\1]", text, flags=re.IGNORECASE)
|
|
106
|
+
|
|
107
|
+
# Replace 'SOURCE n°X' with [XX]
|
|
108
|
+
text = re.sub(r"source[\s]?n°(\d+)", r"[\1]", text, flags=re.IGNORECASE)
|
|
109
|
+
|
|
110
|
+
# Replace '[<[XX]>]' and '[\<[XX]>]' with [XX]
|
|
111
|
+
text = re.sub(r"\[(\\)?\[?<\[(\d+)\]?\]>\]", r"[\2]", text)
|
|
112
|
+
|
|
113
|
+
# Replace '[source: X, Y, Z]' with [X][Y][Z], where X,Y,Z are numbers
|
|
114
|
+
def replace_source_colon(match):
|
|
115
|
+
numbers = re.findall(r"\d+", match.group(0))
|
|
116
|
+
return "".join(f"[{n}]" for n in numbers)
|
|
117
|
+
|
|
118
|
+
text = re.sub(r"\[source:\s*([\d,\s]+)\]", replace_source_colon, text)
|
|
119
|
+
|
|
120
|
+
# Replace '[[A], [B], ...]', '[[A], B, C, ...]', and '[X, Y, Z]' with [A][B][C]... where A,B,C are numbers
|
|
121
|
+
def replace_combined_brackets(match):
|
|
122
|
+
numbers = re.findall(r"\d+", match.group(0))
|
|
123
|
+
return "".join(f"[{n}]" for n in numbers)
|
|
124
|
+
|
|
125
|
+
text = re.sub(
|
|
126
|
+
r"(?:\[\[(\d+)\](?:,\s*(?:\[)?\d+(?:\])?)*\]|\[([\d,\s]+)\])",
|
|
127
|
+
replace_combined_brackets,
|
|
128
|
+
text,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return text
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _limit_consecutive_source_references(text: str) -> str:
|
|
135
|
+
"""Limit consecutive source references to maximum 5 unique sources."""
|
|
136
|
+
|
|
137
|
+
def replace_consecutive(match):
|
|
138
|
+
# Extract all numbers from the match and get unique values
|
|
139
|
+
numbers = list(set(re.findall(r"\d+", match.group(0))))
|
|
140
|
+
# Take only the first five unique numbers
|
|
141
|
+
return "".join(f"<sup>{n}</sup>" for n in numbers[:5])
|
|
142
|
+
|
|
143
|
+
# Find sequences of 5+ consecutive sources
|
|
144
|
+
pattern = r"(?:<sup>\d+</sup>){5,}"
|
|
145
|
+
return re.sub(pattern, replace_consecutive, text)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _postprocess_message(text: str) -> str:
|
|
149
|
+
"""Format superscript references to remove duplicates."""
|
|
150
|
+
|
|
151
|
+
def replace_sup_sequence(match):
|
|
152
|
+
# Extract unique numbers from the entire match
|
|
153
|
+
sup_numbers = set(re.findall(r"\d+", match.group(0)))
|
|
154
|
+
return "".join(f"<sup>{n}</sup>" for n in sup_numbers)
|
|
155
|
+
|
|
156
|
+
# Find sequences of 2+ superscripts including internal spaces
|
|
157
|
+
pattern = r"(<sup>\d+</sup>[ ]*)+<sup>\d+</sup>"
|
|
158
|
+
return re.sub(pattern, replace_sup_sequence, text)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _get_max_sub_count_in_text(text: str) -> int:
|
|
162
|
+
"""Get the maximum superscript number in the text."""
|
|
163
|
+
matches = re.findall(r"<sup>(\d+)</sup>", text)
|
|
164
|
+
return max((int(match) for match in matches), default=0)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _find_references(
|
|
168
|
+
text: str,
|
|
169
|
+
search_context: list[ContentChunk],
|
|
170
|
+
message_id: str,
|
|
171
|
+
) -> list[ContentReference]:
|
|
172
|
+
"""Find references in text based on search context."""
|
|
173
|
+
references: list[ContentReference] = []
|
|
174
|
+
sequence_number = 1 + _get_max_sub_count_in_text(text)
|
|
175
|
+
|
|
176
|
+
# Find all numbers in brackets to ensure we get references in order of occurrence
|
|
177
|
+
numbers_in_brackets = _extract_numbers_in_brackets(text)
|
|
178
|
+
|
|
179
|
+
for number in numbers_in_brackets:
|
|
180
|
+
# Convert 1-based reference to 0-based index
|
|
181
|
+
index = number - 1
|
|
182
|
+
if index < 0 or index >= len(search_context):
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
search = search_context[index]
|
|
186
|
+
if not search:
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
# Don't put the reference twice
|
|
190
|
+
reference_name = search.title or search.key or f"Content {search.id}"
|
|
191
|
+
found_reference = next(
|
|
192
|
+
(r for r in references if r.name == reference_name), None
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
if found_reference:
|
|
196
|
+
found_reference.original_index.append(number)
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
url = (
|
|
200
|
+
search.url
|
|
201
|
+
if search.url and not search.internally_stored_at
|
|
202
|
+
else f"unique://content/{search.id}"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
references.append(
|
|
206
|
+
ContentReference(
|
|
207
|
+
name=reference_name,
|
|
208
|
+
url=url,
|
|
209
|
+
sequence_number=sequence_number,
|
|
210
|
+
original_index=[number],
|
|
211
|
+
source_id=f"{search.id}_{search.chunk_id}"
|
|
212
|
+
if search.chunk_id
|
|
213
|
+
else search.id,
|
|
214
|
+
source="node-ingestion-chunks",
|
|
215
|
+
message_id=message_id,
|
|
216
|
+
id=search.id,
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
sequence_number += 1
|
|
220
|
+
|
|
221
|
+
return references
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _extract_numbers_in_brackets(text: str) -> list[int]:
|
|
225
|
+
"""Extract numbers from [X] format in text."""
|
|
226
|
+
matches = re.findall(r"\[(\d+)\]", text)
|
|
227
|
+
return [int(match) for match in matches]
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _add_footnotes_to_text(text: str, references: list[ContentReference]) -> str:
|
|
231
|
+
"""Replace bracket references with superscript footnotes."""
|
|
232
|
+
for reference in references:
|
|
233
|
+
for original_index in reference.original_index:
|
|
234
|
+
text = text.replace(
|
|
235
|
+
f"[{original_index}]", f"<sup>{reference.sequence_number}</sup>"
|
|
236
|
+
)
|
|
237
|
+
return text
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _remove_hallucinated_references(text: str) -> str:
|
|
241
|
+
"""Remove any remaining bracket references that weren't converted."""
|
|
242
|
+
return re.sub(r"\[\d+\]", "", text).strip()
|