unique_toolkit 0.7.32__py3-none-any.whl → 0.7.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_toolkit/content/utils.py +8 -3
- unique_toolkit/language_model/reference.py +15 -6
- {unique_toolkit-0.7.32.dist-info → unique_toolkit-0.7.34.dist-info}/METADATA +7 -1
- {unique_toolkit-0.7.32.dist-info → unique_toolkit-0.7.34.dist-info}/RECORD +6 -6
- {unique_toolkit-0.7.32.dist-info → unique_toolkit-0.7.34.dist-info}/LICENSE +0 -0
- {unique_toolkit-0.7.32.dist-info → unique_toolkit-0.7.34.dist-info}/WHEEL +0 -0
unique_toolkit/content/utils.py
CHANGED
@@ -190,9 +190,11 @@ def count_tokens(text: str, encoding_model="cl100k_base") -> int:
|
|
190
190
|
return len(encoding.encode(text))
|
191
191
|
|
192
192
|
|
193
|
-
def map_content_chunk(content_chunk: dict):
|
193
|
+
def map_content_chunk(content_id: str, content_key: str, content_chunk: dict):
|
194
194
|
return ContentChunk(
|
195
|
-
id=
|
195
|
+
id=content_id,
|
196
|
+
key=content_key,
|
197
|
+
chunk_id=content_chunk["id"],
|
196
198
|
text=content_chunk["text"],
|
197
199
|
start_page=content_chunk["startPage"],
|
198
200
|
end_page=content_chunk["endPage"],
|
@@ -206,7 +208,10 @@ def map_content(content: dict):
|
|
206
208
|
key=content["key"],
|
207
209
|
title=content["title"],
|
208
210
|
url=content["url"],
|
209
|
-
chunks=[
|
211
|
+
chunks=[
|
212
|
+
map_content_chunk(content["id"], content["key"], chunk)
|
213
|
+
for chunk in content["chunks"]
|
214
|
+
],
|
210
215
|
created_at=content["createdAt"],
|
211
216
|
updated_at=content["updatedAt"],
|
212
217
|
)
|
@@ -51,9 +51,9 @@ def _add_references(
|
|
51
51
|
# Only reference a source once, even if it is mentioned multiple times in the text.
|
52
52
|
with_footnotes = _add_footnotes_to_text(text=text, references=references)
|
53
53
|
|
54
|
-
# Gemini 2.5
|
54
|
+
# Gemini 2.5 models have tendency to add multiple references for the same fact
|
55
55
|
# This is a workaround to limit the number of references to 5
|
56
|
-
if model and model.startswith("litellm:gemini-2-5
|
56
|
+
if model and model.startswith("litellm:gemini-2-5"):
|
57
57
|
reduced_text = _limit_consecutive_source_references(with_footnotes)
|
58
58
|
|
59
59
|
# Get the references that remain after reduction
|
@@ -92,8 +92,11 @@ def _preprocess_message(text: str) -> str:
|
|
92
92
|
# Replace XML format '[<source XX>]', '[<sourceXX>]' and '[\<sourceXX>]' with [XX]
|
93
93
|
text = re.sub(r"\[(\\)?<source[\s]?(\d+)>\]", r"[\2]", text)
|
94
94
|
|
95
|
-
# Replace format '
|
96
|
-
text = re.sub(r"
|
95
|
+
# Replace format 'source XX', 'source_X' and 'sourceXX' references with XX, where XX is a number
|
96
|
+
text = re.sub(r"source[\s_]?(\d+)", r"[\1]", text)
|
97
|
+
|
98
|
+
# Replace 'source_number="X"' with X, where X is a number
|
99
|
+
text = re.sub(r"source_number=\"(\d+)\"", r"[\1]", text)
|
97
100
|
|
98
101
|
# Make all references non-bold
|
99
102
|
text = re.sub(r"\[\*\*(\d+)\*\*\]", r"[\1]", text)
|
@@ -107,13 +110,19 @@ def _preprocess_message(text: str) -> str:
|
|
107
110
|
# Replace '[<[XX]>]' and '[\<[XX]>]' with [XX]
|
108
111
|
text = re.sub(r"\[(\\)?\[?<\[(\d+)\]?\]>\]", r"[\2]", text)
|
109
112
|
|
110
|
-
# Replace '[
|
113
|
+
# Replace '[source: X, Y, Z]' with [X][Y][Z], where X,Y,Z are numbers
|
114
|
+
def replace_source_colon(match):
|
115
|
+
numbers = re.findall(r"\d+", match.group(0))
|
116
|
+
return "".join(f"[{n}]" for n in numbers)
|
117
|
+
text = re.sub(r"\[source:\s*([\d,\s]+)\]", replace_source_colon, text)
|
118
|
+
|
119
|
+
# Replace '[[A], [B], ...]', '[[A], B, C, ...]', and '[X, Y, Z]' with [A][B][C]... where A,B,C are numbers
|
111
120
|
def replace_combined_brackets(match):
|
112
121
|
numbers = re.findall(r"\d+", match.group(0))
|
113
122
|
return "".join(f"[{n}]" for n in numbers)
|
114
123
|
|
115
124
|
text = re.sub(
|
116
|
-
r"
|
125
|
+
r"(?:\[\[(\d+)\](?:,\s*(?:\[)?\d+(?:\])?)*\]|\[([\d,\s]+)\])", replace_combined_brackets, text
|
117
126
|
)
|
118
127
|
|
119
128
|
return text
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: unique_toolkit
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.34
|
4
4
|
Summary:
|
5
5
|
License: Proprietary
|
6
6
|
Author: Martin Fadler
|
@@ -111,6 +111,12 @@ All notable changes to this project will be documented in this file.
|
|
111
111
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
112
112
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
113
113
|
|
114
|
+
## [0.7.34] - 2025-05-30
|
115
|
+
- Fix incorrect mapping in `ContentService` for the `search_content` function when mapping into `ContentChunk` object
|
116
|
+
|
117
|
+
## [0.7.33] - 2025-06-25
|
118
|
+
- Update reference post-processing
|
119
|
+
|
114
120
|
## [0.7.32] - 2025-06-24
|
115
121
|
- Create `classmethod` for `LanguageModelMessages` to load raw messages to root
|
116
122
|
|
@@ -23,7 +23,7 @@ unique_toolkit/content/constants.py,sha256=1iy4Y67xobl5VTnJB6SxSyuoBWbdLl9244xfV
|
|
23
23
|
unique_toolkit/content/functions.py,sha256=0ELepm3_sl0SD_SYzvQVQ-jTdrcUqK5mVJZv0nQBuAw,18367
|
24
24
|
unique_toolkit/content/schemas.py,sha256=KJ604BOx0vBh2AwlTCZkOo55aHsI6yj8vxDAARKKqEo,2995
|
25
25
|
unique_toolkit/content/service.py,sha256=jGRTIt0JQKL6qRyZ9-_njWJejKKvIvDOzEQrf8CdMaU,19491
|
26
|
-
unique_toolkit/content/utils.py,sha256=
|
26
|
+
unique_toolkit/content/utils.py,sha256=qNVmHTuETaPNGqheg7TbgPr1_1jbNHDc09N5RrmUIyo,7901
|
27
27
|
unique_toolkit/embedding/__init__.py,sha256=uUyzjonPvuDCYsvXCIt7ErQXopLggpzX-MEQd3_e2kE,250
|
28
28
|
unique_toolkit/embedding/constants.py,sha256=Lj8-Lcy1FvuC31PM9Exq7vaFuxQV4pEI1huUMFX-J2M,52
|
29
29
|
unique_toolkit/embedding/functions.py,sha256=3qp-BfuMAbnp8YB04rh3xH8vsJuCBPizoy-JeaBFtoQ,1944
|
@@ -50,7 +50,7 @@ unique_toolkit/language_model/constants.py,sha256=B-topqW0r83dkC_25DeQfnPk3n53qz
|
|
50
50
|
unique_toolkit/language_model/functions.py,sha256=WhgHbJgz4Z2aZt9TLdOpI0PGyYWA5R90tdwkwdDeT8c,11987
|
51
51
|
unique_toolkit/language_model/infos.py,sha256=w5__BVG-IiiEYKG1FwM838wzqNbYI3eCCEDocKezc0I,34801
|
52
52
|
unique_toolkit/language_model/prompt.py,sha256=JSawaLjQg3VR-E2fK8engFyJnNdk21zaO8pPIodzN4Q,3991
|
53
|
-
unique_toolkit/language_model/reference.py,sha256=
|
53
|
+
unique_toolkit/language_model/reference.py,sha256=jd-JPuqLwEJDs56im56o-AdiT73pdwNtSzcW38LZ53o,8565
|
54
54
|
unique_toolkit/language_model/schemas.py,sha256=AeuDRJFblGzEYcEMyrlxpOPk12Di3J45I9rT2xZrhEU,14332
|
55
55
|
unique_toolkit/language_model/service.py,sha256=VRkUk2XbijqGlnTTvqU7uCue6qtT7lpLd_Y8f3bWv1I,10486
|
56
56
|
unique_toolkit/language_model/utils.py,sha256=bPQ4l6_YO71w-zaIPanUUmtbXC1_hCvLK0tAFc3VCRc,1902
|
@@ -62,7 +62,7 @@ unique_toolkit/short_term_memory/schemas.py,sha256=OhfcXyF6ACdwIXW45sKzjtZX_gkcJ
|
|
62
62
|
unique_toolkit/short_term_memory/service.py,sha256=cqpXA0nMbi4PhFweg-Cql3u0RvaTi5c8Xjv0uHMiSGc,8112
|
63
63
|
unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
64
|
unique_toolkit/smart_rules/compile.py,sha256=cxWjb2dxEI2HGsakKdVCkSNi7VK9mr08w5sDcFCQyWI,9553
|
65
|
-
unique_toolkit-0.7.
|
66
|
-
unique_toolkit-0.7.
|
67
|
-
unique_toolkit-0.7.
|
68
|
-
unique_toolkit-0.7.
|
65
|
+
unique_toolkit-0.7.34.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
|
66
|
+
unique_toolkit-0.7.34.dist-info/METADATA,sha256=1OxeaVaHJorHRvzuJ8TXg6cbe-4PqT9iUMF-FZ5rG1Q,24918
|
67
|
+
unique_toolkit-0.7.34.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
68
|
+
unique_toolkit-0.7.34.dist-info/RECORD,,
|
File without changes
|
File without changes
|