unique_toolkit 0.7.32__py3-none-any.whl → 0.7.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -190,9 +190,11 @@ def count_tokens(text: str, encoding_model="cl100k_base") -> int:
190
190
  return len(encoding.encode(text))
191
191
 
192
192
 
193
- def map_content_chunk(content_chunk: dict):
193
+ def map_content_chunk(content_id: str, content_key: str, content_chunk: dict):
194
194
  return ContentChunk(
195
- id=content_chunk["id"],
195
+ id=content_id,
196
+ key=content_key,
197
+ chunk_id=content_chunk["id"],
196
198
  text=content_chunk["text"],
197
199
  start_page=content_chunk["startPage"],
198
200
  end_page=content_chunk["endPage"],
@@ -206,7 +208,10 @@ def map_content(content: dict):
206
208
  key=content["key"],
207
209
  title=content["title"],
208
210
  url=content["url"],
209
- chunks=[map_content_chunk(chunk) for chunk in content["chunks"]],
211
+ chunks=[
212
+ map_content_chunk(content["id"], content["key"], chunk)
213
+ for chunk in content["chunks"]
214
+ ],
210
215
  created_at=content["createdAt"],
211
216
  updated_at=content["updatedAt"],
212
217
  )
@@ -51,9 +51,9 @@ def _add_references(
51
51
  # Only reference a source once, even if it is mentioned multiple times in the text.
52
52
  with_footnotes = _add_footnotes_to_text(text=text, references=references)
53
53
 
54
- # Gemini 2.5 flash model has tendency to add multiple references for the same fact
54
+ # Gemini 2.5 models have tendency to add multiple references for the same fact
55
55
  # This is a workaround to limit the number of references to 5
56
- if model and model.startswith("litellm:gemini-2-5-flash"):
56
+ if model and model.startswith("litellm:gemini-2-5"):
57
57
  reduced_text = _limit_consecutive_source_references(with_footnotes)
58
58
 
59
59
  # Get the references that remain after reduction
@@ -92,8 +92,11 @@ def _preprocess_message(text: str) -> str:
92
92
  # Replace XML format '[<source XX>]', '[<sourceXX>]' and '[\<sourceXX>]' with [XX]
93
93
  text = re.sub(r"\[(\\)?<source[\s]?(\d+)>\]", r"[\2]", text)
94
94
 
95
- # Replace format '[source XX]' and '[sourceXX]' with [XX]
96
- text = re.sub(r"\[source[\s]?(\d+)\]", r"[\1]", text)
95
+ # Replace format 'source XX', 'source_X' and 'sourceXX' references with XX, where XX is a number
96
+ text = re.sub(r"source[\s_]?(\d+)", r"[\1]", text)
97
+
98
+ # Replace 'source_number="X"' with X, where X is a number
99
+ text = re.sub(r"source_number=\"(\d+)\"", r"[\1]", text)
97
100
 
98
101
  # Make all references non-bold
99
102
  text = re.sub(r"\[\*\*(\d+)\*\*\]", r"[\1]", text)
@@ -107,13 +110,19 @@ def _preprocess_message(text: str) -> str:
107
110
  # Replace '[<[XX]>]' and '[\<[XX]>]' with [XX]
108
111
  text = re.sub(r"\[(\\)?\[?<\[(\d+)\]?\]>\]", r"[\2]", text)
109
112
 
110
- # Replace '[[A], [B], ...]' or '[[A], B, C, ...]' with [A][B][C]...
113
+ # Replace '[source: X, Y, Z]' with [X][Y][Z], where X,Y,Z are numbers
114
+ def replace_source_colon(match):
115
+ numbers = re.findall(r"\d+", match.group(0))
116
+ return "".join(f"[{n}]" for n in numbers)
117
+ text = re.sub(r"\[source:\s*([\d,\s]+)\]", replace_source_colon, text)
118
+
119
+ # Replace '[[A], [B], ...]', '[[A], B, C, ...]', and '[X, Y, Z]' with [A][B][C]... where A,B,C are numbers
111
120
  def replace_combined_brackets(match):
112
121
  numbers = re.findall(r"\d+", match.group(0))
113
122
  return "".join(f"[{n}]" for n in numbers)
114
123
 
115
124
  text = re.sub(
116
- r"\[\[(\d+)\](?:,\s*(?:\[)?\d+(?:\])?)*\]", replace_combined_brackets, text
125
+ r"(?:\[\[(\d+)\](?:,\s*(?:\[)?\d+(?:\])?)*\]|\[([\d,\s]+)\])", replace_combined_brackets, text
117
126
  )
118
127
 
119
128
  return text
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 0.7.32
3
+ Version: 0.7.34
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Martin Fadler
@@ -111,6 +111,12 @@ All notable changes to this project will be documented in this file.
111
111
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
112
112
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
113
113
 
114
+ ## [0.7.34] - 2025-05-30
115
+ - Fix incorrect mapping in `ContentService` for the `search_content` function when mapping into `ContentChunk` object
116
+
117
+ ## [0.7.33] - 2025-06-25
118
+ - Update reference post-processing
119
+
114
120
  ## [0.7.32] - 2025-06-24
115
121
  - Create `classmethod` for `LanguageModelMessages` to load raw messages to root
116
122
 
@@ -23,7 +23,7 @@ unique_toolkit/content/constants.py,sha256=1iy4Y67xobl5VTnJB6SxSyuoBWbdLl9244xfV
23
23
  unique_toolkit/content/functions.py,sha256=0ELepm3_sl0SD_SYzvQVQ-jTdrcUqK5mVJZv0nQBuAw,18367
24
24
  unique_toolkit/content/schemas.py,sha256=KJ604BOx0vBh2AwlTCZkOo55aHsI6yj8vxDAARKKqEo,2995
25
25
  unique_toolkit/content/service.py,sha256=jGRTIt0JQKL6qRyZ9-_njWJejKKvIvDOzEQrf8CdMaU,19491
26
- unique_toolkit/content/utils.py,sha256=GUVPrkZfMoAj4MRoBs5BD_7vSuLZTZx69hyWzYFrI50,7747
26
+ unique_toolkit/content/utils.py,sha256=qNVmHTuETaPNGqheg7TbgPr1_1jbNHDc09N5RrmUIyo,7901
27
27
  unique_toolkit/embedding/__init__.py,sha256=uUyzjonPvuDCYsvXCIt7ErQXopLggpzX-MEQd3_e2kE,250
28
28
  unique_toolkit/embedding/constants.py,sha256=Lj8-Lcy1FvuC31PM9Exq7vaFuxQV4pEI1huUMFX-J2M,52
29
29
  unique_toolkit/embedding/functions.py,sha256=3qp-BfuMAbnp8YB04rh3xH8vsJuCBPizoy-JeaBFtoQ,1944
@@ -50,7 +50,7 @@ unique_toolkit/language_model/constants.py,sha256=B-topqW0r83dkC_25DeQfnPk3n53qz
50
50
  unique_toolkit/language_model/functions.py,sha256=WhgHbJgz4Z2aZt9TLdOpI0PGyYWA5R90tdwkwdDeT8c,11987
51
51
  unique_toolkit/language_model/infos.py,sha256=w5__BVG-IiiEYKG1FwM838wzqNbYI3eCCEDocKezc0I,34801
52
52
  unique_toolkit/language_model/prompt.py,sha256=JSawaLjQg3VR-E2fK8engFyJnNdk21zaO8pPIodzN4Q,3991
53
- unique_toolkit/language_model/reference.py,sha256=ntoWdJ_hly8FntTgg1qpj0ta55SOCCVsC5L49VqOiAQ,8067
53
+ unique_toolkit/language_model/reference.py,sha256=jd-JPuqLwEJDs56im56o-AdiT73pdwNtSzcW38LZ53o,8565
54
54
  unique_toolkit/language_model/schemas.py,sha256=AeuDRJFblGzEYcEMyrlxpOPk12Di3J45I9rT2xZrhEU,14332
55
55
  unique_toolkit/language_model/service.py,sha256=VRkUk2XbijqGlnTTvqU7uCue6qtT7lpLd_Y8f3bWv1I,10486
56
56
  unique_toolkit/language_model/utils.py,sha256=bPQ4l6_YO71w-zaIPanUUmtbXC1_hCvLK0tAFc3VCRc,1902
@@ -62,7 +62,7 @@ unique_toolkit/short_term_memory/schemas.py,sha256=OhfcXyF6ACdwIXW45sKzjtZX_gkcJ
62
62
  unique_toolkit/short_term_memory/service.py,sha256=cqpXA0nMbi4PhFweg-Cql3u0RvaTi5c8Xjv0uHMiSGc,8112
63
63
  unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
64
  unique_toolkit/smart_rules/compile.py,sha256=cxWjb2dxEI2HGsakKdVCkSNi7VK9mr08w5sDcFCQyWI,9553
65
- unique_toolkit-0.7.32.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
66
- unique_toolkit-0.7.32.dist-info/METADATA,sha256=qmPxpLockST_2FR2k05nfwgNDtI7C1pBK9fbNwCoKGE,24713
67
- unique_toolkit-0.7.32.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
68
- unique_toolkit-0.7.32.dist-info/RECORD,,
65
+ unique_toolkit-0.7.34.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
66
+ unique_toolkit-0.7.34.dist-info/METADATA,sha256=1OxeaVaHJorHRvzuJ8TXg6cbe-4PqT9iUMF-FZ5rG1Q,24918
67
+ unique_toolkit-0.7.34.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
68
+ unique_toolkit-0.7.34.dist-info/RECORD,,