synthelion 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. synthelion-1.0.0/LICENSE +21 -0
  2. synthelion-1.0.0/PKG-INFO +225 -0
  3. synthelion-1.0.0/README.md +194 -0
  4. synthelion-1.0.0/pyproject.toml +54 -0
  5. synthelion-1.0.0/setup.cfg +4 -0
  6. synthelion-1.0.0/synthelion/__init__.py +38 -0
  7. synthelion-1.0.0/synthelion/agent/__init__.py +5 -0
  8. synthelion-1.0.0/synthelion/agent/context_window.py +87 -0
  9. synthelion-1.0.0/synthelion/agent/memory_extractor.py +44 -0
  10. synthelion-1.0.0/synthelion/agent/memory_store.py +64 -0
  11. synthelion-1.0.0/synthelion/cli.py +130 -0
  12. synthelion-1.0.0/synthelion/compressors/__init__.py +11 -0
  13. synthelion-1.0.0/synthelion/compressors/code_compressor.py +84 -0
  14. synthelion-1.0.0/synthelion/compressors/diff_compressor.py +140 -0
  15. synthelion-1.0.0/synthelion/compressors/html_extractor.py +81 -0
  16. synthelion-1.0.0/synthelion/compressors/json_crusher.py +151 -0
  17. synthelion-1.0.0/synthelion/compressors/log_compressor.py +64 -0
  18. synthelion-1.0.0/synthelion/compressors/tabular.py +70 -0
  19. synthelion-1.0.0/synthelion/content_detector.py +80 -0
  20. synthelion-1.0.0/synthelion/content_router.py +170 -0
  21. synthelion-1.0.0/synthelion/core.py +396 -0
  22. synthelion-1.0.0/synthelion/detector.py +76 -0
  23. synthelion-1.0.0/synthelion/models.py +113 -0
  24. synthelion-1.0.0/synthelion/nlp/__init__.py +6 -0
  25. synthelion-1.0.0/synthelion/nlp/sentence_detector.py +74 -0
  26. synthelion-1.0.0/synthelion/nlp/summarizer.py +134 -0
  27. synthelion-1.0.0/synthelion/nlp/text_rank.py +138 -0
  28. synthelion-1.0.0/synthelion/nlp/text_splitter.py +91 -0
  29. synthelion-1.0.0/synthelion/plugins/__init__.py +0 -0
  30. synthelion-1.0.0/synthelion/plugins/mcp_server.py +78 -0
  31. synthelion-1.0.0/synthelion/plugins/openai_tools.py +233 -0
  32. synthelion-1.0.0/synthelion/word_provider.py +432 -0
  33. synthelion-1.0.0/synthelion/worddata/__init__.py +0 -0
  34. synthelion-1.0.0/synthelion/worddata/_index.br +0 -0
  35. synthelion-1.0.0/synthelion/worddata/afr.yaml.br +0 -0
  36. synthelion-1.0.0/synthelion/worddata/ara.yaml.br +0 -0
  37. synthelion-1.0.0/synthelion/worddata/bel.yaml.br +0 -0
  38. synthelion-1.0.0/synthelion/worddata/ben.yaml.br +0 -0
  39. synthelion-1.0.0/synthelion/worddata/bul.yaml.br +0 -0
  40. synthelion-1.0.0/synthelion/worddata/cat.yaml.br +0 -0
  41. synthelion-1.0.0/synthelion/worddata/ces.yaml.br +0 -0
  42. synthelion-1.0.0/synthelion/worddata/dan.yaml.br +0 -0
  43. synthelion-1.0.0/synthelion/worddata/deu.yaml.br +0 -0
  44. synthelion-1.0.0/synthelion/worddata/ell.yaml.br +0 -0
  45. synthelion-1.0.0/synthelion/worddata/eng.yaml.br +0 -0
  46. synthelion-1.0.0/synthelion/worddata/est.yaml.br +0 -0
  47. synthelion-1.0.0/synthelion/worddata/eus.yaml.br +0 -0
  48. synthelion-1.0.0/synthelion/worddata/fas.yaml.br +0 -0
  49. synthelion-1.0.0/synthelion/worddata/fin.yaml.br +0 -0
  50. synthelion-1.0.0/synthelion/worddata/fra.yaml.br +0 -0
  51. synthelion-1.0.0/synthelion/worddata/gle.yaml.br +0 -0
  52. synthelion-1.0.0/synthelion/worddata/glg.yaml.br +0 -0
  53. synthelion-1.0.0/synthelion/worddata/heb.yaml.br +0 -0
  54. synthelion-1.0.0/synthelion/worddata/hin.yaml.br +0 -0
  55. synthelion-1.0.0/synthelion/worddata/hrv.yaml.br +0 -0
  56. synthelion-1.0.0/synthelion/worddata/hun.yaml.br +0 -0
  57. synthelion-1.0.0/synthelion/worddata/hye.yaml.br +0 -0
  58. synthelion-1.0.0/synthelion/worddata/ind.yaml.br +0 -0
  59. synthelion-1.0.0/synthelion/worddata/isl.yaml.br +0 -0
  60. synthelion-1.0.0/synthelion/worddata/ita.yaml.br +0 -0
  61. synthelion-1.0.0/synthelion/worddata/jpn.yaml.br +0 -0
  62. synthelion-1.0.0/synthelion/worddata/kan.yaml.br +0 -0
  63. synthelion-1.0.0/synthelion/worddata/kaz.yaml.br +0 -0
  64. synthelion-1.0.0/synthelion/worddata/kor.yaml.br +0 -0
  65. synthelion-1.0.0/synthelion/worddata/lat.yaml.br +0 -0
  66. synthelion-1.0.0/synthelion/worddata/lav.yaml.br +0 -0
  67. synthelion-1.0.0/synthelion/worddata/lit.yaml.br +0 -0
  68. synthelion-1.0.0/synthelion/worddata/mar.yaml.br +0 -0
  69. synthelion-1.0.0/synthelion/worddata/mkd.yaml.br +0 -0
  70. synthelion-1.0.0/synthelion/worddata/msa.yaml.br +0 -0
  71. synthelion-1.0.0/synthelion/worddata/nld.yaml.br +0 -0
  72. synthelion-1.0.0/synthelion/worddata/nor.yaml.br +0 -0
  73. synthelion-1.0.0/synthelion/worddata/pol.yaml.br +0 -0
  74. synthelion-1.0.0/synthelion/worddata/por.yaml.br +0 -0
  75. synthelion-1.0.0/synthelion/worddata/ron.yaml.br +0 -0
  76. synthelion-1.0.0/synthelion/worddata/rus.yaml.br +0 -0
  77. synthelion-1.0.0/synthelion/worddata/slk.yaml.br +0 -0
  78. synthelion-1.0.0/synthelion/worddata/slv.yaml.br +0 -0
  79. synthelion-1.0.0/synthelion/worddata/spa.yaml.br +0 -0
  80. synthelion-1.0.0/synthelion/worddata/sqi.yaml.br +0 -0
  81. synthelion-1.0.0/synthelion/worddata/srp.yaml.br +0 -0
  82. synthelion-1.0.0/synthelion/worddata/swe.yaml.br +0 -0
  83. synthelion-1.0.0/synthelion/worddata/tam.yaml.br +0 -0
  84. synthelion-1.0.0/synthelion/worddata/tel.yaml.br +0 -0
  85. synthelion-1.0.0/synthelion/worddata/tha.yaml.br +0 -0
  86. synthelion-1.0.0/synthelion/worddata/tur.yaml.br +0 -0
  87. synthelion-1.0.0/synthelion/worddata/ukr.yaml.br +0 -0
  88. synthelion-1.0.0/synthelion/worddata/urd.yaml.br +0 -0
  89. synthelion-1.0.0/synthelion/worddata/vie.yaml.br +0 -0
  90. synthelion-1.0.0/synthelion/worddata/zho.yaml.br +0 -0
  91. synthelion-1.0.0/synthelion.egg-info/PKG-INFO +225 -0
  92. synthelion-1.0.0/synthelion.egg-info/SOURCES.txt +95 -0
  93. synthelion-1.0.0/synthelion.egg-info/dependency_links.txt +1 -0
  94. synthelion-1.0.0/synthelion.egg-info/entry_points.txt +3 -0
  95. synthelion-1.0.0/synthelion.egg-info/requires.txt +10 -0
  96. synthelion-1.0.0/synthelion.egg-info/top_level.txt +1 -0
  97. synthelion-1.0.0/tests/test_synthelion.py +393 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Francesco Paolo Passaro
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,225 @@
1
+ Metadata-Version: 2.4
2
+ Name: synthelion
3
+ Version: 1.0.0
4
+ Summary: MCP plugin + Python library for LLM token compression. 50+ languages, zero ML models. Port of Caveman (C#).
5
+ Author-email: Passaro Francesco Paolo <passaroweb@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/francescopaolopassaro/synthelion
8
+ Project-URL: Repository, https://github.com/francescopaolopassaro/synthelion
9
+ Project-URL: Bug Tracker, https://github.com/francescopaolopassaro/synthelion/issues
10
+ Project-URL: Original C# project, https://github.com/francescopaolopassaro/caveman
11
+ Keywords: llm,token,compression,prompt,nlp,mcp,claude,claude-code
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: brotli>=1.1
23
+ Requires-Dist: regex>=2024.1
24
+ Requires-Dist: mcp>=1.0
25
+ Provides-Extra: openai
26
+ Requires-Dist: openai>=1.0; extra == "openai"
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=8.0; extra == "dev"
29
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # Synthelion — Claude Code Plugin + Python Library
33
+
34
+ **Synthelion** is a [Claude Code](https://claude.ai/code) MCP plugin and Python library that reduces LLM token usage by stripping grammatical noise and lemmatizing words — across 50+ languages, with zero ML model dependencies.
35
+
36
+ > "Why use many tokens when few tokens do trick?" — A caveman (and your wallet).
37
+
38
+ Python port of [Caveman](https://github.com/francescopaolopassaro/caveman) by Passaro Francesco Paolo (Digitalsolutions.it).
39
+
40
+ ---
41
+
42
+ ## Use as Claude Code plugin (30 seconds)
43
+
44
+ **1. Install:**
45
+ ```bash
46
+ pip install synthelion
47
+ ```
48
+
49
+ **2. Add to Claude Code** (`~/.claude/settings.json` on macOS/Linux, `%APPDATA%\Claude\claude_desktop_config.json` on Windows):
50
+ ```json
51
+ {
52
+ "mcpServers": {
53
+ "synthelion": {
54
+ "command": "synthelion-mcp"
55
+ }
56
+ }
57
+ }
58
+ ```
59
+
60
+ **3. Restart Claude Code.** Done — the tools `compress`, `detect_language`, `route_content`, `summarize`, `compress_batch` are now available.
61
+
62
+ **Zero-install with uvx:**
63
+ ```json
64
+ {
65
+ "mcpServers": {
66
+ "synthelion": {
67
+ "command": "uvx",
68
+ "args": ["synthelion-mcp"]
69
+ }
70
+ }
71
+ }
72
+ ```
73
+
74
+ → Full plugin guide: [`docs/claude-code-plugin.md`](docs/claude-code-plugin.md)
75
+
76
+ ---
77
+
78
+ Powered by Synthelion — © Passaro Francesco Paolo, Digitalsolutions.it (https://digitalsolutions.it)
79
+
80
+ ---
81
+
82
+ ## Installation
83
+
84
+ ```bash
85
+ pip install synthelion
86
+ # With MCP server support (for Claude Code, OpenCode, …):
87
+ pip install "synthelion[mcp]"
88
+ # With OpenAI function tools:
89
+ pip install "synthelion[openai]"
90
+ ```
91
+
92
+ ---
93
+
94
+ ## Quick start
95
+
96
+ ```python
97
+ from synthelion import CompressionService, CompressionLevel
98
+
99
+ svc = CompressionService()
100
+ result = svc.compress(
101
+ "I would like to know if it is possible to receive information about cheap restaurants in Rome.",
102
+ CompressionLevel.SEMANTIC,
103
+ )
104
+ print(result.compressed_text) # "know possible receive information cheap restaurant Rome"
105
+ print(f"{result.efficiency_pct:.1f}% saved")
106
+ ```
107
+
108
+ ---
109
+
110
+ ## Compression levels
111
+
112
+ | Level | What it does | Typical savings |
113
+ |---|---|---|
114
+ | `LIGHT` | Remove stop words | ~25–35% |
115
+ | `SEMANTIC` | Stop words + lemmatization | ~30–69% |
116
+ | `AGGRESSIVE` | Lemmatization + generic-term pruning | ~35–70% |
117
+
118
+ ---
119
+
120
+ ## Language detection
121
+
122
+ ```python
123
+ from synthelion import LanguageDetector
124
+
125
+ det = LanguageDetector()
126
+ print(det.detect("Vorrei un tavolo per due persone, per favore.")) # ita
127
+ scores = det.detect_with_scores("Where is the nearest train station?")
128
+ # {"eng": 0.42, ...}
129
+ ```
130
+
131
+ ---
132
+
133
+ ## Content-aware routing
134
+
135
+ ```python
136
+ from synthelion import ContentRouter, CompressionProfile
137
+
138
+ router = ContentRouter.from_profile(CompressionProfile.BALANCED)
139
+ result = router.route(content) # auto-detects JSON/HTML/diff/log/code/prose
140
+ print(result.strategy_used, result.savings_pct)
141
+ ```
142
+
143
+ ---
144
+
145
+ ## MCP server (Claude Code / OpenCode)
146
+
147
+ ```bash
148
+ # Run the MCP server on stdio:
149
+ synthelion-mcp
150
+
151
+ # Or add to your Claude Code MCP config:
152
+ # {
153
+ # "mcpServers": {
154
+ # "synthelion": { "command": "synthelion-mcp" }
155
+ # }
156
+ # }
157
+ ```
158
+
159
+ Tools exposed: `compress`, `detect_language`, `route_content`, `summarize`, `compress_batch`.
160
+
161
+ ---
162
+
163
+ ## OpenAI function tools
164
+
165
+ ```python
166
+ from synthelion.plugins.openai_tools import get_tool_definitions, execute_tool
167
+
168
+ tools = get_tool_definitions()
169
+ # Pass to: client.chat.completions.create(tools=tools, ...)
170
+
171
+ # Execute a tool call returned by the model:
172
+ result = execute_tool("compress", {"text": "...", "level": "semantic"})
173
+ ```
174
+
175
+ ---
176
+
177
+ ## CLI
178
+
179
+ ```bash
180
+ synthelion compress --text "Hello world, I would like to know..." --level semantic
181
+ synthelion detect --text "Guten Morgen, wie geht es Ihnen?"
182
+ synthelion route --file myfile.json
183
+ synthelion serve-mcp # same as synthelion-mcp
184
+ ```
185
+
186
+ ---
187
+
188
+ ## Summarization
189
+
190
+ ```python
191
+ from synthelion.nlp import TfIdfSummarizer, TextRankSummarizer
192
+
193
+ summarizer = TfIdfSummarizer()
194
+ summary = summarizer.summarize(long_text, sentence_count=3)
195
+
196
+ tr = TextRankSummarizer()
197
+ summary = tr.summarize(long_text, ratio=0.3)
198
+ ```
199
+
200
+ ---
201
+
202
+ ## Agent toolkit
203
+
204
+ ```python
205
+ from synthelion.agent import ContextWindow, MemoryStore
206
+
207
+ window = ContextWindow(max_tokens=4000)
208
+ window.append("user", "Tell me about Rome.")
209
+ window.append("assistant", "Rome is the capital of Italy...")
210
+ # Auto-compacts when over budget:
211
+ print(window.to_messages_json())
212
+
213
+ memory = MemoryStore()
214
+ memory.remember({"summary": "User prefers Italian cuisine", "keywords": ["pizza", "pasta"]})
215
+ relevant = memory.recall("What food does the user like?", top_k=3)
216
+ ```
217
+
218
+ ---
219
+
220
+ ## Attribution
221
+
222
+ Synthelion is a Python port of **Caveman** — © 2026 Passaro Francesco Paolo, Digitalsolutions.it.
223
+ Original C# source: https://github.com/francescopaolopassaro/caveman
224
+
225
+ Language data derived from [Universal Dependencies](https://universaldependencies.org/) treebanks (CC BY-SA / CC BY).
@@ -0,0 +1,194 @@
1
+ # Synthelion — Claude Code Plugin + Python Library
2
+
3
+ **Synthelion** is a [Claude Code](https://claude.ai/code) MCP plugin and Python library that reduces LLM token usage by stripping grammatical noise and lemmatizing words — across 50+ languages, with zero ML model dependencies.
4
+
5
+ > "Why use many tokens when few tokens do trick?" — A caveman (and your wallet).
6
+
7
+ Python port of [Caveman](https://github.com/francescopaolopassaro/caveman) by Passaro Francesco Paolo (Digitalsolutions.it).
8
+
9
+ ---
10
+
11
+ ## Use as Claude Code plugin (30 seconds)
12
+
13
+ **1. Install:**
14
+ ```bash
15
+ pip install synthelion
16
+ ```
17
+
18
+ **2. Add to Claude Code** (`~/.claude/settings.json` on macOS/Linux, `%APPDATA%\Claude\claude_desktop_config.json` on Windows):
19
+ ```json
20
+ {
21
+ "mcpServers": {
22
+ "synthelion": {
23
+ "command": "synthelion-mcp"
24
+ }
25
+ }
26
+ }
27
+ ```
28
+
29
+ **3. Restart Claude Code.** Done — the tools `compress`, `detect_language`, `route_content`, `summarize`, `compress_batch` are now available.
30
+
31
+ **Zero-install with uvx:**
32
+ ```json
33
+ {
34
+ "mcpServers": {
35
+ "synthelion": {
36
+ "command": "uvx",
37
+ "args": ["synthelion-mcp"]
38
+ }
39
+ }
40
+ }
41
+ ```
42
+
43
+ → Full plugin guide: [`docs/claude-code-plugin.md`](docs/claude-code-plugin.md)
44
+
45
+ ---
46
+
47
+ Powered by Synthelion — © Passaro Francesco Paolo, Digitalsolutions.it (https://digitalsolutions.it)
48
+
49
+ ---
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install synthelion
55
+ # With MCP server support (for Claude Code, OpenCode, …):
56
+ pip install "synthelion[mcp]"
57
+ # With OpenAI function tools:
58
+ pip install "synthelion[openai]"
59
+ ```
60
+
61
+ ---
62
+
63
+ ## Quick start
64
+
65
+ ```python
66
+ from synthelion import CompressionService, CompressionLevel
67
+
68
+ svc = CompressionService()
69
+ result = svc.compress(
70
+ "I would like to know if it is possible to receive information about cheap restaurants in Rome.",
71
+ CompressionLevel.SEMANTIC,
72
+ )
73
+ print(result.compressed_text) # "know possible receive information cheap restaurant Rome"
74
+ print(f"{result.efficiency_pct:.1f}% saved")
75
+ ```
76
+
77
+ ---
78
+
79
+ ## Compression levels
80
+
81
+ | Level | What it does | Typical savings |
82
+ |---|---|---|
83
+ | `LIGHT` | Remove stop words | ~25–35% |
84
+ | `SEMANTIC` | Stop words + lemmatization | ~30–69% |
85
+ | `AGGRESSIVE` | Lemmatization + generic-term pruning | ~35–70% |
86
+
87
+ ---
88
+
89
+ ## Language detection
90
+
91
+ ```python
92
+ from synthelion import LanguageDetector
93
+
94
+ det = LanguageDetector()
95
+ print(det.detect("Vorrei un tavolo per due persone, per favore.")) # ita
96
+ scores = det.detect_with_scores("Where is the nearest train station?")
97
+ # {"eng": 0.42, ...}
98
+ ```
99
+
100
+ ---
101
+
102
+ ## Content-aware routing
103
+
104
+ ```python
105
+ from synthelion import ContentRouter, CompressionProfile
106
+
107
+ router = ContentRouter.from_profile(CompressionProfile.BALANCED)
108
+ result = router.route(content) # auto-detects JSON/HTML/diff/log/code/prose
109
+ print(result.strategy_used, result.savings_pct)
110
+ ```
111
+
112
+ ---
113
+
114
+ ## MCP server (Claude Code / OpenCode)
115
+
116
+ ```bash
117
+ # Run the MCP server on stdio:
118
+ synthelion-mcp
119
+
120
+ # Or add to your Claude Code MCP config:
121
+ # {
122
+ # "mcpServers": {
123
+ # "synthelion": { "command": "synthelion-mcp" }
124
+ # }
125
+ # }
126
+ ```
127
+
128
+ Tools exposed: `compress`, `detect_language`, `route_content`, `summarize`, `compress_batch`.
129
+
130
+ ---
131
+
132
+ ## OpenAI function tools
133
+
134
+ ```python
135
+ from synthelion.plugins.openai_tools import get_tool_definitions, execute_tool
136
+
137
+ tools = get_tool_definitions()
138
+ # Pass to: client.chat.completions.create(tools=tools, ...)
139
+
140
+ # Execute a tool call returned by the model:
141
+ result = execute_tool("compress", {"text": "...", "level": "semantic"})
142
+ ```
143
+
144
+ ---
145
+
146
+ ## CLI
147
+
148
+ ```bash
149
+ synthelion compress --text "Hello world, I would like to know..." --level semantic
150
+ synthelion detect --text "Guten Morgen, wie geht es Ihnen?"
151
+ synthelion route --file myfile.json
152
+ synthelion serve-mcp # same as synthelion-mcp
153
+ ```
154
+
155
+ ---
156
+
157
+ ## Summarization
158
+
159
+ ```python
160
+ from synthelion.nlp import TfIdfSummarizer, TextRankSummarizer
161
+
162
+ summarizer = TfIdfSummarizer()
163
+ summary = summarizer.summarize(long_text, sentence_count=3)
164
+
165
+ tr = TextRankSummarizer()
166
+ summary = tr.summarize(long_text, ratio=0.3)
167
+ ```
168
+
169
+ ---
170
+
171
+ ## Agent toolkit
172
+
173
+ ```python
174
+ from synthelion.agent import ContextWindow, MemoryStore
175
+
176
+ window = ContextWindow(max_tokens=4000)
177
+ window.append("user", "Tell me about Rome.")
178
+ window.append("assistant", "Rome is the capital of Italy...")
179
+ # Auto-compacts when over budget:
180
+ print(window.to_messages_json())
181
+
182
+ memory = MemoryStore()
183
+ memory.remember({"summary": "User prefers Italian cuisine", "keywords": ["pizza", "pasta"]})
184
+ relevant = memory.recall("What food does the user like?", top_k=3)
185
+ ```
186
+
187
+ ---
188
+
189
+ ## Attribution
190
+
191
+ Synthelion is a Python port of **Caveman** — © 2026 Passaro Francesco Paolo, Digitalsolutions.it.
192
+ Original C# source: https://github.com/francescopaolopassaro/caveman
193
+
194
+ Language data derived from [Universal Dependencies](https://universaldependencies.org/) treebanks (CC BY-SA / CC BY).
@@ -0,0 +1,54 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "synthelion"
7
+ version = "1.0.0"
8
+ description = "MCP plugin + Python library for LLM token compression. 50+ languages, zero ML models. Port of Caveman (C#)."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ license-files = ["LICENSE"]
12
+ authors = [{ name = "Passaro Francesco Paolo", email = "passaroweb@gmail.com" }]
13
+ requires-python = ">=3.11"
14
+ keywords = ["llm", "token", "compression", "prompt", "nlp", "mcp", "claude", "claude-code"]
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Developers",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
22
+ "Topic :: Software Development :: Libraries :: Python Modules",
23
+ ]
24
+ # mcp is a core dependency — this package is primarily a Claude Code / MCP plugin
25
+ dependencies = [
26
+ "brotli>=1.1",
27
+ "regex>=2024.1",
28
+ "mcp>=1.0",
29
+ ]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/francescopaolopassaro/synthelion"
33
+ Repository = "https://github.com/francescopaolopassaro/synthelion"
34
+ "Bug Tracker" = "https://github.com/francescopaolopassaro/synthelion/issues"
35
+ "Original C# project" = "https://github.com/francescopaolopassaro/caveman"
36
+
37
+ [project.optional-dependencies]
38
+ openai = ["openai>=1.0"]
39
+ dev = ["pytest>=8.0", "pytest-asyncio>=0.23"]
40
+
41
+ [project.scripts]
42
+ synthelion = "synthelion.cli:main"
43
+ synthelion-mcp = "synthelion.plugins.mcp_server:main"
44
+
45
+ [tool.setuptools.packages.find]
46
+ where = ["."]
47
+ include = ["synthelion*"]
48
+
49
+ [tool.setuptools.package-data]
50
+ synthelion = ["worddata/*.br"]
51
+
52
+ [tool.pytest.ini_options]
53
+ testpaths = ["tests"]
54
+ asyncio_mode = "auto"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,38 @@
1
+ # Synthelion — Python port of Caveman (https://github.com/francescopaolopassaro/caveman)
2
+ # © 2026 Passaro Francesco Paolo — Digitalsolutions.it
3
+ """Synthelion: token compressor for LLMs. 50+ languages, zero ML models.
4
+
5
+ Python port of Caveman (C#) by Passaro Francesco Paolo (Digitalsolutions.it).
6
+ Original: https://github.com/francescopaolopassaro/caveman
7
+ """
8
+ from synthelion.models import (
9
+ CompressionLevel,
10
+ CompressionProfile,
11
+ CompressionResult,
12
+ ContentType,
13
+ RoutedCompressionResult,
14
+ VerbosityLevel,
15
+ )
16
+ from synthelion.word_provider import FunctionWordProvider
17
+ from synthelion.detector import LanguageDetector
18
+ from synthelion.core import CompressionFilter, CompressionService
19
+ from synthelion.content_detector import ContentDetector
20
+ from synthelion.content_router import ContentRouter
21
+
22
+ __version__ = "1.0.0"
23
+ __author__ = "Passaro Francesco Paolo"
24
+
25
+ __all__ = [
26
+ "CompressionLevel",
27
+ "CompressionProfile",
28
+ "CompressionResult",
29
+ "CompressionFilter",
30
+ "ContentType",
31
+ "RoutedCompressionResult",
32
+ "VerbosityLevel",
33
+ "FunctionWordProvider",
34
+ "LanguageDetector",
35
+ "CompressionService",
36
+ "ContentDetector",
37
+ "ContentRouter",
38
+ ]
@@ -0,0 +1,5 @@
1
+ from synthelion.agent.context_window import ContextWindow
2
+ from synthelion.agent.memory_extractor import MemoryExtractor
3
+ from synthelion.agent.memory_store import MemoryStore
4
+
5
+ __all__ = ["ContextWindow", "MemoryExtractor", "MemoryStore"]
@@ -0,0 +1,87 @@
1
+ # Synthelion — Python port of Caveman (https://github.com/francescopaolopassaro/caveman)
2
+ # © 2026 Passaro Francesco Paolo — Digitalsolutions.it
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+
8
+ from synthelion.nlp.text_rank import TextRankSummarizer
9
+ from synthelion.word_provider import FunctionWordProvider
10
+
11
+
12
+ def _count_tokens(text: str) -> int:
13
+ return len(text) // 4
14
+
15
+
16
+ class ContextWindow:
17
+ """Rolling token-budget conversation buffer for AI agents.
18
+
19
+ Ported from C# CavemanContextWindow. Auto-compacts older turns with
20
+ TextRank when the total token count exceeds max_tokens.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ max_tokens: int = 4000,
26
+ keep_last_turns: int = 4,
27
+ deduplicate: bool = False,
28
+ summarizer: TextRankSummarizer | None = None,
29
+ ) -> None:
30
+ if max_tokens <= 0:
31
+ raise ValueError("max_tokens must be positive")
32
+ self.max_tokens = max_tokens
33
+ self.keep_last_turns = keep_last_turns
34
+ self.deduplicate = deduplicate
35
+ self._messages: list[dict] = []
36
+ self._seen_hashes: set[str] = set()
37
+ self._summarizer = summarizer or TextRankSummarizer()
38
+
39
+ @property
40
+ def message_count(self) -> int:
41
+ return len(self._messages)
42
+
43
+ @property
44
+ def token_count(self) -> int:
45
+ return _count_tokens(self.render())
46
+
47
+ def append(self, role: str, content: str) -> None:
48
+ if not content or not content.strip():
49
+ return
50
+ h = hashlib.md5(content.encode()).hexdigest()
51
+ if self.deduplicate and h in self._seen_hashes:
52
+ return
53
+ self._seen_hashes.add(h)
54
+ self._messages.append({"role": role, "content": content})
55
+ if self.token_count > self.max_tokens:
56
+ self._compact()
57
+
58
+ def render(self) -> str:
59
+ return "\n".join(f"{m['role']}: {m['content']}" for m in self._messages)
60
+
61
+ def to_messages_json(self, indent: int | None = None) -> str:
62
+ return json.dumps(self._messages, ensure_ascii=False, indent=indent)
63
+
64
+ def to_messages(self) -> list[dict]:
65
+ return list(self._messages)
66
+
67
+ def clear(self) -> None:
68
+ self._messages.clear()
69
+ self._seen_hashes.clear()
70
+
71
+ def _compact(self) -> None:
72
+ """Summarize older turns to fit within the token budget."""
73
+ if len(self._messages) <= self.keep_last_turns:
74
+ return
75
+
76
+ system_msgs = [m for m in self._messages if m["role"] == "system"]
77
+ recent = self._messages[-self.keep_last_turns :]
78
+ old = self._messages[len(system_msgs) : len(self._messages) - self.keep_last_turns]
79
+
80
+ if not old:
81
+ return
82
+
83
+ old_text = "\n".join(f"{m['role']}: {m['content']}" for m in old)
84
+ summary = self._summarizer.summarize(old_text, ratio=0.3)
85
+
86
+ compacted = {"role": "assistant", "content": f"[Summary of earlier context: {summary}]"}
87
+ self._messages = system_msgs + [compacted] + recent
@@ -0,0 +1,44 @@
1
+ # Synthelion — Python port of Caveman (https://github.com/francescopaolopassaro/caveman)
2
+ # © 2026 Passaro Francesco Paolo — Digitalsolutions.it
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from synthelion.nlp.text_rank import TextRankSummarizer
8
+ from synthelion.word_provider import FunctionWordProvider
9
+
10
+
11
+ class MemoryExtractor:
12
+ """Distils salient sentences and key terms from a conversation.
13
+
14
+ Ported from C# CavemanMemoryExtractor. Returns {summary, keywords}.
15
+ No embeddings — pure lexical extraction.
16
+ """
17
+
18
+ def __init__(self, word_provider: FunctionWordProvider | None = None) -> None:
19
+ self._provider = word_provider or FunctionWordProvider()
20
+ self._summarizer = TextRankSummarizer(self._provider)
21
+
22
+ def extract(self, text: str, max_sentences: int = 5) -> dict:
23
+ if not text or not text.strip():
24
+ return {"summary": "", "keywords": []}
25
+
26
+ summary = self._summarizer.summarize(text, sentence_count=max_sentences)
27
+ keywords = self._extract_keywords(text)
28
+ return {"summary": summary, "keywords": keywords[:20]}
29
+
30
+ def _extract_keywords(self, text: str) -> list[str]:
31
+ # Extract capitalized words (likely proper nouns / entities) and frequent nouns
32
+ from collections import Counter
33
+ words = re.findall(r"\b[A-Z][a-z]{2,}\b", text)
34
+ # Add most frequent content words
35
+ all_words = re.findall(r"\b[a-zA-Z]{4,}\b", text.lower())
36
+ common = [w for w, _ in Counter(all_words).most_common(30)]
37
+ seen: set[str] = set()
38
+ result: list[str] = []
39
+ for w in words + common:
40
+ low = w.lower()
41
+ if low not in seen:
42
+ seen.add(low)
43
+ result.append(w)
44
+ return result