synthelion 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synthelion-1.0.0/LICENSE +21 -0
- synthelion-1.0.0/PKG-INFO +225 -0
- synthelion-1.0.0/README.md +194 -0
- synthelion-1.0.0/pyproject.toml +54 -0
- synthelion-1.0.0/setup.cfg +4 -0
- synthelion-1.0.0/synthelion/__init__.py +38 -0
- synthelion-1.0.0/synthelion/agent/__init__.py +5 -0
- synthelion-1.0.0/synthelion/agent/context_window.py +87 -0
- synthelion-1.0.0/synthelion/agent/memory_extractor.py +44 -0
- synthelion-1.0.0/synthelion/agent/memory_store.py +64 -0
- synthelion-1.0.0/synthelion/cli.py +130 -0
- synthelion-1.0.0/synthelion/compressors/__init__.py +11 -0
- synthelion-1.0.0/synthelion/compressors/code_compressor.py +84 -0
- synthelion-1.0.0/synthelion/compressors/diff_compressor.py +140 -0
- synthelion-1.0.0/synthelion/compressors/html_extractor.py +81 -0
- synthelion-1.0.0/synthelion/compressors/json_crusher.py +151 -0
- synthelion-1.0.0/synthelion/compressors/log_compressor.py +64 -0
- synthelion-1.0.0/synthelion/compressors/tabular.py +70 -0
- synthelion-1.0.0/synthelion/content_detector.py +80 -0
- synthelion-1.0.0/synthelion/content_router.py +170 -0
- synthelion-1.0.0/synthelion/core.py +396 -0
- synthelion-1.0.0/synthelion/detector.py +76 -0
- synthelion-1.0.0/synthelion/models.py +113 -0
- synthelion-1.0.0/synthelion/nlp/__init__.py +6 -0
- synthelion-1.0.0/synthelion/nlp/sentence_detector.py +74 -0
- synthelion-1.0.0/synthelion/nlp/summarizer.py +134 -0
- synthelion-1.0.0/synthelion/nlp/text_rank.py +138 -0
- synthelion-1.0.0/synthelion/nlp/text_splitter.py +91 -0
- synthelion-1.0.0/synthelion/plugins/__init__.py +0 -0
- synthelion-1.0.0/synthelion/plugins/mcp_server.py +78 -0
- synthelion-1.0.0/synthelion/plugins/openai_tools.py +233 -0
- synthelion-1.0.0/synthelion/word_provider.py +432 -0
- synthelion-1.0.0/synthelion/worddata/__init__.py +0 -0
- synthelion-1.0.0/synthelion/worddata/_index.br +0 -0
- synthelion-1.0.0/synthelion/worddata/afr.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/ara.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/bel.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/ben.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/bul.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/cat.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/ces.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/dan.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/deu.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/ell.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/eng.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/est.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/eus.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/fas.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/fin.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/fra.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/gle.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/glg.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/heb.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/hin.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/hrv.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/hun.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/hye.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/ind.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/isl.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/ita.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/jpn.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/kan.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/kaz.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/kor.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/lat.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/lav.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/lit.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/mar.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/mkd.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/msa.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/nld.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/nor.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/pol.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/por.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/ron.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/rus.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/slk.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/slv.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/spa.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/sqi.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/srp.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/swe.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/tam.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/tel.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/tha.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/tur.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/ukr.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/urd.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/vie.yaml.br +0 -0
- synthelion-1.0.0/synthelion/worddata/zho.yaml.br +0 -0
- synthelion-1.0.0/synthelion.egg-info/PKG-INFO +225 -0
- synthelion-1.0.0/synthelion.egg-info/SOURCES.txt +95 -0
- synthelion-1.0.0/synthelion.egg-info/dependency_links.txt +1 -0
- synthelion-1.0.0/synthelion.egg-info/entry_points.txt +3 -0
- synthelion-1.0.0/synthelion.egg-info/requires.txt +10 -0
- synthelion-1.0.0/synthelion.egg-info/top_level.txt +1 -0
- synthelion-1.0.0/tests/test_synthelion.py +393 -0
synthelion-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Francesco Paolo Passaro
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synthelion
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: MCP plugin + Python library for LLM token compression. 50+ languages, zero ML models. Port of Caveman (C#).
|
|
5
|
+
Author-email: Passaro Francesco Paolo <passaroweb@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/francescopaolopassaro/synthelion
|
|
8
|
+
Project-URL: Repository, https://github.com/francescopaolopassaro/synthelion
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/francescopaolopassaro/synthelion/issues
|
|
10
|
+
Project-URL: Original C# project, https://github.com/francescopaolopassaro/caveman
|
|
11
|
+
Keywords: llm,token,compression,prompt,nlp,mcp,claude,claude-code
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: brotli>=1.1
|
|
23
|
+
Requires-Dist: regex>=2024.1
|
|
24
|
+
Requires-Dist: mcp>=1.0
|
|
25
|
+
Provides-Extra: openai
|
|
26
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# Synthelion — Claude Code Plugin + Python Library
|
|
33
|
+
|
|
34
|
+
**Synthelion** is a [Claude Code](https://claude.ai/code) MCP plugin and Python library that reduces LLM token usage by stripping grammatical noise and lemmatizing words — across 50+ languages, with zero ML model dependencies.
|
|
35
|
+
|
|
36
|
+
> "Why use many tokens when few tokens do trick?" — A caveman (and your wallet).
|
|
37
|
+
|
|
38
|
+
Python port of [Caveman](https://github.com/francescopaolopassaro/caveman) by Passaro Francesco Paolo (Digitalsolutions.it).
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Use as Claude Code plugin (30 seconds)
|
|
43
|
+
|
|
44
|
+
**1. Install:**
|
|
45
|
+
```bash
|
|
46
|
+
pip install synthelion
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**2. Add to Claude Code** (`~/.claude/settings.json` on macOS/Linux, `%APPDATA%\Claude\claude_desktop_config.json` on Windows):
|
|
50
|
+
```json
|
|
51
|
+
{
|
|
52
|
+
"mcpServers": {
|
|
53
|
+
"synthelion": {
|
|
54
|
+
"command": "synthelion-mcp"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**3. Restart Claude Code.** Done — the tools `compress`, `detect_language`, `route_content`, `summarize`, `compress_batch` are now available.
|
|
61
|
+
|
|
62
|
+
**Zero-install with uvx:**
|
|
63
|
+
```json
|
|
64
|
+
{
|
|
65
|
+
"mcpServers": {
|
|
66
|
+
"synthelion": {
|
|
67
|
+
"command": "uvx",
|
|
68
|
+
"args": ["synthelion-mcp"]
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
→ Full plugin guide: [`docs/claude-code-plugin.md`](docs/claude-code-plugin.md)
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
Powered by Synthelion — © Passaro Francesco Paolo, Digitalsolutions.it (https://digitalsolutions.it)
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Installation
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install synthelion
|
|
86
|
+
# With MCP server support (for Claude Code, OpenCode, …):
|
|
87
|
+
pip install "synthelion[mcp]"
|
|
88
|
+
# With OpenAI function tools:
|
|
89
|
+
pip install "synthelion[openai]"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Quick start
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from synthelion import CompressionService, CompressionLevel
|
|
98
|
+
|
|
99
|
+
svc = CompressionService()
|
|
100
|
+
result = svc.compress(
|
|
101
|
+
"I would like to know if it is possible to receive information about cheap restaurants in Rome.",
|
|
102
|
+
CompressionLevel.SEMANTIC,
|
|
103
|
+
)
|
|
104
|
+
print(result.compressed_text) # "know possible receive information cheap restaurant Rome"
|
|
105
|
+
print(f"{result.efficiency_pct:.1f}% saved")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Compression levels
|
|
111
|
+
|
|
112
|
+
| Level | What it does | Typical savings |
|
|
113
|
+
|---|---|---|
|
|
114
|
+
| `LIGHT` | Remove stop words | ~25–35% |
|
|
115
|
+
| `SEMANTIC` | Stop words + lemmatization | ~30–69% |
|
|
116
|
+
| `AGGRESSIVE` | Lemmatization + generic-term pruning | ~35–70% |
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Language detection
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from synthelion import LanguageDetector
|
|
124
|
+
|
|
125
|
+
det = LanguageDetector()
|
|
126
|
+
print(det.detect("Vorrei un tavolo per due persone, per favore.")) # ita
|
|
127
|
+
scores = det.detect_with_scores("Where is the nearest train station?")
|
|
128
|
+
# {"eng": 0.42, ...}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Content-aware routing
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from synthelion import ContentRouter, CompressionProfile
|
|
137
|
+
|
|
138
|
+
router = ContentRouter.from_profile(CompressionProfile.BALANCED)
|
|
139
|
+
result = router.route(content) # auto-detects JSON/HTML/diff/log/code/prose
|
|
140
|
+
print(result.strategy_used, result.savings_pct)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## MCP server (Claude Code / OpenCode)
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
# Run the MCP server on stdio:
|
|
149
|
+
synthelion-mcp
|
|
150
|
+
|
|
151
|
+
# Or add to your Claude Code MCP config:
|
|
152
|
+
# {
|
|
153
|
+
# "mcpServers": {
|
|
154
|
+
# "synthelion": { "command": "synthelion-mcp" }
|
|
155
|
+
# }
|
|
156
|
+
# }
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Tools exposed: `compress`, `detect_language`, `route_content`, `summarize`, `compress_batch`.
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## OpenAI function tools
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from synthelion.plugins.openai_tools import get_tool_definitions, execute_tool
|
|
167
|
+
|
|
168
|
+
tools = get_tool_definitions()
|
|
169
|
+
# Pass to: client.chat.completions.create(tools=tools, ...)
|
|
170
|
+
|
|
171
|
+
# Execute a tool call returned by the model:
|
|
172
|
+
result = execute_tool("compress", {"text": "...", "level": "semantic"})
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## CLI
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
synthelion compress --text "Hello world, I would like to know..." --level semantic
|
|
181
|
+
synthelion detect --text "Guten Morgen, wie geht es Ihnen?"
|
|
182
|
+
synthelion route --file myfile.json
|
|
183
|
+
synthelion serve-mcp # same as synthelion-mcp
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Summarization
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
from synthelion.nlp import TfIdfSummarizer, TextRankSummarizer
|
|
192
|
+
|
|
193
|
+
summarizer = TfIdfSummarizer()
|
|
194
|
+
summary = summarizer.summarize(long_text, sentence_count=3)
|
|
195
|
+
|
|
196
|
+
tr = TextRankSummarizer()
|
|
197
|
+
summary = tr.summarize(long_text, ratio=0.3)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## Agent toolkit
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from synthelion.agent import ContextWindow, MemoryStore
|
|
206
|
+
|
|
207
|
+
window = ContextWindow(max_tokens=4000)
|
|
208
|
+
window.append("user", "Tell me about Rome.")
|
|
209
|
+
window.append("assistant", "Rome is the capital of Italy...")
|
|
210
|
+
# Auto-compacts when over budget:
|
|
211
|
+
print(window.to_messages_json())
|
|
212
|
+
|
|
213
|
+
memory = MemoryStore()
|
|
214
|
+
memory.remember({"summary": "User prefers Italian cuisine", "keywords": ["pizza", "pasta"]})
|
|
215
|
+
relevant = memory.recall("What food does the user like?", top_k=3)
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Attribution
|
|
221
|
+
|
|
222
|
+
Synthelion is a Python port of **Caveman** — © 2026 Passaro Francesco Paolo, Digitalsolutions.it.
|
|
223
|
+
Original C# source: https://github.com/francescopaolopassaro/caveman
|
|
224
|
+
|
|
225
|
+
Language data derived from [Universal Dependencies](https://universaldependencies.org/) treebanks (CC BY-SA / CC BY).
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# Synthelion — Claude Code Plugin + Python Library
|
|
2
|
+
|
|
3
|
+
**Synthelion** is a [Claude Code](https://claude.ai/code) MCP plugin and Python library that reduces LLM token usage by stripping grammatical noise and lemmatizing words — across 50+ languages, with zero ML model dependencies.
|
|
4
|
+
|
|
5
|
+
> "Why use many tokens when few tokens do trick?" — A caveman (and your wallet).
|
|
6
|
+
|
|
7
|
+
Python port of [Caveman](https://github.com/francescopaolopassaro/caveman) by Passaro Francesco Paolo (Digitalsolutions.it).
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Use as Claude Code plugin (30 seconds)
|
|
12
|
+
|
|
13
|
+
**1. Install:**
|
|
14
|
+
```bash
|
|
15
|
+
pip install synthelion
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
**2. Add to Claude Code** (`~/.claude/settings.json` on macOS/Linux, `%APPDATA%\Claude\claude_desktop_config.json` on Windows):
|
|
19
|
+
```json
|
|
20
|
+
{
|
|
21
|
+
"mcpServers": {
|
|
22
|
+
"synthelion": {
|
|
23
|
+
"command": "synthelion-mcp"
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
**3. Restart Claude Code.** Done — the tools `compress`, `detect_language`, `route_content`, `summarize`, `compress_batch` are now available.
|
|
30
|
+
|
|
31
|
+
**Zero-install with uvx:**
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"mcpServers": {
|
|
35
|
+
"synthelion": {
|
|
36
|
+
"command": "uvx",
|
|
37
|
+
"args": ["synthelion-mcp"]
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
→ Full plugin guide: [`docs/claude-code-plugin.md`](docs/claude-code-plugin.md)
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
Powered by Synthelion — © Passaro Francesco Paolo, Digitalsolutions.it (https://digitalsolutions.it)
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install synthelion
|
|
55
|
+
# With MCP server support (for Claude Code, OpenCode, …):
|
|
56
|
+
pip install "synthelion[mcp]"
|
|
57
|
+
# With OpenAI function tools:
|
|
58
|
+
pip install "synthelion[openai]"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Quick start
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from synthelion import CompressionService, CompressionLevel
|
|
67
|
+
|
|
68
|
+
svc = CompressionService()
|
|
69
|
+
result = svc.compress(
|
|
70
|
+
"I would like to know if it is possible to receive information about cheap restaurants in Rome.",
|
|
71
|
+
CompressionLevel.SEMANTIC,
|
|
72
|
+
)
|
|
73
|
+
print(result.compressed_text) # "know possible receive information cheap restaurant Rome"
|
|
74
|
+
print(f"{result.efficiency_pct:.1f}% saved")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Compression levels
|
|
80
|
+
|
|
81
|
+
| Level | What it does | Typical savings |
|
|
82
|
+
|---|---|---|
|
|
83
|
+
| `LIGHT` | Remove stop words | ~25–35% |
|
|
84
|
+
| `SEMANTIC` | Stop words + lemmatization | ~30–69% |
|
|
85
|
+
| `AGGRESSIVE` | Lemmatization + generic-term pruning | ~35–70% |
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Language detection
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from synthelion import LanguageDetector
|
|
93
|
+
|
|
94
|
+
det = LanguageDetector()
|
|
95
|
+
print(det.detect("Vorrei un tavolo per due persone, per favore.")) # ita
|
|
96
|
+
scores = det.detect_with_scores("Where is the nearest train station?")
|
|
97
|
+
# {"eng": 0.42, ...}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Content-aware routing
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from synthelion import ContentRouter, CompressionProfile
|
|
106
|
+
|
|
107
|
+
router = ContentRouter.from_profile(CompressionProfile.BALANCED)
|
|
108
|
+
result = router.route(content) # auto-detects JSON/HTML/diff/log/code/prose
|
|
109
|
+
print(result.strategy_used, result.savings_pct)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## MCP server (Claude Code / OpenCode)
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Run the MCP server on stdio:
|
|
118
|
+
synthelion-mcp
|
|
119
|
+
|
|
120
|
+
# Or add to your Claude Code MCP config:
|
|
121
|
+
# {
|
|
122
|
+
# "mcpServers": {
|
|
123
|
+
# "synthelion": { "command": "synthelion-mcp" }
|
|
124
|
+
# }
|
|
125
|
+
# }
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Tools exposed: `compress`, `detect_language`, `route_content`, `summarize`, `compress_batch`.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## OpenAI function tools
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from synthelion.plugins.openai_tools import get_tool_definitions, execute_tool
|
|
136
|
+
|
|
137
|
+
tools = get_tool_definitions()
|
|
138
|
+
# Pass to: client.chat.completions.create(tools=tools, ...)
|
|
139
|
+
|
|
140
|
+
# Execute a tool call returned by the model:
|
|
141
|
+
result = execute_tool("compress", {"text": "...", "level": "semantic"})
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## CLI
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
synthelion compress --text "Hello world, I would like to know..." --level semantic
|
|
150
|
+
synthelion detect --text "Guten Morgen, wie geht es Ihnen?"
|
|
151
|
+
synthelion route --file myfile.json
|
|
152
|
+
synthelion serve-mcp # same as synthelion-mcp
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Summarization
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from synthelion.nlp import TfIdfSummarizer, TextRankSummarizer
|
|
161
|
+
|
|
162
|
+
summarizer = TfIdfSummarizer()
|
|
163
|
+
summary = summarizer.summarize(long_text, sentence_count=3)
|
|
164
|
+
|
|
165
|
+
tr = TextRankSummarizer()
|
|
166
|
+
summary = tr.summarize(long_text, ratio=0.3)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Agent toolkit
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
from synthelion.agent import ContextWindow, MemoryStore
|
|
175
|
+
|
|
176
|
+
window = ContextWindow(max_tokens=4000)
|
|
177
|
+
window.append("user", "Tell me about Rome.")
|
|
178
|
+
window.append("assistant", "Rome is the capital of Italy...")
|
|
179
|
+
# Auto-compacts when over budget:
|
|
180
|
+
print(window.to_messages_json())
|
|
181
|
+
|
|
182
|
+
memory = MemoryStore()
|
|
183
|
+
memory.remember({"summary": "User prefers Italian cuisine", "keywords": ["pizza", "pasta"]})
|
|
184
|
+
relevant = memory.recall("What food does the user like?", top_k=3)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Attribution
|
|
190
|
+
|
|
191
|
+
Synthelion is a Python port of **Caveman** — © 2026 Passaro Francesco Paolo, Digitalsolutions.it.
|
|
192
|
+
Original C# source: https://github.com/francescopaolopassaro/caveman
|
|
193
|
+
|
|
194
|
+
Language data derived from [Universal Dependencies](https://universaldependencies.org/) treebanks (CC BY-SA / CC BY).
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "synthelion"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "MCP plugin + Python library for LLM token compression. 50+ languages, zero ML models. Port of Caveman (C#)."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
license-files = ["LICENSE"]
|
|
12
|
+
authors = [{ name = "Passaro Francesco Paolo", email = "passaroweb@gmail.com" }]
|
|
13
|
+
requires-python = ">=3.11"
|
|
14
|
+
keywords = ["llm", "token", "compression", "prompt", "nlp", "mcp", "claude", "claude-code"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
22
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
23
|
+
]
|
|
24
|
+
# mcp is a core dependency — this package is primarily a Claude Code / MCP plugin
|
|
25
|
+
dependencies = [
|
|
26
|
+
"brotli>=1.1",
|
|
27
|
+
"regex>=2024.1",
|
|
28
|
+
"mcp>=1.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/francescopaolopassaro/synthelion"
|
|
33
|
+
Repository = "https://github.com/francescopaolopassaro/synthelion"
|
|
34
|
+
"Bug Tracker" = "https://github.com/francescopaolopassaro/synthelion/issues"
|
|
35
|
+
"Original C# project" = "https://github.com/francescopaolopassaro/caveman"
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
openai = ["openai>=1.0"]
|
|
39
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.23"]
|
|
40
|
+
|
|
41
|
+
[project.scripts]
|
|
42
|
+
synthelion = "synthelion.cli:main"
|
|
43
|
+
synthelion-mcp = "synthelion.plugins.mcp_server:main"
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.packages.find]
|
|
46
|
+
where = ["."]
|
|
47
|
+
include = ["synthelion*"]
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.package-data]
|
|
50
|
+
synthelion = ["worddata/*.br"]
|
|
51
|
+
|
|
52
|
+
[tool.pytest.ini_options]
|
|
53
|
+
testpaths = ["tests"]
|
|
54
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Synthelion — Python port of Caveman (https://github.com/francescopaolopassaro/caveman)
|
|
2
|
+
# © 2026 Passaro Francesco Paolo — Digitalsolutions.it
|
|
3
|
+
"""Synthelion: token compressor for LLMs. 50+ languages, zero ML models.
|
|
4
|
+
|
|
5
|
+
Python port of Caveman (C#) by Passaro Francesco Paolo (Digitalsolutions.it).
|
|
6
|
+
Original: https://github.com/francescopaolopassaro/caveman
|
|
7
|
+
"""
|
|
8
|
+
from synthelion.models import (
|
|
9
|
+
CompressionLevel,
|
|
10
|
+
CompressionProfile,
|
|
11
|
+
CompressionResult,
|
|
12
|
+
ContentType,
|
|
13
|
+
RoutedCompressionResult,
|
|
14
|
+
VerbosityLevel,
|
|
15
|
+
)
|
|
16
|
+
from synthelion.word_provider import FunctionWordProvider
|
|
17
|
+
from synthelion.detector import LanguageDetector
|
|
18
|
+
from synthelion.core import CompressionFilter, CompressionService
|
|
19
|
+
from synthelion.content_detector import ContentDetector
|
|
20
|
+
from synthelion.content_router import ContentRouter
|
|
21
|
+
|
|
22
|
+
__version__ = "1.0.0"
|
|
23
|
+
__author__ = "Passaro Francesco Paolo"
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"CompressionLevel",
|
|
27
|
+
"CompressionProfile",
|
|
28
|
+
"CompressionResult",
|
|
29
|
+
"CompressionFilter",
|
|
30
|
+
"ContentType",
|
|
31
|
+
"RoutedCompressionResult",
|
|
32
|
+
"VerbosityLevel",
|
|
33
|
+
"FunctionWordProvider",
|
|
34
|
+
"LanguageDetector",
|
|
35
|
+
"CompressionService",
|
|
36
|
+
"ContentDetector",
|
|
37
|
+
"ContentRouter",
|
|
38
|
+
]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Synthelion — Python port of Caveman (https://github.com/francescopaolopassaro/caveman)
|
|
2
|
+
# © 2026 Passaro Francesco Paolo — Digitalsolutions.it
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
from synthelion.nlp.text_rank import TextRankSummarizer
|
|
9
|
+
from synthelion.word_provider import FunctionWordProvider
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _count_tokens(text: str) -> int:
|
|
13
|
+
return len(text) // 4
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ContextWindow:
|
|
17
|
+
"""Rolling token-budget conversation buffer for AI agents.
|
|
18
|
+
|
|
19
|
+
Ported from C# CavemanContextWindow. Auto-compacts older turns with
|
|
20
|
+
TextRank when the total token count exceeds max_tokens.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
max_tokens: int = 4000,
|
|
26
|
+
keep_last_turns: int = 4,
|
|
27
|
+
deduplicate: bool = False,
|
|
28
|
+
summarizer: TextRankSummarizer | None = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
if max_tokens <= 0:
|
|
31
|
+
raise ValueError("max_tokens must be positive")
|
|
32
|
+
self.max_tokens = max_tokens
|
|
33
|
+
self.keep_last_turns = keep_last_turns
|
|
34
|
+
self.deduplicate = deduplicate
|
|
35
|
+
self._messages: list[dict] = []
|
|
36
|
+
self._seen_hashes: set[str] = set()
|
|
37
|
+
self._summarizer = summarizer or TextRankSummarizer()
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def message_count(self) -> int:
|
|
41
|
+
return len(self._messages)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def token_count(self) -> int:
|
|
45
|
+
return _count_tokens(self.render())
|
|
46
|
+
|
|
47
|
+
def append(self, role: str, content: str) -> None:
|
|
48
|
+
if not content or not content.strip():
|
|
49
|
+
return
|
|
50
|
+
h = hashlib.md5(content.encode()).hexdigest()
|
|
51
|
+
if self.deduplicate and h in self._seen_hashes:
|
|
52
|
+
return
|
|
53
|
+
self._seen_hashes.add(h)
|
|
54
|
+
self._messages.append({"role": role, "content": content})
|
|
55
|
+
if self.token_count > self.max_tokens:
|
|
56
|
+
self._compact()
|
|
57
|
+
|
|
58
|
+
def render(self) -> str:
|
|
59
|
+
return "\n".join(f"{m['role']}: {m['content']}" for m in self._messages)
|
|
60
|
+
|
|
61
|
+
def to_messages_json(self, indent: int | None = None) -> str:
|
|
62
|
+
return json.dumps(self._messages, ensure_ascii=False, indent=indent)
|
|
63
|
+
|
|
64
|
+
def to_messages(self) -> list[dict]:
|
|
65
|
+
return list(self._messages)
|
|
66
|
+
|
|
67
|
+
def clear(self) -> None:
|
|
68
|
+
self._messages.clear()
|
|
69
|
+
self._seen_hashes.clear()
|
|
70
|
+
|
|
71
|
+
def _compact(self) -> None:
|
|
72
|
+
"""Summarize older turns to fit within the token budget."""
|
|
73
|
+
if len(self._messages) <= self.keep_last_turns:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
system_msgs = [m for m in self._messages if m["role"] == "system"]
|
|
77
|
+
recent = self._messages[-self.keep_last_turns :]
|
|
78
|
+
old = self._messages[len(system_msgs) : len(self._messages) - self.keep_last_turns]
|
|
79
|
+
|
|
80
|
+
if not old:
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
old_text = "\n".join(f"{m['role']}: {m['content']}" for m in old)
|
|
84
|
+
summary = self._summarizer.summarize(old_text, ratio=0.3)
|
|
85
|
+
|
|
86
|
+
compacted = {"role": "assistant", "content": f"[Summary of earlier context: {summary}]"}
|
|
87
|
+
self._messages = system_msgs + [compacted] + recent
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Synthelion — Python port of Caveman (https://github.com/francescopaolopassaro/caveman)
|
|
2
|
+
# © 2026 Passaro Francesco Paolo — Digitalsolutions.it
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from synthelion.nlp.text_rank import TextRankSummarizer
|
|
8
|
+
from synthelion.word_provider import FunctionWordProvider
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MemoryExtractor:
|
|
12
|
+
"""Distils salient sentences and key terms from a conversation.
|
|
13
|
+
|
|
14
|
+
Ported from C# CavemanMemoryExtractor. Returns {summary, keywords}.
|
|
15
|
+
No embeddings — pure lexical extraction.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, word_provider: FunctionWordProvider | None = None) -> None:
|
|
19
|
+
self._provider = word_provider or FunctionWordProvider()
|
|
20
|
+
self._summarizer = TextRankSummarizer(self._provider)
|
|
21
|
+
|
|
22
|
+
def extract(self, text: str, max_sentences: int = 5) -> dict:
|
|
23
|
+
if not text or not text.strip():
|
|
24
|
+
return {"summary": "", "keywords": []}
|
|
25
|
+
|
|
26
|
+
summary = self._summarizer.summarize(text, sentence_count=max_sentences)
|
|
27
|
+
keywords = self._extract_keywords(text)
|
|
28
|
+
return {"summary": summary, "keywords": keywords[:20]}
|
|
29
|
+
|
|
30
|
+
def _extract_keywords(self, text: str) -> list[str]:
|
|
31
|
+
# Extract capitalized words (likely proper nouns / entities) and frequent nouns
|
|
32
|
+
from collections import Counter
|
|
33
|
+
words = re.findall(r"\b[A-Z][a-z]{2,}\b", text)
|
|
34
|
+
# Add most frequent content words
|
|
35
|
+
all_words = re.findall(r"\b[a-zA-Z]{4,}\b", text.lower())
|
|
36
|
+
common = [w for w, _ in Counter(all_words).most_common(30)]
|
|
37
|
+
seen: set[str] = set()
|
|
38
|
+
result: list[str] = []
|
|
39
|
+
for w in words + common:
|
|
40
|
+
low = w.lower()
|
|
41
|
+
if low not in seen:
|
|
42
|
+
seen.add(low)
|
|
43
|
+
result.append(w)
|
|
44
|
+
return result
|