tokenshrink 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenshrink/__init__.py +8 -2
- tokenshrink/__main__.py +4 -0
- tokenshrink/cli.py +23 -1
- tokenshrink/pipeline.py +1 -1
- {tokenshrink-0.2.0.dist-info → tokenshrink-0.2.2.dist-info}/METADATA +1 -1
- tokenshrink-0.2.2.dist-info/RECORD +9 -0
- tokenshrink-0.2.0.dist-info/RECORD +0 -8
- {tokenshrink-0.2.0.dist-info → tokenshrink-0.2.2.dist-info}/WHEEL +0 -0
- {tokenshrink-0.2.0.dist-info → tokenshrink-0.2.2.dist-info}/entry_points.txt +0 -0
- {tokenshrink-0.2.0.dist-info → tokenshrink-0.2.2.dist-info}/licenses/LICENSE +0 -0
tokenshrink/__init__.py
CHANGED
|
@@ -23,7 +23,13 @@ CLI:
|
|
|
23
23
|
tokenshrink stats
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
__version__ = "0.2.2"
|
|
27
|
+
|
|
28
|
+
def __getattr__(name):
|
|
29
|
+
if name in ("TokenShrink", "ShrinkResult", "ChunkScore"):
|
|
30
|
+
from tokenshrink.pipeline import TokenShrink, ShrinkResult, ChunkScore
|
|
31
|
+
globals().update({"TokenShrink": TokenShrink, "ShrinkResult": ShrinkResult, "ChunkScore": ChunkScore})
|
|
32
|
+
return globals()[name]
|
|
33
|
+
raise AttributeError(f"module 'tokenshrink' has no attribute {name!r}")
|
|
27
34
|
|
|
28
|
-
__version__ = "0.2.0"
|
|
29
35
|
__all__ = ["TokenShrink", "ShrinkResult", "ChunkScore"]
|
tokenshrink/__main__.py
ADDED
tokenshrink/cli.py
CHANGED
|
@@ -13,7 +13,7 @@ import sys
|
|
|
13
13
|
import json
|
|
14
14
|
from pathlib import Path
|
|
15
15
|
|
|
16
|
-
from tokenshrink import
|
|
16
|
+
from tokenshrink import __version__
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def main():
|
|
@@ -32,6 +32,11 @@ def main():
|
|
|
32
32
|
action="store_true",
|
|
33
33
|
help="Output as JSON",
|
|
34
34
|
)
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"--quiet",
|
|
37
|
+
action="store_true",
|
|
38
|
+
help="Suppress model loading messages",
|
|
39
|
+
)
|
|
35
40
|
|
|
36
41
|
subparsers = parser.add_subparsers(dest="command", help="Commands")
|
|
37
42
|
|
|
@@ -118,6 +123,20 @@ def main():
|
|
|
118
123
|
parser.print_help()
|
|
119
124
|
sys.exit(0)
|
|
120
125
|
|
|
126
|
+
# Suppress noisy output when --quiet or --json
|
|
127
|
+
if args.quiet or args.json:
|
|
128
|
+
import os, logging, warnings
|
|
129
|
+
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
|
|
130
|
+
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
|
131
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
132
|
+
logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
|
|
133
|
+
logging.getLogger("transformers").setLevel(logging.ERROR)
|
|
134
|
+
logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
|
|
135
|
+
warnings.filterwarnings("ignore", message=".*unauthenticated.*")
|
|
136
|
+
|
|
137
|
+
# Lazy import to avoid loading ML models for --help/--version
|
|
138
|
+
from tokenshrink import TokenShrink
|
|
139
|
+
|
|
121
140
|
# Determine compression setting
|
|
122
141
|
compression = True
|
|
123
142
|
if hasattr(args, 'no_compress') and args.no_compress:
|
|
@@ -195,6 +214,9 @@ def main():
|
|
|
195
214
|
print(f"Sources: {', '.join(Path(s).name for s in result.sources)}")
|
|
196
215
|
print(f"Stats: {result.savings}")
|
|
197
216
|
|
|
217
|
+
if result.savings_pct == 0.0:
|
|
218
|
+
print(" Tip: Install llmlingua for compression: pip install llmlingua")
|
|
219
|
+
|
|
198
220
|
if getattr(args, 'scores', False) and result.chunk_scores:
|
|
199
221
|
print("\nChunk Importance Scores:")
|
|
200
222
|
for cs in result.chunk_scores:
|
tokenshrink/pipeline.py
CHANGED
|
@@ -613,7 +613,7 @@ class TokenShrink:
|
|
|
613
613
|
"ratio": total_compressed / total_original if total_original else 1.0,
|
|
614
614
|
}
|
|
615
615
|
|
|
616
|
-
def search(self, question: str, k: int = 5, min_score: float = 0.
|
|
616
|
+
def search(self, question: str, k: int = 5, min_score: float = 0.15) -> list[dict]:
|
|
617
617
|
"""Search without compression. Returns raw chunks with scores."""
|
|
618
618
|
if self._index.ntotal == 0:
|
|
619
619
|
return []
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tokenshrink
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Cut your AI costs 50-80%. FAISS retrieval + LLMLingua compression + REFRAG-inspired adaptive optimization.
|
|
5
5
|
Project-URL: Homepage, https://tokenshrink.dev
|
|
6
6
|
Project-URL: Repository, https://github.com/MusashiMiyamoto1-cloud/tokenshrink
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
tokenshrink/__init__.py,sha256=ve3m9BZp1ti-U76YWmI63vPU8_C2wx8ZckJTr2Quh70,1195
|
|
2
|
+
tokenshrink/__main__.py,sha256=yEx25Vj4itzYPdLxHsJP3MVjKM75CGdP97WIETxGljQ,91
|
|
3
|
+
tokenshrink/cli.py,sha256=5F6YB_ZLPyf77AsDKKz0IPLsmgbSt0ivAsElzB6yKqU,9058
|
|
4
|
+
tokenshrink/pipeline.py,sha256=ayFJVr_esO-L0Z0JWerLcYuf83te5umAEJ6e6l-TDns,24056
|
|
5
|
+
tokenshrink-0.2.2.dist-info/METADATA,sha256=cbm_6-wOyuCd7D42wNO1NWDtJpZJka4ARSw2lMHwrlo,10615
|
|
6
|
+
tokenshrink-0.2.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
+
tokenshrink-0.2.2.dist-info/entry_points.txt,sha256=vwr3PMC25J8f-ppDVngO3MmXuY_cdR2rNM_syUmT7lc,53
|
|
8
|
+
tokenshrink-0.2.2.dist-info/licenses/LICENSE,sha256=LsUNAvKJnhwbhmOWCjLq-Zf0HllrifthQ9TZkv1UUig,1064
|
|
9
|
+
tokenshrink-0.2.2.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
tokenshrink/__init__.py,sha256=ybxGRkBPQTLIBckBYSZxebHKlAilMOoWVJxOyhv1Hgw,883
|
|
2
|
-
tokenshrink/cli.py,sha256=dmP1BPbMow_NBm8fFXo05vJlU4vgyhDuzxL5q1a6n20,8102
|
|
3
|
-
tokenshrink/pipeline.py,sha256=H3T3UlvHOIc1VOVyNFL-HEP0Cf_v7fVlAY-BFVT4V4w,24055
|
|
4
|
-
tokenshrink-0.2.0.dist-info/METADATA,sha256=quql1c1tRTp7lF4t1YOVEFGx1dT8tf3bXD04KvK0TW4,10615
|
|
5
|
-
tokenshrink-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
-
tokenshrink-0.2.0.dist-info/entry_points.txt,sha256=vwr3PMC25J8f-ppDVngO3MmXuY_cdR2rNM_syUmT7lc,53
|
|
7
|
-
tokenshrink-0.2.0.dist-info/licenses/LICENSE,sha256=LsUNAvKJnhwbhmOWCjLq-Zf0HllrifthQ9TZkv1UUig,1064
|
|
8
|
-
tokenshrink-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|