PyPI - tokenshrink - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

tokenshrink 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

tokenshrink-0.2.1/Dockerfile ADDED Viewed

@@ -0,0 +1,21 @@
+FROM python:3.12-slim
+WORKDIR /app
+# Install system deps for FAISS
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy project
+COPY pyproject.toml README.md LICENSE ./
+COPY src/ ./src/
+# Install package with dev deps (no compression — too heavy for test image)
+RUN pip install --no-cache-dir -e ".[dev]"
+# Copy tests
+COPY tests/ ./tests/
+# Default: run all tests
+CMD ["pytest", "tests/", "-v", "--tb=short", "-x"]

{tokenshrink-0.2.0 → tokenshrink-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tokenshrink
-Version: 0.2.0
+Version: 0.2.1
 Summary: Cut your AI costs 50-80%. FAISS retrieval + LLMLingua compression + REFRAG-inspired adaptive optimization.
 Project-URL: Homepage, https://tokenshrink.dev
 Project-URL: Repository, https://github.com/MusashiMiyamoto1-cloud/tokenshrink

tokenshrink-0.2.1/docker-compose.test.yml ADDED Viewed

@@ -0,0 +1,47 @@
+version: "3.8"
+services:
+  # Full test suite
+  test-all:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    command: pytest tests/ -v --tb=short -x
+    environment:
+      - TOKENIZERS_PARALLELISM=false
+  # Unit tests only (fast)
+  test-unit:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    command: pytest tests/test_utils.py tests/test_pipeline.py -v --tb=short
+    environment:
+      - TOKENIZERS_PARALLELISM=false
+  # CLI tests
+  test-cli:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    command: pytest tests/test_cli.py -v --tb=short
+    environment:
+      - TOKENIZERS_PARALLELISM=false
+  # Integration tests
+  test-integration:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    command: pytest tests/test_integration.py -v --tb=short
+    environment:
+      - TOKENIZERS_PARALLELISM=false
+  # Stress tests
+  test-stress:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    command: pytest tests/test_stress.py -v --tb=short -s
+    environment:
+      - TOKENIZERS_PARALLELISM=false

{tokenshrink-0.2.0 → tokenshrink-0.2.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "tokenshrink"
-version = "0.2.0"
+version = "0.2.1"
 description = "Cut your AI costs 50-80%. FAISS retrieval + LLMLingua compression + REFRAG-inspired adaptive optimization."
 readme = "README.md"
 license = "MIT"

{tokenshrink-0.2.0 → tokenshrink-0.2.1}/src/tokenshrink/__init__.py RENAMED Viewed

@@ -25,5 +25,5 @@ CLI:
 from tokenshrink.pipeline import TokenShrink, ShrinkResult, ChunkScore
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 __all__ = ["TokenShrink", "ShrinkResult", "ChunkScore"]

tokenshrink-0.2.1/src/tokenshrink/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""Allow running with `python -m tokenshrink`."""
+from tokenshrink.cli import main
+main()

{tokenshrink-0.2.0 → tokenshrink-0.2.1}/src/tokenshrink/cli.py RENAMED Viewed

@@ -32,6 +32,11 @@ def main():
         action="store_true",
         help="Output as JSON",
     )
+    parser.add_argument(
+        "--quiet",
+        action="store_true",
+        help="Suppress model loading messages",
+    )
     subparsers = parser.add_subparsers(dest="command", help="Commands")
@@ -118,6 +123,17 @@ def main():
         parser.print_help()
         sys.exit(0)
+    # Suppress noisy output when --quiet or --json
+    if args.quiet or args.json:
+        import os, logging, warnings
+        os.environ["TRANSFORMERS_VERBOSITY"] = "error"
+        os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
+        os.environ["TOKENIZERS_PARALLELISM"] = "false"
+        logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
+        logging.getLogger("transformers").setLevel(logging.ERROR)
+        logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
+        warnings.filterwarnings("ignore", message=".*unauthenticated.*")
     # Determine compression setting
     compression = True
     if hasattr(args, 'no_compress') and args.no_compress:
@@ -195,6 +211,9 @@ def main():
                 print(f"Sources: {', '.join(Path(s).name for s in result.sources)}")
                 print(f"Stats: {result.savings}")
+                if result.savings_pct == 0.0:
+                    print("  Tip: Install llmlingua for compression: pip install llmlingua")
                 if getattr(args, 'scores', False) and result.chunk_scores:
                     print("\nChunk Importance Scores:")
                     for cs in result.chunk_scores:

{tokenshrink-0.2.0 → tokenshrink-0.2.1}/src/tokenshrink/pipeline.py RENAMED Viewed

@@ -613,7 +613,7 @@ class TokenShrink:
             "ratio": total_compressed / total_original if total_original else 1.0,
         }
-    def search(self, question: str, k: int = 5, min_score: float = 0.3) -> list[dict]:
+    def search(self, question: str, k: int = 5, min_score: float = 0.15) -> list[dict]:
         """Search without compression. Returns raw chunks with scores."""
         if self._index.ntotal == 0:
             return []

tokenshrink-0.2.1/tests/conftest.py ADDED Viewed

@@ -0,0 +1,211 @@
+"""Shared fixtures for TokenShrink test suite."""
+import os
+import json
+import shutil
+import tempfile
+from pathlib import Path
+import pytest
+from sentence_transformers import SentenceTransformer
+# Session-scoped: load the embedding model ONCE for all tests
+@pytest.fixture(scope="session")
+def shared_model():
+    """Load embedding model once per test session."""
+    return SentenceTransformer("all-MiniLM-L6-v2")
+@pytest.fixture
+def tmp_dir():
+    """Create a temporary directory, clean up after."""
+    d = tempfile.mkdtemp(prefix="tokenshrink_test_")
+    yield Path(d)
+    shutil.rmtree(d, ignore_errors=True)
+@pytest.fixture
+def sample_docs(tmp_dir):
+    """Create sample documents for indexing."""
+    docs_dir = tmp_dir / "docs"
+    docs_dir.mkdir()
+    # Auth documentation
+    (docs_dir / "auth.md").write_text(
+        "# Authentication\n\n"
+        "All API requests require a Bearer token in the Authorization header. "
+        "Tokens expire after 24 hours and must be refreshed using the /auth/refresh endpoint. "
+        "Rate limiting is enforced at 100 requests per minute per token. "
+        "If you exceed the rate limit, you'll receive a 429 status code. "
+        "OAuth2 flows are supported for third-party integrations. "
+        "The client_id and client_secret must be stored securely. "
+        "Never expose credentials in client-side code or version control. "
+        "Use environment variables or a secrets manager for production deployments. "
+        "Multi-factor authentication is required for admin endpoints. "
+        "Session tokens are tied to IP address for security. "
+        * 3
+    )
+    # Rate limiting documentation
+    (docs_dir / "rate-limits.md").write_text(
+        "# Rate Limits\n\n"
+        "The API enforces the following rate limits:\n"
+        "- Free tier: 10 requests per minute\n"
+        "- Pro tier: 100 requests per minute\n"
+        "- Enterprise: 1000 requests per minute\n\n"
+        "Rate limit headers are included in every response:\n"
+        "- X-RateLimit-Limit: Maximum requests allowed\n"
+        "- X-RateLimit-Remaining: Requests remaining\n"
+        "- X-RateLimit-Reset: Unix timestamp when limit resets\n\n"
+        "When rate limited, the response includes a Retry-After header. "
+        "Implement exponential backoff in your client. "
+        "Batch endpoints have separate, higher limits. "
+        "WebSocket connections have a message rate limit of 60 messages per minute. "
+        "Exceeding limits temporarily blocks the API key for 5 minutes. "
+        * 3
+    )
+    # Deployment guide
+    (docs_dir / "deployment.md").write_text(
+        "# Deployment Guide\n\n"
+        "## Docker\n"
+        "Build the image: `docker build -t myapp .`\n"
+        "Run with: `docker run -p 8080:8080 myapp`\n\n"
+        "## Kubernetes\n"
+        "Apply manifests: `kubectl apply -f k8s/`\n"
+        "The service uses a HorizontalPodAutoscaler with CPU target of 70%. "
+        "Minimum 2 replicas, maximum 10 replicas. "
+        "Persistent volumes are required for the database. "
+        "Use ConfigMaps for environment-specific settings. "
+        "Secrets should be managed via external-secrets-operator. "
+        "Health checks are configured on /health and /ready endpoints. "
+        "The readiness probe has an initial delay of 10 seconds. "
+        "Rolling updates with maxUnavailable=1 and maxSurge=1. "
+        * 3
+    )
+    # Near-duplicate of auth (for dedup testing)
+    (docs_dir / "auth2.md").write_text(
+        "# Authentication Guide\n\n"
+        "All API requests require a Bearer token in the Authorization header. "
+        "Tokens expire after 24 hours and must be refreshed using the /auth/refresh endpoint. "
+        "Rate limiting is enforced at 100 requests per minute per token. "
+        "If you exceed the rate limit, you'll receive a 429 status code. "
+        "OAuth2 flows are supported for third-party integrations. "
+        "The client_id and client_secret must be stored securely. "
+        "Never expose credentials in client-side code or version control. "
+        "Use environment variables or a secrets manager for production deployments. "
+        "Multi-factor authentication is required for admin endpoints. "
+        "Session tokens are tied to IP address for extra security measures. "
+        * 3
+    )
+    # Python code file
+    (docs_dir / "client.py").write_text(
+        '"""\nAPI Client for the service.\n"""\n\n'
+        "import requests\n"
+        "import time\n\n"
+        "class APIClient:\n"
+        '    """HTTP client with retry and rate limit handling."""\n\n'
+        "    def __init__(self, base_url: str, token: str):\n"
+        "        self.base_url = base_url\n"
+        "        self.token = token\n"
+        "        self.session = requests.Session()\n"
+        '        self.session.headers["Authorization"] = f"Bearer {token}"\n\n'
+        "    def get(self, path: str, **kwargs):\n"
+        '        """GET request with retry."""\n'
+        "        for attempt in range(3):\n"
+        "            resp = self.session.get(f'{self.base_url}{path}', **kwargs)\n"
+        "            if resp.status_code == 429:\n"
+        "                wait = int(resp.headers.get('Retry-After', 5))\n"
+        "                time.sleep(wait)\n"
+        "                continue\n"
+        "            return resp\n"
+        "        raise Exception('Rate limited after 3 retries')\n\n"
+        "    def post(self, path: str, data=None, **kwargs):\n"
+        '        """POST request with retry."""\n'
+        "        for attempt in range(3):\n"
+        "            resp = self.session.post(f'{self.base_url}{path}', json=data, **kwargs)\n"
+        "            if resp.status_code == 429:\n"
+        "                wait = int(resp.headers.get('Retry-After', 5))\n"
+        "                time.sleep(wait)\n"
+        "                continue\n"
+        "            return resp\n"
+        "        raise Exception('Rate limited after 3 retries')\n"
+    )
+    return docs_dir
+@pytest.fixture
+def large_docs(tmp_dir):
+    """Create a large document set for stress testing."""
+    docs_dir = tmp_dir / "large_docs"
+    docs_dir.mkdir()
+    topics = [
+        ("machine-learning", "Machine learning models use gradient descent to optimize loss functions. "
+         "Neural networks consist of layers of interconnected nodes. "),
+        ("databases", "PostgreSQL supports JSONB for semi-structured data storage. "
+         "Indexes improve query performance on frequently accessed columns. "),
+        ("networking", "TCP provides reliable ordered delivery of data between applications. "
+         "DNS resolves domain names to IP addresses using a hierarchical system. "),
+        ("security", "TLS encrypts data in transit between client and server. "
+         "CORS headers control which origins can access API resources. "),
+        ("devops", "CI/CD pipelines automate building, testing, and deploying code. "
+         "Infrastructure as code tools like Terraform manage cloud resources. "),
+        ("frontend", "React components re-render when state or props change. "
+         "CSS Grid and Flexbox provide powerful layout capabilities. "),
+        ("api-design", "REST APIs use HTTP methods to perform CRUD operations. "
+         "GraphQL allows clients to request exactly the data they need. "),
+        ("testing", "Unit tests verify individual functions in isolation. "
+         "Integration tests check that components work together correctly. "),
+        ("monitoring", "Prometheus collects metrics from instrumented applications. "
+         "Grafana dashboards visualize time-series data for observability. "),
+        ("caching", "Redis provides in-memory key-value storage with persistence options. "
+         "CDN edge caching reduces latency for static assets. "),
+    ]
+    for i in range(50):
+        topic_name, content = topics[i % len(topics)]
+        filename = f"{topic_name}-{i:03d}.md"
+        # Each file ~2000 words
+        (docs_dir / filename).write_text(
+            f"# {topic_name.replace('-', ' ').title()} - Part {i}\n\n"
+            + (content * 80)
+        )
+    return docs_dir
+@pytest.fixture
+def indexed_ts(tmp_dir, sample_docs, shared_model):
+    """Return a TokenShrink instance with sample docs already indexed."""
+    from tokenshrink import TokenShrink
+    ts = TokenShrink(
+        index_dir=str(tmp_dir / ".tokenshrink"),
+        compression=False,
+        adaptive=True,
+        dedup=True,
+    )
+    # Inject the shared model to avoid reloading
+    ts._model = shared_model
+    ts.index(str(sample_docs))
+    return ts
+@pytest.fixture
+def make_ts(tmp_dir, shared_model):
+    """Factory fixture: creates a TokenShrink with the shared model."""
+    from tokenshrink import TokenShrink
+    def _make(**kwargs):
+        kwargs.setdefault("index_dir", str(tmp_dir / ".ts"))
+        kwargs.setdefault("compression", False)
+        ts = TokenShrink(**kwargs)
+        ts._model = shared_model
+        return ts
+    return _make

tokenshrink-0.2.1/tests/test_cli.py ADDED Viewed

@@ -0,0 +1,248 @@
+"""Tests for the CLI interface."""
+import json
+import subprocess
+import sys
+from pathlib import Path
+import pytest
+@pytest.fixture
+def cli_env(tmp_dir, sample_docs):
+    """Set up CLI test environment."""
+    return {
+        "docs_dir": str(sample_docs),
+        "index_dir": str(tmp_dir / ".ts"),
+        "tmp_dir": tmp_dir,
+    }
+def run_cli(*args, cwd=None):
+    """Run tokenshrink CLI and return result."""
+    cmd = [sys.executable, "-m", "tokenshrink.cli"] + list(args)
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        cwd=cwd,
+        timeout=120,
+    )
+    return result
+class TestCLIIndex:
+    """Test CLI index command."""
+    def test_index_basic(self, cli_env):
+        r = run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        assert r.returncode == 0
+        assert "Indexed" in r.stdout
+        assert "Chunks" in r.stdout
+    def test_index_json_output(self, cli_env):
+        r = run_cli("--index-dir", cli_env["index_dir"], "--json", "index", cli_env["docs_dir"])
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert "files_indexed" in data
+        assert "chunks_added" in data
+        assert data["files_indexed"] > 0
+    def test_index_with_extensions(self, cli_env):
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "--json",
+            "index", cli_env["docs_dir"],
+            "-e", ".md",
+        )
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert data["files_indexed"] == 4  # Only .md files
+    def test_index_force(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "--json",
+            "index", cli_env["docs_dir"],
+            "-f",
+        )
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert data["files_indexed"] > 0  # Re-indexed with force
+class TestCLIQuery:
+    """Test CLI query command."""
+    def test_query_basic(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "query", "authentication tokens",
+            "--no-compress",
+        )
+        assert r.returncode == 0
+        assert "Sources:" in r.stdout or "No relevant" in r.stdout
+    def test_query_json(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "--json",
+            "query", "authentication",
+            "--no-compress",
+        )
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert "context" in data
+        assert "sources" in data
+        assert "original_tokens" in data
+    def test_query_with_scores(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "query", "rate limits",
+            "--no-compress",
+            "--scores",
+        )
+        assert r.returncode == 0
+        assert "Chunk Importance Scores" in r.stdout
+        assert "sim=" in r.stdout
+        assert "density=" in r.stdout
+    def test_query_json_with_scores(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "--json",
+            "query", "deployment kubernetes",
+            "--no-compress",
+            "--scores",
+        )
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert "chunk_scores" in data
+        for cs in data["chunk_scores"]:
+            assert "similarity" in cs
+            assert "density" in cs
+            assert "importance" in cs
+    def test_query_no_dedup(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "--json",
+            "query", "authentication",
+            "--no-compress",
+            "--no-dedup",
+        )
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert data["dedup_removed"] == 0
+    def test_query_k_param(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "--json",
+            "query", "authentication",
+            "--no-compress",
+            "-k", "2",
+        )
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert len(data.get("sources", [])) <= 2
+class TestCLISearch:
+    """Test CLI search command."""
+    def test_search_basic(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "search", "rate limits",
+        )
+        assert r.returncode == 0
+        assert "score:" in r.stdout
+    def test_search_json(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "--json",
+            "search", "rate limits",
+        )
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert isinstance(data, list)
+        assert len(data) > 0
+    def test_search_empty_index(self, cli_env):
+        r = run_cli(
+            "--index-dir", cli_env["index_dir"],
+            "search", "anything",
+        )
+        assert r.returncode == 0
+        assert "No results" in r.stdout
+class TestCLIStats:
+    """Test CLI stats command."""
+    def test_stats_empty(self, cli_env):
+        r = run_cli("--index-dir", cli_env["index_dir"], "stats")
+        assert r.returncode == 0
+        assert "Chunks: 0" in r.stdout
+    def test_stats_after_index(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli("--index-dir", cli_env["index_dir"], "stats")
+        assert r.returncode == 0
+        assert "Chunks:" in r.stdout
+        assert "Files:" in r.stdout
+    def test_stats_json(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli("--index-dir", cli_env["index_dir"], "--json", "stats")
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert "total_chunks" in data
+        assert data["total_chunks"] > 0
+class TestCLIClear:
+    """Test CLI clear command."""
+    def test_clear(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli("--index-dir", cli_env["index_dir"], "clear")
+        assert r.returncode == 0
+        assert "cleared" in r.stdout.lower()
+    def test_clear_json(self, cli_env):
+        run_cli("--index-dir", cli_env["index_dir"], "index", cli_env["docs_dir"])
+        r = run_cli("--index-dir", cli_env["index_dir"], "--json", "clear")
+        assert r.returncode == 0
+        data = json.loads(r.stdout)
+        assert data["status"] == "cleared"
+class TestCLIMisc:
+    """Test miscellaneous CLI behavior."""
+    def test_version(self):
+        r = run_cli("--version")
+        assert r.returncode == 0
+        assert "0.2.0" in r.stdout
+    def test_no_command(self):
+        r = run_cli()
+        assert r.returncode == 0  # Just prints help
+    def test_help(self):
+        r = run_cli("--help")
+        assert r.returncode == 0
+        assert "tokenshrink" in r.stdout.lower()

tokenshrink 0.2.0__tar.gz → 0.2.1__tar.gz

tokenshrink 0.2.0tar.gz → 0.2.1tar.gz