PyPI - thealgorithms-mcp - Versions diffs - 0.1.0__py3-none-any.whl - Mend

thealgorithms-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

thealgorithms_mcp/__init__.py +3 -0
thealgorithms_mcp/fetch.py +56 -0
thealgorithms_mcp/index.py +141 -0
thealgorithms_mcp/parse.py +40 -0
thealgorithms_mcp/search.py +49 -0
thealgorithms_mcp/server.py +75 -0
thealgorithms_mcp-0.1.0.dist-info/METADATA +72 -0
thealgorithms_mcp-0.1.0.dist-info/RECORD +10 -0
thealgorithms_mcp-0.1.0.dist-info/WHEEL +4 -0
thealgorithms_mcp-0.1.0.dist-info/entry_points.txt +2 -0

thealgorithms_mcp/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""TheAlgorithms MCP — query TheAlgorithms/Python for implementations + doctests."""
+__version__ = "0.1.0"

thealgorithms_mcp/fetch.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""On-demand fetch of a single algorithm file's source, cached by path + ETag."""
+from __future__ import annotations
+import json
+from pathlib import Path
+import httpx
+from .index import CACHE_DIR, RAW_BASE
+FILE_CACHE_DIR = CACHE_DIR / "files"
+def _cache_paths(path: str) -> tuple[Path, Path]:
+    safe = path.replace("/", "__")
+    return FILE_CACHE_DIR / safe, FILE_CACHE_DIR / (safe + ".meta")
+def get_file(path: str) -> str:
+    """Return raw source for a repo-relative path.
+    Conditional GET via ETag; 304 reuses the cached body. Network failures fall back to
+    cache when present, else raise. Raises FileNotFoundError on a 404 (bad path).
+    """
+    body_file, meta_file = _cache_paths(path)
+    etag = None
+    cached_body = None
+    if body_file.exists():
+        cached_body = body_file.read_text()
+        if meta_file.exists():
+            try:
+                etag = json.loads(meta_file.read_text()).get("etag")
+            except (json.JSONDecodeError, OSError):
+                etag = None
+    headers = {"User-Agent": "thealgorithms-mcp"}
+    if etag:
+        headers["If-None-Match"] = etag
+    try:
+        resp = httpx.get(RAW_BASE + path, headers=headers, timeout=30, follow_redirects=True)
+    except httpx.HTTPError:
+        if cached_body is not None:
+            return cached_body
+        raise
+    if resp.status_code == 304 and cached_body is not None:
+        return cached_body
+    if resp.status_code == 404:
+        raise FileNotFoundError(path)
+    resp.raise_for_status()
+    FILE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    body_file.write_text(resp.text)
+    meta_file.write_text(json.dumps({"etag": resp.headers.get("ETag")}))
+    return resp.text

thealgorithms_mcp/index.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""Fetch, parse, and cache TheAlgorithms/Python DIRECTORY.md.
+Hybrid model: the index is small (~1,160 entries) so we cache it whole, validated by
+ETag with a 24h TTL fallback. File *contents* are fetched on demand (see fetch.py).
+"""
+from __future__ import annotations
+import json
+import re
+import time
+from pathlib import Path
+import httpx
+from platformdirs import user_cache_dir
+REPO = "TheAlgorithms/Python"
+BRANCH = "master"
+RAW_BASE = f"https://raw.githubusercontent.com/{REPO}/{BRANCH}/"
+DIRECTORY_URL = RAW_BASE + "DIRECTORY.md"
+TTL_SECONDS = 24 * 3600
+DRIFT_THRESHOLD = 0.95  # keep prior cache if a refresh matches fewer than this fraction of links
+CACHE_DIR = Path(user_cache_dir("thealgorithms-mcp"))
+INDEX_FILE = CACHE_DIR / "directory.json"
+ENTRY_RE = re.compile(r"^\s*\* \[(?P<name>.+?)\]\((?P<path>.+?\.py)\)\s*$")
+LINK_RE = re.compile(r"^\s*\* \[.+?\]\(.+?\)\s*$")
+# Process-lifetime memo so repeated tool calls don't re-read disk.
+_memo: dict | None = None
+def github_url(path: str) -> str:
+    """Human-facing blob URL for a repo-relative path."""
+    return f"https://github.com/{REPO}/blob/{BRANCH}/{path}"
+def _parse(text: str) -> tuple[list[dict], float]:
+    """Parse DIRECTORY.md into entries; return (entries, match_rate vs all link lines)."""
+    link_lines = 0
+    entries: list[dict] = []
+    for line in text.splitlines():
+        if LINK_RE.match(line):
+            link_lines += 1
+        m = ENTRY_RE.match(line)
+        if m:
+            path = m.group("path")
+            entries.append(
+                {"name": m.group("name"), "path": path, "category": path.split("/")[0]}
+            )
+    match_rate = (len(entries) / link_lines) if link_lines else 1.0
+    return entries, match_rate
+def _read_cache() -> dict | None:
+    if not INDEX_FILE.exists():
+        return None
+    try:
+        return json.loads(INDEX_FILE.read_text())
+    except (json.JSONDecodeError, OSError):
+        return None
+def _write_cache(data: dict) -> None:
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    INDEX_FILE.write_text(json.dumps(data))
+def load_index(force: bool = False) -> list[dict]:
+    """Return the parsed index, refreshing from GitHub when stale.
+    Order of operations:
+      1. Serve the process memo if present and fresh (and not forced).
+      2. Serve the disk cache if fresh.
+      3. Conditional GET (If-None-Match). 304 -> reuse cached entries, bump timestamp.
+         200 -> parse, apply drift guard, persist.
+      4. Any network failure -> fall back to cached entries (offline degradation).
+    """
+    global _memo
+    now = time.time()
+    cache = _read_cache()
+    if not force and _memo and (now - _memo["fetched_at"] < TTL_SECONDS):
+        return _memo["entries"]
+    if not force and cache and (now - cache.get("fetched_at", 0) < TTL_SECONDS):
+        _memo = cache
+        return cache["entries"]
+    headers = {"User-Agent": "thealgorithms-mcp"}
+    if cache and cache.get("etag"):
+        headers["If-None-Match"] = cache["etag"]
+    try:
+        resp = httpx.get(DIRECTORY_URL, headers=headers, timeout=30, follow_redirects=True)
+    except httpx.HTTPError:
+        if cache:
+            _memo = cache
+            return cache["entries"]  # offline: stale is better than dead
+        raise
+    if resp.status_code == 304 and cache:
+        cache["fetched_at"] = now
+        _write_cache(cache)
+        _memo = cache
+        return cache["entries"]
+    resp.raise_for_status()
+    entries, match_rate = _parse(resp.text)
+    # Drift guard: a sudden drop in match rate means the format changed under us.
+    if match_rate < DRIFT_THRESHOLD and cache and cache.get("entries"):
+        # Keep the known-good index rather than silently shipping a broken one.
+        cache["fetched_at"] = now
+        _write_cache(cache)
+        _memo = cache
+        return cache["entries"]
+    data = {
+        "entries": entries,
+        "etag": resp.headers.get("ETag"),
+        "fetched_at": now,
+        "match_rate": match_rate,
+    }
+    _write_cache(data)
+    _memo = data
+    return entries
+def list_categories(entries: list[dict]) -> list[dict]:
+    counts: dict[str, int] = {}
+    for e in entries:
+        counts[e["category"]] = counts.get(e["category"], 0) + 1
+    return [{"category": c, "count": n} for c, n in sorted(counts.items())]
+def category_entries(entries: list[dict], category: str) -> list[dict]:
+    return [
+        {"name": e["name"], "path": e["path"]}
+        for e in entries
+        if e["category"] == category
+    ]

thealgorithms_mcp/parse.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""Extract the module description and all doctest examples from a source file.
+Doctests in this repo live in module, class, AND function docstrings — often several
+per file. We walk the full AST and run stdlib ``doctest.DocTestParser`` over every
+docstring, so nothing is missed regardless of where the example sits.
+"""
+from __future__ import annotations
+import ast
+import doctest
+_DOC_NODES = (ast.Module, ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)
+def parse_source(source: str) -> dict:
+    """Return {description, doctests, line_count}.
+    description: module-level docstring (the human-readable summary), or "".
+    doctests: list of {"code", "expected"} drawn from every docstring in the file.
+    """
+    line_count = len(source.splitlines())
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        # Non-parseable (rare); still useful to return raw line count.
+        return {"description": "", "doctests": [], "line_count": line_count}
+    description = ast.get_docstring(tree) or ""
+    parser = doctest.DocTestParser()
+    doctests: list[dict] = []
+    for node in ast.walk(tree):
+        if isinstance(node, _DOC_NODES):
+            doc = ast.get_docstring(node)
+            if not doc:
+                continue
+            for ex in parser.get_examples(doc):
+                doctests.append(
+                    {"code": ex.source.rstrip("\n"), "expected": ex.want.rstrip("\n")}
+                )
+    return {"description": description, "doctests": doctests, "line_count": line_count}

thealgorithms_mcp/search.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""Fuzzy lexical ranking over the cached index. No network — index only."""
+from __future__ import annotations
+import re
+from rapidfuzz import fuzz
+_NORM_RE = re.compile(r"[\s_/]+")
+def _norm(s: str) -> str:
+    """Lowercase and collapse separators so 'merge_sort'/'Merge Sort' compare equal."""
+    return _NORM_RE.sub(" ", s.lower()).strip()
+def search(entries: list[dict], query: str, category: str | None = None, limit: int = 10) -> list[dict]:
+    """Rank entries by fuzzy match against name (primary) and path (fallback).
+    Scoring rewards *tight* matches so exact intent wins over a superset:
+      - exact normalized name match dominates ("merge sort" -> "Merge Sort", not "Iterative Merge Sort")
+      - a substring hit is scaled by how much of the name it covers (tighter = higher)
+      - path matches count at half weight, so a name match always outranks an incidental path hit
+    Returns [{name, category, path, score}] sorted desc.
+    """
+    qn = _norm(query)
+    pool = entries if category is None else [e for e in entries if e["category"] == category]
+    scored: list[tuple[float, dict]] = []
+    for e in pool:
+        name_n = _norm(e["name"])
+        path_n = _norm(e["path"][:-3] if e["path"].endswith(".py") else e["path"])
+        base = fuzz.WRatio(qn, name_n)
+        path_score = fuzz.partial_ratio(qn, path_n) * 0.5
+        bonus = 0.0
+        if qn == name_n:
+            bonus = 100.0
+        elif qn and qn in name_n:
+            bonus = 30.0 * len(qn) / len(name_n)  # covers more of the name -> bigger bonus
+        score = max(base, path_score) + bonus
+        scored.append((score, e))
+    scored.sort(key=lambda x: -x[0])
+    return [
+        {"name": e["name"], "category": e["category"], "path": e["path"], "score": round(s, 1)}
+        for s, e in scored[:limit]
+    ]

thealgorithms_mcp/server.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""TheAlgorithms MCP server.
+Four tools over stdio:
+  list_categories()                      -> categories + counts (index only)
+  search_algorithms(query, category?, limit) -> ranked file paths (index only)
+  get_category(category)                 -> all entries in a category (index only)
+  get_algorithm(path, include_source)    -> source + extracted doctests (on-demand fetch)
+"""
+from __future__ import annotations
+from mcp.server.fastmcp import FastMCP
+from . import index as idx
+from . import fetch, parse, search
+mcp = FastMCP("thealgorithms")
+@mcp.tool()
+def list_categories() -> list[dict]:
+    """List the algorithm categories (e.g. sorts, graphs, dynamic_programming) with entry counts."""
+    return idx.list_categories(idx.load_index())
+@mcp.tool()
+def search_algorithms(query: str, category: str | None = None, limit: int = 10) -> list[dict]:
+    """Search TheAlgorithms/Python by name/topic. Returns ranked {name, category, path, score}.
+    Feed a returned `path` to get_algorithm to read the implementation. Optionally constrain
+    to a `category` (see list_categories).
+    """
+    return search.search(idx.load_index(), query, category=category, limit=limit)
+@mcp.tool()
+def get_category(category: str) -> list[dict]:
+    """List every algorithm in one category as {name, path}. Use for 'show me every sort'."""
+    return idx.category_entries(idx.load_index(), category)
+@mcp.tool()
+def get_algorithm(path: str, include_source: bool = True) -> dict:
+    """Fetch one algorithm by repo-relative path (e.g. 'sorts/merge_sort.py').
+    Always returns the module description and extracted doctests (the usage examples).
+    Set include_source=False for a cheap peek (description + examples, no body).
+    Returns {path, github_url, description, doctests, line_count, source?}.
+    """
+    try:
+        source = fetch.get_file(path)
+    except FileNotFoundError:
+        return {
+            "error": f"No file at '{path}'. Call search_algorithms first to find the right path.",
+            "path": path,
+        }
+    parsed = parse.parse_source(source)
+    result = {
+        "path": path,
+        "github_url": idx.github_url(path),
+        "description": parsed["description"],
+        "doctests": parsed["doctests"],
+        "line_count": parsed["line_count"],
+    }
+    if include_source:
+        result["source"] = source
+    return result
+def main() -> None:
+    """Console-script entry point. Runs over stdio."""
+    mcp.run()
+if __name__ == "__main__":
+    main()

thealgorithms_mcp-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,72 @@
+Metadata-Version: 2.4
+Name: thealgorithms-mcp
+Version: 0.1.0
+Summary: MCP server for querying TheAlgorithms/Python — search algorithms and fetch implementations with their doctests as examples.
+Requires-Python: >=3.11
+Requires-Dist: httpx>=0.27
+Requires-Dist: mcp>=1.2.0
+Requires-Dist: platformdirs>=4.0
+Requires-Dist: rapidfuzz>=3.9
+Description-Content-Type: text/markdown
+# thealgorithms-mcp
+mcp-name: io.github.mcande21/thealgorithms-mcp
+An [MCP](https://modelcontextprotocol.io) server for querying
+[TheAlgorithms/Python](https://github.com/TheAlgorithms/Python) — search ~1,160 algorithm
+implementations and fetch any one with its **doctests as usage examples**.
+Hybrid design: the small `DIRECTORY.md` index is cached locally (ETag + 24h TTL) for instant
+fuzzy search; file contents are fetched on demand from `raw.githubusercontent.com`. No API token,
+no rate limits, tiny footprint. See [`DESIGN.md`](DESIGN.md).
+## Tools
+| Tool | Purpose |
+|------|---------|
+| `list_categories()` | Categories (sorts, graphs, dynamic_programming, …) with counts |
+| `search_algorithms(query, category?, limit=10)` | Ranked `{name, category, path, score}` |
+| `get_category(category)` | Every algorithm in a category |
+| `get_algorithm(path, include_source=True)` | Source + extracted doctests for one file |
+Typical flow: `search_algorithms("dijkstra")` → `get_algorithm("graphs/dijkstra.py")`.
+## Install
+**From PyPI (recommended):**
+```json
+{ "thealgorithms": { "command": "uvx", "args": ["thealgorithms-mcp"] } }
+```
+**From GitHub (no PyPI needed):**
+```json
+{ "thealgorithms": {
+    "command": "uvx",
+    "args": ["--from", "git+https://github.com/mcande21/thealgorithms-mcp", "thealgorithms-mcp"] } }
+```
+**From a local checkout (development):**
+```bash
+uv sync
+uv run thealgorithms-mcp          # serves over stdio
+```
+```json
+{ "thealgorithms": {
+    "command": "uv",
+    "args": ["run", "--directory", "/path/to/thealgorithms-mcp", "thealgorithms-mcp"] } }
+```
+Add any of the above to `~/.normandy-generic/mcp.json` (or your MCP client config).
+## Verify
+```bash
+uv run python scripts/verify_stdio.py
+```
+Spawns the server over stdio and asserts every tool against the live repo.

thealgorithms_mcp-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+thealgorithms_mcp/__init__.py,sha256=LtD_JNwH2r8e6_bkvFpsEfrl_KUErPiFypu9yynjC4M,110
+thealgorithms_mcp/fetch.py,sha256=kLj8dDypwKlB87_d61sQiCf18jKZtOE1AKyd4I1le_g,1707
+thealgorithms_mcp/index.py,sha256=wJmP7FbnRmwX_61Ii3Kyz8VdyCmDWDO__gOCdhKEKg4,4518
+thealgorithms_mcp/parse.py,sha256=aPN1OvFtyzfZ43wVs7r3AENhvPA1cJb7Bow4e93f-0Y,1537
+thealgorithms_mcp/search.py,sha256=BjcFDYkuoz58r_IOEnj8PGla4zM5kOyBVl_E2YB6ENQ,1827
+thealgorithms_mcp/server.py,sha256=xpe3CHNL5r2bYEc6TJvV8bpOzsbiirvW4s-Dkc6EGMo,2509
+thealgorithms_mcp-0.1.0.dist-info/METADATA,sha256=JxMHqUKNJV6R9tfqCKhgiUQhSf1Uf-n69ln7ZRqbs4Y,2208
+thealgorithms_mcp-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+thealgorithms_mcp-0.1.0.dist-info/entry_points.txt,sha256=JCNpNa-Jziw3_YXWxRzSnO8sINFzNyo6vXyU7hBMgf4,68
+thealgorithms_mcp-0.1.0.dist-info/RECORD,,

thealgorithms_mcp-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.29.0
+Root-Is-Purelib: true
+Tag: py3-none-any

thealgorithms_mcp-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ thealgorithms-mcp = thealgorithms_mcp.server:main