thealgorithms-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """TheAlgorithms MCP — query TheAlgorithms/Python for implementations + doctests."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,56 @@
1
+ """On-demand fetch of a single algorithm file's source, cached by path + ETag."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+
7
+ import httpx
8
+
9
+ from .index import CACHE_DIR, RAW_BASE
10
+
11
+ FILE_CACHE_DIR = CACHE_DIR / "files"
12
+
13
+
14
+ def _cache_paths(path: str) -> tuple[Path, Path]:
15
+ safe = path.replace("/", "__")
16
+ return FILE_CACHE_DIR / safe, FILE_CACHE_DIR / (safe + ".meta")
17
+
18
+
19
+ def get_file(path: str) -> str:
20
+ """Return raw source for a repo-relative path.
21
+
22
+ Conditional GET via ETag; 304 reuses the cached body. Network failures fall back to
23
+ cache when present, else raise. Raises FileNotFoundError on a 404 (bad path).
24
+ """
25
+ body_file, meta_file = _cache_paths(path)
26
+ etag = None
27
+ cached_body = None
28
+ if body_file.exists():
29
+ cached_body = body_file.read_text()
30
+ if meta_file.exists():
31
+ try:
32
+ etag = json.loads(meta_file.read_text()).get("etag")
33
+ except (json.JSONDecodeError, OSError):
34
+ etag = None
35
+
36
+ headers = {"User-Agent": "thealgorithms-mcp"}
37
+ if etag:
38
+ headers["If-None-Match"] = etag
39
+
40
+ try:
41
+ resp = httpx.get(RAW_BASE + path, headers=headers, timeout=30, follow_redirects=True)
42
+ except httpx.HTTPError:
43
+ if cached_body is not None:
44
+ return cached_body
45
+ raise
46
+
47
+ if resp.status_code == 304 and cached_body is not None:
48
+ return cached_body
49
+ if resp.status_code == 404:
50
+ raise FileNotFoundError(path)
51
+ resp.raise_for_status()
52
+
53
+ FILE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
54
+ body_file.write_text(resp.text)
55
+ meta_file.write_text(json.dumps({"etag": resp.headers.get("ETag")}))
56
+ return resp.text
@@ -0,0 +1,141 @@
1
+ """Fetch, parse, and cache TheAlgorithms/Python DIRECTORY.md.
2
+
3
+ Hybrid model: the index is small (~1,160 entries) so we cache it whole, validated by
4
+ ETag with a 24h TTL fallback. File *contents* are fetched on demand (see fetch.py).
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import re
10
+ import time
11
+ from pathlib import Path
12
+
13
+ import httpx
14
+ from platformdirs import user_cache_dir
15
+
16
+ REPO = "TheAlgorithms/Python"
17
+ BRANCH = "master"
18
+ RAW_BASE = f"https://raw.githubusercontent.com/{REPO}/{BRANCH}/"
19
+ DIRECTORY_URL = RAW_BASE + "DIRECTORY.md"
20
+ TTL_SECONDS = 24 * 3600
21
+ DRIFT_THRESHOLD = 0.95 # keep prior cache if a refresh matches fewer than this fraction of links
22
+
23
+ CACHE_DIR = Path(user_cache_dir("thealgorithms-mcp"))
24
+ INDEX_FILE = CACHE_DIR / "directory.json"
25
+
26
+ ENTRY_RE = re.compile(r"^\s*\* \[(?P<name>.+?)\]\((?P<path>.+?\.py)\)\s*$")
27
+ LINK_RE = re.compile(r"^\s*\* \[.+?\]\(.+?\)\s*$")
28
+
29
+ # Process-lifetime memo so repeated tool calls don't re-read disk.
30
+ _memo: dict | None = None
31
+
32
+
33
+ def github_url(path: str) -> str:
34
+ """Human-facing blob URL for a repo-relative path."""
35
+ return f"https://github.com/{REPO}/blob/{BRANCH}/{path}"
36
+
37
+
38
+ def _parse(text: str) -> tuple[list[dict], float]:
39
+ """Parse DIRECTORY.md into entries; return (entries, match_rate vs all link lines)."""
40
+ link_lines = 0
41
+ entries: list[dict] = []
42
+ for line in text.splitlines():
43
+ if LINK_RE.match(line):
44
+ link_lines += 1
45
+ m = ENTRY_RE.match(line)
46
+ if m:
47
+ path = m.group("path")
48
+ entries.append(
49
+ {"name": m.group("name"), "path": path, "category": path.split("/")[0]}
50
+ )
51
+ match_rate = (len(entries) / link_lines) if link_lines else 1.0
52
+ return entries, match_rate
53
+
54
+
55
+ def _read_cache() -> dict | None:
56
+ if not INDEX_FILE.exists():
57
+ return None
58
+ try:
59
+ return json.loads(INDEX_FILE.read_text())
60
+ except (json.JSONDecodeError, OSError):
61
+ return None
62
+
63
+
64
+ def _write_cache(data: dict) -> None:
65
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
66
+ INDEX_FILE.write_text(json.dumps(data))
67
+
68
+
69
+ def load_index(force: bool = False) -> list[dict]:
70
+ """Return the parsed index, refreshing from GitHub when stale.
71
+
72
+ Order of operations:
73
+ 1. Serve the process memo if present and fresh (and not forced).
74
+ 2. Serve the disk cache if fresh.
75
+ 3. Conditional GET (If-None-Match). 304 -> reuse cached entries, bump timestamp.
76
+ 200 -> parse, apply drift guard, persist.
77
+ 4. Any network failure -> fall back to cached entries (offline degradation).
78
+ """
79
+ global _memo
80
+ now = time.time()
81
+ cache = _read_cache()
82
+
83
+ if not force and _memo and (now - _memo["fetched_at"] < TTL_SECONDS):
84
+ return _memo["entries"]
85
+ if not force and cache and (now - cache.get("fetched_at", 0) < TTL_SECONDS):
86
+ _memo = cache
87
+ return cache["entries"]
88
+
89
+ headers = {"User-Agent": "thealgorithms-mcp"}
90
+ if cache and cache.get("etag"):
91
+ headers["If-None-Match"] = cache["etag"]
92
+
93
+ try:
94
+ resp = httpx.get(DIRECTORY_URL, headers=headers, timeout=30, follow_redirects=True)
95
+ except httpx.HTTPError:
96
+ if cache:
97
+ _memo = cache
98
+ return cache["entries"] # offline: stale is better than dead
99
+ raise
100
+
101
+ if resp.status_code == 304 and cache:
102
+ cache["fetched_at"] = now
103
+ _write_cache(cache)
104
+ _memo = cache
105
+ return cache["entries"]
106
+
107
+ resp.raise_for_status()
108
+ entries, match_rate = _parse(resp.text)
109
+
110
+ # Drift guard: a sudden drop in match rate means the format changed under us.
111
+ if match_rate < DRIFT_THRESHOLD and cache and cache.get("entries"):
112
+ # Keep the known-good index rather than silently shipping a broken one.
113
+ cache["fetched_at"] = now
114
+ _write_cache(cache)
115
+ _memo = cache
116
+ return cache["entries"]
117
+
118
+ data = {
119
+ "entries": entries,
120
+ "etag": resp.headers.get("ETag"),
121
+ "fetched_at": now,
122
+ "match_rate": match_rate,
123
+ }
124
+ _write_cache(data)
125
+ _memo = data
126
+ return entries
127
+
128
+
129
+ def list_categories(entries: list[dict]) -> list[dict]:
130
+ counts: dict[str, int] = {}
131
+ for e in entries:
132
+ counts[e["category"]] = counts.get(e["category"], 0) + 1
133
+ return [{"category": c, "count": n} for c, n in sorted(counts.items())]
134
+
135
+
136
+ def category_entries(entries: list[dict], category: str) -> list[dict]:
137
+ return [
138
+ {"name": e["name"], "path": e["path"]}
139
+ for e in entries
140
+ if e["category"] == category
141
+ ]
@@ -0,0 +1,40 @@
1
+ """Extract the module description and all doctest examples from a source file.
2
+
3
+ Doctests in this repo live in module, class, AND function docstrings — often several
4
+ per file. We walk the full AST and run stdlib ``doctest.DocTestParser`` over every
5
+ docstring, so nothing is missed regardless of where the example sits.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import ast
10
+ import doctest
11
+
12
+ _DOC_NODES = (ast.Module, ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)
13
+
14
+
15
+ def parse_source(source: str) -> dict:
16
+ """Return {description, doctests, line_count}.
17
+
18
+ description: module-level docstring (the human-readable summary), or "".
19
+ doctests: list of {"code", "expected"} drawn from every docstring in the file.
20
+ """
21
+ line_count = len(source.splitlines())
22
+ try:
23
+ tree = ast.parse(source)
24
+ except SyntaxError:
25
+ # Non-parseable (rare); still useful to return raw line count.
26
+ return {"description": "", "doctests": [], "line_count": line_count}
27
+
28
+ description = ast.get_docstring(tree) or ""
29
+ parser = doctest.DocTestParser()
30
+ doctests: list[dict] = []
31
+ for node in ast.walk(tree):
32
+ if isinstance(node, _DOC_NODES):
33
+ doc = ast.get_docstring(node)
34
+ if not doc:
35
+ continue
36
+ for ex in parser.get_examples(doc):
37
+ doctests.append(
38
+ {"code": ex.source.rstrip("\n"), "expected": ex.want.rstrip("\n")}
39
+ )
40
+ return {"description": description, "doctests": doctests, "line_count": line_count}
@@ -0,0 +1,49 @@
1
+ """Fuzzy lexical ranking over the cached index. No network — index only."""
2
+ from __future__ import annotations
3
+
4
+ import re
5
+
6
+ from rapidfuzz import fuzz
7
+
8
+ _NORM_RE = re.compile(r"[\s_/]+")
9
+
10
+
11
+ def _norm(s: str) -> str:
12
+ """Lowercase and collapse separators so 'merge_sort'/'Merge Sort' compare equal."""
13
+ return _NORM_RE.sub(" ", s.lower()).strip()
14
+
15
+
16
+ def search(entries: list[dict], query: str, category: str | None = None, limit: int = 10) -> list[dict]:
17
+ """Rank entries by fuzzy match against name (primary) and path (fallback).
18
+
19
+ Scoring rewards *tight* matches so exact intent wins over a superset:
20
+ - exact normalized name match dominates ("merge sort" -> "Merge Sort", not "Iterative Merge Sort")
21
+ - a substring hit is scaled by how much of the name it covers (tighter = higher)
22
+ - path matches count at half weight, so a name match always outranks an incidental path hit
23
+ Returns [{name, category, path, score}] sorted desc.
24
+ """
25
+ qn = _norm(query)
26
+ pool = entries if category is None else [e for e in entries if e["category"] == category]
27
+
28
+ scored: list[tuple[float, dict]] = []
29
+ for e in pool:
30
+ name_n = _norm(e["name"])
31
+ path_n = _norm(e["path"][:-3] if e["path"].endswith(".py") else e["path"])
32
+
33
+ base = fuzz.WRatio(qn, name_n)
34
+ path_score = fuzz.partial_ratio(qn, path_n) * 0.5
35
+
36
+ bonus = 0.0
37
+ if qn == name_n:
38
+ bonus = 100.0
39
+ elif qn and qn in name_n:
40
+ bonus = 30.0 * len(qn) / len(name_n) # covers more of the name -> bigger bonus
41
+
42
+ score = max(base, path_score) + bonus
43
+ scored.append((score, e))
44
+
45
+ scored.sort(key=lambda x: -x[0])
46
+ return [
47
+ {"name": e["name"], "category": e["category"], "path": e["path"], "score": round(s, 1)}
48
+ for s, e in scored[:limit]
49
+ ]
@@ -0,0 +1,75 @@
1
+ """TheAlgorithms MCP server.
2
+
3
+ Four tools over stdio:
4
+ list_categories() -> categories + counts (index only)
5
+ search_algorithms(query, category?, limit) -> ranked file paths (index only)
6
+ get_category(category) -> all entries in a category (index only)
7
+ get_algorithm(path, include_source) -> source + extracted doctests (on-demand fetch)
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from mcp.server.fastmcp import FastMCP
12
+
13
+ from . import index as idx
14
+ from . import fetch, parse, search
15
+
16
+ mcp = FastMCP("thealgorithms")
17
+
18
+
19
+ @mcp.tool()
20
+ def list_categories() -> list[dict]:
21
+ """List the algorithm categories (e.g. sorts, graphs, dynamic_programming) with entry counts."""
22
+ return idx.list_categories(idx.load_index())
23
+
24
+
25
+ @mcp.tool()
26
+ def search_algorithms(query: str, category: str | None = None, limit: int = 10) -> list[dict]:
27
+ """Search TheAlgorithms/Python by name/topic. Returns ranked {name, category, path, score}.
28
+
29
+ Feed a returned `path` to get_algorithm to read the implementation. Optionally constrain
30
+ to a `category` (see list_categories).
31
+ """
32
+ return search.search(idx.load_index(), query, category=category, limit=limit)
33
+
34
+
35
+ @mcp.tool()
36
+ def get_category(category: str) -> list[dict]:
37
+ """List every algorithm in one category as {name, path}. Use for 'show me every sort'."""
38
+ return idx.category_entries(idx.load_index(), category)
39
+
40
+
41
+ @mcp.tool()
42
+ def get_algorithm(path: str, include_source: bool = True) -> dict:
43
+ """Fetch one algorithm by repo-relative path (e.g. 'sorts/merge_sort.py').
44
+
45
+ Always returns the module description and extracted doctests (the usage examples).
46
+ Set include_source=False for a cheap peek (description + examples, no body).
47
+ Returns {path, github_url, description, doctests, line_count, source?}.
48
+ """
49
+ try:
50
+ source = fetch.get_file(path)
51
+ except FileNotFoundError:
52
+ return {
53
+ "error": f"No file at '{path}'. Call search_algorithms first to find the right path.",
54
+ "path": path,
55
+ }
56
+ parsed = parse.parse_source(source)
57
+ result = {
58
+ "path": path,
59
+ "github_url": idx.github_url(path),
60
+ "description": parsed["description"],
61
+ "doctests": parsed["doctests"],
62
+ "line_count": parsed["line_count"],
63
+ }
64
+ if include_source:
65
+ result["source"] = source
66
+ return result
67
+
68
+
69
+ def main() -> None:
70
+ """Console-script entry point. Runs over stdio."""
71
+ mcp.run()
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: thealgorithms-mcp
3
+ Version: 0.1.0
4
+ Summary: MCP server for querying TheAlgorithms/Python — search algorithms and fetch implementations with their doctests as examples.
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: httpx>=0.27
7
+ Requires-Dist: mcp>=1.2.0
8
+ Requires-Dist: platformdirs>=4.0
9
+ Requires-Dist: rapidfuzz>=3.9
10
+ Description-Content-Type: text/markdown
11
+
12
+ # thealgorithms-mcp
13
+
14
+ mcp-name: io.github.mcande21/thealgorithms-mcp
15
+
16
+ An [MCP](https://modelcontextprotocol.io) server for querying
17
+ [TheAlgorithms/Python](https://github.com/TheAlgorithms/Python) — search ~1,160 algorithm
18
+ implementations and fetch any one with its **doctests as usage examples**.
19
+
20
+ Hybrid design: the small `DIRECTORY.md` index is cached locally (ETag + 24h TTL) for instant
21
+ fuzzy search; file contents are fetched on demand from `raw.githubusercontent.com`. No API token,
22
+ no rate limits, tiny footprint. See [`DESIGN.md`](DESIGN.md).
23
+
24
+ ## Tools
25
+
26
+ | Tool | Purpose |
27
+ |------|---------|
28
+ | `list_categories()` | Categories (sorts, graphs, dynamic_programming, …) with counts |
29
+ | `search_algorithms(query, category?, limit=10)` | Ranked `{name, category, path, score}` |
30
+ | `get_category(category)` | Every algorithm in a category |
31
+ | `get_algorithm(path, include_source=True)` | Source + extracted doctests for one file |
32
+
33
+ Typical flow: `search_algorithms("dijkstra")` → `get_algorithm("graphs/dijkstra.py")`.
34
+
35
+ ## Install
36
+
37
+ **From PyPI (recommended):**
38
+
39
+ ```json
40
+ { "thealgorithms": { "command": "uvx", "args": ["thealgorithms-mcp"] } }
41
+ ```
42
+
43
+ **From GitHub (no PyPI needed):**
44
+
45
+ ```json
46
+ { "thealgorithms": {
47
+ "command": "uvx",
48
+ "args": ["--from", "git+https://github.com/mcande21/thealgorithms-mcp", "thealgorithms-mcp"] } }
49
+ ```
50
+
51
+ **From a local checkout (development):**
52
+
53
+ ```bash
54
+ uv sync
55
+ uv run thealgorithms-mcp # serves over stdio
56
+ ```
57
+
58
+ ```json
59
+ { "thealgorithms": {
60
+ "command": "uv",
61
+ "args": ["run", "--directory", "/path/to/thealgorithms-mcp", "thealgorithms-mcp"] } }
62
+ ```
63
+
64
+ Add any of the above to `~/.normandy-generic/mcp.json` (or your MCP client config).
65
+
66
+ ## Verify
67
+
68
+ ```bash
69
+ uv run python scripts/verify_stdio.py
70
+ ```
71
+
72
+ Spawns the server over stdio and asserts every tool against the live repo.
@@ -0,0 +1,10 @@
1
+ thealgorithms_mcp/__init__.py,sha256=LtD_JNwH2r8e6_bkvFpsEfrl_KUErPiFypu9yynjC4M,110
2
+ thealgorithms_mcp/fetch.py,sha256=kLj8dDypwKlB87_d61sQiCf18jKZtOE1AKyd4I1le_g,1707
3
+ thealgorithms_mcp/index.py,sha256=wJmP7FbnRmwX_61Ii3Kyz8VdyCmDWDO__gOCdhKEKg4,4518
4
+ thealgorithms_mcp/parse.py,sha256=aPN1OvFtyzfZ43wVs7r3AENhvPA1cJb7Bow4e93f-0Y,1537
5
+ thealgorithms_mcp/search.py,sha256=BjcFDYkuoz58r_IOEnj8PGla4zM5kOyBVl_E2YB6ENQ,1827
6
+ thealgorithms_mcp/server.py,sha256=xpe3CHNL5r2bYEc6TJvV8bpOzsbiirvW4s-Dkc6EGMo,2509
7
+ thealgorithms_mcp-0.1.0.dist-info/METADATA,sha256=JxMHqUKNJV6R9tfqCKhgiUQhSf1Uf-n69ln7ZRqbs4Y,2208
8
+ thealgorithms_mcp-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
9
+ thealgorithms_mcp-0.1.0.dist-info/entry_points.txt,sha256=JCNpNa-Jziw3_YXWxRzSnO8sINFzNyo6vXyU7hBMgf4,68
10
+ thealgorithms_mcp-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ thealgorithms-mcp = thealgorithms_mcp.server:main