zoo_mcp 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zoo_mcp/kcl_samples.py ADDED
@@ -0,0 +1,313 @@
1
+ """KCL Samples fetching and search.
2
+
3
+ This module fetches KCL samples from the modeling-app GitHub repository
4
+ at server startup and provides search functionality for LLMs.
5
+ """
6
+
7
+ import asyncio
8
+ from dataclasses import dataclass, field
9
+ from typing import ClassVar, TypedDict
10
+
11
+ import httpx
12
+
13
+ from zoo_mcp import logger
14
+
15
+ MANIFEST_URL = "https://raw.githubusercontent.com/KittyCAD/modeling-app/main/public/kcl-samples/manifest.json"
16
+ RAW_CONTENT_BASE = (
17
+ "https://raw.githubusercontent.com/KittyCAD/modeling-app/main/public/kcl-samples/"
18
+ )
19
+
20
+
21
+ class SampleMetadata(TypedDict):
22
+ """Metadata for a single KCL sample from manifest.json."""
23
+
24
+ file: str
25
+ pathFromProjectDirectoryToFirstFile: str
26
+ multipleFiles: bool
27
+ title: str
28
+ description: str
29
+ files: list[str]
30
+
31
+
32
+ class SampleFile(TypedDict):
33
+ """A single file within a KCL sample."""
34
+
35
+ filename: str
36
+ content: str
37
+
38
+
39
+ class SampleData(TypedDict):
40
+ """Complete data for a KCL sample including all files."""
41
+
42
+ name: str
43
+ title: str
44
+ description: str
45
+ multipleFiles: bool
46
+ files: list[SampleFile]
47
+
48
+
49
+ @dataclass
50
+ class KCLSamples:
51
+ """Container for KCL samples data."""
52
+
53
+ # Manifest data indexed by sample directory name
54
+ manifest: dict[str, SampleMetadata] = field(default_factory=dict)
55
+ # Cached file contents: sample_name -> filename -> content
56
+ file_cache: dict[str, dict[str, str]] = field(default_factory=dict)
57
+
58
+ _instance: ClassVar["KCLSamples | None"] = None
59
+
60
+ @classmethod
61
+ def get(cls) -> "KCLSamples":
62
+ """Get the cached samples instance, or empty cache if not initialized."""
63
+ return cls._instance if cls._instance is not None else cls()
64
+
65
+ @classmethod
66
+ async def initialize(cls) -> None:
67
+ """Initialize the samples cache from GitHub."""
68
+ if cls._instance is None:
69
+ cls._instance = await _fetch_manifest_from_github()
70
+
71
+
72
+ def _extract_sample_name(path: str) -> str:
73
+ """Extract the sample directory name from a path.
74
+
75
+ Example: "axial-fan/main.kcl" -> "axial-fan"
76
+ """
77
+ return path.split("/")[0] if "/" in path else path
78
+
79
+
80
+ def _extract_excerpt(content: str, query: str, context_chars: int = 200) -> str:
81
+ """Extract an excerpt around the first match of query in content."""
82
+ query_lower = query.lower()
83
+ content_lower = content.lower()
84
+
85
+ pos = content_lower.find(query_lower)
86
+ if pos == -1:
87
+ # Return first context_chars of content as fallback
88
+ return content[:context_chars].strip() + "..."
89
+
90
+ # Find start and end positions for excerpt
91
+ start = max(0, pos - context_chars // 2)
92
+ end = min(len(content), pos + len(query) + context_chars // 2)
93
+
94
+ # Adjust to word boundaries
95
+ if start > 0:
96
+ while start > 0 and content[start - 1] not in " \n\t":
97
+ start -= 1
98
+
99
+ if end < len(content):
100
+ while end < len(content) and content[end] not in " \n\t":
101
+ end += 1
102
+
103
+ excerpt = content[start:end].strip()
104
+
105
+ prefix = "..." if start > 0 else ""
106
+ suffix = "..." if end < len(content) else ""
107
+
108
+ return f"{prefix}{excerpt}{suffix}"
109
+
110
+
111
+ async def _fetch_file_content(
112
+ client: httpx.AsyncClient, sample_name: str, filename: str
113
+ ) -> tuple[str, str | None]:
114
+ """Fetch a single sample file's content."""
115
+ url = f"{RAW_CONTENT_BASE}{sample_name}/{filename}"
116
+ try:
117
+ response = await client.get(url)
118
+ response.raise_for_status()
119
+ return filename, response.text
120
+ except httpx.HTTPError as e:
121
+ logger.warning(f"Failed to fetch {sample_name}/{filename}: {e}")
122
+ return filename, None
123
+
124
+
125
+ async def _fetch_sample_files(
126
+ client: httpx.AsyncClient, sample_name: str, filenames: list[str]
127
+ ) -> dict[str, str]:
128
+ """Fetch all files for a sample."""
129
+ tasks = [_fetch_file_content(client, sample_name, f) for f in filenames]
130
+ results = await asyncio.gather(*tasks)
131
+ return {filename: content for filename, content in results if content is not None}
132
+
133
+
134
+ async def _fetch_manifest_from_github() -> KCLSamples:
135
+ """Fetch the manifest from GitHub and return a KCLSamples instance."""
136
+ samples = KCLSamples()
137
+
138
+ logger.info("Fetching KCL samples manifest from GitHub...")
139
+
140
+ async with httpx.AsyncClient(timeout=30.0) as client:
141
+ try:
142
+ response = await client.get(MANIFEST_URL)
143
+ response.raise_for_status()
144
+ manifest_data: list[SampleMetadata] = response.json()
145
+ except httpx.HTTPError as e:
146
+ logger.warning(f"Failed to fetch samples manifest: {e}")
147
+ return samples
148
+
149
+ # Index manifest by sample name
150
+ for entry in manifest_data:
151
+ sample_name = _extract_sample_name(
152
+ entry.get("pathFromProjectDirectoryToFirstFile", "")
153
+ )
154
+ if sample_name:
155
+ samples.manifest[sample_name] = entry
156
+
157
+ logger.info(f"KCL samples manifest loaded with {len(samples.manifest)} samples")
158
+ return samples
159
+
160
+
161
+ async def initialize_samples_cache() -> None:
162
+ """Initialize the samples cache from GitHub."""
163
+ await KCLSamples.initialize()
164
+
165
+
166
+ def list_available_samples() -> list[dict]:
167
+ """Return a list of all available KCL samples with basic info.
168
+
169
+ Returns a list of dictionaries, each containing:
170
+ - name: The sample directory name (used to retrieve the sample)
171
+ - title: Human-readable title
172
+ - description: Brief description of the sample
173
+ - multipleFiles: Whether the sample contains multiple KCL files
174
+
175
+ Use get_kcl_sample() with the name to retrieve the full sample content.
176
+
177
+ Returns:
178
+ list[dict]: List of sample information dictionaries.
179
+ """
180
+ samples = KCLSamples.get()
181
+ result = []
182
+
183
+ for name, metadata in sorted(samples.manifest.items()):
184
+ result.append(
185
+ {
186
+ "name": name,
187
+ "title": metadata.get("title", name),
188
+ "description": metadata.get("description", ""),
189
+ "multipleFiles": metadata.get("multipleFiles", False),
190
+ }
191
+ )
192
+
193
+ return result
194
+
195
+
196
+ def search_samples(query: str, max_results: int = 5) -> list[dict]:
197
+ """Search samples by keyword in title and description.
198
+
199
+ Searches across all KCL sample titles and descriptions
200
+ for the given query. Returns matching samples ranked by relevance.
201
+
202
+ Args:
203
+ query (str): The search query (case-insensitive).
204
+ max_results (int): Maximum number of results to return (default: 5).
205
+
206
+ Returns:
207
+ list[dict]: List of search results, each containing:
208
+ - name: The sample directory name (used to retrieve the sample)
209
+ - title: Human-readable title
210
+ - description: Brief description of the sample
211
+ - multipleFiles: Whether the sample contains multiple KCL files
212
+ - match_count: Number of times the query appears in title/description
213
+ - excerpt: A relevant excerpt with the match in context
214
+ """
215
+ if not query or not query.strip():
216
+ return [{"error": "Empty search query"}]
217
+
218
+ query = query.strip()
219
+ query_lower = query.lower()
220
+ results: list[dict] = []
221
+
222
+ samples = KCLSamples.get()
223
+
224
+ for name, metadata in samples.manifest.items():
225
+ title = metadata.get("title", name)
226
+ description = metadata.get("description", "")
227
+ searchable = f"{title} {description} {name}"
228
+ searchable_lower = searchable.lower()
229
+
230
+ match_count = searchable_lower.count(query_lower)
231
+ if match_count > 0:
232
+ # Prioritize title matches
233
+ title_matches = title.lower().count(query_lower)
234
+ score = match_count + (title_matches * 3) # Boost title matches
235
+
236
+ excerpt = _extract_excerpt(searchable, query, context_chars=150)
237
+
238
+ results.append(
239
+ {
240
+ "name": name,
241
+ "title": title,
242
+ "description": description,
243
+ "multipleFiles": metadata.get("multipleFiles", False),
244
+ "match_count": match_count,
245
+ "excerpt": excerpt,
246
+ "_score": score,
247
+ }
248
+ )
249
+
250
+ # Sort by score (descending)
251
+ results.sort(key=lambda x: x["_score"], reverse=True)
252
+
253
+ # Remove internal score field
254
+ for r in results:
255
+ del r["_score"]
256
+
257
+ return results[:max_results]
258
+
259
+
260
+ async def get_sample_content(sample_name: str) -> SampleData | None:
261
+ """Get the full content of a specific KCL sample including all files.
262
+
263
+ Use list_kcl_samples() to see available sample names, or
264
+ search_kcl_samples() to find samples by keyword.
265
+
266
+ Args:
267
+ sample_name (str): The sample directory name
268
+ (e.g., "ball-bearing", "axial-fan")
269
+
270
+ Returns:
271
+ SampleData | None: A dictionary containing:
272
+ - name: The sample directory name
273
+ - title: Human-readable title
274
+ - description: Brief description
275
+ - multipleFiles: Whether the sample contains multiple files
276
+ - files: List of file dictionaries, each with 'filename' and 'content'
277
+ Returns None if the sample is not found.
278
+ """
279
+ samples = KCLSamples.get()
280
+
281
+ # Basic validation
282
+ if ".." in sample_name or "/" in sample_name:
283
+ return None
284
+
285
+ metadata = samples.manifest.get(sample_name)
286
+ if metadata is None:
287
+ return None
288
+
289
+ # Check if we have cached files
290
+ if sample_name in samples.file_cache:
291
+ file_contents = samples.file_cache[sample_name]
292
+ else:
293
+ # Fetch all files for this sample
294
+ filenames = metadata.get("files", ["main.kcl"])
295
+
296
+ async with httpx.AsyncClient(timeout=30.0) as client:
297
+ file_contents = await _fetch_sample_files(client, sample_name, filenames)
298
+
299
+ # Cache the results
300
+ samples.file_cache[sample_name] = file_contents
301
+
302
+ # Build response
303
+ files_list: list[SampleFile] = []
304
+ for filename, content in sorted(file_contents.items()):
305
+ files_list.append(SampleFile(filename=filename, content=content))
306
+
307
+ return SampleData(
308
+ name=sample_name,
309
+ title=metadata.get("title", sample_name),
310
+ description=metadata.get("description", ""),
311
+ multipleFiles=metadata.get("multipleFiles", False),
312
+ files=files_list,
313
+ )