sol-mcp 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sol_mcp-0.2.0.dist-info/METADATA +218 -0
- sol_mcp-0.2.0.dist-info/RECORD +20 -0
- sol_mcp-0.2.0.dist-info/WHEEL +4 -0
- sol_mcp-0.2.0.dist-info/entry_points.txt +3 -0
- solana_mcp/__init__.py +3 -0
- solana_mcp/cli.py +527 -0
- solana_mcp/config.py +324 -0
- solana_mcp/expert/__init__.py +5 -0
- solana_mcp/expert/guidance.py +452 -0
- solana_mcp/indexer/__init__.py +8 -0
- solana_mcp/indexer/chunker.py +457 -0
- solana_mcp/indexer/compiler.py +1101 -0
- solana_mcp/indexer/downloader.py +304 -0
- solana_mcp/indexer/embedder.py +755 -0
- solana_mcp/indexer/manifest.py +411 -0
- solana_mcp/logging.py +85 -0
- solana_mcp/models.py +62 -0
- solana_mcp/server.py +746 -0
- solana_mcp/tools/__init__.py +1 -0
- solana_mcp/versions.py +391 -0
|
@@ -0,0 +1,1101 @@
|
|
|
1
|
+
"""Compile/extract source code into structured JSON for indexing.
|
|
2
|
+
|
|
3
|
+
Parses source files to extract:
|
|
4
|
+
- Rust: pub fn, pub struct, pub enum, const, impl blocks
|
|
5
|
+
- C: functions, structs, typedefs, #define macros (for Firedancer)
|
|
6
|
+
|
|
7
|
+
Uses tree-sitter for robust parsing when available.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import asdict, dataclass
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
# Try to import tree-sitter parsers
|
|
16
|
+
TREE_SITTER_RUST = False
|
|
17
|
+
TREE_SITTER_C = False
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
import tree_sitter_rust as ts_rust
|
|
21
|
+
from tree_sitter import Language, Parser
|
|
22
|
+
TREE_SITTER_RUST = True
|
|
23
|
+
except ImportError:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import tree_sitter_c as ts_c
|
|
28
|
+
from tree_sitter import Language, Parser
|
|
29
|
+
TREE_SITTER_C = True
|
|
30
|
+
except ImportError:
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
TREE_SITTER_AVAILABLE = TREE_SITTER_RUST or TREE_SITTER_C
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class ExtractedItem:
|
|
38
|
+
"""An extracted code item."""
|
|
39
|
+
|
|
40
|
+
kind: str # function, struct, enum, const, impl, type
|
|
41
|
+
name: str
|
|
42
|
+
signature: str # For functions: full signature; for types: definition line
|
|
43
|
+
body: str # Full source code
|
|
44
|
+
doc_comment: str | None
|
|
45
|
+
file_path: str
|
|
46
|
+
line_number: int
|
|
47
|
+
visibility: str # pub, pub(crate), private
|
|
48
|
+
attributes: list[str] # #[derive(...)] etc.
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class ExtractedConstant:
|
|
53
|
+
"""An extracted constant."""
|
|
54
|
+
|
|
55
|
+
name: str
|
|
56
|
+
value: str
|
|
57
|
+
type_annotation: str | None
|
|
58
|
+
doc_comment: str | None
|
|
59
|
+
file_path: str
|
|
60
|
+
line_number: int
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class RustParser:
|
|
64
|
+
"""Parse Rust source code to extract definitions."""
|
|
65
|
+
|
|
66
|
+
def __init__(self):
|
|
67
|
+
if TREE_SITTER_AVAILABLE:
|
|
68
|
+
self.parser = Parser(Language(ts_rust.language()))
|
|
69
|
+
else:
|
|
70
|
+
self.parser = None
|
|
71
|
+
|
|
72
|
+
def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
73
|
+
"""Parse a Rust file and extract items."""
|
|
74
|
+
try:
|
|
75
|
+
content = file_path.read_text(encoding="utf-8")
|
|
76
|
+
except (OSError, UnicodeDecodeError):
|
|
77
|
+
return [], []
|
|
78
|
+
|
|
79
|
+
if self.parser:
|
|
80
|
+
return self._parse_with_tree_sitter(content, str(file_path))
|
|
81
|
+
else:
|
|
82
|
+
return self._parse_with_regex(content, str(file_path))
|
|
83
|
+
|
|
84
|
+
def _parse_with_tree_sitter(
|
|
85
|
+
self, content: str, file_path: str
|
|
86
|
+
) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
87
|
+
"""Parse using tree-sitter for accurate AST."""
|
|
88
|
+
tree = self.parser.parse(bytes(content, "utf-8"))
|
|
89
|
+
items = []
|
|
90
|
+
constants = []
|
|
91
|
+
|
|
92
|
+
lines = content.split("\n")
|
|
93
|
+
|
|
94
|
+
def get_text(node) -> str:
|
|
95
|
+
return content[node.start_byte : node.end_byte]
|
|
96
|
+
|
|
97
|
+
def get_doc_comment(node) -> str | None:
|
|
98
|
+
"""Get doc comment preceding a node."""
|
|
99
|
+
# Look for comment nodes before this node
|
|
100
|
+
doc_lines = []
|
|
101
|
+
line = node.start_point[0] - 1
|
|
102
|
+
|
|
103
|
+
while line >= 0:
|
|
104
|
+
line_text = lines[line].strip()
|
|
105
|
+
if line_text.startswith("///"):
|
|
106
|
+
doc_lines.insert(0, line_text[3:].strip())
|
|
107
|
+
line -= 1
|
|
108
|
+
elif line_text.startswith("//!"):
|
|
109
|
+
doc_lines.insert(0, line_text[3:].strip())
|
|
110
|
+
line -= 1
|
|
111
|
+
elif line_text == "" or line_text.startswith("#["):
|
|
112
|
+
line -= 1
|
|
113
|
+
else:
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
return "\n".join(doc_lines) if doc_lines else None
|
|
117
|
+
|
|
118
|
+
def get_attributes(node) -> list[str]:
|
|
119
|
+
"""Get attributes preceding a node."""
|
|
120
|
+
attrs = []
|
|
121
|
+
line = node.start_point[0] - 1
|
|
122
|
+
|
|
123
|
+
while line >= 0:
|
|
124
|
+
line_text = lines[line].strip()
|
|
125
|
+
if line_text.startswith("#["):
|
|
126
|
+
attrs.insert(0, line_text)
|
|
127
|
+
line -= 1
|
|
128
|
+
elif line_text.startswith("///") or line_text.startswith("//!"):
|
|
129
|
+
line -= 1
|
|
130
|
+
elif line_text == "":
|
|
131
|
+
line -= 1
|
|
132
|
+
else:
|
|
133
|
+
break
|
|
134
|
+
|
|
135
|
+
return attrs
|
|
136
|
+
|
|
137
|
+
def get_visibility(node) -> str:
|
|
138
|
+
"""Determine visibility of a node."""
|
|
139
|
+
for child in node.children:
|
|
140
|
+
if child.type == "visibility_modifier":
|
|
141
|
+
vis_text = get_text(child)
|
|
142
|
+
if vis_text == "pub":
|
|
143
|
+
return "pub"
|
|
144
|
+
elif "crate" in vis_text:
|
|
145
|
+
return "pub(crate)"
|
|
146
|
+
else:
|
|
147
|
+
return vis_text
|
|
148
|
+
return "private"
|
|
149
|
+
|
|
150
|
+
def process_node(node):
|
|
151
|
+
if node.type == "function_item":
|
|
152
|
+
visibility = get_visibility(node)
|
|
153
|
+
if visibility.startswith("pub"):
|
|
154
|
+
name = None
|
|
155
|
+
signature_parts = []
|
|
156
|
+
|
|
157
|
+
for child in node.children:
|
|
158
|
+
if child.type == "identifier":
|
|
159
|
+
name = get_text(child)
|
|
160
|
+
elif child.type == "parameters":
|
|
161
|
+
signature_parts.append(get_text(child))
|
|
162
|
+
elif child.type == "return_type":
|
|
163
|
+
signature_parts.append(f"-> {get_text(child)}")
|
|
164
|
+
|
|
165
|
+
if name:
|
|
166
|
+
# Build signature
|
|
167
|
+
params = signature_parts[0] if signature_parts else "()"
|
|
168
|
+
ret = signature_parts[1] if len(signature_parts) > 1 else ""
|
|
169
|
+
signature = f"fn {name}{params} {ret}".strip()
|
|
170
|
+
|
|
171
|
+
items.append(
|
|
172
|
+
ExtractedItem(
|
|
173
|
+
kind="function",
|
|
174
|
+
name=name,
|
|
175
|
+
signature=signature,
|
|
176
|
+
body=get_text(node),
|
|
177
|
+
doc_comment=get_doc_comment(node),
|
|
178
|
+
file_path=file_path,
|
|
179
|
+
line_number=node.start_point[0] + 1,
|
|
180
|
+
visibility=visibility,
|
|
181
|
+
attributes=get_attributes(node),
|
|
182
|
+
)
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
elif node.type == "struct_item":
|
|
186
|
+
visibility = get_visibility(node)
|
|
187
|
+
if visibility.startswith("pub"):
|
|
188
|
+
name = None
|
|
189
|
+
for child in node.children:
|
|
190
|
+
if child.type == "type_identifier":
|
|
191
|
+
name = get_text(child)
|
|
192
|
+
break
|
|
193
|
+
|
|
194
|
+
if name:
|
|
195
|
+
items.append(
|
|
196
|
+
ExtractedItem(
|
|
197
|
+
kind="struct",
|
|
198
|
+
name=name,
|
|
199
|
+
signature=f"struct {name}",
|
|
200
|
+
body=get_text(node),
|
|
201
|
+
doc_comment=get_doc_comment(node),
|
|
202
|
+
file_path=file_path,
|
|
203
|
+
line_number=node.start_point[0] + 1,
|
|
204
|
+
visibility=visibility,
|
|
205
|
+
attributes=get_attributes(node),
|
|
206
|
+
)
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
elif node.type == "enum_item":
|
|
210
|
+
visibility = get_visibility(node)
|
|
211
|
+
if visibility.startswith("pub"):
|
|
212
|
+
name = None
|
|
213
|
+
for child in node.children:
|
|
214
|
+
if child.type == "type_identifier":
|
|
215
|
+
name = get_text(child)
|
|
216
|
+
break
|
|
217
|
+
|
|
218
|
+
if name:
|
|
219
|
+
items.append(
|
|
220
|
+
ExtractedItem(
|
|
221
|
+
kind="enum",
|
|
222
|
+
name=name,
|
|
223
|
+
signature=f"enum {name}",
|
|
224
|
+
body=get_text(node),
|
|
225
|
+
doc_comment=get_doc_comment(node),
|
|
226
|
+
file_path=file_path,
|
|
227
|
+
line_number=node.start_point[0] + 1,
|
|
228
|
+
visibility=visibility,
|
|
229
|
+
attributes=get_attributes(node),
|
|
230
|
+
)
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
elif node.type == "const_item":
|
|
234
|
+
visibility = get_visibility(node)
|
|
235
|
+
name = None
|
|
236
|
+
type_ann = None
|
|
237
|
+
value = None
|
|
238
|
+
|
|
239
|
+
for child in node.children:
|
|
240
|
+
if child.type == "identifier":
|
|
241
|
+
name = get_text(child)
|
|
242
|
+
elif child.type == "type_identifier" or child.type.endswith("_type"):
|
|
243
|
+
type_ann = get_text(child)
|
|
244
|
+
|
|
245
|
+
# Extract value from the full text
|
|
246
|
+
full_text = get_text(node)
|
|
247
|
+
if "=" in full_text:
|
|
248
|
+
value = full_text.split("=", 1)[1].strip().rstrip(";")
|
|
249
|
+
|
|
250
|
+
if name:
|
|
251
|
+
constants.append(
|
|
252
|
+
ExtractedConstant(
|
|
253
|
+
name=name,
|
|
254
|
+
value=value or "",
|
|
255
|
+
type_annotation=type_ann,
|
|
256
|
+
doc_comment=get_doc_comment(node),
|
|
257
|
+
file_path=file_path,
|
|
258
|
+
line_number=node.start_point[0] + 1,
|
|
259
|
+
)
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
elif node.type == "impl_item":
|
|
263
|
+
# Extract impl blocks for types
|
|
264
|
+
type_name = None
|
|
265
|
+
for child in node.children:
|
|
266
|
+
if child.type == "type_identifier":
|
|
267
|
+
type_name = get_text(child)
|
|
268
|
+
break
|
|
269
|
+
elif child.type == "generic_type":
|
|
270
|
+
type_name = get_text(child)
|
|
271
|
+
break
|
|
272
|
+
|
|
273
|
+
if type_name:
|
|
274
|
+
items.append(
|
|
275
|
+
ExtractedItem(
|
|
276
|
+
kind="impl",
|
|
277
|
+
name=f"impl {type_name}",
|
|
278
|
+
signature=f"impl {type_name}",
|
|
279
|
+
body=get_text(node),
|
|
280
|
+
doc_comment=get_doc_comment(node),
|
|
281
|
+
file_path=file_path,
|
|
282
|
+
line_number=node.start_point[0] + 1,
|
|
283
|
+
visibility="pub", # impl blocks are effectively pub if type is
|
|
284
|
+
attributes=get_attributes(node),
|
|
285
|
+
)
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Recurse into children
|
|
289
|
+
for child in node.children:
|
|
290
|
+
process_node(child)
|
|
291
|
+
|
|
292
|
+
process_node(tree.root_node)
|
|
293
|
+
return items, constants
|
|
294
|
+
|
|
295
|
+
def _parse_with_regex(
|
|
296
|
+
self, content: str, file_path: str
|
|
297
|
+
) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
298
|
+
"""Fallback regex-based parsing when tree-sitter isn't available."""
|
|
299
|
+
items = []
|
|
300
|
+
constants = []
|
|
301
|
+
lines = content.split("\n")
|
|
302
|
+
|
|
303
|
+
# Patterns for extraction
|
|
304
|
+
fn_pattern = re.compile(
|
|
305
|
+
r"^(\s*)(pub(?:\([^)]+\))?\s+)?fn\s+(\w+)\s*(<[^>]+>)?\s*\(([^)]*)\)(\s*->\s*[^{]+)?\s*\{",
|
|
306
|
+
re.MULTILINE,
|
|
307
|
+
)
|
|
308
|
+
struct_pattern = re.compile(
|
|
309
|
+
r"^(\s*)(pub(?:\([^)]+\))?\s+)?struct\s+(\w+)", re.MULTILINE
|
|
310
|
+
)
|
|
311
|
+
enum_pattern = re.compile(
|
|
312
|
+
r"^(\s*)(pub(?:\([^)]+\))?\s+)?enum\s+(\w+)", re.MULTILINE
|
|
313
|
+
)
|
|
314
|
+
const_pattern = re.compile(
|
|
315
|
+
r"^(\s*)(pub(?:\([^)]+\))?\s+)?const\s+(\w+)\s*:\s*([^=]+)\s*=\s*([^;]+);",
|
|
316
|
+
re.MULTILINE,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Extract functions
|
|
320
|
+
for match in fn_pattern.finditer(content):
|
|
321
|
+
visibility = match.group(2) or ""
|
|
322
|
+
visibility = visibility.strip()
|
|
323
|
+
if not visibility.startswith("pub"):
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
name = match.group(3)
|
|
327
|
+
params = match.group(5)
|
|
328
|
+
ret = match.group(6) or ""
|
|
329
|
+
|
|
330
|
+
# Find the full function body
|
|
331
|
+
start = match.start()
|
|
332
|
+
line_num = content[:start].count("\n") + 1
|
|
333
|
+
|
|
334
|
+
# Simple brace matching to find end
|
|
335
|
+
brace_count = 1
|
|
336
|
+
idx = match.end()
|
|
337
|
+
while idx < len(content) and brace_count > 0:
|
|
338
|
+
if content[idx] == "{":
|
|
339
|
+
brace_count += 1
|
|
340
|
+
elif content[idx] == "}":
|
|
341
|
+
brace_count -= 1
|
|
342
|
+
idx += 1
|
|
343
|
+
|
|
344
|
+
body = content[match.start() : idx]
|
|
345
|
+
|
|
346
|
+
# Get doc comment
|
|
347
|
+
doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
|
|
348
|
+
|
|
349
|
+
items.append(
|
|
350
|
+
ExtractedItem(
|
|
351
|
+
kind="function",
|
|
352
|
+
name=name,
|
|
353
|
+
signature=f"fn {name}({params}){ret}".strip(),
|
|
354
|
+
body=body,
|
|
355
|
+
doc_comment=doc_comment,
|
|
356
|
+
file_path=file_path,
|
|
357
|
+
line_number=line_num,
|
|
358
|
+
visibility=visibility if visibility else "pub",
|
|
359
|
+
attributes=[],
|
|
360
|
+
)
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Extract structs
|
|
364
|
+
for match in struct_pattern.finditer(content):
|
|
365
|
+
visibility = match.group(2) or ""
|
|
366
|
+
visibility = visibility.strip()
|
|
367
|
+
if not visibility.startswith("pub"):
|
|
368
|
+
continue
|
|
369
|
+
|
|
370
|
+
name = match.group(3)
|
|
371
|
+
start = match.start()
|
|
372
|
+
line_num = content[:start].count("\n") + 1
|
|
373
|
+
|
|
374
|
+
# Find end of struct (either ; or })
|
|
375
|
+
idx = match.end()
|
|
376
|
+
brace_count = 0
|
|
377
|
+
while idx < len(content):
|
|
378
|
+
if content[idx] == "{":
|
|
379
|
+
brace_count += 1
|
|
380
|
+
elif content[idx] == "}":
|
|
381
|
+
if brace_count == 0:
|
|
382
|
+
idx += 1
|
|
383
|
+
break
|
|
384
|
+
brace_count -= 1
|
|
385
|
+
elif content[idx] == ";" and brace_count == 0:
|
|
386
|
+
idx += 1
|
|
387
|
+
break
|
|
388
|
+
idx += 1
|
|
389
|
+
|
|
390
|
+
body = content[match.start() : idx]
|
|
391
|
+
doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
|
|
392
|
+
|
|
393
|
+
items.append(
|
|
394
|
+
ExtractedItem(
|
|
395
|
+
kind="struct",
|
|
396
|
+
name=name,
|
|
397
|
+
signature=f"struct {name}",
|
|
398
|
+
body=body,
|
|
399
|
+
doc_comment=doc_comment,
|
|
400
|
+
file_path=file_path,
|
|
401
|
+
line_number=line_num,
|
|
402
|
+
visibility=visibility if visibility else "pub",
|
|
403
|
+
attributes=[],
|
|
404
|
+
)
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
# Extract enums
|
|
408
|
+
for match in enum_pattern.finditer(content):
|
|
409
|
+
visibility = match.group(2) or ""
|
|
410
|
+
visibility = visibility.strip()
|
|
411
|
+
if not visibility.startswith("pub"):
|
|
412
|
+
continue
|
|
413
|
+
|
|
414
|
+
name = match.group(3)
|
|
415
|
+
start = match.start()
|
|
416
|
+
line_num = content[:start].count("\n") + 1
|
|
417
|
+
|
|
418
|
+
# Find end of enum
|
|
419
|
+
idx = match.end()
|
|
420
|
+
brace_count = 0
|
|
421
|
+
while idx < len(content):
|
|
422
|
+
if content[idx] == "{":
|
|
423
|
+
brace_count += 1
|
|
424
|
+
elif content[idx] == "}":
|
|
425
|
+
if brace_count == 1:
|
|
426
|
+
idx += 1
|
|
427
|
+
break
|
|
428
|
+
brace_count -= 1
|
|
429
|
+
idx += 1
|
|
430
|
+
|
|
431
|
+
body = content[match.start() : idx]
|
|
432
|
+
doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
|
|
433
|
+
|
|
434
|
+
items.append(
|
|
435
|
+
ExtractedItem(
|
|
436
|
+
kind="enum",
|
|
437
|
+
name=name,
|
|
438
|
+
signature=f"enum {name}",
|
|
439
|
+
body=body,
|
|
440
|
+
doc_comment=doc_comment,
|
|
441
|
+
file_path=file_path,
|
|
442
|
+
line_number=line_num,
|
|
443
|
+
visibility=visibility if visibility else "pub",
|
|
444
|
+
attributes=[],
|
|
445
|
+
)
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
# Extract constants
|
|
449
|
+
for match in const_pattern.finditer(content):
|
|
450
|
+
visibility = match.group(2) or ""
|
|
451
|
+
if not visibility.strip().startswith("pub"):
|
|
452
|
+
continue
|
|
453
|
+
|
|
454
|
+
name = match.group(3)
|
|
455
|
+
type_ann = match.group(4).strip()
|
|
456
|
+
value = match.group(5).strip()
|
|
457
|
+
|
|
458
|
+
start = match.start()
|
|
459
|
+
line_num = content[:start].count("\n") + 1
|
|
460
|
+
doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
|
|
461
|
+
|
|
462
|
+
constants.append(
|
|
463
|
+
ExtractedConstant(
|
|
464
|
+
name=name,
|
|
465
|
+
value=value,
|
|
466
|
+
type_annotation=type_ann,
|
|
467
|
+
doc_comment=doc_comment,
|
|
468
|
+
file_path=file_path,
|
|
469
|
+
line_number=line_num,
|
|
470
|
+
)
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
return items, constants
|
|
474
|
+
|
|
475
|
+
def _get_doc_comment_at_line(self, lines: list[str], line_idx: int) -> str | None:
|
|
476
|
+
"""Get doc comment ending at the given line index."""
|
|
477
|
+
doc_lines = []
|
|
478
|
+
idx = line_idx - 1
|
|
479
|
+
|
|
480
|
+
while idx >= 0:
|
|
481
|
+
line = lines[idx].strip()
|
|
482
|
+
if line.startswith("///"):
|
|
483
|
+
doc_lines.insert(0, line[3:].strip())
|
|
484
|
+
idx -= 1
|
|
485
|
+
elif line.startswith("//!"):
|
|
486
|
+
doc_lines.insert(0, line[3:].strip())
|
|
487
|
+
idx -= 1
|
|
488
|
+
elif line == "" or line.startswith("#["):
|
|
489
|
+
idx -= 1
|
|
490
|
+
else:
|
|
491
|
+
break
|
|
492
|
+
|
|
493
|
+
return "\n".join(doc_lines) if doc_lines else None
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
class CParser:
|
|
497
|
+
"""Parse C source code to extract definitions (for Firedancer)."""
|
|
498
|
+
|
|
499
|
+
def __init__(self):
|
|
500
|
+
if TREE_SITTER_C:
|
|
501
|
+
self.parser = Parser(Language(ts_c.language()))
|
|
502
|
+
else:
|
|
503
|
+
self.parser = None
|
|
504
|
+
|
|
505
|
+
def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
506
|
+
"""Parse a C file and extract items."""
|
|
507
|
+
try:
|
|
508
|
+
content = file_path.read_text(encoding="utf-8")
|
|
509
|
+
except (OSError, UnicodeDecodeError):
|
|
510
|
+
return [], []
|
|
511
|
+
|
|
512
|
+
if self.parser:
|
|
513
|
+
return self._parse_with_tree_sitter(content, str(file_path))
|
|
514
|
+
else:
|
|
515
|
+
return self._parse_with_regex(content, str(file_path))
|
|
516
|
+
|
|
517
|
+
def _parse_with_tree_sitter(
|
|
518
|
+
self, content: str, file_path: str
|
|
519
|
+
) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
520
|
+
"""Parse using tree-sitter for accurate AST."""
|
|
521
|
+
tree = self.parser.parse(bytes(content, "utf-8"))
|
|
522
|
+
items = []
|
|
523
|
+
constants = []
|
|
524
|
+
|
|
525
|
+
lines = content.split("\n")
|
|
526
|
+
|
|
527
|
+
def get_text(node) -> str:
|
|
528
|
+
return content[node.start_byte : node.end_byte]
|
|
529
|
+
|
|
530
|
+
def get_doc_comment(node) -> str | None:
|
|
531
|
+
"""Get doc comment preceding a node (C-style /* */ or //)."""
|
|
532
|
+
doc_lines = []
|
|
533
|
+
line = node.start_point[0] - 1
|
|
534
|
+
|
|
535
|
+
while line >= 0:
|
|
536
|
+
line_text = lines[line].strip()
|
|
537
|
+
if line_text.startswith("//"):
|
|
538
|
+
doc_lines.insert(0, line_text[2:].strip())
|
|
539
|
+
line -= 1
|
|
540
|
+
elif line_text.endswith("*/"):
|
|
541
|
+
# Multi-line comment, find start
|
|
542
|
+
comment_lines = [line_text.rstrip("*/").strip()]
|
|
543
|
+
line -= 1
|
|
544
|
+
while line >= 0 and "/*" not in lines[line]:
|
|
545
|
+
comment_lines.insert(0, lines[line].strip().lstrip("*").strip())
|
|
546
|
+
line -= 1
|
|
547
|
+
if line >= 0:
|
|
548
|
+
start_line = lines[line].strip().lstrip("/*").strip()
|
|
549
|
+
if start_line:
|
|
550
|
+
comment_lines.insert(0, start_line)
|
|
551
|
+
doc_lines = comment_lines + doc_lines
|
|
552
|
+
break
|
|
553
|
+
elif line_text == "":
|
|
554
|
+
line -= 1
|
|
555
|
+
else:
|
|
556
|
+
break
|
|
557
|
+
|
|
558
|
+
return "\n".join(doc_lines) if doc_lines else None
|
|
559
|
+
|
|
560
|
+
def process_node(node):
|
|
561
|
+
# Function definitions
|
|
562
|
+
if node.type == "function_definition":
|
|
563
|
+
name = None
|
|
564
|
+
return_type = None
|
|
565
|
+
params = None
|
|
566
|
+
|
|
567
|
+
for child in node.children:
|
|
568
|
+
if child.type == "function_declarator":
|
|
569
|
+
for sub in child.children:
|
|
570
|
+
if sub.type == "identifier":
|
|
571
|
+
name = get_text(sub)
|
|
572
|
+
elif sub.type == "parameter_list":
|
|
573
|
+
params = get_text(sub)
|
|
574
|
+
elif child.type in ("primitive_type", "type_identifier", "sized_type_specifier"):
|
|
575
|
+
return_type = get_text(child)
|
|
576
|
+
|
|
577
|
+
if name:
|
|
578
|
+
signature = f"{return_type or 'void'} {name}{params or '()'}"
|
|
579
|
+
items.append(
|
|
580
|
+
ExtractedItem(
|
|
581
|
+
kind="function",
|
|
582
|
+
name=name,
|
|
583
|
+
signature=signature,
|
|
584
|
+
body=get_text(node),
|
|
585
|
+
doc_comment=get_doc_comment(node),
|
|
586
|
+
file_path=file_path,
|
|
587
|
+
line_number=node.start_point[0] + 1,
|
|
588
|
+
visibility="pub", # C doesn't have visibility modifiers in same way
|
|
589
|
+
attributes=[],
|
|
590
|
+
)
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
# Struct definitions
|
|
594
|
+
elif node.type == "struct_specifier":
|
|
595
|
+
name = None
|
|
596
|
+
for child in node.children:
|
|
597
|
+
if child.type == "type_identifier":
|
|
598
|
+
name = get_text(child)
|
|
599
|
+
break
|
|
600
|
+
|
|
601
|
+
if name and any(c.type == "field_declaration_list" for c in node.children):
|
|
602
|
+
items.append(
|
|
603
|
+
ExtractedItem(
|
|
604
|
+
kind="struct",
|
|
605
|
+
name=name,
|
|
606
|
+
signature=f"struct {name}",
|
|
607
|
+
body=get_text(node),
|
|
608
|
+
doc_comment=get_doc_comment(node),
|
|
609
|
+
file_path=file_path,
|
|
610
|
+
line_number=node.start_point[0] + 1,
|
|
611
|
+
visibility="pub",
|
|
612
|
+
attributes=[],
|
|
613
|
+
)
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
# Enum definitions
|
|
617
|
+
elif node.type == "enum_specifier":
|
|
618
|
+
name = None
|
|
619
|
+
for child in node.children:
|
|
620
|
+
if child.type == "type_identifier":
|
|
621
|
+
name = get_text(child)
|
|
622
|
+
break
|
|
623
|
+
|
|
624
|
+
if name:
|
|
625
|
+
items.append(
|
|
626
|
+
ExtractedItem(
|
|
627
|
+
kind="enum",
|
|
628
|
+
name=name,
|
|
629
|
+
signature=f"enum {name}",
|
|
630
|
+
body=get_text(node),
|
|
631
|
+
doc_comment=get_doc_comment(node),
|
|
632
|
+
file_path=file_path,
|
|
633
|
+
line_number=node.start_point[0] + 1,
|
|
634
|
+
visibility="pub",
|
|
635
|
+
attributes=[],
|
|
636
|
+
)
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
# Typedef
|
|
640
|
+
elif node.type == "type_definition":
|
|
641
|
+
name = None
|
|
642
|
+
for child in node.children:
|
|
643
|
+
if child.type == "type_identifier":
|
|
644
|
+
name = get_text(child)
|
|
645
|
+
|
|
646
|
+
if name:
|
|
647
|
+
items.append(
|
|
648
|
+
ExtractedItem(
|
|
649
|
+
kind="type",
|
|
650
|
+
name=name,
|
|
651
|
+
signature=f"typedef {name}",
|
|
652
|
+
body=get_text(node),
|
|
653
|
+
doc_comment=get_doc_comment(node),
|
|
654
|
+
file_path=file_path,
|
|
655
|
+
line_number=node.start_point[0] + 1,
|
|
656
|
+
visibility="pub",
|
|
657
|
+
attributes=[],
|
|
658
|
+
)
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
# Recurse into children
|
|
662
|
+
for child in node.children:
|
|
663
|
+
process_node(child)
|
|
664
|
+
|
|
665
|
+
process_node(tree.root_node)
|
|
666
|
+
|
|
667
|
+
# Also extract #define constants with regex (tree-sitter doesn't handle preprocessor well)
|
|
668
|
+
constants.extend(self._extract_defines(content, file_path))
|
|
669
|
+
|
|
670
|
+
return items, constants
|
|
671
|
+
|
|
672
|
+
def _extract_defines(self, content: str, file_path: str) -> list[ExtractedConstant]:
|
|
673
|
+
"""Extract #define macros."""
|
|
674
|
+
constants = []
|
|
675
|
+
lines = content.split("\n")
|
|
676
|
+
|
|
677
|
+
define_pattern = re.compile(r"^#define\s+(\w+)\s+(.+)$")
|
|
678
|
+
|
|
679
|
+
for i, line in enumerate(lines):
|
|
680
|
+
match = define_pattern.match(line.strip())
|
|
681
|
+
if match:
|
|
682
|
+
name = match.group(1)
|
|
683
|
+
value = match.group(2).strip()
|
|
684
|
+
|
|
685
|
+
# Skip function-like macros (they have parentheses right after name)
|
|
686
|
+
if "(" in name:
|
|
687
|
+
continue
|
|
688
|
+
|
|
689
|
+
# Get preceding comment
|
|
690
|
+
doc_comment = None
|
|
691
|
+
if i > 0:
|
|
692
|
+
prev_line = lines[i - 1].strip()
|
|
693
|
+
if prev_line.startswith("//"):
|
|
694
|
+
doc_comment = prev_line[2:].strip()
|
|
695
|
+
elif prev_line.endswith("*/"):
|
|
696
|
+
# Try to get multi-line comment
|
|
697
|
+
comment_lines = []
|
|
698
|
+
j = i - 1
|
|
699
|
+
while j >= 0 and "/*" not in lines[j]:
|
|
700
|
+
comment_lines.insert(0, lines[j].strip().lstrip("*").strip())
|
|
701
|
+
j -= 1
|
|
702
|
+
if comment_lines:
|
|
703
|
+
doc_comment = "\n".join(comment_lines)
|
|
704
|
+
|
|
705
|
+
constants.append(
|
|
706
|
+
ExtractedConstant(
|
|
707
|
+
name=name,
|
|
708
|
+
value=value,
|
|
709
|
+
type_annotation=None, # C macros don't have types
|
|
710
|
+
doc_comment=doc_comment,
|
|
711
|
+
file_path=file_path,
|
|
712
|
+
line_number=i + 1,
|
|
713
|
+
)
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
return constants
|
|
717
|
+
|
|
718
|
+
def _parse_with_regex(
|
|
719
|
+
self, content: str, file_path: str
|
|
720
|
+
) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
|
|
721
|
+
"""Fallback regex-based parsing when tree-sitter isn't available."""
|
|
722
|
+
items = []
|
|
723
|
+
constants = []
|
|
724
|
+
lines = content.split("\n")
|
|
725
|
+
|
|
726
|
+
# Function pattern (simplified)
|
|
727
|
+
fn_pattern = re.compile(
|
|
728
|
+
r"^(\w+(?:\s*\*)?)\s+(\w+)\s*\(([^)]*)\)\s*\{",
|
|
729
|
+
re.MULTILINE,
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
# Struct pattern
|
|
733
|
+
struct_pattern = re.compile(
|
|
734
|
+
r"^(?:typedef\s+)?struct\s+(\w+)\s*\{",
|
|
735
|
+
re.MULTILINE,
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
# Extract functions
|
|
739
|
+
for match in fn_pattern.finditer(content):
|
|
740
|
+
return_type = match.group(1).strip()
|
|
741
|
+
name = match.group(2)
|
|
742
|
+
params = match.group(3)
|
|
743
|
+
|
|
744
|
+
# Skip if it's a control statement
|
|
745
|
+
if name in ("if", "while", "for", "switch"):
|
|
746
|
+
continue
|
|
747
|
+
|
|
748
|
+
start = match.start()
|
|
749
|
+
line_num = content[:start].count("\n") + 1
|
|
750
|
+
|
|
751
|
+
# Find the full function body (brace matching)
|
|
752
|
+
brace_count = 1
|
|
753
|
+
idx = match.end()
|
|
754
|
+
while idx < len(content) and brace_count > 0:
|
|
755
|
+
if content[idx] == "{":
|
|
756
|
+
brace_count += 1
|
|
757
|
+
elif content[idx] == "}":
|
|
758
|
+
brace_count -= 1
|
|
759
|
+
idx += 1
|
|
760
|
+
|
|
761
|
+
body = content[match.start() : idx]
|
|
762
|
+
|
|
763
|
+
# Get doc comment
|
|
764
|
+
doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
|
|
765
|
+
|
|
766
|
+
items.append(
|
|
767
|
+
ExtractedItem(
|
|
768
|
+
kind="function",
|
|
769
|
+
name=name,
|
|
770
|
+
signature=f"{return_type} {name}({params})",
|
|
771
|
+
body=body,
|
|
772
|
+
doc_comment=doc_comment,
|
|
773
|
+
file_path=file_path,
|
|
774
|
+
line_number=line_num,
|
|
775
|
+
visibility="pub",
|
|
776
|
+
attributes=[],
|
|
777
|
+
)
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
# Extract structs
|
|
781
|
+
for match in struct_pattern.finditer(content):
|
|
782
|
+
name = match.group(1)
|
|
783
|
+
start = match.start()
|
|
784
|
+
line_num = content[:start].count("\n") + 1
|
|
785
|
+
|
|
786
|
+
# Find end of struct
|
|
787
|
+
brace_count = 1
|
|
788
|
+
idx = match.end()
|
|
789
|
+
while idx < len(content) and brace_count > 0:
|
|
790
|
+
if content[idx] == "{":
|
|
791
|
+
brace_count += 1
|
|
792
|
+
elif content[idx] == "}":
|
|
793
|
+
brace_count -= 1
|
|
794
|
+
idx += 1
|
|
795
|
+
|
|
796
|
+
# Skip past the semicolon
|
|
797
|
+
while idx < len(content) and content[idx] != ";":
|
|
798
|
+
idx += 1
|
|
799
|
+
idx += 1
|
|
800
|
+
|
|
801
|
+
body = content[match.start() : idx]
|
|
802
|
+
doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
|
|
803
|
+
|
|
804
|
+
items.append(
|
|
805
|
+
ExtractedItem(
|
|
806
|
+
kind="struct",
|
|
807
|
+
name=name,
|
|
808
|
+
signature=f"struct {name}",
|
|
809
|
+
body=body,
|
|
810
|
+
doc_comment=doc_comment,
|
|
811
|
+
file_path=file_path,
|
|
812
|
+
line_number=line_num,
|
|
813
|
+
visibility="pub",
|
|
814
|
+
attributes=[],
|
|
815
|
+
)
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
# Extract #define constants
|
|
819
|
+
constants.extend(self._extract_defines(content, file_path))
|
|
820
|
+
|
|
821
|
+
return items, constants
|
|
822
|
+
|
|
823
|
+
def _get_doc_comment_at_line(self, lines: list[str], line_idx: int) -> str | None:
|
|
824
|
+
"""Get doc comment ending at the given line index."""
|
|
825
|
+
doc_lines = []
|
|
826
|
+
idx = line_idx - 1
|
|
827
|
+
|
|
828
|
+
while idx >= 0:
|
|
829
|
+
line = lines[idx].strip()
|
|
830
|
+
if line.startswith("//"):
|
|
831
|
+
doc_lines.insert(0, line[2:].strip())
|
|
832
|
+
idx -= 1
|
|
833
|
+
elif line == "":
|
|
834
|
+
idx -= 1
|
|
835
|
+
else:
|
|
836
|
+
break
|
|
837
|
+
|
|
838
|
+
return "\n".join(doc_lines) if doc_lines else None
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
def compile_c(
|
|
842
|
+
source_dir: Path,
|
|
843
|
+
output_dir: Path,
|
|
844
|
+
file_patterns: list[str] | None = None,
|
|
845
|
+
) -> dict:
|
|
846
|
+
"""
|
|
847
|
+
Compile C source files into JSON extracts.
|
|
848
|
+
|
|
849
|
+
Args:
|
|
850
|
+
source_dir: Directory containing C source files
|
|
851
|
+
output_dir: Directory to write JSON output
|
|
852
|
+
file_patterns: Glob patterns for files to include (default: ["**/*.c", "**/*.h"])
|
|
853
|
+
|
|
854
|
+
Returns:
|
|
855
|
+
Statistics about extraction
|
|
856
|
+
"""
|
|
857
|
+
if file_patterns is None:
|
|
858
|
+
file_patterns = ["**/*.c", "**/*.h"]
|
|
859
|
+
|
|
860
|
+
parser = CParser()
|
|
861
|
+
all_items = []
|
|
862
|
+
all_constants = []
|
|
863
|
+
|
|
864
|
+
# Find all C files
|
|
865
|
+
c_files = []
|
|
866
|
+
for pattern in file_patterns:
|
|
867
|
+
c_files.extend(source_dir.glob(pattern))
|
|
868
|
+
|
|
869
|
+
# Parse each file
|
|
870
|
+
for file_path in c_files:
|
|
871
|
+
items, constants = parser.parse_file(file_path)
|
|
872
|
+
|
|
873
|
+
# Make paths relative to source_dir
|
|
874
|
+
for item in items:
|
|
875
|
+
item.file_path = str(Path(item.file_path).relative_to(source_dir))
|
|
876
|
+
for const in constants:
|
|
877
|
+
const.file_path = str(Path(const.file_path).relative_to(source_dir))
|
|
878
|
+
|
|
879
|
+
all_items.extend(items)
|
|
880
|
+
all_constants.extend(constants)
|
|
881
|
+
|
|
882
|
+
# Write output
|
|
883
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
884
|
+
|
|
885
|
+
items_file = output_dir / "items.json"
|
|
886
|
+
with open(items_file, "w") as f:
|
|
887
|
+
json.dump([asdict(item) for item in all_items], f, indent=2)
|
|
888
|
+
|
|
889
|
+
constants_file = output_dir / "constants.json"
|
|
890
|
+
with open(constants_file, "w") as f:
|
|
891
|
+
json.dump([asdict(const) for const in all_constants], f, indent=2)
|
|
892
|
+
|
|
893
|
+
# Build index by name for fast lookup
|
|
894
|
+
index = {
|
|
895
|
+
"functions": {},
|
|
896
|
+
"structs": {},
|
|
897
|
+
"enums": {},
|
|
898
|
+
"constants": {},
|
|
899
|
+
"types": {},
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
for item in all_items:
|
|
903
|
+
category = f"{item.kind}s"
|
|
904
|
+
if category in index:
|
|
905
|
+
index[category][item.name] = {
|
|
906
|
+
"file": item.file_path,
|
|
907
|
+
"line": item.line_number,
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
for const in all_constants:
|
|
911
|
+
index["constants"][const.name] = {
|
|
912
|
+
"file": const.file_path,
|
|
913
|
+
"line": const.line_number,
|
|
914
|
+
"value": const.value,
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
index_file = output_dir / "index.json"
|
|
918
|
+
with open(index_file, "w") as f:
|
|
919
|
+
json.dump(index, f, indent=2)
|
|
920
|
+
|
|
921
|
+
return {
|
|
922
|
+
"files_processed": len(c_files),
|
|
923
|
+
"items_extracted": len(all_items),
|
|
924
|
+
"constants_extracted": len(all_constants),
|
|
925
|
+
"functions": len([i for i in all_items if i.kind == "function"]),
|
|
926
|
+
"structs": len([i for i in all_items if i.kind == "struct"]),
|
|
927
|
+
"enums": len([i for i in all_items if i.kind == "enum"]),
|
|
928
|
+
"types": len([i for i in all_items if i.kind == "type"]),
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
|
|
932
|
+
def compile_rust(
|
|
933
|
+
source_dir: Path,
|
|
934
|
+
output_dir: Path,
|
|
935
|
+
file_patterns: list[str] | None = None,
|
|
936
|
+
) -> dict:
|
|
937
|
+
"""
|
|
938
|
+
Compile Rust source files into JSON extracts.
|
|
939
|
+
|
|
940
|
+
Args:
|
|
941
|
+
source_dir: Directory containing Rust source files
|
|
942
|
+
output_dir: Directory to write JSON output
|
|
943
|
+
file_patterns: Glob patterns for files to include (default: ["**/*.rs"])
|
|
944
|
+
|
|
945
|
+
Returns:
|
|
946
|
+
Statistics about extraction
|
|
947
|
+
"""
|
|
948
|
+
if file_patterns is None:
|
|
949
|
+
file_patterns = ["**/*.rs"]
|
|
950
|
+
|
|
951
|
+
parser = RustParser()
|
|
952
|
+
all_items = []
|
|
953
|
+
all_constants = []
|
|
954
|
+
|
|
955
|
+
# Find all Rust files
|
|
956
|
+
rust_files = []
|
|
957
|
+
for pattern in file_patterns:
|
|
958
|
+
rust_files.extend(source_dir.glob(pattern))
|
|
959
|
+
|
|
960
|
+
# Parse each file
|
|
961
|
+
for file_path in rust_files:
|
|
962
|
+
items, constants = parser.parse_file(file_path)
|
|
963
|
+
|
|
964
|
+
# Make paths relative to source_dir
|
|
965
|
+
for item in items:
|
|
966
|
+
item.file_path = str(Path(item.file_path).relative_to(source_dir))
|
|
967
|
+
for const in constants:
|
|
968
|
+
const.file_path = str(Path(const.file_path).relative_to(source_dir))
|
|
969
|
+
|
|
970
|
+
all_items.extend(items)
|
|
971
|
+
all_constants.extend(constants)
|
|
972
|
+
|
|
973
|
+
# Write output
|
|
974
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
975
|
+
|
|
976
|
+
items_file = output_dir / "items.json"
|
|
977
|
+
with open(items_file, "w") as f:
|
|
978
|
+
json.dump([asdict(item) for item in all_items], f, indent=2)
|
|
979
|
+
|
|
980
|
+
constants_file = output_dir / "constants.json"
|
|
981
|
+
with open(constants_file, "w") as f:
|
|
982
|
+
json.dump([asdict(const) for const in all_constants], f, indent=2)
|
|
983
|
+
|
|
984
|
+
# Build index by name for fast lookup
|
|
985
|
+
index = {
|
|
986
|
+
"functions": {},
|
|
987
|
+
"structs": {},
|
|
988
|
+
"enums": {},
|
|
989
|
+
"constants": {},
|
|
990
|
+
"impls": {},
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
for item in all_items:
|
|
994
|
+
category = f"{item.kind}s"
|
|
995
|
+
if category in index:
|
|
996
|
+
index[category][item.name] = {
|
|
997
|
+
"file": item.file_path,
|
|
998
|
+
"line": item.line_number,
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
for const in all_constants:
|
|
1002
|
+
index["constants"][const.name] = {
|
|
1003
|
+
"file": const.file_path,
|
|
1004
|
+
"line": const.line_number,
|
|
1005
|
+
"value": const.value,
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
index_file = output_dir / "index.json"
|
|
1009
|
+
with open(index_file, "w") as f:
|
|
1010
|
+
json.dump(index, f, indent=2)
|
|
1011
|
+
|
|
1012
|
+
return {
|
|
1013
|
+
"files_processed": len(rust_files),
|
|
1014
|
+
"items_extracted": len(all_items),
|
|
1015
|
+
"constants_extracted": len(all_constants),
|
|
1016
|
+
"functions": len([i for i in all_items if i.kind == "function"]),
|
|
1017
|
+
"structs": len([i for i in all_items if i.kind == "struct"]),
|
|
1018
|
+
"enums": len([i for i in all_items if i.kind == "enum"]),
|
|
1019
|
+
"impls": len([i for i in all_items if i.kind == "impl"]),
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def load_compiled_items(compiled_dir: Path) -> list[ExtractedItem]:
|
|
1024
|
+
"""Load compiled items from JSON."""
|
|
1025
|
+
items_file = compiled_dir / "items.json"
|
|
1026
|
+
if not items_file.exists():
|
|
1027
|
+
return []
|
|
1028
|
+
|
|
1029
|
+
with open(items_file) as f:
|
|
1030
|
+
data = json.load(f)
|
|
1031
|
+
|
|
1032
|
+
return [ExtractedItem(**item) for item in data]
|
|
1033
|
+
|
|
1034
|
+
|
|
1035
|
+
def load_compiled_constants(compiled_dir: Path) -> list[ExtractedConstant]:
|
|
1036
|
+
"""Load compiled constants from JSON."""
|
|
1037
|
+
constants_file = compiled_dir / "constants.json"
|
|
1038
|
+
if not constants_file.exists():
|
|
1039
|
+
return []
|
|
1040
|
+
|
|
1041
|
+
with open(constants_file) as f:
|
|
1042
|
+
data = json.load(f)
|
|
1043
|
+
|
|
1044
|
+
return [ExtractedConstant(**item) for item in data]
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
def lookup_constant(name: str, compiled_dir: Path) -> ExtractedConstant | None:
|
|
1048
|
+
"""Fast lookup of a constant by name."""
|
|
1049
|
+
index_file = compiled_dir / "index.json"
|
|
1050
|
+
if not index_file.exists():
|
|
1051
|
+
return None
|
|
1052
|
+
|
|
1053
|
+
with open(index_file) as f:
|
|
1054
|
+
index = json.load(f)
|
|
1055
|
+
|
|
1056
|
+
if name not in index.get("constants", {}):
|
|
1057
|
+
return None
|
|
1058
|
+
|
|
1059
|
+
# Load full constant data
|
|
1060
|
+
constants = load_compiled_constants(compiled_dir)
|
|
1061
|
+
for const in constants:
|
|
1062
|
+
if const.name == name:
|
|
1063
|
+
return const
|
|
1064
|
+
|
|
1065
|
+
return None
|
|
1066
|
+
|
|
1067
|
+
|
|
1068
|
+
def lookup_function(name: str, compiled_dir: Path) -> ExtractedItem | None:
|
|
1069
|
+
"""Fast lookup of a function by name."""
|
|
1070
|
+
index_file = compiled_dir / "index.json"
|
|
1071
|
+
if not index_file.exists():
|
|
1072
|
+
return None
|
|
1073
|
+
|
|
1074
|
+
with open(index_file) as f:
|
|
1075
|
+
index = json.load(f)
|
|
1076
|
+
|
|
1077
|
+
if name not in index.get("functions", {}):
|
|
1078
|
+
return None
|
|
1079
|
+
|
|
1080
|
+
items = load_compiled_items(compiled_dir)
|
|
1081
|
+
for item in items:
|
|
1082
|
+
if item.kind == "function" and item.name == name:
|
|
1083
|
+
return item
|
|
1084
|
+
|
|
1085
|
+
return None
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
if __name__ == "__main__":
|
|
1089
|
+
# Test compilation
|
|
1090
|
+
import sys
|
|
1091
|
+
|
|
1092
|
+
if len(sys.argv) < 3:
|
|
1093
|
+
print("Usage: compiler.py <source_dir> <output_dir>")
|
|
1094
|
+
sys.exit(1)
|
|
1095
|
+
|
|
1096
|
+
source = Path(sys.argv[1])
|
|
1097
|
+
output = Path(sys.argv[2])
|
|
1098
|
+
|
|
1099
|
+
print(f"Compiling Rust from {source} to {output}...")
|
|
1100
|
+
stats = compile_rust(source, output)
|
|
1101
|
+
print(f"Results: {stats}")
|