sol-mcp 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1101 @@
1
+ """Compile/extract source code into structured JSON for indexing.
2
+
3
+ Parses source files to extract:
4
+ - Rust: pub fn, pub struct, pub enum, const, impl blocks
5
+ - C: functions, structs, typedefs, #define macros (for Firedancer)
6
+
7
+ Uses tree-sitter for robust parsing when available.
8
+ """
9
+
10
+ import json
11
+ import re
12
+ from dataclasses import asdict, dataclass
13
+ from pathlib import Path
14
+
15
+ # Try to import tree-sitter parsers
16
+ TREE_SITTER_RUST = False
17
+ TREE_SITTER_C = False
18
+
19
+ try:
20
+ import tree_sitter_rust as ts_rust
21
+ from tree_sitter import Language, Parser
22
+ TREE_SITTER_RUST = True
23
+ except ImportError:
24
+ pass
25
+
26
+ try:
27
+ import tree_sitter_c as ts_c
28
+ from tree_sitter import Language, Parser
29
+ TREE_SITTER_C = True
30
+ except ImportError:
31
+ pass
32
+
33
+ TREE_SITTER_AVAILABLE = TREE_SITTER_RUST or TREE_SITTER_C
34
+
35
+
36
+ @dataclass
37
+ class ExtractedItem:
38
+ """An extracted code item."""
39
+
40
+ kind: str # function, struct, enum, const, impl, type
41
+ name: str
42
+ signature: str # For functions: full signature; for types: definition line
43
+ body: str # Full source code
44
+ doc_comment: str | None
45
+ file_path: str
46
+ line_number: int
47
+ visibility: str # pub, pub(crate), private
48
+ attributes: list[str] # #[derive(...)] etc.
49
+
50
+
51
+ @dataclass
52
+ class ExtractedConstant:
53
+ """An extracted constant."""
54
+
55
+ name: str
56
+ value: str
57
+ type_annotation: str | None
58
+ doc_comment: str | None
59
+ file_path: str
60
+ line_number: int
61
+
62
+
63
+ class RustParser:
64
+ """Parse Rust source code to extract definitions."""
65
+
66
+ def __init__(self):
67
+ if TREE_SITTER_AVAILABLE:
68
+ self.parser = Parser(Language(ts_rust.language()))
69
+ else:
70
+ self.parser = None
71
+
72
+ def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
73
+ """Parse a Rust file and extract items."""
74
+ try:
75
+ content = file_path.read_text(encoding="utf-8")
76
+ except (OSError, UnicodeDecodeError):
77
+ return [], []
78
+
79
+ if self.parser:
80
+ return self._parse_with_tree_sitter(content, str(file_path))
81
+ else:
82
+ return self._parse_with_regex(content, str(file_path))
83
+
84
+ def _parse_with_tree_sitter(
85
+ self, content: str, file_path: str
86
+ ) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
87
+ """Parse using tree-sitter for accurate AST."""
88
+ tree = self.parser.parse(bytes(content, "utf-8"))
89
+ items = []
90
+ constants = []
91
+
92
+ lines = content.split("\n")
93
+
94
+ def get_text(node) -> str:
95
+ return content[node.start_byte : node.end_byte]
96
+
97
+ def get_doc_comment(node) -> str | None:
98
+ """Get doc comment preceding a node."""
99
+ # Look for comment nodes before this node
100
+ doc_lines = []
101
+ line = node.start_point[0] - 1
102
+
103
+ while line >= 0:
104
+ line_text = lines[line].strip()
105
+ if line_text.startswith("///"):
106
+ doc_lines.insert(0, line_text[3:].strip())
107
+ line -= 1
108
+ elif line_text.startswith("//!"):
109
+ doc_lines.insert(0, line_text[3:].strip())
110
+ line -= 1
111
+ elif line_text == "" or line_text.startswith("#["):
112
+ line -= 1
113
+ else:
114
+ break
115
+
116
+ return "\n".join(doc_lines) if doc_lines else None
117
+
118
+ def get_attributes(node) -> list[str]:
119
+ """Get attributes preceding a node."""
120
+ attrs = []
121
+ line = node.start_point[0] - 1
122
+
123
+ while line >= 0:
124
+ line_text = lines[line].strip()
125
+ if line_text.startswith("#["):
126
+ attrs.insert(0, line_text)
127
+ line -= 1
128
+ elif line_text.startswith("///") or line_text.startswith("//!"):
129
+ line -= 1
130
+ elif line_text == "":
131
+ line -= 1
132
+ else:
133
+ break
134
+
135
+ return attrs
136
+
137
+ def get_visibility(node) -> str:
138
+ """Determine visibility of a node."""
139
+ for child in node.children:
140
+ if child.type == "visibility_modifier":
141
+ vis_text = get_text(child)
142
+ if vis_text == "pub":
143
+ return "pub"
144
+ elif "crate" in vis_text:
145
+ return "pub(crate)"
146
+ else:
147
+ return vis_text
148
+ return "private"
149
+
150
+ def process_node(node):
151
+ if node.type == "function_item":
152
+ visibility = get_visibility(node)
153
+ if visibility.startswith("pub"):
154
+ name = None
155
+ signature_parts = []
156
+
157
+ for child in node.children:
158
+ if child.type == "identifier":
159
+ name = get_text(child)
160
+ elif child.type == "parameters":
161
+ signature_parts.append(get_text(child))
162
+ elif child.type == "return_type":
163
+ signature_parts.append(f"-> {get_text(child)}")
164
+
165
+ if name:
166
+ # Build signature
167
+ params = signature_parts[0] if signature_parts else "()"
168
+ ret = signature_parts[1] if len(signature_parts) > 1 else ""
169
+ signature = f"fn {name}{params} {ret}".strip()
170
+
171
+ items.append(
172
+ ExtractedItem(
173
+ kind="function",
174
+ name=name,
175
+ signature=signature,
176
+ body=get_text(node),
177
+ doc_comment=get_doc_comment(node),
178
+ file_path=file_path,
179
+ line_number=node.start_point[0] + 1,
180
+ visibility=visibility,
181
+ attributes=get_attributes(node),
182
+ )
183
+ )
184
+
185
+ elif node.type == "struct_item":
186
+ visibility = get_visibility(node)
187
+ if visibility.startswith("pub"):
188
+ name = None
189
+ for child in node.children:
190
+ if child.type == "type_identifier":
191
+ name = get_text(child)
192
+ break
193
+
194
+ if name:
195
+ items.append(
196
+ ExtractedItem(
197
+ kind="struct",
198
+ name=name,
199
+ signature=f"struct {name}",
200
+ body=get_text(node),
201
+ doc_comment=get_doc_comment(node),
202
+ file_path=file_path,
203
+ line_number=node.start_point[0] + 1,
204
+ visibility=visibility,
205
+ attributes=get_attributes(node),
206
+ )
207
+ )
208
+
209
+ elif node.type == "enum_item":
210
+ visibility = get_visibility(node)
211
+ if visibility.startswith("pub"):
212
+ name = None
213
+ for child in node.children:
214
+ if child.type == "type_identifier":
215
+ name = get_text(child)
216
+ break
217
+
218
+ if name:
219
+ items.append(
220
+ ExtractedItem(
221
+ kind="enum",
222
+ name=name,
223
+ signature=f"enum {name}",
224
+ body=get_text(node),
225
+ doc_comment=get_doc_comment(node),
226
+ file_path=file_path,
227
+ line_number=node.start_point[0] + 1,
228
+ visibility=visibility,
229
+ attributes=get_attributes(node),
230
+ )
231
+ )
232
+
233
+ elif node.type == "const_item":
234
+ visibility = get_visibility(node)
235
+ name = None
236
+ type_ann = None
237
+ value = None
238
+
239
+ for child in node.children:
240
+ if child.type == "identifier":
241
+ name = get_text(child)
242
+ elif child.type == "type_identifier" or child.type.endswith("_type"):
243
+ type_ann = get_text(child)
244
+
245
+ # Extract value from the full text
246
+ full_text = get_text(node)
247
+ if "=" in full_text:
248
+ value = full_text.split("=", 1)[1].strip().rstrip(";")
249
+
250
+ if name:
251
+ constants.append(
252
+ ExtractedConstant(
253
+ name=name,
254
+ value=value or "",
255
+ type_annotation=type_ann,
256
+ doc_comment=get_doc_comment(node),
257
+ file_path=file_path,
258
+ line_number=node.start_point[0] + 1,
259
+ )
260
+ )
261
+
262
+ elif node.type == "impl_item":
263
+ # Extract impl blocks for types
264
+ type_name = None
265
+ for child in node.children:
266
+ if child.type == "type_identifier":
267
+ type_name = get_text(child)
268
+ break
269
+ elif child.type == "generic_type":
270
+ type_name = get_text(child)
271
+ break
272
+
273
+ if type_name:
274
+ items.append(
275
+ ExtractedItem(
276
+ kind="impl",
277
+ name=f"impl {type_name}",
278
+ signature=f"impl {type_name}",
279
+ body=get_text(node),
280
+ doc_comment=get_doc_comment(node),
281
+ file_path=file_path,
282
+ line_number=node.start_point[0] + 1,
283
+ visibility="pub", # impl blocks are effectively pub if type is
284
+ attributes=get_attributes(node),
285
+ )
286
+ )
287
+
288
+ # Recurse into children
289
+ for child in node.children:
290
+ process_node(child)
291
+
292
+ process_node(tree.root_node)
293
+ return items, constants
294
+
295
+ def _parse_with_regex(
296
+ self, content: str, file_path: str
297
+ ) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
298
+ """Fallback regex-based parsing when tree-sitter isn't available."""
299
+ items = []
300
+ constants = []
301
+ lines = content.split("\n")
302
+
303
+ # Patterns for extraction
304
+ fn_pattern = re.compile(
305
+ r"^(\s*)(pub(?:\([^)]+\))?\s+)?fn\s+(\w+)\s*(<[^>]+>)?\s*\(([^)]*)\)(\s*->\s*[^{]+)?\s*\{",
306
+ re.MULTILINE,
307
+ )
308
+ struct_pattern = re.compile(
309
+ r"^(\s*)(pub(?:\([^)]+\))?\s+)?struct\s+(\w+)", re.MULTILINE
310
+ )
311
+ enum_pattern = re.compile(
312
+ r"^(\s*)(pub(?:\([^)]+\))?\s+)?enum\s+(\w+)", re.MULTILINE
313
+ )
314
+ const_pattern = re.compile(
315
+ r"^(\s*)(pub(?:\([^)]+\))?\s+)?const\s+(\w+)\s*:\s*([^=]+)\s*=\s*([^;]+);",
316
+ re.MULTILINE,
317
+ )
318
+
319
+ # Extract functions
320
+ for match in fn_pattern.finditer(content):
321
+ visibility = match.group(2) or ""
322
+ visibility = visibility.strip()
323
+ if not visibility.startswith("pub"):
324
+ continue
325
+
326
+ name = match.group(3)
327
+ params = match.group(5)
328
+ ret = match.group(6) or ""
329
+
330
+ # Find the full function body
331
+ start = match.start()
332
+ line_num = content[:start].count("\n") + 1
333
+
334
+ # Simple brace matching to find end
335
+ brace_count = 1
336
+ idx = match.end()
337
+ while idx < len(content) and brace_count > 0:
338
+ if content[idx] == "{":
339
+ brace_count += 1
340
+ elif content[idx] == "}":
341
+ brace_count -= 1
342
+ idx += 1
343
+
344
+ body = content[match.start() : idx]
345
+
346
+ # Get doc comment
347
+ doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
348
+
349
+ items.append(
350
+ ExtractedItem(
351
+ kind="function",
352
+ name=name,
353
+ signature=f"fn {name}({params}){ret}".strip(),
354
+ body=body,
355
+ doc_comment=doc_comment,
356
+ file_path=file_path,
357
+ line_number=line_num,
358
+ visibility=visibility if visibility else "pub",
359
+ attributes=[],
360
+ )
361
+ )
362
+
363
+ # Extract structs
364
+ for match in struct_pattern.finditer(content):
365
+ visibility = match.group(2) or ""
366
+ visibility = visibility.strip()
367
+ if not visibility.startswith("pub"):
368
+ continue
369
+
370
+ name = match.group(3)
371
+ start = match.start()
372
+ line_num = content[:start].count("\n") + 1
373
+
374
+ # Find end of struct (either ; or })
375
+ idx = match.end()
376
+ brace_count = 0
377
+ while idx < len(content):
378
+ if content[idx] == "{":
379
+ brace_count += 1
380
+ elif content[idx] == "}":
381
+ if brace_count == 0:
382
+ idx += 1
383
+ break
384
+ brace_count -= 1
385
+ elif content[idx] == ";" and brace_count == 0:
386
+ idx += 1
387
+ break
388
+ idx += 1
389
+
390
+ body = content[match.start() : idx]
391
+ doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
392
+
393
+ items.append(
394
+ ExtractedItem(
395
+ kind="struct",
396
+ name=name,
397
+ signature=f"struct {name}",
398
+ body=body,
399
+ doc_comment=doc_comment,
400
+ file_path=file_path,
401
+ line_number=line_num,
402
+ visibility=visibility if visibility else "pub",
403
+ attributes=[],
404
+ )
405
+ )
406
+
407
+ # Extract enums
408
+ for match in enum_pattern.finditer(content):
409
+ visibility = match.group(2) or ""
410
+ visibility = visibility.strip()
411
+ if not visibility.startswith("pub"):
412
+ continue
413
+
414
+ name = match.group(3)
415
+ start = match.start()
416
+ line_num = content[:start].count("\n") + 1
417
+
418
+ # Find end of enum
419
+ idx = match.end()
420
+ brace_count = 0
421
+ while idx < len(content):
422
+ if content[idx] == "{":
423
+ brace_count += 1
424
+ elif content[idx] == "}":
425
+ if brace_count == 1:
426
+ idx += 1
427
+ break
428
+ brace_count -= 1
429
+ idx += 1
430
+
431
+ body = content[match.start() : idx]
432
+ doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
433
+
434
+ items.append(
435
+ ExtractedItem(
436
+ kind="enum",
437
+ name=name,
438
+ signature=f"enum {name}",
439
+ body=body,
440
+ doc_comment=doc_comment,
441
+ file_path=file_path,
442
+ line_number=line_num,
443
+ visibility=visibility if visibility else "pub",
444
+ attributes=[],
445
+ )
446
+ )
447
+
448
+ # Extract constants
449
+ for match in const_pattern.finditer(content):
450
+ visibility = match.group(2) or ""
451
+ if not visibility.strip().startswith("pub"):
452
+ continue
453
+
454
+ name = match.group(3)
455
+ type_ann = match.group(4).strip()
456
+ value = match.group(5).strip()
457
+
458
+ start = match.start()
459
+ line_num = content[:start].count("\n") + 1
460
+ doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
461
+
462
+ constants.append(
463
+ ExtractedConstant(
464
+ name=name,
465
+ value=value,
466
+ type_annotation=type_ann,
467
+ doc_comment=doc_comment,
468
+ file_path=file_path,
469
+ line_number=line_num,
470
+ )
471
+ )
472
+
473
+ return items, constants
474
+
475
+ def _get_doc_comment_at_line(self, lines: list[str], line_idx: int) -> str | None:
476
+ """Get doc comment ending at the given line index."""
477
+ doc_lines = []
478
+ idx = line_idx - 1
479
+
480
+ while idx >= 0:
481
+ line = lines[idx].strip()
482
+ if line.startswith("///"):
483
+ doc_lines.insert(0, line[3:].strip())
484
+ idx -= 1
485
+ elif line.startswith("//!"):
486
+ doc_lines.insert(0, line[3:].strip())
487
+ idx -= 1
488
+ elif line == "" or line.startswith("#["):
489
+ idx -= 1
490
+ else:
491
+ break
492
+
493
+ return "\n".join(doc_lines) if doc_lines else None
494
+
495
+
496
+ class CParser:
497
+ """Parse C source code to extract definitions (for Firedancer)."""
498
+
499
+ def __init__(self):
500
+ if TREE_SITTER_C:
501
+ self.parser = Parser(Language(ts_c.language()))
502
+ else:
503
+ self.parser = None
504
+
505
+ def parse_file(self, file_path: Path) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
506
+ """Parse a C file and extract items."""
507
+ try:
508
+ content = file_path.read_text(encoding="utf-8")
509
+ except (OSError, UnicodeDecodeError):
510
+ return [], []
511
+
512
+ if self.parser:
513
+ return self._parse_with_tree_sitter(content, str(file_path))
514
+ else:
515
+ return self._parse_with_regex(content, str(file_path))
516
+
517
+ def _parse_with_tree_sitter(
518
+ self, content: str, file_path: str
519
+ ) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
520
+ """Parse using tree-sitter for accurate AST."""
521
+ tree = self.parser.parse(bytes(content, "utf-8"))
522
+ items = []
523
+ constants = []
524
+
525
+ lines = content.split("\n")
526
+
527
+ def get_text(node) -> str:
528
+ return content[node.start_byte : node.end_byte]
529
+
530
+ def get_doc_comment(node) -> str | None:
531
+ """Get doc comment preceding a node (C-style /* */ or //)."""
532
+ doc_lines = []
533
+ line = node.start_point[0] - 1
534
+
535
+ while line >= 0:
536
+ line_text = lines[line].strip()
537
+ if line_text.startswith("//"):
538
+ doc_lines.insert(0, line_text[2:].strip())
539
+ line -= 1
540
+ elif line_text.endswith("*/"):
541
+ # Multi-line comment, find start
542
+ comment_lines = [line_text.rstrip("*/").strip()]
543
+ line -= 1
544
+ while line >= 0 and "/*" not in lines[line]:
545
+ comment_lines.insert(0, lines[line].strip().lstrip("*").strip())
546
+ line -= 1
547
+ if line >= 0:
548
+ start_line = lines[line].strip().lstrip("/*").strip()
549
+ if start_line:
550
+ comment_lines.insert(0, start_line)
551
+ doc_lines = comment_lines + doc_lines
552
+ break
553
+ elif line_text == "":
554
+ line -= 1
555
+ else:
556
+ break
557
+
558
+ return "\n".join(doc_lines) if doc_lines else None
559
+
560
+ def process_node(node):
561
+ # Function definitions
562
+ if node.type == "function_definition":
563
+ name = None
564
+ return_type = None
565
+ params = None
566
+
567
+ for child in node.children:
568
+ if child.type == "function_declarator":
569
+ for sub in child.children:
570
+ if sub.type == "identifier":
571
+ name = get_text(sub)
572
+ elif sub.type == "parameter_list":
573
+ params = get_text(sub)
574
+ elif child.type in ("primitive_type", "type_identifier", "sized_type_specifier"):
575
+ return_type = get_text(child)
576
+
577
+ if name:
578
+ signature = f"{return_type or 'void'} {name}{params or '()'}"
579
+ items.append(
580
+ ExtractedItem(
581
+ kind="function",
582
+ name=name,
583
+ signature=signature,
584
+ body=get_text(node),
585
+ doc_comment=get_doc_comment(node),
586
+ file_path=file_path,
587
+ line_number=node.start_point[0] + 1,
588
+ visibility="pub", # C doesn't have visibility modifiers in same way
589
+ attributes=[],
590
+ )
591
+ )
592
+
593
+ # Struct definitions
594
+ elif node.type == "struct_specifier":
595
+ name = None
596
+ for child in node.children:
597
+ if child.type == "type_identifier":
598
+ name = get_text(child)
599
+ break
600
+
601
+ if name and any(c.type == "field_declaration_list" for c in node.children):
602
+ items.append(
603
+ ExtractedItem(
604
+ kind="struct",
605
+ name=name,
606
+ signature=f"struct {name}",
607
+ body=get_text(node),
608
+ doc_comment=get_doc_comment(node),
609
+ file_path=file_path,
610
+ line_number=node.start_point[0] + 1,
611
+ visibility="pub",
612
+ attributes=[],
613
+ )
614
+ )
615
+
616
+ # Enum definitions
617
+ elif node.type == "enum_specifier":
618
+ name = None
619
+ for child in node.children:
620
+ if child.type == "type_identifier":
621
+ name = get_text(child)
622
+ break
623
+
624
+ if name:
625
+ items.append(
626
+ ExtractedItem(
627
+ kind="enum",
628
+ name=name,
629
+ signature=f"enum {name}",
630
+ body=get_text(node),
631
+ doc_comment=get_doc_comment(node),
632
+ file_path=file_path,
633
+ line_number=node.start_point[0] + 1,
634
+ visibility="pub",
635
+ attributes=[],
636
+ )
637
+ )
638
+
639
+ # Typedef
640
+ elif node.type == "type_definition":
641
+ name = None
642
+ for child in node.children:
643
+ if child.type == "type_identifier":
644
+ name = get_text(child)
645
+
646
+ if name:
647
+ items.append(
648
+ ExtractedItem(
649
+ kind="type",
650
+ name=name,
651
+ signature=f"typedef {name}",
652
+ body=get_text(node),
653
+ doc_comment=get_doc_comment(node),
654
+ file_path=file_path,
655
+ line_number=node.start_point[0] + 1,
656
+ visibility="pub",
657
+ attributes=[],
658
+ )
659
+ )
660
+
661
+ # Recurse into children
662
+ for child in node.children:
663
+ process_node(child)
664
+
665
+ process_node(tree.root_node)
666
+
667
+ # Also extract #define constants with regex (tree-sitter doesn't handle preprocessor well)
668
+ constants.extend(self._extract_defines(content, file_path))
669
+
670
+ return items, constants
671
+
672
+ def _extract_defines(self, content: str, file_path: str) -> list[ExtractedConstant]:
673
+ """Extract #define macros."""
674
+ constants = []
675
+ lines = content.split("\n")
676
+
677
+ define_pattern = re.compile(r"^#define\s+(\w+)\s+(.+)$")
678
+
679
+ for i, line in enumerate(lines):
680
+ match = define_pattern.match(line.strip())
681
+ if match:
682
+ name = match.group(1)
683
+ value = match.group(2).strip()
684
+
685
+ # Skip function-like macros (they have parentheses right after name)
686
+ if "(" in name:
687
+ continue
688
+
689
+ # Get preceding comment
690
+ doc_comment = None
691
+ if i > 0:
692
+ prev_line = lines[i - 1].strip()
693
+ if prev_line.startswith("//"):
694
+ doc_comment = prev_line[2:].strip()
695
+ elif prev_line.endswith("*/"):
696
+ # Try to get multi-line comment
697
+ comment_lines = []
698
+ j = i - 1
699
+ while j >= 0 and "/*" not in lines[j]:
700
+ comment_lines.insert(0, lines[j].strip().lstrip("*").strip())
701
+ j -= 1
702
+ if comment_lines:
703
+ doc_comment = "\n".join(comment_lines)
704
+
705
+ constants.append(
706
+ ExtractedConstant(
707
+ name=name,
708
+ value=value,
709
+ type_annotation=None, # C macros don't have types
710
+ doc_comment=doc_comment,
711
+ file_path=file_path,
712
+ line_number=i + 1,
713
+ )
714
+ )
715
+
716
+ return constants
717
+
718
+ def _parse_with_regex(
719
+ self, content: str, file_path: str
720
+ ) -> tuple[list[ExtractedItem], list[ExtractedConstant]]:
721
+ """Fallback regex-based parsing when tree-sitter isn't available."""
722
+ items = []
723
+ constants = []
724
+ lines = content.split("\n")
725
+
726
+ # Function pattern (simplified)
727
+ fn_pattern = re.compile(
728
+ r"^(\w+(?:\s*\*)?)\s+(\w+)\s*\(([^)]*)\)\s*\{",
729
+ re.MULTILINE,
730
+ )
731
+
732
+ # Struct pattern
733
+ struct_pattern = re.compile(
734
+ r"^(?:typedef\s+)?struct\s+(\w+)\s*\{",
735
+ re.MULTILINE,
736
+ )
737
+
738
+ # Extract functions
739
+ for match in fn_pattern.finditer(content):
740
+ return_type = match.group(1).strip()
741
+ name = match.group(2)
742
+ params = match.group(3)
743
+
744
+ # Skip if it's a control statement
745
+ if name in ("if", "while", "for", "switch"):
746
+ continue
747
+
748
+ start = match.start()
749
+ line_num = content[:start].count("\n") + 1
750
+
751
+ # Find the full function body (brace matching)
752
+ brace_count = 1
753
+ idx = match.end()
754
+ while idx < len(content) and brace_count > 0:
755
+ if content[idx] == "{":
756
+ brace_count += 1
757
+ elif content[idx] == "}":
758
+ brace_count -= 1
759
+ idx += 1
760
+
761
+ body = content[match.start() : idx]
762
+
763
+ # Get doc comment
764
+ doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
765
+
766
+ items.append(
767
+ ExtractedItem(
768
+ kind="function",
769
+ name=name,
770
+ signature=f"{return_type} {name}({params})",
771
+ body=body,
772
+ doc_comment=doc_comment,
773
+ file_path=file_path,
774
+ line_number=line_num,
775
+ visibility="pub",
776
+ attributes=[],
777
+ )
778
+ )
779
+
780
+ # Extract structs
781
+ for match in struct_pattern.finditer(content):
782
+ name = match.group(1)
783
+ start = match.start()
784
+ line_num = content[:start].count("\n") + 1
785
+
786
+ # Find end of struct
787
+ brace_count = 1
788
+ idx = match.end()
789
+ while idx < len(content) and brace_count > 0:
790
+ if content[idx] == "{":
791
+ brace_count += 1
792
+ elif content[idx] == "}":
793
+ brace_count -= 1
794
+ idx += 1
795
+
796
+ # Skip past the semicolon
797
+ while idx < len(content) and content[idx] != ";":
798
+ idx += 1
799
+ idx += 1
800
+
801
+ body = content[match.start() : idx]
802
+ doc_comment = self._get_doc_comment_at_line(lines, line_num - 1)
803
+
804
+ items.append(
805
+ ExtractedItem(
806
+ kind="struct",
807
+ name=name,
808
+ signature=f"struct {name}",
809
+ body=body,
810
+ doc_comment=doc_comment,
811
+ file_path=file_path,
812
+ line_number=line_num,
813
+ visibility="pub",
814
+ attributes=[],
815
+ )
816
+ )
817
+
818
+ # Extract #define constants
819
+ constants.extend(self._extract_defines(content, file_path))
820
+
821
+ return items, constants
822
+
823
+ def _get_doc_comment_at_line(self, lines: list[str], line_idx: int) -> str | None:
824
+ """Get doc comment ending at the given line index."""
825
+ doc_lines = []
826
+ idx = line_idx - 1
827
+
828
+ while idx >= 0:
829
+ line = lines[idx].strip()
830
+ if line.startswith("//"):
831
+ doc_lines.insert(0, line[2:].strip())
832
+ idx -= 1
833
+ elif line == "":
834
+ idx -= 1
835
+ else:
836
+ break
837
+
838
+ return "\n".join(doc_lines) if doc_lines else None
839
+
840
+
841
+ def compile_c(
842
+ source_dir: Path,
843
+ output_dir: Path,
844
+ file_patterns: list[str] | None = None,
845
+ ) -> dict:
846
+ """
847
+ Compile C source files into JSON extracts.
848
+
849
+ Args:
850
+ source_dir: Directory containing C source files
851
+ output_dir: Directory to write JSON output
852
+ file_patterns: Glob patterns for files to include (default: ["**/*.c", "**/*.h"])
853
+
854
+ Returns:
855
+ Statistics about extraction
856
+ """
857
+ if file_patterns is None:
858
+ file_patterns = ["**/*.c", "**/*.h"]
859
+
860
+ parser = CParser()
861
+ all_items = []
862
+ all_constants = []
863
+
864
+ # Find all C files
865
+ c_files = []
866
+ for pattern in file_patterns:
867
+ c_files.extend(source_dir.glob(pattern))
868
+
869
+ # Parse each file
870
+ for file_path in c_files:
871
+ items, constants = parser.parse_file(file_path)
872
+
873
+ # Make paths relative to source_dir
874
+ for item in items:
875
+ item.file_path = str(Path(item.file_path).relative_to(source_dir))
876
+ for const in constants:
877
+ const.file_path = str(Path(const.file_path).relative_to(source_dir))
878
+
879
+ all_items.extend(items)
880
+ all_constants.extend(constants)
881
+
882
+ # Write output
883
+ output_dir.mkdir(parents=True, exist_ok=True)
884
+
885
+ items_file = output_dir / "items.json"
886
+ with open(items_file, "w") as f:
887
+ json.dump([asdict(item) for item in all_items], f, indent=2)
888
+
889
+ constants_file = output_dir / "constants.json"
890
+ with open(constants_file, "w") as f:
891
+ json.dump([asdict(const) for const in all_constants], f, indent=2)
892
+
893
+ # Build index by name for fast lookup
894
+ index = {
895
+ "functions": {},
896
+ "structs": {},
897
+ "enums": {},
898
+ "constants": {},
899
+ "types": {},
900
+ }
901
+
902
+ for item in all_items:
903
+ category = f"{item.kind}s"
904
+ if category in index:
905
+ index[category][item.name] = {
906
+ "file": item.file_path,
907
+ "line": item.line_number,
908
+ }
909
+
910
+ for const in all_constants:
911
+ index["constants"][const.name] = {
912
+ "file": const.file_path,
913
+ "line": const.line_number,
914
+ "value": const.value,
915
+ }
916
+
917
+ index_file = output_dir / "index.json"
918
+ with open(index_file, "w") as f:
919
+ json.dump(index, f, indent=2)
920
+
921
+ return {
922
+ "files_processed": len(c_files),
923
+ "items_extracted": len(all_items),
924
+ "constants_extracted": len(all_constants),
925
+ "functions": len([i for i in all_items if i.kind == "function"]),
926
+ "structs": len([i for i in all_items if i.kind == "struct"]),
927
+ "enums": len([i for i in all_items if i.kind == "enum"]),
928
+ "types": len([i for i in all_items if i.kind == "type"]),
929
+ }
930
+
931
+
932
+ def compile_rust(
933
+ source_dir: Path,
934
+ output_dir: Path,
935
+ file_patterns: list[str] | None = None,
936
+ ) -> dict:
937
+ """
938
+ Compile Rust source files into JSON extracts.
939
+
940
+ Args:
941
+ source_dir: Directory containing Rust source files
942
+ output_dir: Directory to write JSON output
943
+ file_patterns: Glob patterns for files to include (default: ["**/*.rs"])
944
+
945
+ Returns:
946
+ Statistics about extraction
947
+ """
948
+ if file_patterns is None:
949
+ file_patterns = ["**/*.rs"]
950
+
951
+ parser = RustParser()
952
+ all_items = []
953
+ all_constants = []
954
+
955
+ # Find all Rust files
956
+ rust_files = []
957
+ for pattern in file_patterns:
958
+ rust_files.extend(source_dir.glob(pattern))
959
+
960
+ # Parse each file
961
+ for file_path in rust_files:
962
+ items, constants = parser.parse_file(file_path)
963
+
964
+ # Make paths relative to source_dir
965
+ for item in items:
966
+ item.file_path = str(Path(item.file_path).relative_to(source_dir))
967
+ for const in constants:
968
+ const.file_path = str(Path(const.file_path).relative_to(source_dir))
969
+
970
+ all_items.extend(items)
971
+ all_constants.extend(constants)
972
+
973
+ # Write output
974
+ output_dir.mkdir(parents=True, exist_ok=True)
975
+
976
+ items_file = output_dir / "items.json"
977
+ with open(items_file, "w") as f:
978
+ json.dump([asdict(item) for item in all_items], f, indent=2)
979
+
980
+ constants_file = output_dir / "constants.json"
981
+ with open(constants_file, "w") as f:
982
+ json.dump([asdict(const) for const in all_constants], f, indent=2)
983
+
984
+ # Build index by name for fast lookup
985
+ index = {
986
+ "functions": {},
987
+ "structs": {},
988
+ "enums": {},
989
+ "constants": {},
990
+ "impls": {},
991
+ }
992
+
993
+ for item in all_items:
994
+ category = f"{item.kind}s"
995
+ if category in index:
996
+ index[category][item.name] = {
997
+ "file": item.file_path,
998
+ "line": item.line_number,
999
+ }
1000
+
1001
+ for const in all_constants:
1002
+ index["constants"][const.name] = {
1003
+ "file": const.file_path,
1004
+ "line": const.line_number,
1005
+ "value": const.value,
1006
+ }
1007
+
1008
+ index_file = output_dir / "index.json"
1009
+ with open(index_file, "w") as f:
1010
+ json.dump(index, f, indent=2)
1011
+
1012
+ return {
1013
+ "files_processed": len(rust_files),
1014
+ "items_extracted": len(all_items),
1015
+ "constants_extracted": len(all_constants),
1016
+ "functions": len([i for i in all_items if i.kind == "function"]),
1017
+ "structs": len([i for i in all_items if i.kind == "struct"]),
1018
+ "enums": len([i for i in all_items if i.kind == "enum"]),
1019
+ "impls": len([i for i in all_items if i.kind == "impl"]),
1020
+ }
1021
+
1022
+
1023
+ def load_compiled_items(compiled_dir: Path) -> list[ExtractedItem]:
1024
+ """Load compiled items from JSON."""
1025
+ items_file = compiled_dir / "items.json"
1026
+ if not items_file.exists():
1027
+ return []
1028
+
1029
+ with open(items_file) as f:
1030
+ data = json.load(f)
1031
+
1032
+ return [ExtractedItem(**item) for item in data]
1033
+
1034
+
1035
+ def load_compiled_constants(compiled_dir: Path) -> list[ExtractedConstant]:
1036
+ """Load compiled constants from JSON."""
1037
+ constants_file = compiled_dir / "constants.json"
1038
+ if not constants_file.exists():
1039
+ return []
1040
+
1041
+ with open(constants_file) as f:
1042
+ data = json.load(f)
1043
+
1044
+ return [ExtractedConstant(**item) for item in data]
1045
+
1046
+
1047
+ def lookup_constant(name: str, compiled_dir: Path) -> ExtractedConstant | None:
1048
+ """Fast lookup of a constant by name."""
1049
+ index_file = compiled_dir / "index.json"
1050
+ if not index_file.exists():
1051
+ return None
1052
+
1053
+ with open(index_file) as f:
1054
+ index = json.load(f)
1055
+
1056
+ if name not in index.get("constants", {}):
1057
+ return None
1058
+
1059
+ # Load full constant data
1060
+ constants = load_compiled_constants(compiled_dir)
1061
+ for const in constants:
1062
+ if const.name == name:
1063
+ return const
1064
+
1065
+ return None
1066
+
1067
+
1068
+ def lookup_function(name: str, compiled_dir: Path) -> ExtractedItem | None:
1069
+ """Fast lookup of a function by name."""
1070
+ index_file = compiled_dir / "index.json"
1071
+ if not index_file.exists():
1072
+ return None
1073
+
1074
+ with open(index_file) as f:
1075
+ index = json.load(f)
1076
+
1077
+ if name not in index.get("functions", {}):
1078
+ return None
1079
+
1080
+ items = load_compiled_items(compiled_dir)
1081
+ for item in items:
1082
+ if item.kind == "function" and item.name == name:
1083
+ return item
1084
+
1085
+ return None
1086
+
1087
+
1088
+ if __name__ == "__main__":
1089
+ # Test compilation
1090
+ import sys
1091
+
1092
+ if len(sys.argv) < 3:
1093
+ print("Usage: compiler.py <source_dir> <output_dir>")
1094
+ sys.exit(1)
1095
+
1096
+ source = Path(sys.argv[1])
1097
+ output = Path(sys.argv[2])
1098
+
1099
+ print(f"Compiling Rust from {source} to {output}...")
1100
+ stats = compile_rust(source, output)
1101
+ print(f"Results: {stats}")