tree-sitter-analyzer 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show
  1. tree_sitter_analyzer/__init__.py +132 -132
  2. tree_sitter_analyzer/__main__.py +11 -11
  3. tree_sitter_analyzer/api.py +533 -533
  4. tree_sitter_analyzer/cli/__init__.py +39 -39
  5. tree_sitter_analyzer/cli/__main__.py +12 -12
  6. tree_sitter_analyzer/cli/commands/__init__.py +26 -26
  7. tree_sitter_analyzer/cli/commands/advanced_command.py +88 -88
  8. tree_sitter_analyzer/cli/commands/base_command.py +181 -178
  9. tree_sitter_analyzer/cli/commands/structure_command.py +138 -138
  10. tree_sitter_analyzer/cli/commands/summary_command.py +101 -101
  11. tree_sitter_analyzer/cli_main.py +7 -3
  12. tree_sitter_analyzer/core/__init__.py +15 -15
  13. tree_sitter_analyzer/core/analysis_engine.py +91 -87
  14. tree_sitter_analyzer/core/cache_service.py +320 -320
  15. tree_sitter_analyzer/core/engine.py +566 -566
  16. tree_sitter_analyzer/core/parser.py +293 -293
  17. tree_sitter_analyzer/encoding_utils.py +459 -459
  18. tree_sitter_analyzer/file_handler.py +210 -210
  19. tree_sitter_analyzer/formatters/__init__.py +1 -1
  20. tree_sitter_analyzer/formatters/base_formatter.py +167 -167
  21. tree_sitter_analyzer/formatters/formatter_factory.py +78 -78
  22. tree_sitter_analyzer/formatters/java_formatter.py +18 -18
  23. tree_sitter_analyzer/formatters/python_formatter.py +19 -19
  24. tree_sitter_analyzer/interfaces/__init__.py +9 -9
  25. tree_sitter_analyzer/interfaces/cli.py +528 -528
  26. tree_sitter_analyzer/interfaces/cli_adapter.py +344 -343
  27. tree_sitter_analyzer/interfaces/mcp_adapter.py +206 -206
  28. tree_sitter_analyzer/language_detector.py +53 -53
  29. tree_sitter_analyzer/languages/__init__.py +10 -10
  30. tree_sitter_analyzer/languages/java_plugin.py +1 -1
  31. tree_sitter_analyzer/languages/javascript_plugin.py +446 -446
  32. tree_sitter_analyzer/languages/python_plugin.py +755 -755
  33. tree_sitter_analyzer/mcp/__init__.py +34 -45
  34. tree_sitter_analyzer/mcp/resources/__init__.py +44 -44
  35. tree_sitter_analyzer/mcp/resources/code_file_resource.py +209 -209
  36. tree_sitter_analyzer/mcp/server.py +623 -568
  37. tree_sitter_analyzer/mcp/tools/__init__.py +30 -30
  38. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +681 -673
  39. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +247 -247
  40. tree_sitter_analyzer/mcp/tools/base_tool.py +54 -54
  41. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +310 -308
  42. tree_sitter_analyzer/mcp/tools/table_format_tool.py +386 -379
  43. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +563 -559
  44. tree_sitter_analyzer/mcp/utils/__init__.py +107 -107
  45. tree_sitter_analyzer/models.py +10 -10
  46. tree_sitter_analyzer/output_manager.py +253 -253
  47. tree_sitter_analyzer/plugins/__init__.py +280 -280
  48. tree_sitter_analyzer/plugins/base.py +529 -529
  49. tree_sitter_analyzer/plugins/manager.py +379 -379
  50. tree_sitter_analyzer/project_detector.py +330 -317
  51. tree_sitter_analyzer/queries/__init__.py +26 -26
  52. tree_sitter_analyzer/queries/java.py +391 -391
  53. tree_sitter_analyzer/queries/javascript.py +148 -148
  54. tree_sitter_analyzer/queries/python.py +285 -285
  55. tree_sitter_analyzer/queries/typescript.py +229 -229
  56. tree_sitter_analyzer/query_loader.py +257 -257
  57. tree_sitter_analyzer/security/boundary_manager.py +57 -51
  58. tree_sitter_analyzer/security/validator.py +246 -241
  59. tree_sitter_analyzer/utils.py +294 -277
  60. {tree_sitter_analyzer-0.9.1.dist-info → tree_sitter_analyzer-0.9.3.dist-info}/METADATA +13 -13
  61. tree_sitter_analyzer-0.9.3.dist-info/RECORD +77 -0
  62. {tree_sitter_analyzer-0.9.1.dist-info → tree_sitter_analyzer-0.9.3.dist-info}/entry_points.txt +1 -0
  63. tree_sitter_analyzer-0.9.1.dist-info/RECORD +0 -77
  64. {tree_sitter_analyzer-0.9.1.dist-info → tree_sitter_analyzer-0.9.3.dist-info}/WHEEL +0 -0
@@ -1,529 +1,529 @@
1
- #!/usr/bin/env python3
2
- """
3
- Plugin Base Classes
4
-
5
- Defines the base interfaces for language plugins and element extractors.
6
- All language plugins must inherit from these base classes.
7
- """
8
-
9
- import logging
10
- from abc import ABC, abstractmethod
11
- from typing import TYPE_CHECKING, Any
12
-
13
- if TYPE_CHECKING:
14
- import tree_sitter
15
-
16
- from ..core.analysis_engine import AnalysisRequest
17
- from ..models import AnalysisResult
18
-
19
- from ..models import Class as ModelClass
20
- from ..models import CodeElement
21
- from ..models import Function as ModelFunction
22
- from ..models import Import as ModelImport
23
- from ..models import Variable as ModelVariable
24
- from ..utils import log_debug, log_error
25
-
26
- logger = logging.getLogger(__name__)
27
-
28
-
29
- class ElementExtractor(ABC):
30
- """
31
- Abstract base class for language-specific element extractors.
32
-
33
- Element extractors are responsible for parsing ASTs and extracting
34
- meaningful code elements like functions, classes, variables, etc.
35
- """
36
-
37
- @abstractmethod
38
- def extract_functions(
39
- self, tree: "tree_sitter.Tree", source_code: str
40
- ) -> list[ModelFunction]:
41
- """
42
- Extract function definitions from the syntax tree.
43
-
44
- Args:
45
- tree: Tree-sitter AST
46
- source_code: Original source code
47
-
48
- Returns:
49
- List of extracted function objects
50
- """
51
- pass
52
-
53
- @abstractmethod
54
- def extract_classes(
55
- self, tree: "tree_sitter.Tree", source_code: str
56
- ) -> list[ModelClass]:
57
- """
58
- Extract class definitions from the syntax tree.
59
-
60
- Args:
61
- tree: Tree-sitter AST
62
- source_code: Original source code
63
-
64
- Returns:
65
- List of extracted class objects
66
- """
67
- pass
68
-
69
- @abstractmethod
70
- def extract_variables(
71
- self, tree: "tree_sitter.Tree", source_code: str
72
- ) -> list[ModelVariable]:
73
- """
74
- Extract variable declarations from the syntax tree.
75
-
76
- Args:
77
- tree: Tree-sitter AST
78
- source_code: Original source code
79
-
80
- Returns:
81
- List of extracted variable objects
82
- """
83
- pass
84
-
85
- @abstractmethod
86
- def extract_imports(
87
- self, tree: "tree_sitter.Tree", source_code: str
88
- ) -> list[ModelImport]:
89
- """
90
- Extract import statements from the syntax tree.
91
-
92
- Args:
93
- tree: Tree-sitter AST
94
- source_code: Original source code
95
-
96
- Returns:
97
- List of extracted import objects
98
- """
99
- pass
100
-
101
- def extract_all_elements(
102
- self, tree: "tree_sitter.Tree", source_code: str
103
- ) -> list[CodeElement]:
104
- """
105
- Extract all code elements from the syntax tree.
106
-
107
- Args:
108
- tree: Tree-sitter AST
109
- source_code: Original source code
110
-
111
- Returns:
112
- List of all extracted code elements
113
- """
114
- elements: list[CodeElement] = []
115
-
116
- try:
117
- elements.extend(self.extract_functions(tree, source_code))
118
- elements.extend(self.extract_classes(tree, source_code))
119
- elements.extend(self.extract_variables(tree, source_code))
120
- elements.extend(self.extract_imports(tree, source_code))
121
- except Exception as e:
122
- log_error(f"Failed to extract all elements: {e}")
123
-
124
- return elements
125
-
126
-
127
- class LanguagePlugin(ABC):
128
- """
129
- Abstract base class for language-specific plugins.
130
-
131
- Language plugins provide language-specific functionality including
132
- element extraction, file extension mapping, and language identification.
133
- """
134
-
135
- @abstractmethod
136
- def get_language_name(self) -> str:
137
- """
138
- Return the name of the programming language this plugin supports.
139
-
140
- Returns:
141
- Language name (e.g., "java", "python", "javascript")
142
- """
143
- pass
144
-
145
- @abstractmethod
146
- def get_file_extensions(self) -> list[str]:
147
- """
148
- Return list of file extensions this plugin supports.
149
-
150
- Returns:
151
- List of file extensions (e.g., [".java", ".class"])
152
- """
153
- pass
154
-
155
- @abstractmethod
156
- def create_extractor(self) -> ElementExtractor:
157
- """
158
- Create and return an element extractor for this language.
159
-
160
- Returns:
161
- ElementExtractor instance for this language
162
- """
163
- pass
164
-
165
- @abstractmethod
166
- async def analyze_file(
167
- self, file_path: str, request: "AnalysisRequest"
168
- ) -> "AnalysisResult":
169
- """
170
- Analyze a file and return analysis results.
171
-
172
- Args:
173
- file_path: Path to the file to analyze
174
- request: Analysis request with configuration
175
-
176
- Returns:
177
- AnalysisResult containing extracted information
178
- """
179
- pass
180
-
181
- def is_applicable(self, file_path: str) -> bool:
182
- """
183
- Check if this plugin is applicable for the given file.
184
-
185
- Args:
186
- file_path: Path to the file to check
187
-
188
- Returns:
189
- True if this plugin can handle the file
190
- """
191
- extensions = self.get_file_extensions()
192
- return any(file_path.lower().endswith(ext.lower()) for ext in extensions)
193
-
194
- def get_plugin_info(self) -> dict[str, Any]:
195
- """
196
- Get information about this plugin.
197
-
198
- Returns:
199
- Dictionary containing plugin information
200
- """
201
- return {
202
- "language": self.get_language_name(),
203
- "extensions": self.get_file_extensions(),
204
- "class_name": self.__class__.__name__,
205
- "module": self.__class__.__module__,
206
- }
207
-
208
-
209
- class DefaultExtractor(ElementExtractor):
210
- """
211
- Default implementation of ElementExtractor with basic functionality.
212
-
213
- This extractor provides generic extraction logic that works across
214
- multiple languages by looking for common node types.
215
- """
216
-
217
- def extract_functions(
218
- self, tree: "tree_sitter.Tree", source_code: str
219
- ) -> list[ModelFunction]:
220
- """Basic function extraction implementation."""
221
- functions: list[ModelFunction] = []
222
-
223
- try:
224
- if hasattr(tree, "root_node"):
225
- lines = source_code.splitlines()
226
- self._traverse_for_functions(
227
- tree.root_node, functions, lines, source_code
228
- )
229
- except Exception as e:
230
- log_error(f"Error in function extraction: {e}")
231
-
232
- return functions
233
-
234
- def extract_classes(
235
- self, tree: "tree_sitter.Tree", source_code: str
236
- ) -> list[ModelClass]:
237
- """Basic class extraction implementation."""
238
- classes: list[ModelClass] = []
239
-
240
- try:
241
- if hasattr(tree, "root_node"):
242
- lines = source_code.splitlines()
243
- self._traverse_for_classes(tree.root_node, classes, lines, source_code)
244
- except Exception as e:
245
- log_error(f"Error in class extraction: {e}")
246
-
247
- return classes
248
-
249
- def extract_variables(
250
- self, tree: "tree_sitter.Tree", source_code: str
251
- ) -> list[ModelVariable]:
252
- """Basic variable extraction implementation."""
253
- variables: list[ModelVariable] = []
254
-
255
- try:
256
- if hasattr(tree, "root_node"):
257
- lines = source_code.splitlines()
258
- self._traverse_for_variables(
259
- tree.root_node, variables, lines, source_code
260
- )
261
- except Exception as e:
262
- log_error(f"Error in variable extraction: {e}")
263
-
264
- return variables
265
-
266
- def extract_imports(
267
- self, tree: "tree_sitter.Tree", source_code: str
268
- ) -> list[ModelImport]:
269
- """Basic import extraction implementation."""
270
- imports: list[ModelImport] = []
271
-
272
- try:
273
- if hasattr(tree, "root_node"):
274
- lines = source_code.splitlines()
275
- self._traverse_for_imports(tree.root_node, imports, lines, source_code)
276
- except Exception as e:
277
- log_error(f"Error in import extraction: {e}")
278
-
279
- return imports
280
-
281
- def _traverse_for_functions(
282
- self,
283
- node: "tree_sitter.Node",
284
- functions: list[ModelFunction],
285
- lines: list[str],
286
- source_code: str,
287
- ) -> None:
288
- """Traverse tree to find function-like nodes."""
289
- if hasattr(node, "type") and self._is_function_node(node.type):
290
- try:
291
- name = self._extract_node_name(node, source_code) or "unknown"
292
- raw_text = self._extract_node_text(node, source_code)
293
-
294
- func = ModelFunction(
295
- name=name,
296
- start_line=(
297
- node.start_point[0] + 1 if hasattr(node, "start_point") else 0
298
- ),
299
- end_line=node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
300
- raw_text=raw_text,
301
- language=self._get_language_hint(),
302
- )
303
- functions.append(func)
304
- except Exception as e:
305
- log_debug(f"Failed to extract function: {e}")
306
-
307
- if hasattr(node, "children"):
308
- for child in node.children:
309
- self._traverse_for_functions(child, functions, lines, source_code)
310
-
311
- def _traverse_for_classes(
312
- self,
313
- node: "tree_sitter.Node",
314
- classes: list[ModelClass],
315
- lines: list[str],
316
- source_code: str,
317
- ) -> None:
318
- """Traverse tree to find class-like nodes."""
319
- if hasattr(node, "type") and self._is_class_node(node.type):
320
- try:
321
- name = self._extract_node_name(node, source_code) or "unknown"
322
- raw_text = self._extract_node_text(node, source_code)
323
-
324
- cls = ModelClass(
325
- name=name,
326
- start_line=(
327
- node.start_point[0] + 1 if hasattr(node, "start_point") else 0
328
- ),
329
- end_line=node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
330
- raw_text=raw_text,
331
- language=self._get_language_hint(),
332
- )
333
- classes.append(cls)
334
- except Exception as e:
335
- log_debug(f"Failed to extract class: {e}")
336
-
337
- if hasattr(node, "children"):
338
- for child in node.children:
339
- self._traverse_for_classes(child, classes, lines, source_code)
340
-
341
- def _traverse_for_variables(
342
- self,
343
- node: "tree_sitter.Node",
344
- variables: list[ModelVariable],
345
- lines: list[str],
346
- source_code: str,
347
- ) -> None:
348
- """Traverse tree to find variable declarations."""
349
- if hasattr(node, "type") and self._is_variable_node(node.type):
350
- try:
351
- name = self._extract_node_name(node, source_code) or "unknown"
352
- raw_text = self._extract_node_text(node, source_code)
353
-
354
- var = ModelVariable(
355
- name=name,
356
- start_line=(
357
- node.start_point[0] + 1 if hasattr(node, "start_point") else 0
358
- ),
359
- end_line=node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
360
- raw_text=raw_text,
361
- language=self._get_language_hint(),
362
- )
363
- variables.append(var)
364
- except Exception as e:
365
- log_debug(f"Failed to extract variable: {e}")
366
-
367
- if hasattr(node, "children"):
368
- for child in node.children:
369
- self._traverse_for_variables(child, variables, lines, source_code)
370
-
371
- def _traverse_for_imports(
372
- self,
373
- node: "tree_sitter.Node",
374
- imports: list[ModelImport],
375
- lines: list[str],
376
- source_code: str,
377
- ) -> None:
378
- """Traverse tree to find import statements."""
379
- if hasattr(node, "type") and self._is_import_node(node.type):
380
- try:
381
- name = self._extract_node_name(node, source_code) or "unknown"
382
- raw_text = self._extract_node_text(node, source_code)
383
-
384
- imp = ModelImport(
385
- name=name,
386
- start_line=(
387
- node.start_point[0] + 1 if hasattr(node, "start_point") else 0
388
- ),
389
- end_line=node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
390
- raw_text=raw_text,
391
- language=self._get_language_hint(),
392
- )
393
- imports.append(imp)
394
- except Exception as e:
395
- log_debug(f"Failed to extract import: {e}")
396
-
397
- if hasattr(node, "children"):
398
- for child in node.children:
399
- self._traverse_for_imports(child, imports, lines, source_code)
400
-
401
- def _is_function_node(self, node_type: str) -> bool:
402
- """Check if a node type represents a function."""
403
- function_types = [
404
- "function_definition",
405
- "function_declaration",
406
- "method_definition",
407
- "function",
408
- "method",
409
- "procedure",
410
- "subroutine",
411
- ]
412
- return any(ftype in node_type.lower() for ftype in function_types)
413
-
414
- def _is_class_node(self, node_type: str) -> bool:
415
- """Check if a node type represents a class."""
416
- class_types = [
417
- "class_definition",
418
- "class_declaration",
419
- "interface_definition",
420
- "class",
421
- "interface",
422
- "struct",
423
- "enum",
424
- ]
425
- return any(ctype in node_type.lower() for ctype in class_types)
426
-
427
- def _is_variable_node(self, node_type: str) -> bool:
428
- """Check if a node type represents a variable."""
429
- variable_types = [
430
- "variable_declaration",
431
- "variable_definition",
432
- "field_declaration",
433
- "assignment",
434
- "declaration",
435
- "variable",
436
- "field",
437
- ]
438
- return any(vtype in node_type.lower() for vtype in variable_types)
439
-
440
- def _is_import_node(self, node_type: str) -> bool:
441
- """Check if a node type represents an import."""
442
- import_types = [
443
- "import_statement",
444
- "import_declaration",
445
- "include_statement",
446
- "import",
447
- "include",
448
- "require",
449
- "use",
450
- ]
451
- return any(itype in node_type.lower() for itype in import_types)
452
-
453
- def _extract_node_name(
454
- self, node: "tree_sitter.Node", source_code: str
455
- ) -> str | None:
456
- """Extract name from a tree-sitter node."""
457
- try:
458
- # Look for identifier children
459
- if hasattr(node, "children"):
460
- for child in node.children:
461
- if hasattr(child, "type") and child.type == "identifier":
462
- return self._extract_node_text(child, source_code)
463
-
464
- # Fallback: use position-based name
465
- return f"element_{node.start_point[0]}_{node.start_point[1]}"
466
- except Exception:
467
- return None
468
-
469
- def _extract_node_text(self, node: "tree_sitter.Node", source_code: str) -> str:
470
- """Extract text content from a tree-sitter node."""
471
- try:
472
- if hasattr(node, "start_byte") and hasattr(node, "end_byte"):
473
- source_bytes = source_code.encode("utf-8")
474
- node_bytes = source_bytes[node.start_byte : node.end_byte]
475
- return node_bytes.decode("utf-8", errors="replace")
476
- return ""
477
- except Exception as e:
478
- log_debug(f"Failed to extract node text: {e}")
479
- return ""
480
-
481
- def _get_language_hint(self) -> str:
482
- """Get a hint about the language being processed."""
483
- return "unknown"
484
-
485
-
486
- class DefaultLanguagePlugin(LanguagePlugin):
487
- """Default plugin that provides basic functionality for any language."""
488
-
489
- def get_language_name(self) -> str:
490
- return "generic"
491
-
492
- def get_file_extensions(self) -> list[str]:
493
- return [".txt", ".md"] # Fallback extensions
494
-
495
- def create_extractor(self) -> ElementExtractor:
496
- return DefaultExtractor()
497
-
498
- async def analyze_file(
499
- self, file_path: str, request: "AnalysisRequest"
500
- ) -> "AnalysisResult":
501
- """
502
- Analyze a file using the default extractor.
503
-
504
- Args:
505
- file_path: Path to the file to analyze
506
- request: Analysis request with configuration
507
-
508
- Returns:
509
- AnalysisResult containing extracted information
510
- """
511
- from ..core.analysis_engine import UnifiedAnalysisEngine
512
- from ..models import AnalysisResult
513
-
514
- try:
515
- engine = UnifiedAnalysisEngine()
516
- return await engine.analyze_file(file_path)
517
- except Exception as e:
518
- log_error(f"Failed to analyze file {file_path}: {e}")
519
- return AnalysisResult(
520
- file_path=file_path,
521
- language=self.get_language_name(),
522
- line_count=0,
523
- elements=[],
524
- node_count=0,
525
- query_results={},
526
- source_code="",
527
- success=False,
528
- error_message=str(e),
529
- )
1
+ #!/usr/bin/env python3
2
+ """
3
+ Plugin Base Classes
4
+
5
+ Defines the base interfaces for language plugins and element extractors.
6
+ All language plugins must inherit from these base classes.
7
+ """
8
+
9
+ import logging
10
+ from abc import ABC, abstractmethod
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ if TYPE_CHECKING:
14
+ import tree_sitter
15
+
16
+ from ..core.analysis_engine import AnalysisRequest
17
+ from ..models import AnalysisResult
18
+
19
+ from ..models import Class as ModelClass
20
+ from ..models import CodeElement
21
+ from ..models import Function as ModelFunction
22
+ from ..models import Import as ModelImport
23
+ from ..models import Variable as ModelVariable
24
+ from ..utils import log_debug, log_error
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class ElementExtractor(ABC):
30
+ """
31
+ Abstract base class for language-specific element extractors.
32
+
33
+ Element extractors are responsible for parsing ASTs and extracting
34
+ meaningful code elements like functions, classes, variables, etc.
35
+ """
36
+
37
+ @abstractmethod
38
+ def extract_functions(
39
+ self, tree: "tree_sitter.Tree", source_code: str
40
+ ) -> list[ModelFunction]:
41
+ """
42
+ Extract function definitions from the syntax tree.
43
+
44
+ Args:
45
+ tree: Tree-sitter AST
46
+ source_code: Original source code
47
+
48
+ Returns:
49
+ List of extracted function objects
50
+ """
51
+ pass
52
+
53
+ @abstractmethod
54
+ def extract_classes(
55
+ self, tree: "tree_sitter.Tree", source_code: str
56
+ ) -> list[ModelClass]:
57
+ """
58
+ Extract class definitions from the syntax tree.
59
+
60
+ Args:
61
+ tree: Tree-sitter AST
62
+ source_code: Original source code
63
+
64
+ Returns:
65
+ List of extracted class objects
66
+ """
67
+ pass
68
+
69
+ @abstractmethod
70
+ def extract_variables(
71
+ self, tree: "tree_sitter.Tree", source_code: str
72
+ ) -> list[ModelVariable]:
73
+ """
74
+ Extract variable declarations from the syntax tree.
75
+
76
+ Args:
77
+ tree: Tree-sitter AST
78
+ source_code: Original source code
79
+
80
+ Returns:
81
+ List of extracted variable objects
82
+ """
83
+ pass
84
+
85
+ @abstractmethod
86
+ def extract_imports(
87
+ self, tree: "tree_sitter.Tree", source_code: str
88
+ ) -> list[ModelImport]:
89
+ """
90
+ Extract import statements from the syntax tree.
91
+
92
+ Args:
93
+ tree: Tree-sitter AST
94
+ source_code: Original source code
95
+
96
+ Returns:
97
+ List of extracted import objects
98
+ """
99
+ pass
100
+
101
+ def extract_all_elements(
102
+ self, tree: "tree_sitter.Tree", source_code: str
103
+ ) -> list[CodeElement]:
104
+ """
105
+ Extract all code elements from the syntax tree.
106
+
107
+ Args:
108
+ tree: Tree-sitter AST
109
+ source_code: Original source code
110
+
111
+ Returns:
112
+ List of all extracted code elements
113
+ """
114
+ elements: list[CodeElement] = []
115
+
116
+ try:
117
+ elements.extend(self.extract_functions(tree, source_code))
118
+ elements.extend(self.extract_classes(tree, source_code))
119
+ elements.extend(self.extract_variables(tree, source_code))
120
+ elements.extend(self.extract_imports(tree, source_code))
121
+ except Exception as e:
122
+ log_error(f"Failed to extract all elements: {e}")
123
+
124
+ return elements
125
+
126
+
127
+ class LanguagePlugin(ABC):
128
+ """
129
+ Abstract base class for language-specific plugins.
130
+
131
+ Language plugins provide language-specific functionality including
132
+ element extraction, file extension mapping, and language identification.
133
+ """
134
+
135
+ @abstractmethod
136
+ def get_language_name(self) -> str:
137
+ """
138
+ Return the name of the programming language this plugin supports.
139
+
140
+ Returns:
141
+ Language name (e.g., "java", "python", "javascript")
142
+ """
143
+ pass
144
+
145
+ @abstractmethod
146
+ def get_file_extensions(self) -> list[str]:
147
+ """
148
+ Return list of file extensions this plugin supports.
149
+
150
+ Returns:
151
+ List of file extensions (e.g., [".java", ".class"])
152
+ """
153
+ pass
154
+
155
+ @abstractmethod
156
+ def create_extractor(self) -> ElementExtractor:
157
+ """
158
+ Create and return an element extractor for this language.
159
+
160
+ Returns:
161
+ ElementExtractor instance for this language
162
+ """
163
+ pass
164
+
165
+ @abstractmethod
166
+ async def analyze_file(
167
+ self, file_path: str, request: "AnalysisRequest"
168
+ ) -> "AnalysisResult":
169
+ """
170
+ Analyze a file and return analysis results.
171
+
172
+ Args:
173
+ file_path: Path to the file to analyze
174
+ request: Analysis request with configuration
175
+
176
+ Returns:
177
+ AnalysisResult containing extracted information
178
+ """
179
+ pass
180
+
181
+ def is_applicable(self, file_path: str) -> bool:
182
+ """
183
+ Check if this plugin is applicable for the given file.
184
+
185
+ Args:
186
+ file_path: Path to the file to check
187
+
188
+ Returns:
189
+ True if this plugin can handle the file
190
+ """
191
+ extensions = self.get_file_extensions()
192
+ return any(file_path.lower().endswith(ext.lower()) for ext in extensions)
193
+
194
+ def get_plugin_info(self) -> dict[str, Any]:
195
+ """
196
+ Get information about this plugin.
197
+
198
+ Returns:
199
+ Dictionary containing plugin information
200
+ """
201
+ return {
202
+ "language": self.get_language_name(),
203
+ "extensions": self.get_file_extensions(),
204
+ "class_name": self.__class__.__name__,
205
+ "module": self.__class__.__module__,
206
+ }
207
+
208
+
209
+ class DefaultExtractor(ElementExtractor):
210
+ """
211
+ Default implementation of ElementExtractor with basic functionality.
212
+
213
+ This extractor provides generic extraction logic that works across
214
+ multiple languages by looking for common node types.
215
+ """
216
+
217
+ def extract_functions(
218
+ self, tree: "tree_sitter.Tree", source_code: str
219
+ ) -> list[ModelFunction]:
220
+ """Basic function extraction implementation."""
221
+ functions: list[ModelFunction] = []
222
+
223
+ try:
224
+ if hasattr(tree, "root_node"):
225
+ lines = source_code.splitlines()
226
+ self._traverse_for_functions(
227
+ tree.root_node, functions, lines, source_code
228
+ )
229
+ except Exception as e:
230
+ log_error(f"Error in function extraction: {e}")
231
+
232
+ return functions
233
+
234
+ def extract_classes(
235
+ self, tree: "tree_sitter.Tree", source_code: str
236
+ ) -> list[ModelClass]:
237
+ """Basic class extraction implementation."""
238
+ classes: list[ModelClass] = []
239
+
240
+ try:
241
+ if hasattr(tree, "root_node"):
242
+ lines = source_code.splitlines()
243
+ self._traverse_for_classes(tree.root_node, classes, lines, source_code)
244
+ except Exception as e:
245
+ log_error(f"Error in class extraction: {e}")
246
+
247
+ return classes
248
+
249
+ def extract_variables(
250
+ self, tree: "tree_sitter.Tree", source_code: str
251
+ ) -> list[ModelVariable]:
252
+ """Basic variable extraction implementation."""
253
+ variables: list[ModelVariable] = []
254
+
255
+ try:
256
+ if hasattr(tree, "root_node"):
257
+ lines = source_code.splitlines()
258
+ self._traverse_for_variables(
259
+ tree.root_node, variables, lines, source_code
260
+ )
261
+ except Exception as e:
262
+ log_error(f"Error in variable extraction: {e}")
263
+
264
+ return variables
265
+
266
+ def extract_imports(
267
+ self, tree: "tree_sitter.Tree", source_code: str
268
+ ) -> list[ModelImport]:
269
+ """Basic import extraction implementation."""
270
+ imports: list[ModelImport] = []
271
+
272
+ try:
273
+ if hasattr(tree, "root_node"):
274
+ lines = source_code.splitlines()
275
+ self._traverse_for_imports(tree.root_node, imports, lines, source_code)
276
+ except Exception as e:
277
+ log_error(f"Error in import extraction: {e}")
278
+
279
+ return imports
280
+
281
+ def _traverse_for_functions(
282
+ self,
283
+ node: "tree_sitter.Node",
284
+ functions: list[ModelFunction],
285
+ lines: list[str],
286
+ source_code: str,
287
+ ) -> None:
288
+ """Traverse tree to find function-like nodes."""
289
+ if hasattr(node, "type") and self._is_function_node(node.type):
290
+ try:
291
+ name = self._extract_node_name(node, source_code) or "unknown"
292
+ raw_text = self._extract_node_text(node, source_code)
293
+
294
+ func = ModelFunction(
295
+ name=name,
296
+ start_line=(
297
+ node.start_point[0] + 1 if hasattr(node, "start_point") else 0
298
+ ),
299
+ end_line=node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
300
+ raw_text=raw_text,
301
+ language=self._get_language_hint(),
302
+ )
303
+ functions.append(func)
304
+ except Exception as e:
305
+ log_debug(f"Failed to extract function: {e}")
306
+
307
+ if hasattr(node, "children"):
308
+ for child in node.children:
309
+ self._traverse_for_functions(child, functions, lines, source_code)
310
+
311
+ def _traverse_for_classes(
312
+ self,
313
+ node: "tree_sitter.Node",
314
+ classes: list[ModelClass],
315
+ lines: list[str],
316
+ source_code: str,
317
+ ) -> None:
318
+ """Traverse tree to find class-like nodes."""
319
+ if hasattr(node, "type") and self._is_class_node(node.type):
320
+ try:
321
+ name = self._extract_node_name(node, source_code) or "unknown"
322
+ raw_text = self._extract_node_text(node, source_code)
323
+
324
+ cls = ModelClass(
325
+ name=name,
326
+ start_line=(
327
+ node.start_point[0] + 1 if hasattr(node, "start_point") else 0
328
+ ),
329
+ end_line=node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
330
+ raw_text=raw_text,
331
+ language=self._get_language_hint(),
332
+ )
333
+ classes.append(cls)
334
+ except Exception as e:
335
+ log_debug(f"Failed to extract class: {e}")
336
+
337
+ if hasattr(node, "children"):
338
+ for child in node.children:
339
+ self._traverse_for_classes(child, classes, lines, source_code)
340
+
341
+ def _traverse_for_variables(
342
+ self,
343
+ node: "tree_sitter.Node",
344
+ variables: list[ModelVariable],
345
+ lines: list[str],
346
+ source_code: str,
347
+ ) -> None:
348
+ """Traverse tree to find variable declarations."""
349
+ if hasattr(node, "type") and self._is_variable_node(node.type):
350
+ try:
351
+ name = self._extract_node_name(node, source_code) or "unknown"
352
+ raw_text = self._extract_node_text(node, source_code)
353
+
354
+ var = ModelVariable(
355
+ name=name,
356
+ start_line=(
357
+ node.start_point[0] + 1 if hasattr(node, "start_point") else 0
358
+ ),
359
+ end_line=node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
360
+ raw_text=raw_text,
361
+ language=self._get_language_hint(),
362
+ )
363
+ variables.append(var)
364
+ except Exception as e:
365
+ log_debug(f"Failed to extract variable: {e}")
366
+
367
+ if hasattr(node, "children"):
368
+ for child in node.children:
369
+ self._traverse_for_variables(child, variables, lines, source_code)
370
+
371
+ def _traverse_for_imports(
372
+ self,
373
+ node: "tree_sitter.Node",
374
+ imports: list[ModelImport],
375
+ lines: list[str],
376
+ source_code: str,
377
+ ) -> None:
378
+ """Traverse tree to find import statements."""
379
+ if hasattr(node, "type") and self._is_import_node(node.type):
380
+ try:
381
+ name = self._extract_node_name(node, source_code) or "unknown"
382
+ raw_text = self._extract_node_text(node, source_code)
383
+
384
+ imp = ModelImport(
385
+ name=name,
386
+ start_line=(
387
+ node.start_point[0] + 1 if hasattr(node, "start_point") else 0
388
+ ),
389
+ end_line=node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
390
+ raw_text=raw_text,
391
+ language=self._get_language_hint(),
392
+ )
393
+ imports.append(imp)
394
+ except Exception as e:
395
+ log_debug(f"Failed to extract import: {e}")
396
+
397
+ if hasattr(node, "children"):
398
+ for child in node.children:
399
+ self._traverse_for_imports(child, imports, lines, source_code)
400
+
401
+ def _is_function_node(self, node_type: str) -> bool:
402
+ """Check if a node type represents a function."""
403
+ function_types = [
404
+ "function_definition",
405
+ "function_declaration",
406
+ "method_definition",
407
+ "function",
408
+ "method",
409
+ "procedure",
410
+ "subroutine",
411
+ ]
412
+ return any(ftype in node_type.lower() for ftype in function_types)
413
+
414
+ def _is_class_node(self, node_type: str) -> bool:
415
+ """Check if a node type represents a class."""
416
+ class_types = [
417
+ "class_definition",
418
+ "class_declaration",
419
+ "interface_definition",
420
+ "class",
421
+ "interface",
422
+ "struct",
423
+ "enum",
424
+ ]
425
+ return any(ctype in node_type.lower() for ctype in class_types)
426
+
427
+ def _is_variable_node(self, node_type: str) -> bool:
428
+ """Check if a node type represents a variable."""
429
+ variable_types = [
430
+ "variable_declaration",
431
+ "variable_definition",
432
+ "field_declaration",
433
+ "assignment",
434
+ "declaration",
435
+ "variable",
436
+ "field",
437
+ ]
438
+ return any(vtype in node_type.lower() for vtype in variable_types)
439
+
440
+ def _is_import_node(self, node_type: str) -> bool:
441
+ """Check if a node type represents an import."""
442
+ import_types = [
443
+ "import_statement",
444
+ "import_declaration",
445
+ "include_statement",
446
+ "import",
447
+ "include",
448
+ "require",
449
+ "use",
450
+ ]
451
+ return any(itype in node_type.lower() for itype in import_types)
452
+
453
+ def _extract_node_name(
454
+ self, node: "tree_sitter.Node", source_code: str
455
+ ) -> str | None:
456
+ """Extract name from a tree-sitter node."""
457
+ try:
458
+ # Look for identifier children
459
+ if hasattr(node, "children"):
460
+ for child in node.children:
461
+ if hasattr(child, "type") and child.type == "identifier":
462
+ return self._extract_node_text(child, source_code)
463
+
464
+ # Fallback: use position-based name
465
+ return f"element_{node.start_point[0]}_{node.start_point[1]}"
466
+ except Exception:
467
+ return None
468
+
469
+ def _extract_node_text(self, node: "tree_sitter.Node", source_code: str) -> str:
470
+ """Extract text content from a tree-sitter node."""
471
+ try:
472
+ if hasattr(node, "start_byte") and hasattr(node, "end_byte"):
473
+ source_bytes = source_code.encode("utf-8")
474
+ node_bytes = source_bytes[node.start_byte : node.end_byte]
475
+ return node_bytes.decode("utf-8", errors="replace")
476
+ return ""
477
+ except Exception as e:
478
+ log_debug(f"Failed to extract node text: {e}")
479
+ return ""
480
+
481
+ def _get_language_hint(self) -> str:
482
+ """Get a hint about the language being processed."""
483
+ return "unknown"
484
+
485
+
486
+ class DefaultLanguagePlugin(LanguagePlugin):
487
+ """Default plugin that provides basic functionality for any language."""
488
+
489
+ def get_language_name(self) -> str:
490
+ return "generic"
491
+
492
+ def get_file_extensions(self) -> list[str]:
493
+ return [".txt", ".md"] # Fallback extensions
494
+
495
+ def create_extractor(self) -> ElementExtractor:
496
+ return DefaultExtractor()
497
+
498
+ async def analyze_file(
499
+ self, file_path: str, request: "AnalysisRequest"
500
+ ) -> "AnalysisResult":
501
+ """
502
+ Analyze a file using the default extractor.
503
+
504
+ Args:
505
+ file_path: Path to the file to analyze
506
+ request: Analysis request with configuration
507
+
508
+ Returns:
509
+ AnalysisResult containing extracted information
510
+ """
511
+ from ..core.analysis_engine import UnifiedAnalysisEngine
512
+ from ..models import AnalysisResult
513
+
514
+ try:
515
+ engine = UnifiedAnalysisEngine()
516
+ return await engine.analyze_file(file_path)
517
+ except Exception as e:
518
+ log_error(f"Failed to analyze file {file_path}: {e}")
519
+ return AnalysisResult(
520
+ file_path=file_path,
521
+ language=self.get_language_name(),
522
+ line_count=0,
523
+ elements=[],
524
+ node_count=0,
525
+ query_results={},
526
+ source_code="",
527
+ success=False,
528
+ error_message=str(e),
529
+ )