tree-sitter-analyzer 1.9.2__py3-none-any.whl → 1.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/api.py +216 -8
- tree_sitter_analyzer/cli/argument_validator.py +1 -1
- tree_sitter_analyzer/cli/commands/advanced_command.py +3 -6
- tree_sitter_analyzer/cli/commands/query_command.py +3 -1
- tree_sitter_analyzer/cli/commands/table_command.py +3 -3
- tree_sitter_analyzer/constants.py +5 -3
- tree_sitter_analyzer/core/analysis_engine.py +1 -1
- tree_sitter_analyzer/core/cache_service.py +1 -1
- tree_sitter_analyzer/core/engine.py +34 -10
- tree_sitter_analyzer/core/query.py +82 -2
- tree_sitter_analyzer/encoding_utils.py +64 -0
- tree_sitter_analyzer/exceptions.py +1 -1
- tree_sitter_analyzer/file_handler.py +49 -33
- tree_sitter_analyzer/formatters/base_formatter.py +1 -1
- tree_sitter_analyzer/formatters/html_formatter.py +24 -14
- tree_sitter_analyzer/formatters/javascript_formatter.py +28 -21
- tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -4
- tree_sitter_analyzer/formatters/markdown_formatter.py +4 -4
- tree_sitter_analyzer/formatters/python_formatter.py +4 -4
- tree_sitter_analyzer/formatters/typescript_formatter.py +1 -1
- tree_sitter_analyzer/interfaces/mcp_adapter.py +4 -2
- tree_sitter_analyzer/interfaces/mcp_server.py +10 -10
- tree_sitter_analyzer/language_detector.py +30 -5
- tree_sitter_analyzer/language_loader.py +46 -26
- tree_sitter_analyzer/languages/css_plugin.py +6 -6
- tree_sitter_analyzer/languages/html_plugin.py +12 -8
- tree_sitter_analyzer/languages/java_plugin.py +330 -520
- tree_sitter_analyzer/languages/javascript_plugin.py +22 -78
- tree_sitter_analyzer/languages/markdown_plugin.py +277 -297
- tree_sitter_analyzer/languages/python_plugin.py +47 -85
- tree_sitter_analyzer/languages/typescript_plugin.py +48 -123
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +14 -8
- tree_sitter_analyzer/mcp/server.py +38 -23
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +10 -7
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +51 -7
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +11 -7
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +8 -6
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +6 -6
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +48 -15
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +13 -8
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +8 -3
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +24 -12
- tree_sitter_analyzer/mcp/utils/path_resolver.py +2 -2
- tree_sitter_analyzer/models.py +16 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/plugins/base.py +66 -0
- tree_sitter_analyzer/queries/java.py +9 -3
- tree_sitter_analyzer/queries/javascript.py +3 -8
- tree_sitter_analyzer/queries/markdown.py +1 -1
- tree_sitter_analyzer/queries/python.py +2 -2
- tree_sitter_analyzer/security/boundary_manager.py +2 -5
- tree_sitter_analyzer/security/regex_checker.py +2 -2
- tree_sitter_analyzer/security/validator.py +5 -1
- tree_sitter_analyzer/table_formatter.py +4 -4
- tree_sitter_analyzer/utils/__init__.py +27 -116
- tree_sitter_analyzer/{utils.py → utils/logging.py} +2 -2
- tree_sitter_analyzer/utils/tree_sitter_compat.py +2 -2
- {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/METADATA +87 -45
- tree_sitter_analyzer-1.9.4.dist-info/RECORD +111 -0
- tree_sitter_analyzer-1.9.2.dist-info/RECORD +0 -109
- {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/entry_points.txt +0 -0
tree_sitter_analyzer/__init__.py
CHANGED
tree_sitter_analyzer/api.py
CHANGED
|
@@ -64,7 +64,7 @@ def analyze_file(
|
|
|
64
64
|
engine = get_engine()
|
|
65
65
|
|
|
66
66
|
# Perform the analysis
|
|
67
|
-
analysis_result = engine.analyze_file(file_path, language)
|
|
67
|
+
analysis_result = engine.analyze_file(file_path, language, queries=queries)
|
|
68
68
|
|
|
69
69
|
# Convert AnalysisResult to expected API format (same as analyze_code)
|
|
70
70
|
result = {
|
|
@@ -85,7 +85,8 @@ def analyze_file(
|
|
|
85
85
|
|
|
86
86
|
# Add elements if requested and available
|
|
87
87
|
if include_elements and hasattr(analysis_result, "elements"):
|
|
88
|
-
|
|
88
|
+
elements_list: list[dict[str, Any]] = []
|
|
89
|
+
result["elements"] = elements_list
|
|
89
90
|
for elem in analysis_result.elements:
|
|
90
91
|
elem_dict = {
|
|
91
92
|
"name": elem.name,
|
|
@@ -145,7 +146,7 @@ def analyze_file(
|
|
|
145
146
|
else:
|
|
146
147
|
elem_dict["class_name"] = None
|
|
147
148
|
|
|
148
|
-
|
|
149
|
+
elements_list.append(elem_dict)
|
|
149
150
|
|
|
150
151
|
# Add query results if requested and available
|
|
151
152
|
if include_queries and hasattr(analysis_result, "query_results"):
|
|
@@ -219,7 +220,8 @@ def analyze_code(
|
|
|
219
220
|
|
|
220
221
|
# Add elements if requested and available
|
|
221
222
|
if include_elements and hasattr(analysis_result, "elements"):
|
|
222
|
-
|
|
223
|
+
elements_list: list[dict[str, Any]] = []
|
|
224
|
+
result["elements"] = elements_list
|
|
223
225
|
for elem in analysis_result.elements:
|
|
224
226
|
elem_dict = {
|
|
225
227
|
"name": elem.name,
|
|
@@ -279,7 +281,7 @@ def analyze_code(
|
|
|
279
281
|
else:
|
|
280
282
|
elem_dict["class_name"] = None
|
|
281
283
|
|
|
282
|
-
|
|
284
|
+
elements_list.append(elem_dict)
|
|
283
285
|
|
|
284
286
|
# Add query results if requested and available
|
|
285
287
|
if include_queries and hasattr(analysis_result, "query_results"):
|
|
@@ -454,8 +456,10 @@ def validate_file(file_path: str | Path) -> dict[str, Any]:
|
|
|
454
456
|
|
|
455
457
|
# Check if file is readable
|
|
456
458
|
try:
|
|
457
|
-
|
|
458
|
-
|
|
459
|
+
from .encoding_utils import read_file_safe
|
|
460
|
+
|
|
461
|
+
# Test file readability by reading it
|
|
462
|
+
read_file_safe(file_path)
|
|
459
463
|
result["readable"] = True
|
|
460
464
|
result["size"] = file_path.stat().st_size
|
|
461
465
|
except Exception as e:
|
|
@@ -518,6 +522,197 @@ def get_framework_info() -> dict[str, Any]:
|
|
|
518
522
|
return {"name": "tree-sitter-analyzer", "version": __version__, "error": str(e)}
|
|
519
523
|
|
|
520
524
|
|
|
525
|
+
def _group_captures_by_main_node(captures: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
526
|
+
"""
|
|
527
|
+
Group query captures by their main nodes (e.g., @method, @class, @function).
|
|
528
|
+
|
|
529
|
+
Each group represents one match of the query pattern, with all its sub-captures.
|
|
530
|
+
For example, a method_with_annotations query returns:
|
|
531
|
+
- One @method capture (the main node)
|
|
532
|
+
- One or more @annotation captures
|
|
533
|
+
- One @name capture
|
|
534
|
+
These all get grouped together as one "result".
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
captures: Flat list of all captures from the query
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
List of grouped results, where each result has a 'captures' dict mapping
|
|
541
|
+
capture names to their data.
|
|
542
|
+
"""
|
|
543
|
+
if not captures:
|
|
544
|
+
return []
|
|
545
|
+
|
|
546
|
+
# Identify the main capture type (method, class, function, etc.)
|
|
547
|
+
# Usually it's the one with the longest text span or appears first
|
|
548
|
+
main_capture_types = {"method", "class", "function", "interface", "field"}
|
|
549
|
+
|
|
550
|
+
# Group by start position - captures that share the same main node position
|
|
551
|
+
position_groups: dict[tuple[int, int], list[dict[str, Any]]] = {}
|
|
552
|
+
|
|
553
|
+
for capture in captures:
|
|
554
|
+
capture_name = capture.get("capture_name", "")
|
|
555
|
+
|
|
556
|
+
# Find the main node position for this capture
|
|
557
|
+
if capture_name in main_capture_types:
|
|
558
|
+
# This is a main node, use its position as the key
|
|
559
|
+
pos_key = (capture.get("start_byte", 0), capture.get("end_byte", 0))
|
|
560
|
+
else:
|
|
561
|
+
# This is a sub-capture, we'll need to find its parent later
|
|
562
|
+
# For now, use its own position
|
|
563
|
+
pos_key = (capture.get("start_byte", 0), capture.get("end_byte", 0))
|
|
564
|
+
|
|
565
|
+
if pos_key not in position_groups:
|
|
566
|
+
position_groups[pos_key] = []
|
|
567
|
+
position_groups[pos_key].append(capture)
|
|
568
|
+
|
|
569
|
+
# Now group captures that belong together
|
|
570
|
+
# A capture belongs to a main node if it's within the main node's byte range
|
|
571
|
+
results = []
|
|
572
|
+
main_nodes = []
|
|
573
|
+
|
|
574
|
+
# First, identify all main nodes
|
|
575
|
+
for captures_list in position_groups.values():
|
|
576
|
+
for capture in captures_list:
|
|
577
|
+
if capture.get("capture_name") in main_capture_types:
|
|
578
|
+
main_nodes.append(capture)
|
|
579
|
+
|
|
580
|
+
# For each main node, find all sub-captures within its range
|
|
581
|
+
for main_node in main_nodes:
|
|
582
|
+
main_start = main_node.get("start_byte", 0)
|
|
583
|
+
main_end = main_node.get("end_byte", 0)
|
|
584
|
+
main_name = main_node.get("capture_name", "")
|
|
585
|
+
|
|
586
|
+
# Collect all captures within this main node's range
|
|
587
|
+
grouped_captures = {main_name: main_node}
|
|
588
|
+
|
|
589
|
+
for captures_list in position_groups.values():
|
|
590
|
+
for capture in captures_list:
|
|
591
|
+
capture_start = capture.get("start_byte", 0)
|
|
592
|
+
capture_end = capture.get("end_byte", 0)
|
|
593
|
+
capture_name = capture.get("capture_name", "")
|
|
594
|
+
|
|
595
|
+
# Skip the main node itself
|
|
596
|
+
if capture is main_node:
|
|
597
|
+
continue
|
|
598
|
+
|
|
599
|
+
# Check if this capture is within the main node's range
|
|
600
|
+
if capture_start >= main_start and capture_end <= main_end:
|
|
601
|
+
# Group multiple captures of the same name in a list
|
|
602
|
+
if capture_name in grouped_captures:
|
|
603
|
+
# Convert to list if not already
|
|
604
|
+
if not isinstance(grouped_captures[capture_name], list):
|
|
605
|
+
grouped_captures[capture_name] = [grouped_captures[capture_name]]
|
|
606
|
+
grouped_captures[capture_name].append(capture)
|
|
607
|
+
else:
|
|
608
|
+
grouped_captures[capture_name] = capture
|
|
609
|
+
|
|
610
|
+
results.append({"captures": grouped_captures})
|
|
611
|
+
|
|
612
|
+
return results
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def _group_captures_by_main_node(captures: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
616
|
+
"""
|
|
617
|
+
Group query captures by their main nodes (e.g., @method, @class, @function).
|
|
618
|
+
|
|
619
|
+
Each group represents one match of the query pattern, with all its sub-captures.
|
|
620
|
+
For example, a method_with_annotations query returns:
|
|
621
|
+
- One @method capture (the main node)
|
|
622
|
+
- One or more @annotation captures
|
|
623
|
+
- One @name capture
|
|
624
|
+
These all get grouped together as one "result".
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
captures: Flat list of all captures from the query
|
|
628
|
+
|
|
629
|
+
Returns:
|
|
630
|
+
List of grouped results, where each result has:
|
|
631
|
+
- 'captures' dict mapping capture names to their data
|
|
632
|
+
- Top-level fields from the main node (text, start_line, end_line, etc.)
|
|
633
|
+
"""
|
|
634
|
+
if not captures:
|
|
635
|
+
return []
|
|
636
|
+
|
|
637
|
+
# Identify the main capture type (method, class, function, etc.)
|
|
638
|
+
# Usually it's the one with the longest text span or appears first
|
|
639
|
+
main_capture_types = {"method", "class", "function", "interface", "field"}
|
|
640
|
+
|
|
641
|
+
# Group by start position - captures that share the same main node position
|
|
642
|
+
position_groups: dict[tuple[int, int], list[dict[str, Any]]] = {}
|
|
643
|
+
|
|
644
|
+
for capture in captures:
|
|
645
|
+
capture_name = capture.get("capture_name", "")
|
|
646
|
+
|
|
647
|
+
# Find the main node position for this capture
|
|
648
|
+
if capture_name in main_capture_types:
|
|
649
|
+
# This is a main node, use its position as the key
|
|
650
|
+
pos_key = (capture.get("start_byte", 0), capture.get("end_byte", 0))
|
|
651
|
+
else:
|
|
652
|
+
# This is a sub-capture, we'll need to find its parent later
|
|
653
|
+
# For now, use its own position
|
|
654
|
+
pos_key = (capture.get("start_byte", 0), capture.get("end_byte", 0))
|
|
655
|
+
|
|
656
|
+
if pos_key not in position_groups:
|
|
657
|
+
position_groups[pos_key] = []
|
|
658
|
+
position_groups[pos_key].append(capture)
|
|
659
|
+
|
|
660
|
+
# Now group captures that belong together
|
|
661
|
+
# A capture belongs to a main node if it's within the main node's byte range
|
|
662
|
+
results = []
|
|
663
|
+
main_nodes = []
|
|
664
|
+
|
|
665
|
+
# First, identify all main nodes
|
|
666
|
+
for captures_list in position_groups.values():
|
|
667
|
+
for capture in captures_list:
|
|
668
|
+
if capture.get("capture_name") in main_capture_types:
|
|
669
|
+
main_nodes.append(capture)
|
|
670
|
+
|
|
671
|
+
# For each main node, find all sub-captures within its range
|
|
672
|
+
for main_node in main_nodes:
|
|
673
|
+
main_start = main_node.get("start_byte", 0)
|
|
674
|
+
main_end = main_node.get("end_byte", 0)
|
|
675
|
+
main_name = main_node.get("capture_name", "")
|
|
676
|
+
|
|
677
|
+
# Collect all captures within this main node's range
|
|
678
|
+
grouped_captures = {main_name: main_node}
|
|
679
|
+
|
|
680
|
+
for captures_list in position_groups.values():
|
|
681
|
+
for capture in captures_list:
|
|
682
|
+
capture_start = capture.get("start_byte", 0)
|
|
683
|
+
capture_end = capture.get("end_byte", 0)
|
|
684
|
+
capture_name = capture.get("capture_name", "")
|
|
685
|
+
|
|
686
|
+
# Skip the main node itself
|
|
687
|
+
if capture is main_node:
|
|
688
|
+
continue
|
|
689
|
+
|
|
690
|
+
# Check if this capture is within the main node's range
|
|
691
|
+
if capture_start >= main_start and capture_end <= main_end:
|
|
692
|
+
# Group multiple captures of the same name in a list
|
|
693
|
+
if capture_name in grouped_captures:
|
|
694
|
+
# Convert to list if not already
|
|
695
|
+
if not isinstance(grouped_captures[capture_name], list):
|
|
696
|
+
grouped_captures[capture_name] = [grouped_captures[capture_name]]
|
|
697
|
+
grouped_captures[capture_name].append(capture)
|
|
698
|
+
else:
|
|
699
|
+
grouped_captures[capture_name] = capture
|
|
700
|
+
|
|
701
|
+
# Create result with top-level fields from main node
|
|
702
|
+
result = {
|
|
703
|
+
"captures": grouped_captures,
|
|
704
|
+
"text": main_node.get("text", ""),
|
|
705
|
+
"start_line": main_node.get("line_number", 0),
|
|
706
|
+
"end_line": main_node.get("line_number", 0) + main_node.get("text", "").count("\n"),
|
|
707
|
+
"start_byte": main_start,
|
|
708
|
+
"end_byte": main_end,
|
|
709
|
+
"node_type": main_node.get("node_type", ""),
|
|
710
|
+
}
|
|
711
|
+
results.append(result)
|
|
712
|
+
|
|
713
|
+
return results
|
|
714
|
+
|
|
715
|
+
|
|
521
716
|
def execute_query(
|
|
522
717
|
file_path: str | Path, query_name: str, language: str | None = None
|
|
523
718
|
) -> dict[str, Any]:
|
|
@@ -543,7 +738,20 @@ def execute_query(
|
|
|
543
738
|
)
|
|
544
739
|
|
|
545
740
|
if result["success"] and "query_results" in result:
|
|
546
|
-
|
|
741
|
+
query_result_dict = result["query_results"].get(query_name, {})
|
|
742
|
+
|
|
743
|
+
# Extract the captures list from the query result dictionary
|
|
744
|
+
if isinstance(query_result_dict, dict) and "captures" in query_result_dict:
|
|
745
|
+
raw_captures = query_result_dict["captures"]
|
|
746
|
+
elif isinstance(query_result_dict, list):
|
|
747
|
+
raw_captures = query_result_dict
|
|
748
|
+
else:
|
|
749
|
+
raw_captures = []
|
|
750
|
+
|
|
751
|
+
# Group captures by their main capture (e.g., @method, @class)
|
|
752
|
+
# This groups related captures together (e.g., method + its annotations + name)
|
|
753
|
+
query_results = _group_captures_by_main_node(raw_captures)
|
|
754
|
+
|
|
547
755
|
return {
|
|
548
756
|
"success": True,
|
|
549
757
|
"query_name": query_name,
|
|
@@ -49,8 +49,9 @@ class AdvancedCommand(BaseCommand):
|
|
|
49
49
|
Dictionary containing file metrics
|
|
50
50
|
"""
|
|
51
51
|
try:
|
|
52
|
-
|
|
53
|
-
|
|
52
|
+
from ...encoding_utils import read_file_safe
|
|
53
|
+
|
|
54
|
+
content, _ = read_file_safe(file_path)
|
|
54
55
|
|
|
55
56
|
lines = content.split("\n")
|
|
56
57
|
total_lines = len(lines)
|
|
@@ -111,10 +112,6 @@ class AdvancedCommand(BaseCommand):
|
|
|
111
112
|
if "-->" not in stripped:
|
|
112
113
|
in_multiline_comment = True
|
|
113
114
|
continue
|
|
114
|
-
elif in_multiline_comment and "-->" in stripped:
|
|
115
|
-
comment_lines += 1
|
|
116
|
-
in_multiline_comment = False
|
|
117
|
-
continue
|
|
118
115
|
|
|
119
116
|
# If not a comment, it's code
|
|
120
117
|
code_lines += 1
|
|
@@ -5,6 +5,8 @@ Query Command
|
|
|
5
5
|
Handles query execution functionality.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
8
10
|
from ...core.query_service import QueryService
|
|
9
11
|
from ...output_manager import output_data, output_error, output_info, output_json
|
|
10
12
|
from .base_command import BaseCommand
|
|
@@ -13,7 +15,7 @@ from .base_command import BaseCommand
|
|
|
13
15
|
class QueryCommand(BaseCommand):
|
|
14
16
|
"""Command for executing queries."""
|
|
15
17
|
|
|
16
|
-
def __init__(self, args):
|
|
18
|
+
def __init__(self, args: Any) -> None:
|
|
17
19
|
"""Initialize the query command with QueryService."""
|
|
18
20
|
super().__init__(args)
|
|
19
21
|
self.query_service = QueryService()
|
|
@@ -25,7 +25,7 @@ from .base_command import BaseCommand
|
|
|
25
25
|
class TableCommand(BaseCommand):
|
|
26
26
|
"""Command for generating table format output."""
|
|
27
27
|
|
|
28
|
-
def __init__(self, args):
|
|
28
|
+
def __init__(self, args: Any) -> None:
|
|
29
29
|
"""Initialize the table command."""
|
|
30
30
|
super().__init__(args)
|
|
31
31
|
|
|
@@ -56,10 +56,10 @@ class TableCommand(BaseCommand):
|
|
|
56
56
|
|
|
57
57
|
# Create table formatter
|
|
58
58
|
include_javadoc = getattr(self.args, "include_javadoc", False)
|
|
59
|
-
|
|
59
|
+
table_formatter: Any = create_table_formatter(
|
|
60
60
|
self.args.table, language, include_javadoc
|
|
61
61
|
)
|
|
62
|
-
table_output =
|
|
62
|
+
table_output = table_formatter.format_structure(structure_result)
|
|
63
63
|
|
|
64
64
|
# Output table
|
|
65
65
|
self._output_table(table_output)
|
|
@@ -5,6 +5,8 @@ Constants for tree-sitter-analyzer
|
|
|
5
5
|
This module defines constants used throughout the project to ensure consistency.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
from typing import Any, cast
|
|
9
|
+
|
|
8
10
|
# Element types for unified element management system
|
|
9
11
|
ELEMENT_TYPE_CLASS = "class"
|
|
10
12
|
ELEMENT_TYPE_FUNCTION = "function"
|
|
@@ -34,7 +36,7 @@ LEGACY_CLASS_MAPPING = {
|
|
|
34
36
|
}
|
|
35
37
|
|
|
36
38
|
|
|
37
|
-
def get_element_type(element) -> str:
|
|
39
|
+
def get_element_type(element: Any) -> str:
|
|
38
40
|
"""
|
|
39
41
|
Get the element type from an element object.
|
|
40
42
|
|
|
@@ -45,7 +47,7 @@ def get_element_type(element) -> str:
|
|
|
45
47
|
Standardized element type string
|
|
46
48
|
"""
|
|
47
49
|
if hasattr(element, "element_type"):
|
|
48
|
-
return element.element_type
|
|
50
|
+
return cast(str, element.element_type)
|
|
49
51
|
|
|
50
52
|
if hasattr(element, "__class__") and hasattr(element.__class__, "__name__"):
|
|
51
53
|
class_name = element.__class__.__name__
|
|
@@ -54,7 +56,7 @@ def get_element_type(element) -> str:
|
|
|
54
56
|
return "unknown"
|
|
55
57
|
|
|
56
58
|
|
|
57
|
-
def is_element_of_type(element, element_type: str) -> bool:
|
|
59
|
+
def is_element_of_type(element: Any, element_type: str) -> bool:
|
|
58
60
|
"""
|
|
59
61
|
Check if an element is of a specific type.
|
|
60
62
|
|
|
@@ -218,7 +218,7 @@ class UnifiedAnalysisEngine:
|
|
|
218
218
|
instance = super().__new__(cls)
|
|
219
219
|
cls._instances[instance_key] = instance
|
|
220
220
|
# Mark as not initialized for this instance
|
|
221
|
-
instance._initialized
|
|
221
|
+
instance._initialized = False
|
|
222
222
|
|
|
223
223
|
return cls._instances[instance_key]
|
|
224
224
|
|
|
@@ -18,7 +18,7 @@ from dataclasses import dataclass
|
|
|
18
18
|
from datetime import datetime, timedelta
|
|
19
19
|
from typing import Any
|
|
20
20
|
|
|
21
|
-
from cachetools import LRUCache, TTLCache
|
|
21
|
+
from cachetools import LRUCache, TTLCache # type: ignore[import-untyped]
|
|
22
22
|
|
|
23
23
|
from ..utils import log_debug, log_info
|
|
24
24
|
|
|
@@ -48,7 +48,7 @@ class AnalysisEngine:
|
|
|
48
48
|
raise
|
|
49
49
|
|
|
50
50
|
def analyze_file(
|
|
51
|
-
self, file_path: str | Path, language: str | None = None
|
|
51
|
+
self, file_path: str | Path, language: str | None = None, queries: list[str] | None = None
|
|
52
52
|
) -> AnalysisResult:
|
|
53
53
|
"""
|
|
54
54
|
Analyze a source code file.
|
|
@@ -56,6 +56,7 @@ class AnalysisEngine:
|
|
|
56
56
|
Args:
|
|
57
57
|
file_path: Path to the file to analyze
|
|
58
58
|
language: Optional language override
|
|
59
|
+
queries: List of query names to execute (all available if not specified)
|
|
59
60
|
|
|
60
61
|
Returns:
|
|
61
62
|
AnalysisResult containing analysis results
|
|
@@ -83,7 +84,7 @@ class AnalysisEngine:
|
|
|
83
84
|
)
|
|
84
85
|
|
|
85
86
|
# Perform analysis
|
|
86
|
-
return self._perform_analysis(parse_result)
|
|
87
|
+
return self._perform_analysis(parse_result, queries=queries)
|
|
87
88
|
|
|
88
89
|
except FileNotFoundError:
|
|
89
90
|
raise
|
|
@@ -161,12 +162,15 @@ class AnalysisEngine:
|
|
|
161
162
|
logger.warning(f"Language detection failed for {file_path}: {e}")
|
|
162
163
|
return "unknown"
|
|
163
164
|
|
|
164
|
-
def _perform_analysis(
|
|
165
|
+
def _perform_analysis(
|
|
166
|
+
self, parse_result: ParseResult, queries: list[str] | None = None
|
|
167
|
+
) -> AnalysisResult:
|
|
165
168
|
"""
|
|
166
169
|
Perform comprehensive analysis on parsed code.
|
|
167
170
|
|
|
168
171
|
Args:
|
|
169
172
|
parse_result: Result from parsing operation
|
|
173
|
+
queries: Optional list of query names to execute (default: all queries)
|
|
170
174
|
|
|
171
175
|
Returns:
|
|
172
176
|
AnalysisResult containing analysis results
|
|
@@ -176,7 +180,13 @@ class AnalysisEngine:
|
|
|
176
180
|
plugin = self._get_language_plugin(parse_result.language)
|
|
177
181
|
|
|
178
182
|
# Execute queries
|
|
179
|
-
query_results = self._execute_queries(
|
|
183
|
+
query_results = self._execute_queries(
|
|
184
|
+
parse_result.tree,
|
|
185
|
+
plugin,
|
|
186
|
+
queries=queries,
|
|
187
|
+
source_code=parse_result.source_code or "",
|
|
188
|
+
language_name=parse_result.language,
|
|
189
|
+
)
|
|
180
190
|
|
|
181
191
|
# Extract elements
|
|
182
192
|
elements = self._extract_elements(parse_result, plugin)
|
|
@@ -227,13 +237,23 @@ class AnalysisEngine:
|
|
|
227
237
|
|
|
228
238
|
return None
|
|
229
239
|
|
|
230
|
-
def _execute_queries(
|
|
240
|
+
def _execute_queries(
|
|
241
|
+
self,
|
|
242
|
+
tree: Tree | None,
|
|
243
|
+
plugin: Any | None,
|
|
244
|
+
queries: list[str] | None = None,
|
|
245
|
+
source_code: str = "",
|
|
246
|
+
language_name: str = "unknown",
|
|
247
|
+
) -> dict[str, Any]:
|
|
231
248
|
"""
|
|
232
249
|
Execute queries on the parsed tree.
|
|
233
250
|
|
|
234
251
|
Args:
|
|
235
252
|
tree: Parsed Tree-sitter tree
|
|
236
253
|
plugin: Language plugin
|
|
254
|
+
queries: Optional list of query names to execute (default: uses plugin queries or ["class", "method", "field"])
|
|
255
|
+
source_code: Source code for context
|
|
256
|
+
language_name: Name of the programming language
|
|
237
257
|
|
|
238
258
|
Returns:
|
|
239
259
|
Dictionary of query results
|
|
@@ -242,8 +262,11 @@ class AnalysisEngine:
|
|
|
242
262
|
return {}
|
|
243
263
|
|
|
244
264
|
try:
|
|
245
|
-
#
|
|
246
|
-
if
|
|
265
|
+
# Use provided queries or determine from plugin/fallback
|
|
266
|
+
if queries is not None:
|
|
267
|
+
query_names = queries
|
|
268
|
+
elif plugin and hasattr(plugin, "get_supported_queries"):
|
|
269
|
+
# If plugin is available, use its supported queries
|
|
247
270
|
query_names = plugin.get_supported_queries()
|
|
248
271
|
else:
|
|
249
272
|
# Fallback to common queries that exist in the system
|
|
@@ -258,11 +281,12 @@ class AnalysisEngine:
|
|
|
258
281
|
results = {}
|
|
259
282
|
for query_name in query_names:
|
|
260
283
|
try:
|
|
261
|
-
result = self.query_executor.
|
|
284
|
+
result = self.query_executor.execute_query_with_language_name(
|
|
262
285
|
tree,
|
|
263
286
|
language_obj,
|
|
264
287
|
query_name,
|
|
265
|
-
|
|
288
|
+
source_code,
|
|
289
|
+
language_name,
|
|
266
290
|
)
|
|
267
291
|
results[query_name] = result
|
|
268
292
|
except Exception as e:
|
|
@@ -535,7 +559,7 @@ class AnalysisEngine:
|
|
|
535
559
|
logger.error(f"Error getting extensions for {language}: {e}")
|
|
536
560
|
return []
|
|
537
561
|
|
|
538
|
-
def get_registry_info(self) -> dict:
|
|
562
|
+
def get_registry_info(self) -> dict[str, Any]:
|
|
539
563
|
"""
|
|
540
564
|
Get registry information (compatibility method)
|
|
541
565
|
|
|
@@ -64,7 +64,6 @@ class QueryExecutor:
|
|
|
64
64
|
# Validate inputs
|
|
65
65
|
if tree is None:
|
|
66
66
|
return self._create_error_result("Tree is None", query_name=query_name)
|
|
67
|
-
|
|
68
67
|
if language is None:
|
|
69
68
|
return self._create_error_result( # type: ignore[unreachable]
|
|
70
69
|
"Language is None", query_name=query_name
|
|
@@ -140,6 +139,88 @@ class QueryExecutor:
|
|
|
140
139
|
f"Unexpected error: {str(e)}", query_name=query_name
|
|
141
140
|
)
|
|
142
141
|
|
|
142
|
+
def execute_query_with_language_name(
|
|
143
|
+
self,
|
|
144
|
+
tree: Tree | None,
|
|
145
|
+
language: Language,
|
|
146
|
+
query_name: str,
|
|
147
|
+
source_code: str,
|
|
148
|
+
language_name: str,
|
|
149
|
+
) -> dict[str, Any]:
|
|
150
|
+
"""
|
|
151
|
+
Execute a predefined query by name with explicit language name.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
tree: Tree-sitter tree to query
|
|
155
|
+
language: Tree-sitter language object
|
|
156
|
+
query_name: Name of the predefined query
|
|
157
|
+
source_code: Source code for context
|
|
158
|
+
language_name: Name of the programming language
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Dictionary containing query results and metadata
|
|
162
|
+
"""
|
|
163
|
+
start_time = time.time()
|
|
164
|
+
self._execution_stats["total_queries"] += 1
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
# Validate inputs
|
|
168
|
+
if tree is None:
|
|
169
|
+
return self._create_error_result("Tree is None", query_name=query_name)
|
|
170
|
+
if language is None:
|
|
171
|
+
return self._create_error_result( # type: ignore[unreachable]
|
|
172
|
+
"Language is None", query_name=query_name
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Use the provided language name
|
|
176
|
+
language_name = language_name.strip().lower() if language_name else "unknown"
|
|
177
|
+
|
|
178
|
+
query_string = self._query_loader.get_query(language_name, query_name)
|
|
179
|
+
if query_string is None:
|
|
180
|
+
return self._create_error_result(
|
|
181
|
+
f"Query '{query_name}' not found", query_name=query_name
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Create and execute the query using modern API
|
|
185
|
+
try:
|
|
186
|
+
captures = TreeSitterQueryCompat.safe_execute_query(
|
|
187
|
+
language, query_string, tree.root_node, fallback_result=[]
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Process captures
|
|
191
|
+
try:
|
|
192
|
+
processed_captures = self._process_captures(captures, source_code)
|
|
193
|
+
except Exception as e:
|
|
194
|
+
logger.error(f"Error processing captures for {query_name}: {e}")
|
|
195
|
+
return self._create_error_result(
|
|
196
|
+
f"Capture processing failed: {str(e)}", query_name=query_name
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
self._execution_stats["successful_queries"] += 1
|
|
200
|
+
execution_time = time.time() - start_time
|
|
201
|
+
self._execution_stats["total_execution_time"] += execution_time
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
"captures": processed_captures,
|
|
205
|
+
"query_name": query_name,
|
|
206
|
+
"query_string": query_string,
|
|
207
|
+
"execution_time": execution_time,
|
|
208
|
+
"success": True,
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
logger.error(f"Error executing query '{query_name}': {e}")
|
|
213
|
+
return self._create_error_result(
|
|
214
|
+
f"Query execution failed: {str(e)}", query_name=query_name
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
except Exception as e:
|
|
218
|
+
logger.error(f"Unexpected error in execute_query: {e}")
|
|
219
|
+
self._execution_stats["failed_queries"] += 1
|
|
220
|
+
return self._create_error_result(
|
|
221
|
+
f"Unexpected error: {str(e)}", query_name=query_name
|
|
222
|
+
)
|
|
223
|
+
|
|
143
224
|
def execute_query_string(
|
|
144
225
|
self,
|
|
145
226
|
tree: Tree | None,
|
|
@@ -166,7 +247,6 @@ class QueryExecutor:
|
|
|
166
247
|
# Validate inputs
|
|
167
248
|
if tree is None:
|
|
168
249
|
return self._create_error_result("Tree is None")
|
|
169
|
-
|
|
170
250
|
if language is None:
|
|
171
251
|
return self._create_error_result("Language is None") # type: ignore[unreachable]
|
|
172
252
|
|