tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,853 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tree-sitter Analyzer API
4
+
5
+ Public API facade that provides a stable, high-level interface to the
6
+ tree-sitter analyzer framework. This is the main entry point for both
7
+ CLI and MCP interfaces.
8
+ """
9
+
10
+ import logging
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from . import __version__
15
+ from .core.engine import AnalysisEngine
16
+ from .utils import log_error
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Global engine instance (singleton pattern)
21
+ _engine: AnalysisEngine | None = None
22
+
23
+
24
+ def get_engine() -> AnalysisEngine:
25
+ """
26
+ Get the global analysis engine instance.
27
+
28
+ Returns:
29
+ AnalysisEngine instance
30
+ """
31
+ global _engine
32
+ if _engine is None:
33
+ _engine = AnalysisEngine()
34
+ return _engine
35
+
36
+
37
+ def analyze_file(
38
+ file_path: str | Path,
39
+ language: str | None = None,
40
+ queries: list[str] | None = None,
41
+ include_elements: bool = True,
42
+ include_details: bool = False, # Add for backward compatibility
43
+ include_queries: bool = True,
44
+ include_complexity: bool = False, # Add for backward compatibility
45
+ ) -> dict[str, Any]:
46
+ """
47
+ Analyze a source code file.
48
+
49
+ This is the main high-level function for file analysis. It handles
50
+ language detection, parsing, query execution, and element extraction.
51
+
52
+ Args:
53
+ file_path: Path to the source file to analyze
54
+ language: Programming language (auto-detected if not specified)
55
+ queries: List of query names to execute (all available if not specified)
56
+ include_elements: Whether to extract code elements
57
+ include_queries: Whether to execute queries
58
+ include_complexity: Whether to include complexity metrics (backward compatibility)
59
+
60
+ Returns:
61
+ Analysis results dictionary
62
+ """
63
+ try:
64
+ engine = get_engine()
65
+
66
+ # Perform the analysis
67
+ analysis_result = engine.analyze_file(file_path, language, queries=queries)
68
+
69
+ # Convert AnalysisResult to expected API format (same as analyze_code)
70
+ result = {
71
+ "success": analysis_result.success,
72
+ "file_info": {
73
+ "path": str(file_path),
74
+ "exists": True,
75
+ },
76
+ "language_info": {
77
+ "language": analysis_result.language,
78
+ "detected": language is None, # True if language was auto-detected
79
+ },
80
+ "ast_info": {
81
+ "node_count": analysis_result.node_count,
82
+ "line_count": analysis_result.line_count,
83
+ },
84
+ }
85
+
86
+ # Add elements if requested and available
87
+ if include_elements and hasattr(analysis_result, "elements"):
88
+ elements_list: list[dict[str, Any]] = []
89
+ result["elements"] = elements_list
90
+ for elem in analysis_result.elements:
91
+ elem_dict = {
92
+ "name": elem.name,
93
+ "type": type(elem).__name__.lower(),
94
+ "start_line": elem.start_line,
95
+ "end_line": elem.end_line,
96
+ "raw_text": elem.raw_text,
97
+ "language": elem.language,
98
+ }
99
+
100
+ # Add type-specific fields
101
+ if hasattr(elem, "module_path"):
102
+ elem_dict["module_path"] = elem.module_path
103
+ if hasattr(elem, "module_name"):
104
+ elem_dict["module_name"] = elem.module_name
105
+ if hasattr(elem, "imported_names"):
106
+ elem_dict["imported_names"] = elem.imported_names
107
+ if hasattr(elem, "variable_type"):
108
+ elem_dict["variable_type"] = elem.variable_type
109
+ if hasattr(elem, "initializer"):
110
+ elem_dict["initializer"] = elem.initializer
111
+ if hasattr(elem, "is_constant"):
112
+ elem_dict["is_constant"] = elem.is_constant
113
+ if hasattr(elem, "parameters"):
114
+ elem_dict["parameters"] = elem.parameters
115
+ if hasattr(elem, "return_type"):
116
+ elem_dict["return_type"] = elem.return_type
117
+ if hasattr(elem, "is_async"):
118
+ elem_dict["is_async"] = elem.is_async
119
+ if hasattr(elem, "is_static"):
120
+ elem_dict["is_static"] = elem.is_static
121
+ if hasattr(elem, "is_constructor"):
122
+ elem_dict["is_constructor"] = elem.is_constructor
123
+ if hasattr(elem, "is_method"):
124
+ elem_dict["is_method"] = elem.is_method
125
+ if hasattr(elem, "complexity_score"):
126
+ elem_dict["complexity_score"] = elem.complexity_score
127
+ if hasattr(elem, "superclass"):
128
+ elem_dict["superclass"] = elem.superclass
129
+ if hasattr(elem, "class_type"):
130
+ elem_dict["class_type"] = elem.class_type
131
+
132
+ # For methods, try to find the class name from context
133
+ if elem_dict.get("is_method") and elem_dict["type"] == "function":
134
+ # Look for the class this method belongs to
135
+ for other_elem in analysis_result.elements:
136
+ if (
137
+ hasattr(other_elem, "start_line")
138
+ and hasattr(other_elem, "end_line")
139
+ and type(other_elem).__name__.lower() == "class"
140
+ and other_elem.start_line
141
+ <= elem.start_line
142
+ <= other_elem.end_line
143
+ ):
144
+ elem_dict["class_name"] = other_elem.name
145
+ break
146
+ else:
147
+ elem_dict["class_name"] = None
148
+
149
+ elements_list.append(elem_dict)
150
+
151
+ # Add query results if requested and available
152
+ if include_queries and hasattr(analysis_result, "query_results"):
153
+ result["query_results"] = analysis_result.query_results
154
+
155
+ # Add error message if analysis failed
156
+ if not analysis_result.success and analysis_result.error_message:
157
+ result["error"] = analysis_result.error_message
158
+
159
+ # Filter results based on options
160
+ if not include_elements and "elements" in result:
161
+ del result["elements"]
162
+
163
+ if not include_queries and "query_results" in result:
164
+ del result["query_results"]
165
+
166
+ return result
167
+
168
+ except FileNotFoundError as e:
169
+ # Re-raise FileNotFoundError for tests that expect it
170
+ raise e
171
+ except Exception as e:
172
+ log_error(f"API analyze_file failed: {e}")
173
+ return {
174
+ "success": False,
175
+ "error": str(e),
176
+ "file_info": {"path": str(file_path), "exists": False},
177
+ "language_info": {"language": language or "unknown", "detected": False},
178
+ "ast_info": {"node_count": 0, "line_count": 0},
179
+ }
180
+
181
+
182
+ def analyze_code(
183
+ source_code: str,
184
+ language: str,
185
+ queries: list[str] | None = None,
186
+ include_elements: bool = True,
187
+ include_queries: bool = True,
188
+ ) -> dict[str, Any]:
189
+ """
190
+ Analyze source code directly (without file).
191
+
192
+ Args:
193
+ source_code: Source code string to analyze
194
+ language: Programming language
195
+ queries: List of query names to execute (all available if not specified)
196
+ include_elements: Whether to extract code elements
197
+ include_queries: Whether to execute queries
198
+
199
+ Returns:
200
+ Analysis results dictionary
201
+ """
202
+ try:
203
+ engine = get_engine()
204
+
205
+ # Perform the analysis
206
+ analysis_result = engine.analyze_code(source_code, language)
207
+
208
+ # Convert AnalysisResult to expected API format
209
+ result = {
210
+ "success": analysis_result.success,
211
+ "language_info": {
212
+ "language": analysis_result.language,
213
+ "detected": False, # Language was explicitly provided
214
+ },
215
+ "ast_info": {
216
+ "node_count": analysis_result.node_count,
217
+ "line_count": analysis_result.line_count,
218
+ },
219
+ }
220
+
221
+ # Add elements if requested and available
222
+ if include_elements and hasattr(analysis_result, "elements"):
223
+ elements_list: list[dict[str, Any]] = []
224
+ result["elements"] = elements_list
225
+ for elem in analysis_result.elements:
226
+ elem_dict = {
227
+ "name": elem.name,
228
+ "type": type(elem).__name__.lower(),
229
+ "start_line": elem.start_line,
230
+ "end_line": elem.end_line,
231
+ "raw_text": elem.raw_text,
232
+ "language": elem.language,
233
+ }
234
+
235
+ # Add type-specific fields
236
+ if hasattr(elem, "module_path"):
237
+ elem_dict["module_path"] = elem.module_path
238
+ if hasattr(elem, "module_name"):
239
+ elem_dict["module_name"] = elem.module_name
240
+ if hasattr(elem, "imported_names"):
241
+ elem_dict["imported_names"] = elem.imported_names
242
+ if hasattr(elem, "variable_type"):
243
+ elem_dict["variable_type"] = elem.variable_type
244
+ if hasattr(elem, "initializer"):
245
+ elem_dict["initializer"] = elem.initializer
246
+ if hasattr(elem, "is_constant"):
247
+ elem_dict["is_constant"] = elem.is_constant
248
+ if hasattr(elem, "parameters"):
249
+ elem_dict["parameters"] = elem.parameters
250
+ if hasattr(elem, "return_type"):
251
+ elem_dict["return_type"] = elem.return_type
252
+ if hasattr(elem, "is_async"):
253
+ elem_dict["is_async"] = elem.is_async
254
+ if hasattr(elem, "is_static"):
255
+ elem_dict["is_static"] = elem.is_static
256
+ if hasattr(elem, "is_constructor"):
257
+ elem_dict["is_constructor"] = elem.is_constructor
258
+ if hasattr(elem, "is_method"):
259
+ elem_dict["is_method"] = elem.is_method
260
+ if hasattr(elem, "complexity_score"):
261
+ elem_dict["complexity_score"] = elem.complexity_score
262
+ if hasattr(elem, "superclass"):
263
+ elem_dict["superclass"] = elem.superclass
264
+ if hasattr(elem, "class_type"):
265
+ elem_dict["class_type"] = elem.class_type
266
+
267
+ # For methods, try to find the class name from context
268
+ if elem_dict.get("is_method") and elem_dict["type"] == "function":
269
+ # Look for the class this method belongs to
270
+ for other_elem in analysis_result.elements:
271
+ if (
272
+ hasattr(other_elem, "start_line")
273
+ and hasattr(other_elem, "end_line")
274
+ and type(other_elem).__name__.lower() == "class"
275
+ and other_elem.start_line
276
+ <= elem.start_line
277
+ <= other_elem.end_line
278
+ ):
279
+ elem_dict["class_name"] = other_elem.name
280
+ break
281
+ else:
282
+ elem_dict["class_name"] = None
283
+
284
+ elements_list.append(elem_dict)
285
+
286
+ # Add query results if requested and available
287
+ if include_queries and hasattr(analysis_result, "query_results"):
288
+ result["query_results"] = analysis_result.query_results
289
+
290
+ # Add error message if analysis failed
291
+ if not analysis_result.success and analysis_result.error_message:
292
+ result["error"] = analysis_result.error_message
293
+
294
+ # Filter results based on options
295
+ if not include_elements and "elements" in result:
296
+ del result["elements"]
297
+
298
+ if not include_queries and "query_results" in result:
299
+ del result["query_results"]
300
+
301
+ return result
302
+
303
+ except Exception as e:
304
+ log_error(f"API analyze_code failed: {e}")
305
+ return {"success": False, "error": str(e)}
306
+
307
+
308
+ def get_supported_languages() -> list[str]:
309
+ """
310
+ Get list of all supported programming languages.
311
+
312
+ Returns:
313
+ List of supported language names
314
+ """
315
+ try:
316
+ engine = get_engine()
317
+ return engine.get_supported_languages()
318
+ except Exception as e:
319
+ log_error(f"Failed to get supported languages: {e}")
320
+ return []
321
+
322
+
323
+ def get_available_queries(language: str) -> list[str]:
324
+ """
325
+ Get available queries for a specific language.
326
+
327
+ Args:
328
+ language: Programming language name
329
+
330
+ Returns:
331
+ List of available query names
332
+ """
333
+ try:
334
+ engine = get_engine()
335
+ # Try to get plugin and its supported queries
336
+ plugin = engine._get_language_plugin(language)
337
+ if plugin and hasattr(plugin, "get_supported_queries"):
338
+ result = plugin.get_supported_queries()
339
+ return list(result) if result else []
340
+ else:
341
+ # Return default queries
342
+ return ["class", "method", "field"]
343
+ except Exception as e:
344
+ log_error(f"Failed to get available queries for {language}: {e}")
345
+ return []
346
+
347
+
348
+ def is_language_supported(language: str) -> bool:
349
+ """
350
+ Check if a programming language is supported.
351
+
352
+ Args:
353
+ language: Programming language name
354
+
355
+ Returns:
356
+ True if the language is supported
357
+ """
358
+ try:
359
+ supported_languages = get_supported_languages()
360
+ return language.lower() in [lang.lower() for lang in supported_languages]
361
+ except Exception as e:
362
+ log_error(f"Failed to check language support for {language}: {e}")
363
+ return False
364
+
365
+
366
+ def detect_language(file_path: str | Path) -> str:
367
+ """
368
+ Detect programming language from file path.
369
+
370
+ Args:
371
+ file_path: Path to the file
372
+
373
+ Returns:
374
+ Detected language name - 常に有効な文字列を返す
375
+ """
376
+ try:
377
+ # Handle invalid input
378
+ if not file_path:
379
+ return "unknown"
380
+
381
+ engine = get_engine()
382
+ # Use language_detector instead of language_registry
383
+ result = engine.language_detector.detect_from_extension(str(file_path))
384
+
385
+ # Ensure result is valid
386
+ if not result or result.strip() == "":
387
+ return "unknown"
388
+
389
+ return result
390
+ except Exception as e:
391
+ log_error(f"Failed to detect language for {file_path}: {e}")
392
+ return "unknown"
393
+
394
+
395
+ def get_file_extensions(language: str) -> list[str]:
396
+ """
397
+ Get file extensions for a specific language.
398
+
399
+ Args:
400
+ language: Programming language name
401
+
402
+ Returns:
403
+ List of file extensions
404
+ """
405
+ try:
406
+ engine = get_engine()
407
+ # Use language_detector to get extensions
408
+ if hasattr(engine.language_detector, "get_extensions_for_language"):
409
+ result = engine.language_detector.get_extensions_for_language(language)
410
+ return list(result) if result else []
411
+ else:
412
+ # Fallback: return common extensions
413
+ extension_map = {
414
+ "java": [".java"],
415
+ "python": [".py"],
416
+ "javascript": [".js"],
417
+ "typescript": [".ts"],
418
+ "c": [".c"],
419
+ "cpp": [".cpp", ".cxx", ".cc"],
420
+ "go": [".go"],
421
+ "rust": [".rs"],
422
+ }
423
+ return extension_map.get(language.lower(), [])
424
+ except Exception as e:
425
+ log_error(f"Failed to get extensions for {language}: {e}")
426
+ return []
427
+
428
+
429
+ def validate_file(file_path: str | Path) -> dict[str, Any]:
430
+ """
431
+ Validate a source code file without full analysis.
432
+
433
+ Args:
434
+ file_path: Path to the file to validate
435
+
436
+ Returns:
437
+ Validation results dictionary
438
+ """
439
+ file_path = Path(file_path)
440
+
441
+ result: dict[str, Any] = {
442
+ "valid": False,
443
+ "exists": file_path.exists(),
444
+ "readable": False,
445
+ "language": None,
446
+ "supported": False,
447
+ "size": 0,
448
+ "errors": [],
449
+ }
450
+
451
+ try:
452
+ # Check if file exists
453
+ if not file_path.exists():
454
+ result["errors"].append("File does not exist")
455
+ return result
456
+
457
+ # Check if file is readable
458
+ try:
459
+ from .encoding_utils import read_file_safe
460
+
461
+ # Test file readability by reading it
462
+ read_file_safe(file_path)
463
+ result["readable"] = True
464
+ result["size"] = file_path.stat().st_size
465
+ except Exception as e:
466
+ result["errors"].append(f"File is not readable: {e}")
467
+ return result
468
+
469
+ # Detect language
470
+ language = detect_language(file_path)
471
+ result["language"] = language
472
+
473
+ if language:
474
+ result["supported"] = is_language_supported(language)
475
+ if not result["supported"]:
476
+ result["errors"].append(f"Language '{language}' is not supported")
477
+ else:
478
+ result["errors"].append("Could not detect programming language")
479
+
480
+ # If we got this far with no errors, the file is valid
481
+ result["valid"] = len(result["errors"]) == 0
482
+
483
+ except Exception as e:
484
+ result["errors"].append(f"Validation failed: {e}")
485
+
486
+ return result
487
+
488
+
489
+ def get_framework_info() -> dict[str, Any]:
490
+ """
491
+ Get information about the framework and its capabilities.
492
+
493
+ Returns:
494
+ Framework information dictionary
495
+ """
496
+ try:
497
+ engine = get_engine()
498
+
499
+ return {
500
+ "name": "tree-sitter-analyzer",
501
+ "version": __version__,
502
+ "supported_languages": engine.get_supported_languages(),
503
+ "total_languages": len(engine.get_supported_languages()),
504
+ "plugin_info": {
505
+ "manager_available": engine.plugin_manager is not None,
506
+ "loaded_plugins": (
507
+ len(engine.plugin_manager.get_supported_languages())
508
+ if engine.plugin_manager
509
+ else 0
510
+ ),
511
+ },
512
+ "core_components": [
513
+ "AnalysisEngine",
514
+ "Parser",
515
+ "QueryExecutor",
516
+ "PluginManager",
517
+ "LanguageDetector",
518
+ ],
519
+ }
520
+ except Exception as e:
521
+ log_error(f"Failed to get framework info: {e}")
522
+ return {"name": "tree-sitter-analyzer", "version": __version__, "error": str(e)}
523
+
524
+
525
+ def _group_captures_by_main_node(
526
+ captures: list[dict[str, Any]],
527
+ ) -> list[dict[str, Any]]:
528
+ """
529
+ Group query captures by their main nodes (e.g., @method, @class, @function).
530
+
531
+ Each group represents one match of the query pattern, with all its sub-captures.
532
+ For example, a method_with_annotations query returns:
533
+ - One @method capture (the main node)
534
+ - One or more @annotation captures
535
+ - One @name capture
536
+ These all get grouped together as one "result".
537
+
538
+ Args:
539
+ captures: Flat list of all captures from the query
540
+
541
+ Returns:
542
+ List of grouped results, where each result has a 'captures' dict mapping
543
+ capture names to their data.
544
+ """
545
+ if not captures:
546
+ return []
547
+
548
+ # Identify the main capture type (method, class, function, etc.)
549
+ # Usually it's the one with the longest text span or appears first
550
+ main_capture_types = {"method", "class", "function", "interface", "field"}
551
+
552
+ # Group by start position - captures that share the same main node position
553
+ position_groups: dict[tuple[int, int], list[dict[str, Any]]] = {}
554
+
555
+ for capture in captures:
556
+ capture_name = capture.get("capture_name", "")
557
+
558
+ # Find the main node position for this capture
559
+ if capture_name in main_capture_types:
560
+ # This is a main node, use its position as the key
561
+ pos_key = (capture.get("start_byte", 0), capture.get("end_byte", 0))
562
+ else:
563
+ # This is a sub-capture, we'll need to find its parent later
564
+ # For now, use its own position
565
+ pos_key = (capture.get("start_byte", 0), capture.get("end_byte", 0))
566
+
567
+ if pos_key not in position_groups:
568
+ position_groups[pos_key] = []
569
+ position_groups[pos_key].append(capture)
570
+
571
+ # Now group captures that belong together
572
+ # A capture belongs to a main node if it's within the main node's byte range
573
+ results = []
574
+ main_nodes = []
575
+
576
+ # First, identify all main nodes
577
+ for captures_list in position_groups.values():
578
+ for capture in captures_list:
579
+ if capture.get("capture_name") in main_capture_types:
580
+ main_nodes.append(capture)
581
+
582
+ # For each main node, find all sub-captures within its range
583
+ for main_node in main_nodes:
584
+ main_start = main_node.get("start_byte", 0)
585
+ main_end = main_node.get("end_byte", 0)
586
+ main_name = main_node.get("capture_name", "")
587
+
588
+ # Collect all captures within this main node's range
589
+ grouped_captures = {main_name: main_node}
590
+
591
+ for captures_list in position_groups.values():
592
+ for capture in captures_list:
593
+ capture_start = capture.get("start_byte", 0)
594
+ capture_end = capture.get("end_byte", 0)
595
+ capture_name = capture.get("capture_name", "")
596
+
597
+ # Skip the main node itself
598
+ if capture is main_node:
599
+ continue
600
+
601
+ # Check if this capture is within the main node's range
602
+ if capture_start >= main_start and capture_end <= main_end:
603
+ # Group multiple captures of the same name in a list
604
+ if capture_name in grouped_captures:
605
+ # Convert to list if not already
606
+ if not isinstance(grouped_captures[capture_name], list):
607
+ grouped_captures[capture_name] = [
608
+ grouped_captures[capture_name]
609
+ ]
610
+ grouped_captures[capture_name].append(capture)
611
+ else:
612
+ grouped_captures[capture_name] = capture
613
+
614
+ results.append({"captures": grouped_captures})
615
+
616
+ return results
617
+
618
+
619
+ def _group_captures_by_main_node(
620
+ captures: list[dict[str, Any]],
621
+ ) -> list[dict[str, Any]]:
622
+ """
623
+ Group query captures by their main nodes (e.g., @method, @class, @function).
624
+
625
+ Each group represents one match of the query pattern, with all its sub-captures.
626
+ For example, a method_with_annotations query returns:
627
+ - One @method capture (the main node)
628
+ - One or more @annotation captures
629
+ - One @name capture
630
+ These all get grouped together as one "result".
631
+
632
+ Args:
633
+ captures: Flat list of all captures from the query
634
+
635
+ Returns:
636
+ List of grouped results, where each result has:
637
+ - 'captures' dict mapping capture names to their data
638
+ - Top-level fields from the main node (text, start_line, end_line, etc.)
639
+ """
640
+ if not captures:
641
+ return []
642
+
643
+ # Identify the main capture type (method, class, function, etc.)
644
+ # Usually it's the one with the longest text span or appears first
645
+ main_capture_types = {"method", "class", "function", "interface", "field"}
646
+
647
+ # Group by start position - captures that share the same main node position
648
+ position_groups: dict[tuple[int, int], list[dict[str, Any]]] = {}
649
+
650
+ for capture in captures:
651
+ capture_name = capture.get("capture_name", "")
652
+
653
+ # Find the main node position for this capture
654
+ if capture_name in main_capture_types:
655
+ # This is a main node, use its position as the key
656
+ pos_key = (capture.get("start_byte", 0), capture.get("end_byte", 0))
657
+ else:
658
+ # This is a sub-capture, we'll need to find its parent later
659
+ # For now, use its own position
660
+ pos_key = (capture.get("start_byte", 0), capture.get("end_byte", 0))
661
+
662
+ if pos_key not in position_groups:
663
+ position_groups[pos_key] = []
664
+ position_groups[pos_key].append(capture)
665
+
666
+ # Now group captures that belong together
667
+ # A capture belongs to a main node if it's within the main node's byte range
668
+ results = []
669
+ main_nodes = []
670
+
671
+ # First, identify all main nodes
672
+ for captures_list in position_groups.values():
673
+ for capture in captures_list:
674
+ if capture.get("capture_name") in main_capture_types:
675
+ main_nodes.append(capture)
676
+
677
+ # For each main node, find all sub-captures within its range
678
+ for main_node in main_nodes:
679
+ main_start = main_node.get("start_byte", 0)
680
+ main_end = main_node.get("end_byte", 0)
681
+ main_name = main_node.get("capture_name", "")
682
+
683
+ # Collect all captures within this main node's range
684
+ grouped_captures = {main_name: main_node}
685
+
686
+ for captures_list in position_groups.values():
687
+ for capture in captures_list:
688
+ capture_start = capture.get("start_byte", 0)
689
+ capture_end = capture.get("end_byte", 0)
690
+ capture_name = capture.get("capture_name", "")
691
+
692
+ # Skip the main node itself
693
+ if capture is main_node:
694
+ continue
695
+
696
+ # Check if this capture is within the main node's range
697
+ if capture_start >= main_start and capture_end <= main_end:
698
+ # Group multiple captures of the same name in a list
699
+ if capture_name in grouped_captures:
700
+ # Convert to list if not already
701
+ if not isinstance(grouped_captures[capture_name], list):
702
+ grouped_captures[capture_name] = [
703
+ grouped_captures[capture_name]
704
+ ]
705
+ grouped_captures[capture_name].append(capture)
706
+ else:
707
+ grouped_captures[capture_name] = capture
708
+
709
+ # Create result with top-level fields from main node
710
+ result = {
711
+ "captures": grouped_captures,
712
+ "text": main_node.get("text", ""),
713
+ "start_line": main_node.get("line_number", 0),
714
+ "end_line": main_node.get("line_number", 0)
715
+ + main_node.get("text", "").count("\n"),
716
+ "start_byte": main_start,
717
+ "end_byte": main_end,
718
+ "node_type": main_node.get("node_type", ""),
719
+ }
720
+ results.append(result)
721
+
722
+ return results
723
+
724
+
725
+ def execute_query(
726
+ file_path: str | Path, query_name: str, language: str | None = None
727
+ ) -> dict[str, Any]:
728
+ """
729
+ Execute a specific query against a file.
730
+
731
+ Args:
732
+ file_path: Path to the source file
733
+ query_name: Name of the query to execute
734
+ language: Programming language (auto-detected if not specified)
735
+
736
+ Returns:
737
+ Query execution results
738
+ """
739
+ try:
740
+ # Analyze with only the specified query
741
+ result = analyze_file(
742
+ file_path,
743
+ language=language,
744
+ queries=[query_name],
745
+ include_elements=False,
746
+ include_queries=True,
747
+ )
748
+
749
+ if result["success"] and "query_results" in result:
750
+ query_result_dict = result["query_results"].get(query_name, {})
751
+
752
+ # Extract the captures list from the query result dictionary
753
+ if isinstance(query_result_dict, dict) and "captures" in query_result_dict:
754
+ raw_captures = query_result_dict["captures"]
755
+ elif isinstance(query_result_dict, list):
756
+ raw_captures = query_result_dict
757
+ else:
758
+ raw_captures = []
759
+
760
+ # Group captures by their main capture (e.g., @method, @class)
761
+ # This groups related captures together (e.g., method + its annotations + name)
762
+ query_results = _group_captures_by_main_node(raw_captures)
763
+
764
+ return {
765
+ "success": True,
766
+ "query_name": query_name,
767
+ "results": query_results,
768
+ "count": len(query_results),
769
+ "language": result.get("language_info", {}).get("language"),
770
+ "file_path": str(file_path),
771
+ }
772
+ else:
773
+ return {
774
+ "success": False,
775
+ "query_name": query_name,
776
+ "error": result.get("error", "Unknown error"),
777
+ "file_path": str(file_path),
778
+ }
779
+
780
+ except Exception as e:
781
+ log_error(f"Query execution failed: {e}")
782
+ return {
783
+ "success": False,
784
+ "query_name": query_name,
785
+ "error": str(e),
786
+ "file_path": str(file_path),
787
+ }
788
+
789
+
790
+ def extract_elements(
791
+ file_path: str | Path,
792
+ language: str | None = None,
793
+ element_types: list[str] | None = None,
794
+ ) -> dict[str, Any]:
795
+ """
796
+ Extract code elements from a file.
797
+
798
+ Args:
799
+ file_path: Path to the source file
800
+ language: Programming language (auto-detected if not specified)
801
+ element_types: Types of elements to extract (all if not specified)
802
+
803
+ Returns:
804
+ Element extraction results
805
+ """
806
+ try:
807
+ # Analyze with only element extraction
808
+ result = analyze_file(
809
+ file_path, language=language, include_elements=True, include_queries=False
810
+ )
811
+
812
+ if result["success"] and "elements" in result:
813
+ elements = result["elements"]
814
+
815
+ # Filter by element types if specified
816
+ if element_types:
817
+ filtered_elements = []
818
+ for element in elements:
819
+ if any(
820
+ etype.lower() in element.get("type", "").lower()
821
+ for etype in element_types
822
+ ):
823
+ filtered_elements.append(element)
824
+ elements = filtered_elements
825
+
826
+ return {
827
+ "success": True,
828
+ "elements": elements,
829
+ "count": len(elements),
830
+ "language": result.get("language_info", {}).get("language"),
831
+ "file_path": str(file_path),
832
+ }
833
+ else:
834
+ return {
835
+ "success": False,
836
+ "error": result.get("error", "Unknown error"),
837
+ "file_path": str(file_path),
838
+ }
839
+
840
+ except Exception as e:
841
+ log_error(f"Element extraction failed: {e}")
842
+ return {"success": False, "error": str(e), "file_path": str(file_path)}
843
+
844
+
845
+ # Convenience functions for backward compatibility
846
+ def analyze(file_path: str | Path, **kwargs: Any) -> dict[str, Any]:
847
+ """Convenience function that aliases to analyze_file."""
848
+ return analyze_file(file_path, **kwargs)
849
+
850
+
851
+ def get_languages() -> list[str]:
852
+ """Convenience function that aliases to get_supported_languages."""
853
+ return get_supported_languages()