tree-sitter-analyzer 0.8.3__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +132 -132
- tree_sitter_analyzer/__main__.py +11 -11
- tree_sitter_analyzer/api.py +533 -533
- tree_sitter_analyzer/cli/__init__.py +39 -39
- tree_sitter_analyzer/cli/__main__.py +12 -12
- tree_sitter_analyzer/cli/commands/__init__.py +26 -26
- tree_sitter_analyzer/cli/commands/advanced_command.py +88 -88
- tree_sitter_analyzer/cli/commands/base_command.py +182 -180
- tree_sitter_analyzer/cli/commands/structure_command.py +138 -138
- tree_sitter_analyzer/cli/commands/summary_command.py +101 -101
- tree_sitter_analyzer/core/__init__.py +15 -15
- tree_sitter_analyzer/core/analysis_engine.py +74 -78
- tree_sitter_analyzer/core/cache_service.py +320 -320
- tree_sitter_analyzer/core/engine.py +566 -566
- tree_sitter_analyzer/core/parser.py +293 -293
- tree_sitter_analyzer/encoding_utils.py +459 -459
- tree_sitter_analyzer/file_handler.py +210 -210
- tree_sitter_analyzer/formatters/__init__.py +1 -1
- tree_sitter_analyzer/formatters/base_formatter.py +167 -167
- tree_sitter_analyzer/formatters/formatter_factory.py +78 -78
- tree_sitter_analyzer/formatters/java_formatter.py +18 -18
- tree_sitter_analyzer/formatters/python_formatter.py +19 -19
- tree_sitter_analyzer/interfaces/__init__.py +9 -9
- tree_sitter_analyzer/interfaces/cli.py +528 -528
- tree_sitter_analyzer/interfaces/cli_adapter.py +344 -343
- tree_sitter_analyzer/interfaces/mcp_adapter.py +206 -206
- tree_sitter_analyzer/language_detector.py +53 -53
- tree_sitter_analyzer/languages/__init__.py +10 -10
- tree_sitter_analyzer/languages/java_plugin.py +1 -1
- tree_sitter_analyzer/languages/javascript_plugin.py +446 -446
- tree_sitter_analyzer/languages/python_plugin.py +755 -755
- tree_sitter_analyzer/mcp/__init__.py +34 -31
- tree_sitter_analyzer/mcp/resources/__init__.py +44 -44
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +209 -209
- tree_sitter_analyzer/mcp/server.py +623 -436
- tree_sitter_analyzer/mcp/tools/__init__.py +30 -30
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +10 -6
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +247 -242
- tree_sitter_analyzer/mcp/tools/base_tool.py +54 -54
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +310 -308
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +386 -379
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +563 -559
- tree_sitter_analyzer/mcp/utils/__init__.py +107 -107
- tree_sitter_analyzer/models.py +10 -10
- tree_sitter_analyzer/output_manager.py +253 -253
- tree_sitter_analyzer/plugins/__init__.py +280 -280
- tree_sitter_analyzer/plugins/base.py +529 -529
- tree_sitter_analyzer/plugins/manager.py +379 -379
- tree_sitter_analyzer/queries/__init__.py +26 -26
- tree_sitter_analyzer/queries/java.py +391 -391
- tree_sitter_analyzer/queries/javascript.py +148 -148
- tree_sitter_analyzer/queries/python.py +285 -285
- tree_sitter_analyzer/queries/typescript.py +229 -229
- tree_sitter_analyzer/query_loader.py +257 -257
- tree_sitter_analyzer/security/boundary_manager.py +237 -279
- tree_sitter_analyzer/security/validator.py +60 -58
- tree_sitter_analyzer/utils.py +294 -277
- {tree_sitter_analyzer-0.8.3.dist-info → tree_sitter_analyzer-0.9.2.dist-info}/METADATA +28 -19
- tree_sitter_analyzer-0.9.2.dist-info/RECORD +77 -0
- {tree_sitter_analyzer-0.8.3.dist-info → tree_sitter_analyzer-0.9.2.dist-info}/entry_points.txt +1 -0
- tree_sitter_analyzer-0.8.3.dist-info/RECORD +0 -77
- {tree_sitter_analyzer-0.8.3.dist-info → tree_sitter_analyzer-0.9.2.dist-info}/WHEEL +0 -0
|
@@ -11,9 +11,9 @@ from typing import Any
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class LanguageDetector:
|
|
14
|
-
"""
|
|
14
|
+
"""Automatic programming language detector"""
|
|
15
15
|
|
|
16
|
-
#
|
|
16
|
+
# Basic extension mapping
|
|
17
17
|
EXTENSION_MAPPING: dict[str, str] = {
|
|
18
18
|
# Java系
|
|
19
19
|
".java": "java",
|
|
@@ -36,7 +36,7 @@ class LanguageDetector:
|
|
|
36
36
|
".cpp": "cpp",
|
|
37
37
|
".cxx": "cpp",
|
|
38
38
|
".cc": "cpp",
|
|
39
|
-
".h": "c", #
|
|
39
|
+
".h": "c", # Ambiguous
|
|
40
40
|
".hpp": "cpp",
|
|
41
41
|
".hxx": "cpp",
|
|
42
42
|
# その他の言語
|
|
@@ -56,12 +56,12 @@ class LanguageDetector:
|
|
|
56
56
|
".lua": "lua",
|
|
57
57
|
".pl": "perl",
|
|
58
58
|
".r": "r",
|
|
59
|
-
".m": "objc", #
|
|
59
|
+
".m": "objc", # Ambiguous (MATLAB as well)
|
|
60
60
|
".dart": "dart",
|
|
61
61
|
".elm": "elm",
|
|
62
62
|
}
|
|
63
63
|
|
|
64
|
-
#
|
|
64
|
+
# Ambiguous extensions (map to multiple languages)
|
|
65
65
|
AMBIGUOUS_EXTENSIONS: dict[str, list[str]] = {
|
|
66
66
|
".h": ["c", "cpp", "objc"],
|
|
67
67
|
".m": ["objc", "matlab"],
|
|
@@ -70,7 +70,7 @@ class LanguageDetector:
|
|
|
70
70
|
".json": ["json", "jsonc"],
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
#
|
|
73
|
+
# Content-based detection patterns
|
|
74
74
|
CONTENT_PATTERNS: dict[str, dict[str, list[str]]] = {
|
|
75
75
|
"c_vs_cpp": {
|
|
76
76
|
"cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
|
|
@@ -82,7 +82,7 @@ class LanguageDetector:
|
|
|
82
82
|
},
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
# Tree-sitter
|
|
85
|
+
# Tree-sitter supported languages
|
|
86
86
|
SUPPORTED_LANGUAGES = {
|
|
87
87
|
"java",
|
|
88
88
|
"javascript",
|
|
@@ -95,7 +95,7 @@ class LanguageDetector:
|
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
def __init__(self) -> None:
|
|
98
|
-
"""
|
|
98
|
+
"""Initialize detector"""
|
|
99
99
|
self.extension_map = {
|
|
100
100
|
".java": ("java", 0.9),
|
|
101
101
|
".js": ("javascript", 0.9),
|
|
@@ -192,139 +192,139 @@ class LanguageDetector:
|
|
|
192
192
|
path = Path(file_path)
|
|
193
193
|
extension = path.suffix.lower()
|
|
194
194
|
|
|
195
|
-
#
|
|
195
|
+
# Direct mapping by extension
|
|
196
196
|
if extension in self.EXTENSION_MAPPING:
|
|
197
197
|
language = self.EXTENSION_MAPPING[extension]
|
|
198
198
|
|
|
199
|
-
#
|
|
199
|
+
# No ambiguity -> high confidence
|
|
200
200
|
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
201
201
|
return language, 1.0
|
|
202
202
|
|
|
203
|
-
#
|
|
203
|
+
# Resolve ambiguity using content
|
|
204
204
|
if content:
|
|
205
205
|
refined_language = self._resolve_ambiguity(extension, content)
|
|
206
206
|
return refined_language, 0.9 if refined_language != language else 0.7
|
|
207
207
|
else:
|
|
208
|
-
return language, 0.7 #
|
|
208
|
+
return language, 0.7 # Lower confidence without content
|
|
209
209
|
|
|
210
|
-
#
|
|
210
|
+
# Unknown extension
|
|
211
211
|
return "unknown", 0.0
|
|
212
212
|
|
|
213
213
|
def detect_from_extension(self, file_path: str) -> str:
|
|
214
214
|
"""
|
|
215
|
-
|
|
215
|
+
Quick detection using extension only
|
|
216
216
|
|
|
217
217
|
Args:
|
|
218
|
-
file_path:
|
|
218
|
+
file_path: File path
|
|
219
219
|
|
|
220
220
|
Returns:
|
|
221
|
-
|
|
221
|
+
Detected language name
|
|
222
222
|
"""
|
|
223
223
|
language, _ = self.detect_language(file_path)
|
|
224
224
|
return language
|
|
225
225
|
|
|
226
226
|
def is_supported(self, language: str) -> bool:
|
|
227
227
|
"""
|
|
228
|
-
|
|
228
|
+
Check if language is supported by Tree-sitter
|
|
229
229
|
|
|
230
230
|
Args:
|
|
231
|
-
language:
|
|
231
|
+
language: Language name
|
|
232
232
|
|
|
233
233
|
Returns:
|
|
234
|
-
|
|
234
|
+
Support status
|
|
235
235
|
"""
|
|
236
236
|
return language in self.SUPPORTED_LANGUAGES
|
|
237
237
|
|
|
238
238
|
def get_supported_extensions(self) -> list[str]:
|
|
239
239
|
"""
|
|
240
|
-
|
|
240
|
+
Get list of supported extensions
|
|
241
241
|
|
|
242
242
|
Returns:
|
|
243
|
-
|
|
243
|
+
List of extensions
|
|
244
244
|
"""
|
|
245
245
|
return sorted(self.EXTENSION_MAPPING.keys())
|
|
246
246
|
|
|
247
247
|
def get_supported_languages(self) -> list[str]:
|
|
248
248
|
"""
|
|
249
|
-
|
|
249
|
+
Get list of supported languages
|
|
250
250
|
|
|
251
251
|
Returns:
|
|
252
|
-
|
|
252
|
+
List of languages
|
|
253
253
|
"""
|
|
254
254
|
return sorted(self.SUPPORTED_LANGUAGES)
|
|
255
255
|
|
|
256
256
|
def _resolve_ambiguity(self, extension: str, content: str) -> str:
|
|
257
257
|
"""
|
|
258
|
-
|
|
258
|
+
Resolve ambiguous extension using content
|
|
259
259
|
|
|
260
260
|
Args:
|
|
261
|
-
extension:
|
|
262
|
-
content:
|
|
261
|
+
extension: File extension
|
|
262
|
+
content: File content
|
|
263
263
|
|
|
264
264
|
Returns:
|
|
265
|
-
|
|
265
|
+
Resolved language name
|
|
266
266
|
"""
|
|
267
267
|
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
268
268
|
return self.EXTENSION_MAPPING.get(extension, "unknown")
|
|
269
269
|
|
|
270
270
|
candidates = self.AMBIGUOUS_EXTENSIONS[extension]
|
|
271
271
|
|
|
272
|
-
# .h
|
|
272
|
+
# .h: C vs C++ vs Objective-C
|
|
273
273
|
if extension == ".h":
|
|
274
274
|
return self._detect_c_family(content, candidates)
|
|
275
275
|
|
|
276
|
-
# .m
|
|
276
|
+
# .m: Objective-C vs MATLAB
|
|
277
277
|
elif extension == ".m":
|
|
278
278
|
return self._detect_objc_vs_matlab(content, candidates)
|
|
279
279
|
|
|
280
|
-
#
|
|
280
|
+
# Fallback to first candidate
|
|
281
281
|
return candidates[0]
|
|
282
282
|
|
|
283
283
|
def _detect_c_family(self, content: str, candidates: list[str]) -> str:
|
|
284
|
-
"""C
|
|
284
|
+
"""Detect among C-family languages"""
|
|
285
285
|
cpp_score = 0
|
|
286
286
|
c_score = 0
|
|
287
287
|
objc_score = 0
|
|
288
288
|
|
|
289
|
-
# C
|
|
289
|
+
# C++ features
|
|
290
290
|
cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
|
|
291
291
|
for pattern in cpp_patterns:
|
|
292
292
|
if pattern in content:
|
|
293
293
|
cpp_score += 1
|
|
294
294
|
|
|
295
|
-
# C
|
|
295
|
+
# C features
|
|
296
296
|
c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
|
|
297
297
|
for pattern in c_patterns:
|
|
298
298
|
if pattern in content:
|
|
299
299
|
c_score += 1
|
|
300
300
|
|
|
301
|
-
# Objective-C
|
|
301
|
+
# Objective-C features
|
|
302
302
|
objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
|
|
303
303
|
for pattern in objc_patterns:
|
|
304
304
|
if pattern in content:
|
|
305
305
|
objc_score += 3 # 強い指標なので重み大
|
|
306
306
|
|
|
307
|
-
#
|
|
307
|
+
# Select best-scoring language
|
|
308
308
|
scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
|
|
309
309
|
best_language = max(scores, key=lambda x: scores[x])
|
|
310
310
|
|
|
311
|
-
# objc
|
|
311
|
+
# If objc not a candidate, fallback to C/C++
|
|
312
312
|
if best_language == "objc" and "objc" not in candidates:
|
|
313
313
|
best_language = "cpp" if cpp_score > c_score else "c"
|
|
314
314
|
|
|
315
315
|
return best_language if scores[best_language] > 0 else candidates[0]
|
|
316
316
|
|
|
317
317
|
def _detect_objc_vs_matlab(self, content: str, candidates: list[str]) -> str:
|
|
318
|
-
"""Objective-C
|
|
318
|
+
"""Detect between Objective-C and MATLAB"""
|
|
319
319
|
objc_score = 0
|
|
320
320
|
matlab_score = 0
|
|
321
321
|
|
|
322
|
-
# Objective-C
|
|
322
|
+
# Objective-C patterns
|
|
323
323
|
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
|
|
324
324
|
if pattern in content:
|
|
325
325
|
objc_score += 1
|
|
326
326
|
|
|
327
|
-
# MATLAB
|
|
327
|
+
# MATLAB patterns
|
|
328
328
|
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
|
|
329
329
|
if pattern in content:
|
|
330
330
|
matlab_score += 1
|
|
@@ -334,27 +334,27 @@ class LanguageDetector:
|
|
|
334
334
|
elif matlab_score > objc_score:
|
|
335
335
|
return "matlab"
|
|
336
336
|
else:
|
|
337
|
-
return candidates[0] #
|
|
337
|
+
return candidates[0] # default
|
|
338
338
|
|
|
339
339
|
def add_extension_mapping(self, extension: str, language: str) -> None:
|
|
340
340
|
"""
|
|
341
|
-
|
|
341
|
+
Add custom extension mapping
|
|
342
342
|
|
|
343
343
|
Args:
|
|
344
|
-
extension:
|
|
345
|
-
language:
|
|
344
|
+
extension: File extension (with dot)
|
|
345
|
+
language: Language name
|
|
346
346
|
"""
|
|
347
347
|
self.EXTENSION_MAPPING[extension.lower()] = language
|
|
348
348
|
|
|
349
349
|
def get_language_info(self, language: str) -> dict[str, Any]:
|
|
350
350
|
"""
|
|
351
|
-
|
|
351
|
+
Get language information
|
|
352
352
|
|
|
353
353
|
Args:
|
|
354
|
-
language:
|
|
354
|
+
language: Language name
|
|
355
355
|
|
|
356
356
|
Returns:
|
|
357
|
-
|
|
357
|
+
Language info dictionary
|
|
358
358
|
"""
|
|
359
359
|
extensions = [
|
|
360
360
|
ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
|
|
@@ -368,31 +368,31 @@ class LanguageDetector:
|
|
|
368
368
|
}
|
|
369
369
|
|
|
370
370
|
|
|
371
|
-
#
|
|
371
|
+
# Global instance
|
|
372
372
|
detector = LanguageDetector()
|
|
373
373
|
|
|
374
374
|
|
|
375
375
|
def detect_language_from_file(file_path: str) -> str:
|
|
376
376
|
"""
|
|
377
|
-
|
|
377
|
+
Detect language from path (simple API)
|
|
378
378
|
|
|
379
379
|
Args:
|
|
380
|
-
file_path:
|
|
380
|
+
file_path: File path
|
|
381
381
|
|
|
382
382
|
Returns:
|
|
383
|
-
|
|
383
|
+
Detected language name
|
|
384
384
|
"""
|
|
385
385
|
return detector.detect_from_extension(file_path)
|
|
386
386
|
|
|
387
387
|
|
|
388
388
|
def is_language_supported(language: str) -> bool:
|
|
389
389
|
"""
|
|
390
|
-
|
|
390
|
+
Check if language is supported (simple API)
|
|
391
391
|
|
|
392
392
|
Args:
|
|
393
|
-
language:
|
|
393
|
+
language: Language name
|
|
394
394
|
|
|
395
395
|
Returns:
|
|
396
|
-
|
|
396
|
+
Support status
|
|
397
397
|
"""
|
|
398
398
|
return detector.is_supported(language)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Language Plugins
|
|
4
|
-
|
|
5
|
-
This package contains the core language plugins for the tree-sitter analyzer.
|
|
6
|
-
Each plugin provides language-specific parsing and element extraction capabilities.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
# This file makes the languages directory a Python package
|
|
10
|
-
# Language plugins are automatically discovered by the PluginManager
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Language Plugins
|
|
4
|
+
|
|
5
|
+
This package contains the core language plugins for the tree-sitter analyzer.
|
|
6
|
+
Each plugin provides language-specific parsing and element extraction capabilities.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
# This file makes the languages directory a Python package
|
|
10
|
+
# Language plugins are automatically discovered by the PluginManager
|
|
@@ -233,7 +233,7 @@ class JavaElementExtractor(ElementExtractor):
|
|
|
233
233
|
"method_declaration",
|
|
234
234
|
"constructor_declaration",
|
|
235
235
|
"block",
|
|
236
|
-
"modifiers", #
|
|
236
|
+
"modifiers", # Annotation nodes can appear inside modifiers
|
|
237
237
|
}
|
|
238
238
|
|
|
239
239
|
# Iterative DFS stack: (node, depth)
|