tree-sitter-analyzer 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +121 -0
- tree_sitter_analyzer/__main__.py +12 -0
- tree_sitter_analyzer/api.py +539 -0
- tree_sitter_analyzer/cli/__init__.py +39 -0
- tree_sitter_analyzer/cli/__main__.py +13 -0
- tree_sitter_analyzer/cli/commands/__init__.py +27 -0
- tree_sitter_analyzer/cli/commands/advanced_command.py +88 -0
- tree_sitter_analyzer/cli/commands/base_command.py +155 -0
- tree_sitter_analyzer/cli/commands/default_command.py +19 -0
- tree_sitter_analyzer/cli/commands/partial_read_command.py +133 -0
- tree_sitter_analyzer/cli/commands/query_command.py +82 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +121 -0
- tree_sitter_analyzer/cli/commands/summary_command.py +93 -0
- tree_sitter_analyzer/cli/commands/table_command.py +233 -0
- tree_sitter_analyzer/cli/info_commands.py +121 -0
- tree_sitter_analyzer/cli_main.py +276 -0
- tree_sitter_analyzer/core/__init__.py +20 -0
- tree_sitter_analyzer/core/analysis_engine.py +574 -0
- tree_sitter_analyzer/core/cache_service.py +330 -0
- tree_sitter_analyzer/core/engine.py +560 -0
- tree_sitter_analyzer/core/parser.py +288 -0
- tree_sitter_analyzer/core/query.py +502 -0
- tree_sitter_analyzer/encoding_utils.py +460 -0
- tree_sitter_analyzer/exceptions.py +340 -0
- tree_sitter_analyzer/file_handler.py +222 -0
- tree_sitter_analyzer/formatters/__init__.py +1 -0
- tree_sitter_analyzer/formatters/base_formatter.py +168 -0
- tree_sitter_analyzer/formatters/formatter_factory.py +74 -0
- tree_sitter_analyzer/formatters/java_formatter.py +270 -0
- tree_sitter_analyzer/formatters/python_formatter.py +235 -0
- tree_sitter_analyzer/interfaces/__init__.py +10 -0
- tree_sitter_analyzer/interfaces/cli.py +557 -0
- tree_sitter_analyzer/interfaces/cli_adapter.py +319 -0
- tree_sitter_analyzer/interfaces/mcp_adapter.py +170 -0
- tree_sitter_analyzer/interfaces/mcp_server.py +416 -0
- tree_sitter_analyzer/java_analyzer.py +219 -0
- tree_sitter_analyzer/language_detector.py +400 -0
- tree_sitter_analyzer/language_loader.py +228 -0
- tree_sitter_analyzer/languages/__init__.py +11 -0
- tree_sitter_analyzer/languages/java_plugin.py +1113 -0
- tree_sitter_analyzer/languages/python_plugin.py +712 -0
- tree_sitter_analyzer/mcp/__init__.py +32 -0
- tree_sitter_analyzer/mcp/resources/__init__.py +47 -0
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +213 -0
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +550 -0
- tree_sitter_analyzer/mcp/server.py +319 -0
- tree_sitter_analyzer/mcp/tools/__init__.py +36 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +558 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +245 -0
- tree_sitter_analyzer/mcp/tools/base_tool.py +55 -0
- tree_sitter_analyzer/mcp/tools/get_positions_tool.py +448 -0
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +302 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +359 -0
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +476 -0
- tree_sitter_analyzer/mcp/utils/__init__.py +106 -0
- tree_sitter_analyzer/mcp/utils/error_handler.py +549 -0
- tree_sitter_analyzer/models.py +481 -0
- tree_sitter_analyzer/output_manager.py +264 -0
- tree_sitter_analyzer/plugins/__init__.py +334 -0
- tree_sitter_analyzer/plugins/base.py +446 -0
- tree_sitter_analyzer/plugins/java_plugin.py +625 -0
- tree_sitter_analyzer/plugins/javascript_plugin.py +439 -0
- tree_sitter_analyzer/plugins/manager.py +355 -0
- tree_sitter_analyzer/plugins/plugin_loader.py +83 -0
- tree_sitter_analyzer/plugins/python_plugin.py +598 -0
- tree_sitter_analyzer/plugins/registry.py +366 -0
- tree_sitter_analyzer/queries/__init__.py +27 -0
- tree_sitter_analyzer/queries/java.py +394 -0
- tree_sitter_analyzer/queries/javascript.py +149 -0
- tree_sitter_analyzer/queries/python.py +286 -0
- tree_sitter_analyzer/queries/typescript.py +230 -0
- tree_sitter_analyzer/query_loader.py +260 -0
- tree_sitter_analyzer/table_formatter.py +448 -0
- tree_sitter_analyzer/utils.py +201 -0
- tree_sitter_analyzer-0.1.0.dist-info/METADATA +581 -0
- tree_sitter_analyzer-0.1.0.dist-info/RECORD +78 -0
- tree_sitter_analyzer-0.1.0.dist-info/WHEEL +4 -0
- tree_sitter_analyzer-0.1.0.dist-info/entry_points.txt +8 -0
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Language Detection System
|
|
5
|
+
|
|
6
|
+
Automatically detects programming language from file extensions and content.
|
|
7
|
+
Supports multiple languages with extensible configuration.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LanguageDetector:
|
|
16
|
+
"""プログラミング言語の自動判定システム"""
|
|
17
|
+
|
|
18
|
+
# 基本的な拡張子マッピング
|
|
19
|
+
EXTENSION_MAPPING: Dict[str, str] = {
|
|
20
|
+
# Java系
|
|
21
|
+
".java": "java",
|
|
22
|
+
".jsp": "jsp",
|
|
23
|
+
".jspx": "jsp",
|
|
24
|
+
# JavaScript/TypeScript系
|
|
25
|
+
".js": "javascript",
|
|
26
|
+
".jsx": "jsx",
|
|
27
|
+
".ts": "typescript",
|
|
28
|
+
".tsx": "tsx",
|
|
29
|
+
".mjs": "javascript",
|
|
30
|
+
".cjs": "javascript",
|
|
31
|
+
# Python系
|
|
32
|
+
".py": "python",
|
|
33
|
+
".pyx": "python",
|
|
34
|
+
".pyi": "python",
|
|
35
|
+
".pyw": "python",
|
|
36
|
+
# C/C++系
|
|
37
|
+
".c": "c",
|
|
38
|
+
".cpp": "cpp",
|
|
39
|
+
".cxx": "cpp",
|
|
40
|
+
".cc": "cpp",
|
|
41
|
+
".h": "c", # 曖昧性あり
|
|
42
|
+
".hpp": "cpp",
|
|
43
|
+
".hxx": "cpp",
|
|
44
|
+
# その他の言語
|
|
45
|
+
".rs": "rust",
|
|
46
|
+
".go": "go",
|
|
47
|
+
".rb": "ruby",
|
|
48
|
+
".php": "php",
|
|
49
|
+
".kt": "kotlin",
|
|
50
|
+
".swift": "swift",
|
|
51
|
+
".cs": "csharp",
|
|
52
|
+
".vb": "vbnet",
|
|
53
|
+
".fs": "fsharp",
|
|
54
|
+
".scala": "scala",
|
|
55
|
+
".clj": "clojure",
|
|
56
|
+
".hs": "haskell",
|
|
57
|
+
".ml": "ocaml",
|
|
58
|
+
".lua": "lua",
|
|
59
|
+
".pl": "perl",
|
|
60
|
+
".r": "r",
|
|
61
|
+
".m": "objc", # 曖昧性あり(MATLABとも)
|
|
62
|
+
".dart": "dart",
|
|
63
|
+
".elm": "elm",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# 曖昧な拡張子(複数言語に対応)
|
|
67
|
+
AMBIGUOUS_EXTENSIONS: Dict[str, List[str]] = {
|
|
68
|
+
".h": ["c", "cpp", "objc"],
|
|
69
|
+
".m": ["objc", "matlab"],
|
|
70
|
+
".sql": ["sql", "plsql", "mysql"],
|
|
71
|
+
".xml": ["xml", "html", "jsp"],
|
|
72
|
+
".json": ["json", "jsonc"],
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# コンテンツベース判定のキーワード
|
|
76
|
+
CONTENT_PATTERNS: Dict[str, Dict[str, List[str]]] = {
|
|
77
|
+
"c_vs_cpp": {
|
|
78
|
+
"cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
|
|
79
|
+
"c": ["#include <stdio.h>", "printf(", "malloc(", "typedef struct"],
|
|
80
|
+
},
|
|
81
|
+
"objc_vs_matlab": {
|
|
82
|
+
"objc": ["#import", "@interface", "@implementation", "NSString", "alloc]"],
|
|
83
|
+
"matlab": ["function ", "end;", "disp(", "clc;", "clear all"],
|
|
84
|
+
},
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# Tree-sitter対応言語(現在サポート済み)
|
|
88
|
+
SUPPORTED_LANGUAGES = {
|
|
89
|
+
"java",
|
|
90
|
+
"javascript",
|
|
91
|
+
"typescript",
|
|
92
|
+
"python",
|
|
93
|
+
"c",
|
|
94
|
+
"cpp",
|
|
95
|
+
"rust",
|
|
96
|
+
"go",
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
def __init__(self) -> None:
|
|
100
|
+
"""言語検出器を初期化"""
|
|
101
|
+
self.extension_map = {
|
|
102
|
+
".java": ("java", 0.9),
|
|
103
|
+
".js": ("javascript", 0.9),
|
|
104
|
+
".jsx": ("javascript", 0.8),
|
|
105
|
+
".ts": ("typescript", 0.9),
|
|
106
|
+
".tsx": ("typescript", 0.8),
|
|
107
|
+
".py": ("python", 0.9),
|
|
108
|
+
".pyw": ("python", 0.8),
|
|
109
|
+
".c": ("c", 0.9),
|
|
110
|
+
".h": ("c", 0.7),
|
|
111
|
+
".cpp": ("cpp", 0.9),
|
|
112
|
+
".cxx": ("cpp", 0.9),
|
|
113
|
+
".cc": ("cpp", 0.9),
|
|
114
|
+
".hpp": ("cpp", 0.8),
|
|
115
|
+
".rs": ("rust", 0.9),
|
|
116
|
+
".go": ("go", 0.9),
|
|
117
|
+
".cs": ("csharp", 0.9),
|
|
118
|
+
".php": ("php", 0.9),
|
|
119
|
+
".rb": ("ruby", 0.9),
|
|
120
|
+
".swift": ("swift", 0.9),
|
|
121
|
+
".kt": ("kotlin", 0.9),
|
|
122
|
+
".scala": ("scala", 0.9),
|
|
123
|
+
".clj": ("clojure", 0.9),
|
|
124
|
+
".hs": ("haskell", 0.9),
|
|
125
|
+
".ml": ("ocaml", 0.9),
|
|
126
|
+
".fs": ("fsharp", 0.9),
|
|
127
|
+
".elm": ("elm", 0.9),
|
|
128
|
+
".dart": ("dart", 0.9),
|
|
129
|
+
".lua": ("lua", 0.9),
|
|
130
|
+
".r": ("r", 0.9),
|
|
131
|
+
".m": ("objectivec", 0.7),
|
|
132
|
+
".mm": ("objectivec", 0.8),
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
# Content-based detection patterns
|
|
136
|
+
self.content_patterns = {
|
|
137
|
+
"java": [
|
|
138
|
+
(r"package\s+[\w\.]+\s*;", 0.3),
|
|
139
|
+
(r"public\s+class\s+\w+", 0.3),
|
|
140
|
+
(r"import\s+[\w\.]+\s*;", 0.2),
|
|
141
|
+
(r"@\w+\s*\(", 0.2), # Annotations
|
|
142
|
+
],
|
|
143
|
+
"python": [
|
|
144
|
+
(r"def\s+\w+\s*\(", 0.3),
|
|
145
|
+
(r"import\s+\w+", 0.2),
|
|
146
|
+
(r"from\s+\w+\s+import", 0.2),
|
|
147
|
+
(r'if\s+__name__\s*==\s*["\']__main__["\']', 0.3),
|
|
148
|
+
],
|
|
149
|
+
"javascript": [
|
|
150
|
+
(r"function\s+\w+\s*\(", 0.3),
|
|
151
|
+
(r"var\s+\w+\s*=", 0.2),
|
|
152
|
+
(r"let\s+\w+\s*=", 0.2),
|
|
153
|
+
(r"const\s+\w+\s*=", 0.2),
|
|
154
|
+
(r"console\.log\s*\(", 0.1),
|
|
155
|
+
],
|
|
156
|
+
"typescript": [
|
|
157
|
+
(r"interface\s+\w+", 0.3),
|
|
158
|
+
(r"type\s+\w+\s*=", 0.2),
|
|
159
|
+
(r":\s*\w+\s*=", 0.2), # Type annotations
|
|
160
|
+
(r"export\s+(interface|type|class)", 0.2),
|
|
161
|
+
],
|
|
162
|
+
"c": [
|
|
163
|
+
(r"#include\s*<[\w\.]+>", 0.3),
|
|
164
|
+
(r"int\s+main\s*\(", 0.3),
|
|
165
|
+
(r"printf\s*\(", 0.2),
|
|
166
|
+
(r"#define\s+\w+", 0.2),
|
|
167
|
+
],
|
|
168
|
+
"cpp": [
|
|
169
|
+
(r"#include\s*<[\w\.]+>", 0.2),
|
|
170
|
+
(r"using\s+namespace\s+\w+", 0.3),
|
|
171
|
+
(r"std::\w+", 0.2),
|
|
172
|
+
(r"class\s+\w+\s*{", 0.3),
|
|
173
|
+
],
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
from .utils import log_debug, log_warning
|
|
177
|
+
|
|
178
|
+
self._log_debug = log_debug
|
|
179
|
+
self._log_warning = log_warning
|
|
180
|
+
|
|
181
|
+
def detect_language(
|
|
182
|
+
self, file_path: str, content: Optional[str] = None
|
|
183
|
+
) -> Tuple[str, float]:
|
|
184
|
+
"""
|
|
185
|
+
ファイルパスとコンテンツから言語を判定
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
file_path: ファイルパス
|
|
189
|
+
content: ファイルコンテンツ(任意、曖昧性解決用)
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
(言語名, 信頼度) のタプル
|
|
193
|
+
"""
|
|
194
|
+
path = Path(file_path)
|
|
195
|
+
extension = path.suffix.lower()
|
|
196
|
+
|
|
197
|
+
# 直接マッピングで判定できる場合
|
|
198
|
+
if extension in self.EXTENSION_MAPPING:
|
|
199
|
+
language = self.EXTENSION_MAPPING[extension]
|
|
200
|
+
|
|
201
|
+
# 曖昧性がない場合は高信頼度
|
|
202
|
+
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
203
|
+
return language, 1.0
|
|
204
|
+
|
|
205
|
+
# 曖昧性がある場合はコンテンツベース判定
|
|
206
|
+
if content:
|
|
207
|
+
refined_language = self._resolve_ambiguity(extension, content)
|
|
208
|
+
return refined_language, 0.9 if refined_language != language else 0.7
|
|
209
|
+
else:
|
|
210
|
+
return language, 0.7 # コンテンツなしなので信頼度低下
|
|
211
|
+
|
|
212
|
+
# 拡張子が不明な場合
|
|
213
|
+
return "unknown", 0.0
|
|
214
|
+
|
|
215
|
+
def detect_from_extension(self, file_path: str) -> str:
|
|
216
|
+
"""
|
|
217
|
+
ファイル拡張子のみから言語を簡易判定
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
file_path: ファイルパス
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
判定された言語名
|
|
224
|
+
"""
|
|
225
|
+
language, _ = self.detect_language(file_path)
|
|
226
|
+
return language
|
|
227
|
+
|
|
228
|
+
def is_supported(self, language: str) -> bool:
|
|
229
|
+
"""
|
|
230
|
+
指定された言語がTree-sitterでサポートされているか確認
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
language: 言語名
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
サポート状況
|
|
237
|
+
"""
|
|
238
|
+
return language in self.SUPPORTED_LANGUAGES
|
|
239
|
+
|
|
240
|
+
def get_supported_extensions(self) -> List[str]:
|
|
241
|
+
"""
|
|
242
|
+
サポートされている拡張子一覧を取得
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
拡張子のリスト
|
|
246
|
+
"""
|
|
247
|
+
return sorted(self.EXTENSION_MAPPING.keys())
|
|
248
|
+
|
|
249
|
+
def get_supported_languages(self) -> List[str]:
|
|
250
|
+
"""
|
|
251
|
+
サポートされている言語一覧を取得
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
言語のリスト
|
|
255
|
+
"""
|
|
256
|
+
return sorted(self.SUPPORTED_LANGUAGES)
|
|
257
|
+
|
|
258
|
+
def _resolve_ambiguity(self, extension: str, content: str) -> str:
|
|
259
|
+
"""
|
|
260
|
+
曖昧な拡張子をコンテンツベースで解決
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
extension: ファイル拡張子
|
|
264
|
+
content: ファイルコンテンツ
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
解決された言語名
|
|
268
|
+
"""
|
|
269
|
+
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
270
|
+
return self.EXTENSION_MAPPING.get(extension, "unknown")
|
|
271
|
+
|
|
272
|
+
candidates = self.AMBIGUOUS_EXTENSIONS[extension]
|
|
273
|
+
|
|
274
|
+
# .h ファイルの場合(C vs C++ vs Objective-C)
|
|
275
|
+
if extension == ".h":
|
|
276
|
+
return self._detect_c_family(content, candidates)
|
|
277
|
+
|
|
278
|
+
# .m ファイルの場合(Objective-C vs MATLAB)
|
|
279
|
+
elif extension == ".m":
|
|
280
|
+
return self._detect_objc_vs_matlab(content, candidates)
|
|
281
|
+
|
|
282
|
+
# デフォルトは最初の候補
|
|
283
|
+
return candidates[0]
|
|
284
|
+
|
|
285
|
+
def _detect_c_family(self, content: str, candidates: List[str]) -> str:
|
|
286
|
+
"""C系言語の判定"""
|
|
287
|
+
cpp_score = 0
|
|
288
|
+
c_score = 0
|
|
289
|
+
objc_score = 0
|
|
290
|
+
|
|
291
|
+
# C++の特徴
|
|
292
|
+
cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
|
|
293
|
+
for pattern in cpp_patterns:
|
|
294
|
+
if pattern in content:
|
|
295
|
+
cpp_score += 1
|
|
296
|
+
|
|
297
|
+
# Cの特徴
|
|
298
|
+
c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
|
|
299
|
+
for pattern in c_patterns:
|
|
300
|
+
if pattern in content:
|
|
301
|
+
c_score += 1
|
|
302
|
+
|
|
303
|
+
# Objective-Cの特徴
|
|
304
|
+
objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
|
|
305
|
+
for pattern in objc_patterns:
|
|
306
|
+
if pattern in content:
|
|
307
|
+
objc_score += 3 # 強い指標なので重み大
|
|
308
|
+
|
|
309
|
+
# 最高スコアの言語を選択
|
|
310
|
+
scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
|
|
311
|
+
best_language = max(scores, key=lambda x: scores[x])
|
|
312
|
+
|
|
313
|
+
# objcが候補にない場合は除外
|
|
314
|
+
if best_language == "objc" and "objc" not in candidates:
|
|
315
|
+
best_language = "cpp" if cpp_score > c_score else "c"
|
|
316
|
+
|
|
317
|
+
return best_language if scores[best_language] > 0 else candidates[0]
|
|
318
|
+
|
|
319
|
+
def _detect_objc_vs_matlab(self, content: str, candidates: List[str]) -> str:
|
|
320
|
+
"""Objective-C vs MATLAB の判定"""
|
|
321
|
+
objc_score = 0
|
|
322
|
+
matlab_score = 0
|
|
323
|
+
|
|
324
|
+
# Objective-Cパターン
|
|
325
|
+
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
|
|
326
|
+
if pattern in content:
|
|
327
|
+
objc_score += 1
|
|
328
|
+
|
|
329
|
+
# MATLABパターン
|
|
330
|
+
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
|
|
331
|
+
if pattern in content:
|
|
332
|
+
matlab_score += 1
|
|
333
|
+
|
|
334
|
+
if objc_score > matlab_score:
|
|
335
|
+
return "objc"
|
|
336
|
+
elif matlab_score > objc_score:
|
|
337
|
+
return "matlab"
|
|
338
|
+
else:
|
|
339
|
+
return candidates[0] # デフォルト
|
|
340
|
+
|
|
341
|
+
def add_extension_mapping(self, extension: str, language: str) -> None:
|
|
342
|
+
"""
|
|
343
|
+
カスタム拡張子マッピングを追加
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
extension: ファイル拡張子(.付き)
|
|
347
|
+
language: 言語名
|
|
348
|
+
"""
|
|
349
|
+
self.EXTENSION_MAPPING[extension.lower()] = language
|
|
350
|
+
|
|
351
|
+
def get_language_info(self, language: str) -> Dict[str, Any]:
|
|
352
|
+
"""
|
|
353
|
+
言語の詳細情報を取得
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
language: 言語名
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
言語情報の辞書
|
|
360
|
+
"""
|
|
361
|
+
extensions = [
|
|
362
|
+
ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
|
|
363
|
+
]
|
|
364
|
+
|
|
365
|
+
return {
|
|
366
|
+
"name": language,
|
|
367
|
+
"extensions": extensions,
|
|
368
|
+
"supported": self.is_supported(language),
|
|
369
|
+
"tree_sitter_available": language in self.SUPPORTED_LANGUAGES,
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
# グローバルインスタンス
|
|
374
|
+
detector = LanguageDetector()
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def detect_language_from_file(file_path: str) -> str:
|
|
378
|
+
"""
|
|
379
|
+
ファイルパスから言語を自動判定(シンプルAPI)
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
file_path: ファイルパス
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
判定された言語名
|
|
386
|
+
"""
|
|
387
|
+
return detector.detect_from_extension(file_path)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def is_language_supported(language: str) -> bool:
|
|
391
|
+
"""
|
|
392
|
+
言語がサポートされているか確認(シンプルAPI)
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
language: 言語名
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
サポート状況
|
|
399
|
+
"""
|
|
400
|
+
return detector.is_supported(language)
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Dynamic Language Loader
|
|
5
|
+
|
|
6
|
+
Handles loading of Tree-sitter language parsers with efficient caching
|
|
7
|
+
and lazy loading for optimal performance.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import importlib
|
|
11
|
+
import sys
|
|
12
|
+
from functools import lru_cache
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional, Set
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from tree_sitter import Language, Parser
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import tree_sitter
|
|
20
|
+
|
|
21
|
+
TREE_SITTER_AVAILABLE = True
|
|
22
|
+
except ImportError:
|
|
23
|
+
TREE_SITTER_AVAILABLE = False
|
|
24
|
+
|
|
25
|
+
from .utils import log_error, log_warning
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class LanguageLoader:
|
|
29
|
+
"""Optimized language loader with enhanced caching"""
|
|
30
|
+
|
|
31
|
+
# 対応言語とモジュールのマッピング(最適化:frozendict使用を検討)
|
|
32
|
+
LANGUAGE_MODULES = {
|
|
33
|
+
"java": "tree_sitter_java",
|
|
34
|
+
"javascript": "tree_sitter_javascript",
|
|
35
|
+
"typescript": "tree_sitter_typescript",
|
|
36
|
+
"tsx": "tree_sitter_typescript",
|
|
37
|
+
"python": "tree_sitter_python",
|
|
38
|
+
"c": "tree_sitter_c",
|
|
39
|
+
"cpp": "tree_sitter_cpp",
|
|
40
|
+
"rust": "tree_sitter_rust",
|
|
41
|
+
"go": "tree_sitter_go",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# TypeScript特別処理(TypeScriptとTSX)
|
|
45
|
+
TYPESCRIPT_DIALECTS = {"typescript": "typescript", "tsx": "tsx"}
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def SUPPORTED_LANGUAGES(self) -> list:
|
|
49
|
+
"""サポートされている言語のリストを取得するプロパティ"""
|
|
50
|
+
return list(self.LANGUAGE_MODULES.keys())
|
|
51
|
+
|
|
52
|
+
def __init__(self) -> None:
|
|
53
|
+
"""ローダーを初期化(最適化:事前キャッシュ容量指定)"""
|
|
54
|
+
self._loaded_languages: Dict[str, "Language"] = {}
|
|
55
|
+
self._loaded_modules: Dict[str, Any] = {}
|
|
56
|
+
self._availability_cache: Dict[str, bool] = {}
|
|
57
|
+
self._parser_cache: Dict[str, "Parser"] = {} # パーサーキャッシュ追加
|
|
58
|
+
self._unavailable_languages: Set[str] = set() # 利用不可言語の記録
|
|
59
|
+
|
|
60
|
+
@lru_cache(maxsize=32) # LRUキャッシュでメモリ効率向上
|
|
61
|
+
def is_language_available(self, language: str) -> bool:
|
|
62
|
+
"""
|
|
63
|
+
指定された言語のライブラリが利用可能かチェック
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
language: 言語名
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
利用可能性
|
|
70
|
+
"""
|
|
71
|
+
# 事前に利用不可とわかっている言語は即座に返す
|
|
72
|
+
if language in self._unavailable_languages:
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
if language in self._availability_cache:
|
|
76
|
+
return self._availability_cache[language]
|
|
77
|
+
|
|
78
|
+
if not TREE_SITTER_AVAILABLE:
|
|
79
|
+
self._availability_cache[language] = False
|
|
80
|
+
self._unavailable_languages.add(language)
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
module_name = self.LANGUAGE_MODULES.get(language)
|
|
84
|
+
if not module_name:
|
|
85
|
+
self._availability_cache[language] = False
|
|
86
|
+
self._unavailable_languages.add(language)
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
importlib.import_module(module_name)
|
|
91
|
+
self._availability_cache[language] = True
|
|
92
|
+
return True
|
|
93
|
+
except ImportError:
|
|
94
|
+
self._availability_cache[language] = False
|
|
95
|
+
self._unavailable_languages.add(language)
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
def load_language(self, language: str) -> Optional["Language"]:
|
|
99
|
+
"""Load and return a tree-sitter Language object for the specified language"""
|
|
100
|
+
if not TREE_SITTER_AVAILABLE:
|
|
101
|
+
log_warning("Tree-sitter is not available")
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
# キャッシュから取得(最適化)
|
|
105
|
+
if language in self._loaded_languages:
|
|
106
|
+
return self._loaded_languages[language]
|
|
107
|
+
|
|
108
|
+
if not self.is_language_available(language):
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
module_name = self.LANGUAGE_MODULES[language]
|
|
113
|
+
|
|
114
|
+
# モジュールキャッシュから取得または新規読み込み
|
|
115
|
+
if module_name not in self._loaded_modules:
|
|
116
|
+
self._loaded_modules[module_name] = importlib.import_module(module_name)
|
|
117
|
+
|
|
118
|
+
module = self._loaded_modules[module_name]
|
|
119
|
+
|
|
120
|
+
# TypeScript特別処理
|
|
121
|
+
if language in self.TYPESCRIPT_DIALECTS:
|
|
122
|
+
dialect = self.TYPESCRIPT_DIALECTS[language]
|
|
123
|
+
if hasattr(module, "language_typescript") and dialect == "typescript":
|
|
124
|
+
language_func = module.language_typescript
|
|
125
|
+
elif hasattr(module, "language_tsx") and dialect == "tsx":
|
|
126
|
+
language_func = module.language_tsx
|
|
127
|
+
elif hasattr(module, "language"):
|
|
128
|
+
language_func = module.language
|
|
129
|
+
else:
|
|
130
|
+
return None
|
|
131
|
+
else:
|
|
132
|
+
if hasattr(module, "language"):
|
|
133
|
+
language_func = module.language
|
|
134
|
+
else:
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
# Language オブジェクト作成(最適化:一度だけ作成)
|
|
138
|
+
tree_sitter_language = tree_sitter.Language(language_func())
|
|
139
|
+
self._loaded_languages[language] = tree_sitter_language
|
|
140
|
+
return tree_sitter_language
|
|
141
|
+
|
|
142
|
+
except (ImportError, AttributeError, Exception) as e:
|
|
143
|
+
log_warning(f"Failed to load language '{language}': {e}")
|
|
144
|
+
self._unavailable_languages.add(language)
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
def create_parser_safely(self, language: str) -> Optional["Parser"]:
|
|
148
|
+
"""Create a parser for the specified language with error handling"""
|
|
149
|
+
if not TREE_SITTER_AVAILABLE:
|
|
150
|
+
log_warning("Tree-sitter is not available")
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
# パーサーキャッシュから取得
|
|
154
|
+
if language in self._parser_cache:
|
|
155
|
+
return self._parser_cache[language]
|
|
156
|
+
|
|
157
|
+
tree_sitter_language = self.load_language(language)
|
|
158
|
+
if tree_sitter_language is None:
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
parser = tree_sitter.Parser(tree_sitter_language)
|
|
163
|
+
# パーサーをキャッシュ(最適化)
|
|
164
|
+
self._parser_cache[language] = parser
|
|
165
|
+
return parser
|
|
166
|
+
except Exception as e:
|
|
167
|
+
log_warning(f"Failed to create parser for '{language}': {e}")
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def create_parser(self, language: str) -> Optional["Parser"]:
|
|
171
|
+
"""Create a parser for the specified language (alias for create_parser_safely)"""
|
|
172
|
+
return self.create_parser_safely(language)
|
|
173
|
+
|
|
174
|
+
def get_supported_languages(self) -> list:
|
|
175
|
+
"""
|
|
176
|
+
サポートされている言語のリストを取得(最適化:結果キャッシュ)
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
サポート言語のリスト
|
|
180
|
+
"""
|
|
181
|
+
# 利用可能な言語のみを返す(効率化)
|
|
182
|
+
return [
|
|
183
|
+
lang
|
|
184
|
+
for lang in self.LANGUAGE_MODULES.keys()
|
|
185
|
+
if lang not in self._unavailable_languages
|
|
186
|
+
and self.is_language_available(lang)
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
def clear_cache(self) -> None:
|
|
190
|
+
"""キャッシュをクリア(メモリ管理用)"""
|
|
191
|
+
self._loaded_languages.clear()
|
|
192
|
+
self._loaded_modules.clear()
|
|
193
|
+
self._availability_cache.clear()
|
|
194
|
+
self._parser_cache.clear()
|
|
195
|
+
self._unavailable_languages.clear()
|
|
196
|
+
# LRUキャッシュもクリア
|
|
197
|
+
self.is_language_available.cache_clear()
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# グローバルインスタンス(最適化:シングルトンパターン)
|
|
201
|
+
_loader_instance = None
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def get_loader() -> "LanguageLoader":
|
|
205
|
+
"""Get singleton loader instance"""
|
|
206
|
+
global _loader_instance
|
|
207
|
+
if _loader_instance is None:
|
|
208
|
+
_loader_instance = LanguageLoader()
|
|
209
|
+
return _loader_instance
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# 後方互換性のため
|
|
213
|
+
loader = get_loader()
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def check_language_availability(language: str) -> bool:
|
|
217
|
+
"""言語の利用可能性をチェック"""
|
|
218
|
+
return get_loader().is_language_available(language)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def create_parser_safely(language: str) -> Optional["Parser"]:
|
|
222
|
+
"""安全にパーサーを作成"""
|
|
223
|
+
return get_loader().create_parser_safely(language)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def load_language(language: str) -> Optional["Language"]:
|
|
227
|
+
"""言語をロード"""
|
|
228
|
+
return get_loader().load_language(language)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Language Plugins
|
|
5
|
+
|
|
6
|
+
This package contains the core language plugins for the tree-sitter analyzer.
|
|
7
|
+
Each plugin provides language-specific parsing and element extraction capabilities.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
# This file makes the languages directory a Python package
|
|
11
|
+
# Language plugins are automatically discovered by the PluginManager
|