tree-sitter-analyzer 0.3.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +5 -6
- tree_sitter_analyzer/__main__.py +2 -2
- tree_sitter_analyzer/api.py +4 -2
- tree_sitter_analyzer/cli/__init__.py +3 -3
- tree_sitter_analyzer/cli/commands/advanced_command.py +1 -1
- tree_sitter_analyzer/cli/commands/base_command.py +1 -1
- tree_sitter_analyzer/cli/commands/default_command.py +1 -1
- tree_sitter_analyzer/cli/commands/partial_read_command.py +2 -2
- tree_sitter_analyzer/cli/commands/query_command.py +5 -5
- tree_sitter_analyzer/cli/commands/summary_command.py +2 -2
- tree_sitter_analyzer/cli/commands/table_command.py +14 -11
- tree_sitter_analyzer/cli/info_commands.py +14 -13
- tree_sitter_analyzer/cli_main.py +51 -31
- tree_sitter_analyzer/core/analysis_engine.py +54 -90
- tree_sitter_analyzer/core/cache_service.py +31 -31
- tree_sitter_analyzer/core/engine.py +6 -4
- tree_sitter_analyzer/core/parser.py +1 -1
- tree_sitter_analyzer/core/query.py +502 -494
- tree_sitter_analyzer/encoding_utils.py +3 -2
- tree_sitter_analyzer/exceptions.py +23 -23
- tree_sitter_analyzer/file_handler.py +7 -14
- tree_sitter_analyzer/formatters/base_formatter.py +18 -18
- tree_sitter_analyzer/formatters/formatter_factory.py +15 -15
- tree_sitter_analyzer/formatters/java_formatter.py +291 -287
- tree_sitter_analyzer/formatters/python_formatter.py +259 -255
- tree_sitter_analyzer/interfaces/cli.py +1 -1
- tree_sitter_analyzer/interfaces/cli_adapter.py +62 -41
- tree_sitter_analyzer/interfaces/mcp_adapter.py +43 -17
- tree_sitter_analyzer/interfaces/mcp_server.py +9 -9
- tree_sitter_analyzer/language_detector.py +398 -398
- tree_sitter_analyzer/language_loader.py +224 -224
- tree_sitter_analyzer/languages/java_plugin.py +1174 -1129
- tree_sitter_analyzer/{plugins → languages}/javascript_plugin.py +3 -3
- tree_sitter_analyzer/languages/python_plugin.py +26 -8
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +0 -3
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +555 -560
- tree_sitter_analyzer/mcp/server.py +4 -4
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +63 -30
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +9 -4
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +2 -2
- tree_sitter_analyzer/mcp/utils/__init__.py +10 -8
- tree_sitter_analyzer/models.py +470 -470
- tree_sitter_analyzer/output_manager.py +12 -20
- tree_sitter_analyzer/plugins/__init__.py +9 -62
- tree_sitter_analyzer/plugins/base.py +53 -1
- tree_sitter_analyzer/plugins/manager.py +29 -12
- tree_sitter_analyzer/queries/java.py +78 -78
- tree_sitter_analyzer/queries/javascript.py +7 -7
- tree_sitter_analyzer/queries/python.py +18 -18
- tree_sitter_analyzer/queries/typescript.py +12 -12
- tree_sitter_analyzer/query_loader.py +17 -14
- tree_sitter_analyzer/table_formatter.py +24 -19
- tree_sitter_analyzer/utils.py +7 -7
- {tree_sitter_analyzer-0.3.0.dist-info → tree_sitter_analyzer-0.6.0.dist-info}/METADATA +11 -11
- tree_sitter_analyzer-0.6.0.dist-info/RECORD +72 -0
- {tree_sitter_analyzer-0.3.0.dist-info → tree_sitter_analyzer-0.6.0.dist-info}/entry_points.txt +2 -1
- tree_sitter_analyzer/java_analyzer.py +0 -218
- tree_sitter_analyzer/plugins/java_plugin.py +0 -608
- tree_sitter_analyzer/plugins/plugin_loader.py +0 -85
- tree_sitter_analyzer/plugins/python_plugin.py +0 -606
- tree_sitter_analyzer/plugins/registry.py +0 -374
- tree_sitter_analyzer-0.3.0.dist-info/RECORD +0 -77
- {tree_sitter_analyzer-0.3.0.dist-info → tree_sitter_analyzer-0.6.0.dist-info}/WHEEL +0 -0
|
@@ -1,398 +1,398 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Language Detection System
|
|
4
|
-
|
|
5
|
-
Automatically detects programming language from file extensions and content.
|
|
6
|
-
Supports multiple languages with extensible configuration.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import Any
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class LanguageDetector:
|
|
14
|
-
"""プログラミング言語の自動判定システム"""
|
|
15
|
-
|
|
16
|
-
# 基本的な拡張子マッピング
|
|
17
|
-
EXTENSION_MAPPING: dict[str, str] = {
|
|
18
|
-
# Java系
|
|
19
|
-
".java": "java",
|
|
20
|
-
".jsp": "jsp",
|
|
21
|
-
".jspx": "jsp",
|
|
22
|
-
# JavaScript/TypeScript系
|
|
23
|
-
".js": "javascript",
|
|
24
|
-
".jsx": "jsx",
|
|
25
|
-
".ts": "typescript",
|
|
26
|
-
".tsx": "tsx",
|
|
27
|
-
".mjs": "javascript",
|
|
28
|
-
".cjs": "javascript",
|
|
29
|
-
# Python系
|
|
30
|
-
".py": "python",
|
|
31
|
-
".pyx": "python",
|
|
32
|
-
".pyi": "python",
|
|
33
|
-
".pyw": "python",
|
|
34
|
-
# C/C++系
|
|
35
|
-
".c": "c",
|
|
36
|
-
".cpp": "cpp",
|
|
37
|
-
".cxx": "cpp",
|
|
38
|
-
".cc": "cpp",
|
|
39
|
-
".h": "c", # 曖昧性あり
|
|
40
|
-
".hpp": "cpp",
|
|
41
|
-
".hxx": "cpp",
|
|
42
|
-
# その他の言語
|
|
43
|
-
".rs": "rust",
|
|
44
|
-
".go": "go",
|
|
45
|
-
".rb": "ruby",
|
|
46
|
-
".php": "php",
|
|
47
|
-
".kt": "kotlin",
|
|
48
|
-
".swift": "swift",
|
|
49
|
-
".cs": "csharp",
|
|
50
|
-
".vb": "vbnet",
|
|
51
|
-
".fs": "fsharp",
|
|
52
|
-
".scala": "scala",
|
|
53
|
-
".clj": "clojure",
|
|
54
|
-
".hs": "haskell",
|
|
55
|
-
".ml": "ocaml",
|
|
56
|
-
".lua": "lua",
|
|
57
|
-
".pl": "perl",
|
|
58
|
-
".r": "r",
|
|
59
|
-
".m": "objc", # 曖昧性あり(MATLABとも)
|
|
60
|
-
".dart": "dart",
|
|
61
|
-
".elm": "elm",
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
# 曖昧な拡張子(複数言語に対応)
|
|
65
|
-
AMBIGUOUS_EXTENSIONS: dict[str, list[str]] = {
|
|
66
|
-
".h": ["c", "cpp", "objc"],
|
|
67
|
-
".m": ["objc", "matlab"],
|
|
68
|
-
".sql": ["sql", "plsql", "mysql"],
|
|
69
|
-
".xml": ["xml", "html", "jsp"],
|
|
70
|
-
".json": ["json", "jsonc"],
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
# コンテンツベース判定のキーワード
|
|
74
|
-
CONTENT_PATTERNS: dict[str, dict[str, list[str]]] = {
|
|
75
|
-
"c_vs_cpp": {
|
|
76
|
-
"cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
|
|
77
|
-
"c": ["#include <stdio.h>", "printf(", "malloc(", "typedef struct"],
|
|
78
|
-
},
|
|
79
|
-
"objc_vs_matlab": {
|
|
80
|
-
"objc": ["#import", "@interface", "@implementation", "NSString", "alloc]"],
|
|
81
|
-
"matlab": ["function ", "end;", "disp(", "clc;", "clear all"],
|
|
82
|
-
},
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
# Tree-sitter対応言語(現在サポート済み)
|
|
86
|
-
SUPPORTED_LANGUAGES = {
|
|
87
|
-
"java",
|
|
88
|
-
"javascript",
|
|
89
|
-
"typescript",
|
|
90
|
-
"python",
|
|
91
|
-
"c",
|
|
92
|
-
"cpp",
|
|
93
|
-
"rust",
|
|
94
|
-
"go",
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
def __init__(self) -> None:
|
|
98
|
-
"""言語検出器を初期化"""
|
|
99
|
-
self.extension_map = {
|
|
100
|
-
".java": ("java", 0.9),
|
|
101
|
-
".js": ("javascript", 0.9),
|
|
102
|
-
".jsx": ("javascript", 0.8),
|
|
103
|
-
".ts": ("typescript", 0.9),
|
|
104
|
-
".tsx": ("typescript", 0.8),
|
|
105
|
-
".py": ("python", 0.9),
|
|
106
|
-
".pyw": ("python", 0.8),
|
|
107
|
-
".c": ("c", 0.9),
|
|
108
|
-
".h": ("c", 0.7),
|
|
109
|
-
".cpp": ("cpp", 0.9),
|
|
110
|
-
".cxx": ("cpp", 0.9),
|
|
111
|
-
".cc": ("cpp", 0.9),
|
|
112
|
-
".hpp": ("cpp", 0.8),
|
|
113
|
-
".rs": ("rust", 0.9),
|
|
114
|
-
".go": ("go", 0.9),
|
|
115
|
-
".cs": ("csharp", 0.9),
|
|
116
|
-
".php": ("php", 0.9),
|
|
117
|
-
".rb": ("ruby", 0.9),
|
|
118
|
-
".swift": ("swift", 0.9),
|
|
119
|
-
".kt": ("kotlin", 0.9),
|
|
120
|
-
".scala": ("scala", 0.9),
|
|
121
|
-
".clj": ("clojure", 0.9),
|
|
122
|
-
".hs": ("haskell", 0.9),
|
|
123
|
-
".ml": ("ocaml", 0.9),
|
|
124
|
-
".fs": ("fsharp", 0.9),
|
|
125
|
-
".elm": ("elm", 0.9),
|
|
126
|
-
".dart": ("dart", 0.9),
|
|
127
|
-
".lua": ("lua", 0.9),
|
|
128
|
-
".r": ("r", 0.9),
|
|
129
|
-
".m": ("objectivec", 0.7),
|
|
130
|
-
".mm": ("objectivec", 0.8),
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
# Content-based detection patterns
|
|
134
|
-
self.content_patterns = {
|
|
135
|
-
"java": [
|
|
136
|
-
(r"package\s+[\w\.]+\s*;", 0.3),
|
|
137
|
-
(r"public\s+class\s+\w+", 0.3),
|
|
138
|
-
(r"import\s+[\w\.]+\s*;", 0.2),
|
|
139
|
-
(r"@\w+\s*\(", 0.2), # Annotations
|
|
140
|
-
],
|
|
141
|
-
"python": [
|
|
142
|
-
(r"def\s+\w+\s*\(", 0.3),
|
|
143
|
-
(r"import\s+\w+", 0.2),
|
|
144
|
-
(r"from\s+\w+\s+import", 0.2),
|
|
145
|
-
(r'if\s+__name__\s*==\s*["\']__main__["\']', 0.3),
|
|
146
|
-
],
|
|
147
|
-
"javascript": [
|
|
148
|
-
(r"function\s+\w+\s*\(", 0.3),
|
|
149
|
-
(r"var\s+\w+\s*=", 0.2),
|
|
150
|
-
(r"let\s+\w+\s*=", 0.2),
|
|
151
|
-
(r"const\s+\w+\s*=", 0.2),
|
|
152
|
-
(r"console\.log\s*\(", 0.1),
|
|
153
|
-
],
|
|
154
|
-
"typescript": [
|
|
155
|
-
(r"interface\s+\w+", 0.3),
|
|
156
|
-
(r"type\s+\w+\s*=", 0.2),
|
|
157
|
-
(r":\s*\w+\s*=", 0.2), # Type annotations
|
|
158
|
-
(r"export\s+(interface|type|class)", 0.2),
|
|
159
|
-
],
|
|
160
|
-
"c": [
|
|
161
|
-
(r"#include\s*<[\w\.]+>", 0.3),
|
|
162
|
-
(r"int\s+main\s*\(", 0.3),
|
|
163
|
-
(r"printf\s*\(", 0.2),
|
|
164
|
-
(r"#define\s+\w+", 0.2),
|
|
165
|
-
],
|
|
166
|
-
"cpp": [
|
|
167
|
-
(r"#include\s*<[\w\.]+>", 0.2),
|
|
168
|
-
(r"using\s+namespace\s+\w+", 0.3),
|
|
169
|
-
(r"std::\w+", 0.2),
|
|
170
|
-
(r"class\s+\w+\s*{", 0.3),
|
|
171
|
-
],
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
from .utils import log_debug, log_warning
|
|
175
|
-
|
|
176
|
-
self._log_debug = log_debug
|
|
177
|
-
self._log_warning = log_warning
|
|
178
|
-
|
|
179
|
-
def detect_language(
|
|
180
|
-
self, file_path: str, content: str | None = None
|
|
181
|
-
) -> tuple[str, float]:
|
|
182
|
-
"""
|
|
183
|
-
ファイルパスとコンテンツから言語を判定
|
|
184
|
-
|
|
185
|
-
Args:
|
|
186
|
-
file_path: ファイルパス
|
|
187
|
-
content: ファイルコンテンツ(任意、曖昧性解決用)
|
|
188
|
-
|
|
189
|
-
Returns:
|
|
190
|
-
(言語名, 信頼度) のタプル
|
|
191
|
-
"""
|
|
192
|
-
path = Path(file_path)
|
|
193
|
-
extension = path.suffix.lower()
|
|
194
|
-
|
|
195
|
-
# 直接マッピングで判定できる場合
|
|
196
|
-
if extension in self.EXTENSION_MAPPING:
|
|
197
|
-
language = self.EXTENSION_MAPPING[extension]
|
|
198
|
-
|
|
199
|
-
# 曖昧性がない場合は高信頼度
|
|
200
|
-
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
201
|
-
return language, 1.0
|
|
202
|
-
|
|
203
|
-
# 曖昧性がある場合はコンテンツベース判定
|
|
204
|
-
if content:
|
|
205
|
-
refined_language = self._resolve_ambiguity(extension, content)
|
|
206
|
-
return refined_language, 0.9 if refined_language != language else 0.7
|
|
207
|
-
else:
|
|
208
|
-
return language, 0.7 # コンテンツなしなので信頼度低下
|
|
209
|
-
|
|
210
|
-
# 拡張子が不明な場合
|
|
211
|
-
return "unknown", 0.0
|
|
212
|
-
|
|
213
|
-
def detect_from_extension(self, file_path: str) -> str:
|
|
214
|
-
"""
|
|
215
|
-
ファイル拡張子のみから言語を簡易判定
|
|
216
|
-
|
|
217
|
-
Args:
|
|
218
|
-
file_path: ファイルパス
|
|
219
|
-
|
|
220
|
-
Returns:
|
|
221
|
-
判定された言語名
|
|
222
|
-
"""
|
|
223
|
-
language, _ = self.detect_language(file_path)
|
|
224
|
-
return language
|
|
225
|
-
|
|
226
|
-
def is_supported(self, language: str) -> bool:
|
|
227
|
-
"""
|
|
228
|
-
指定された言語がTree-sitterでサポートされているか確認
|
|
229
|
-
|
|
230
|
-
Args:
|
|
231
|
-
language: 言語名
|
|
232
|
-
|
|
233
|
-
Returns:
|
|
234
|
-
サポート状況
|
|
235
|
-
"""
|
|
236
|
-
return language in self.SUPPORTED_LANGUAGES
|
|
237
|
-
|
|
238
|
-
def get_supported_extensions(self) -> list[str]:
|
|
239
|
-
"""
|
|
240
|
-
サポートされている拡張子一覧を取得
|
|
241
|
-
|
|
242
|
-
Returns:
|
|
243
|
-
拡張子のリスト
|
|
244
|
-
"""
|
|
245
|
-
return sorted(self.EXTENSION_MAPPING.keys())
|
|
246
|
-
|
|
247
|
-
def get_supported_languages(self) -> list[str]:
|
|
248
|
-
"""
|
|
249
|
-
サポートされている言語一覧を取得
|
|
250
|
-
|
|
251
|
-
Returns:
|
|
252
|
-
言語のリスト
|
|
253
|
-
"""
|
|
254
|
-
return sorted(self.SUPPORTED_LANGUAGES)
|
|
255
|
-
|
|
256
|
-
def _resolve_ambiguity(self, extension: str, content: str) -> str:
|
|
257
|
-
"""
|
|
258
|
-
曖昧な拡張子をコンテンツベースで解決
|
|
259
|
-
|
|
260
|
-
Args:
|
|
261
|
-
extension: ファイル拡張子
|
|
262
|
-
content: ファイルコンテンツ
|
|
263
|
-
|
|
264
|
-
Returns:
|
|
265
|
-
解決された言語名
|
|
266
|
-
"""
|
|
267
|
-
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
268
|
-
return self.EXTENSION_MAPPING.get(extension, "unknown")
|
|
269
|
-
|
|
270
|
-
candidates = self.AMBIGUOUS_EXTENSIONS[extension]
|
|
271
|
-
|
|
272
|
-
# .h ファイルの場合(C vs C++ vs Objective-C)
|
|
273
|
-
if extension == ".h":
|
|
274
|
-
return self._detect_c_family(content, candidates)
|
|
275
|
-
|
|
276
|
-
# .m ファイルの場合(Objective-C vs MATLAB)
|
|
277
|
-
elif extension == ".m":
|
|
278
|
-
return self._detect_objc_vs_matlab(content, candidates)
|
|
279
|
-
|
|
280
|
-
# デフォルトは最初の候補
|
|
281
|
-
return candidates[0]
|
|
282
|
-
|
|
283
|
-
def _detect_c_family(self, content: str, candidates: list[str]) -> str:
|
|
284
|
-
"""C系言語の判定"""
|
|
285
|
-
cpp_score = 0
|
|
286
|
-
c_score = 0
|
|
287
|
-
objc_score = 0
|
|
288
|
-
|
|
289
|
-
# C++の特徴
|
|
290
|
-
cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
|
|
291
|
-
for pattern in cpp_patterns:
|
|
292
|
-
if pattern in content:
|
|
293
|
-
cpp_score += 1
|
|
294
|
-
|
|
295
|
-
# Cの特徴
|
|
296
|
-
c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
|
|
297
|
-
for pattern in c_patterns:
|
|
298
|
-
if pattern in content:
|
|
299
|
-
c_score += 1
|
|
300
|
-
|
|
301
|
-
# Objective-Cの特徴
|
|
302
|
-
objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
|
|
303
|
-
for pattern in objc_patterns:
|
|
304
|
-
if pattern in content:
|
|
305
|
-
objc_score += 3 # 強い指標なので重み大
|
|
306
|
-
|
|
307
|
-
# 最高スコアの言語を選択
|
|
308
|
-
scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
|
|
309
|
-
best_language = max(scores, key=lambda x: scores[x])
|
|
310
|
-
|
|
311
|
-
# objcが候補にない場合は除外
|
|
312
|
-
if best_language == "objc" and "objc" not in candidates:
|
|
313
|
-
best_language = "cpp" if cpp_score > c_score else "c"
|
|
314
|
-
|
|
315
|
-
return best_language if scores[best_language] > 0 else candidates[0]
|
|
316
|
-
|
|
317
|
-
def _detect_objc_vs_matlab(self, content: str, candidates: list[str]) -> str:
|
|
318
|
-
"""Objective-C vs MATLAB の判定"""
|
|
319
|
-
objc_score = 0
|
|
320
|
-
matlab_score = 0
|
|
321
|
-
|
|
322
|
-
# Objective-Cパターン
|
|
323
|
-
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
|
|
324
|
-
if pattern in content:
|
|
325
|
-
objc_score += 1
|
|
326
|
-
|
|
327
|
-
# MATLABパターン
|
|
328
|
-
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
|
|
329
|
-
if pattern in content:
|
|
330
|
-
matlab_score += 1
|
|
331
|
-
|
|
332
|
-
if objc_score > matlab_score:
|
|
333
|
-
return "objc"
|
|
334
|
-
elif matlab_score > objc_score:
|
|
335
|
-
return "matlab"
|
|
336
|
-
else:
|
|
337
|
-
return candidates[0] # デフォルト
|
|
338
|
-
|
|
339
|
-
def add_extension_mapping(self, extension: str, language: str) -> None:
|
|
340
|
-
"""
|
|
341
|
-
カスタム拡張子マッピングを追加
|
|
342
|
-
|
|
343
|
-
Args:
|
|
344
|
-
extension: ファイル拡張子(.付き)
|
|
345
|
-
language: 言語名
|
|
346
|
-
"""
|
|
347
|
-
self.EXTENSION_MAPPING[extension.lower()] = language
|
|
348
|
-
|
|
349
|
-
def get_language_info(self, language: str) -> dict[str, Any]:
|
|
350
|
-
"""
|
|
351
|
-
言語の詳細情報を取得
|
|
352
|
-
|
|
353
|
-
Args:
|
|
354
|
-
language: 言語名
|
|
355
|
-
|
|
356
|
-
Returns:
|
|
357
|
-
言語情報の辞書
|
|
358
|
-
"""
|
|
359
|
-
extensions = [
|
|
360
|
-
ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
|
|
361
|
-
]
|
|
362
|
-
|
|
363
|
-
return {
|
|
364
|
-
"name": language,
|
|
365
|
-
"extensions": extensions,
|
|
366
|
-
"supported": self.is_supported(language),
|
|
367
|
-
"tree_sitter_available": language in self.SUPPORTED_LANGUAGES,
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
# グローバルインスタンス
|
|
372
|
-
detector = LanguageDetector()
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
def detect_language_from_file(file_path: str) -> str:
|
|
376
|
-
"""
|
|
377
|
-
ファイルパスから言語を自動判定(シンプルAPI)
|
|
378
|
-
|
|
379
|
-
Args:
|
|
380
|
-
file_path: ファイルパス
|
|
381
|
-
|
|
382
|
-
Returns:
|
|
383
|
-
判定された言語名
|
|
384
|
-
"""
|
|
385
|
-
return detector.detect_from_extension(file_path)
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
def is_language_supported(language: str) -> bool:
|
|
389
|
-
"""
|
|
390
|
-
言語がサポートされているか確認(シンプルAPI)
|
|
391
|
-
|
|
392
|
-
Args:
|
|
393
|
-
language: 言語名
|
|
394
|
-
|
|
395
|
-
Returns:
|
|
396
|
-
サポート状況
|
|
397
|
-
"""
|
|
398
|
-
return detector.is_supported(language)
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Language Detection System
|
|
4
|
+
|
|
5
|
+
Automatically detects programming language from file extensions and content.
|
|
6
|
+
Supports multiple languages with extensible configuration.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LanguageDetector:
|
|
14
|
+
"""プログラミング言語の自動判定システム"""
|
|
15
|
+
|
|
16
|
+
# 基本的な拡張子マッピング
|
|
17
|
+
EXTENSION_MAPPING: dict[str, str] = {
|
|
18
|
+
# Java系
|
|
19
|
+
".java": "java",
|
|
20
|
+
".jsp": "jsp",
|
|
21
|
+
".jspx": "jsp",
|
|
22
|
+
# JavaScript/TypeScript系
|
|
23
|
+
".js": "javascript",
|
|
24
|
+
".jsx": "jsx",
|
|
25
|
+
".ts": "typescript",
|
|
26
|
+
".tsx": "tsx",
|
|
27
|
+
".mjs": "javascript",
|
|
28
|
+
".cjs": "javascript",
|
|
29
|
+
# Python系
|
|
30
|
+
".py": "python",
|
|
31
|
+
".pyx": "python",
|
|
32
|
+
".pyi": "python",
|
|
33
|
+
".pyw": "python",
|
|
34
|
+
# C/C++系
|
|
35
|
+
".c": "c",
|
|
36
|
+
".cpp": "cpp",
|
|
37
|
+
".cxx": "cpp",
|
|
38
|
+
".cc": "cpp",
|
|
39
|
+
".h": "c", # 曖昧性あり
|
|
40
|
+
".hpp": "cpp",
|
|
41
|
+
".hxx": "cpp",
|
|
42
|
+
# その他の言語
|
|
43
|
+
".rs": "rust",
|
|
44
|
+
".go": "go",
|
|
45
|
+
".rb": "ruby",
|
|
46
|
+
".php": "php",
|
|
47
|
+
".kt": "kotlin",
|
|
48
|
+
".swift": "swift",
|
|
49
|
+
".cs": "csharp",
|
|
50
|
+
".vb": "vbnet",
|
|
51
|
+
".fs": "fsharp",
|
|
52
|
+
".scala": "scala",
|
|
53
|
+
".clj": "clojure",
|
|
54
|
+
".hs": "haskell",
|
|
55
|
+
".ml": "ocaml",
|
|
56
|
+
".lua": "lua",
|
|
57
|
+
".pl": "perl",
|
|
58
|
+
".r": "r",
|
|
59
|
+
".m": "objc", # 曖昧性あり(MATLABとも)
|
|
60
|
+
".dart": "dart",
|
|
61
|
+
".elm": "elm",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# 曖昧な拡張子(複数言語に対応)
|
|
65
|
+
AMBIGUOUS_EXTENSIONS: dict[str, list[str]] = {
|
|
66
|
+
".h": ["c", "cpp", "objc"],
|
|
67
|
+
".m": ["objc", "matlab"],
|
|
68
|
+
".sql": ["sql", "plsql", "mysql"],
|
|
69
|
+
".xml": ["xml", "html", "jsp"],
|
|
70
|
+
".json": ["json", "jsonc"],
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
# コンテンツベース判定のキーワード
|
|
74
|
+
CONTENT_PATTERNS: dict[str, dict[str, list[str]]] = {
|
|
75
|
+
"c_vs_cpp": {
|
|
76
|
+
"cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
|
|
77
|
+
"c": ["#include <stdio.h>", "printf(", "malloc(", "typedef struct"],
|
|
78
|
+
},
|
|
79
|
+
"objc_vs_matlab": {
|
|
80
|
+
"objc": ["#import", "@interface", "@implementation", "NSString", "alloc]"],
|
|
81
|
+
"matlab": ["function ", "end;", "disp(", "clc;", "clear all"],
|
|
82
|
+
},
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Tree-sitter対応言語(現在サポート済み)
|
|
86
|
+
SUPPORTED_LANGUAGES = {
|
|
87
|
+
"java",
|
|
88
|
+
"javascript",
|
|
89
|
+
"typescript",
|
|
90
|
+
"python",
|
|
91
|
+
"c",
|
|
92
|
+
"cpp",
|
|
93
|
+
"rust",
|
|
94
|
+
"go",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
def __init__(self) -> None:
|
|
98
|
+
"""言語検出器を初期化"""
|
|
99
|
+
self.extension_map = {
|
|
100
|
+
".java": ("java", 0.9),
|
|
101
|
+
".js": ("javascript", 0.9),
|
|
102
|
+
".jsx": ("javascript", 0.8),
|
|
103
|
+
".ts": ("typescript", 0.9),
|
|
104
|
+
".tsx": ("typescript", 0.8),
|
|
105
|
+
".py": ("python", 0.9),
|
|
106
|
+
".pyw": ("python", 0.8),
|
|
107
|
+
".c": ("c", 0.9),
|
|
108
|
+
".h": ("c", 0.7),
|
|
109
|
+
".cpp": ("cpp", 0.9),
|
|
110
|
+
".cxx": ("cpp", 0.9),
|
|
111
|
+
".cc": ("cpp", 0.9),
|
|
112
|
+
".hpp": ("cpp", 0.8),
|
|
113
|
+
".rs": ("rust", 0.9),
|
|
114
|
+
".go": ("go", 0.9),
|
|
115
|
+
".cs": ("csharp", 0.9),
|
|
116
|
+
".php": ("php", 0.9),
|
|
117
|
+
".rb": ("ruby", 0.9),
|
|
118
|
+
".swift": ("swift", 0.9),
|
|
119
|
+
".kt": ("kotlin", 0.9),
|
|
120
|
+
".scala": ("scala", 0.9),
|
|
121
|
+
".clj": ("clojure", 0.9),
|
|
122
|
+
".hs": ("haskell", 0.9),
|
|
123
|
+
".ml": ("ocaml", 0.9),
|
|
124
|
+
".fs": ("fsharp", 0.9),
|
|
125
|
+
".elm": ("elm", 0.9),
|
|
126
|
+
".dart": ("dart", 0.9),
|
|
127
|
+
".lua": ("lua", 0.9),
|
|
128
|
+
".r": ("r", 0.9),
|
|
129
|
+
".m": ("objectivec", 0.7),
|
|
130
|
+
".mm": ("objectivec", 0.8),
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Content-based detection patterns
|
|
134
|
+
self.content_patterns = {
|
|
135
|
+
"java": [
|
|
136
|
+
(r"package\s+[\w\.]+\s*;", 0.3),
|
|
137
|
+
(r"public\s+class\s+\w+", 0.3),
|
|
138
|
+
(r"import\s+[\w\.]+\s*;", 0.2),
|
|
139
|
+
(r"@\w+\s*\(", 0.2), # Annotations
|
|
140
|
+
],
|
|
141
|
+
"python": [
|
|
142
|
+
(r"def\s+\w+\s*\(", 0.3),
|
|
143
|
+
(r"import\s+\w+", 0.2),
|
|
144
|
+
(r"from\s+\w+\s+import", 0.2),
|
|
145
|
+
(r'if\s+__name__\s*==\s*["\']__main__["\']', 0.3),
|
|
146
|
+
],
|
|
147
|
+
"javascript": [
|
|
148
|
+
(r"function\s+\w+\s*\(", 0.3),
|
|
149
|
+
(r"var\s+\w+\s*=", 0.2),
|
|
150
|
+
(r"let\s+\w+\s*=", 0.2),
|
|
151
|
+
(r"const\s+\w+\s*=", 0.2),
|
|
152
|
+
(r"console\.log\s*\(", 0.1),
|
|
153
|
+
],
|
|
154
|
+
"typescript": [
|
|
155
|
+
(r"interface\s+\w+", 0.3),
|
|
156
|
+
(r"type\s+\w+\s*=", 0.2),
|
|
157
|
+
(r":\s*\w+\s*=", 0.2), # Type annotations
|
|
158
|
+
(r"export\s+(interface|type|class)", 0.2),
|
|
159
|
+
],
|
|
160
|
+
"c": [
|
|
161
|
+
(r"#include\s*<[\w\.]+>", 0.3),
|
|
162
|
+
(r"int\s+main\s*\(", 0.3),
|
|
163
|
+
(r"printf\s*\(", 0.2),
|
|
164
|
+
(r"#define\s+\w+", 0.2),
|
|
165
|
+
],
|
|
166
|
+
"cpp": [
|
|
167
|
+
(r"#include\s*<[\w\.]+>", 0.2),
|
|
168
|
+
(r"using\s+namespace\s+\w+", 0.3),
|
|
169
|
+
(r"std::\w+", 0.2),
|
|
170
|
+
(r"class\s+\w+\s*{", 0.3),
|
|
171
|
+
],
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
from .utils import log_debug, log_warning
|
|
175
|
+
|
|
176
|
+
self._log_debug = log_debug
|
|
177
|
+
self._log_warning = log_warning
|
|
178
|
+
|
|
179
|
+
def detect_language(
|
|
180
|
+
self, file_path: str, content: str | None = None
|
|
181
|
+
) -> tuple[str, float]:
|
|
182
|
+
"""
|
|
183
|
+
ファイルパスとコンテンツから言語を判定
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
file_path: ファイルパス
|
|
187
|
+
content: ファイルコンテンツ(任意、曖昧性解決用)
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
(言語名, 信頼度) のタプル
|
|
191
|
+
"""
|
|
192
|
+
path = Path(file_path)
|
|
193
|
+
extension = path.suffix.lower()
|
|
194
|
+
|
|
195
|
+
# 直接マッピングで判定できる場合
|
|
196
|
+
if extension in self.EXTENSION_MAPPING:
|
|
197
|
+
language = self.EXTENSION_MAPPING[extension]
|
|
198
|
+
|
|
199
|
+
# 曖昧性がない場合は高信頼度
|
|
200
|
+
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
201
|
+
return language, 1.0
|
|
202
|
+
|
|
203
|
+
# 曖昧性がある場合はコンテンツベース判定
|
|
204
|
+
if content:
|
|
205
|
+
refined_language = self._resolve_ambiguity(extension, content)
|
|
206
|
+
return refined_language, 0.9 if refined_language != language else 0.7
|
|
207
|
+
else:
|
|
208
|
+
return language, 0.7 # コンテンツなしなので信頼度低下
|
|
209
|
+
|
|
210
|
+
# 拡張子が不明な場合
|
|
211
|
+
return "unknown", 0.0
|
|
212
|
+
|
|
213
|
+
def detect_from_extension(self, file_path: str) -> str:
|
|
214
|
+
"""
|
|
215
|
+
ファイル拡張子のみから言語を簡易判定
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
file_path: ファイルパス
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
判定された言語名
|
|
222
|
+
"""
|
|
223
|
+
language, _ = self.detect_language(file_path)
|
|
224
|
+
return language
|
|
225
|
+
|
|
226
|
+
def is_supported(self, language: str) -> bool:
|
|
227
|
+
"""
|
|
228
|
+
指定された言語がTree-sitterでサポートされているか確認
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
language: 言語名
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
サポート状況
|
|
235
|
+
"""
|
|
236
|
+
return language in self.SUPPORTED_LANGUAGES
|
|
237
|
+
|
|
238
|
+
def get_supported_extensions(self) -> list[str]:
|
|
239
|
+
"""
|
|
240
|
+
サポートされている拡張子一覧を取得
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
拡張子のリスト
|
|
244
|
+
"""
|
|
245
|
+
return sorted(self.EXTENSION_MAPPING.keys())
|
|
246
|
+
|
|
247
|
+
def get_supported_languages(self) -> list[str]:
|
|
248
|
+
"""
|
|
249
|
+
サポートされている言語一覧を取得
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
言語のリスト
|
|
253
|
+
"""
|
|
254
|
+
return sorted(self.SUPPORTED_LANGUAGES)
|
|
255
|
+
|
|
256
|
+
def _resolve_ambiguity(self, extension: str, content: str) -> str:
|
|
257
|
+
"""
|
|
258
|
+
曖昧な拡張子をコンテンツベースで解決
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
extension: ファイル拡張子
|
|
262
|
+
content: ファイルコンテンツ
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
解決された言語名
|
|
266
|
+
"""
|
|
267
|
+
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
268
|
+
return self.EXTENSION_MAPPING.get(extension, "unknown")
|
|
269
|
+
|
|
270
|
+
candidates = self.AMBIGUOUS_EXTENSIONS[extension]
|
|
271
|
+
|
|
272
|
+
# .h ファイルの場合(C vs C++ vs Objective-C)
|
|
273
|
+
if extension == ".h":
|
|
274
|
+
return self._detect_c_family(content, candidates)
|
|
275
|
+
|
|
276
|
+
# .m ファイルの場合(Objective-C vs MATLAB)
|
|
277
|
+
elif extension == ".m":
|
|
278
|
+
return self._detect_objc_vs_matlab(content, candidates)
|
|
279
|
+
|
|
280
|
+
# デフォルトは最初の候補
|
|
281
|
+
return candidates[0]
|
|
282
|
+
|
|
283
|
+
def _detect_c_family(self, content: str, candidates: list[str]) -> str:
|
|
284
|
+
"""C系言語の判定"""
|
|
285
|
+
cpp_score = 0
|
|
286
|
+
c_score = 0
|
|
287
|
+
objc_score = 0
|
|
288
|
+
|
|
289
|
+
# C++の特徴
|
|
290
|
+
cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
|
|
291
|
+
for pattern in cpp_patterns:
|
|
292
|
+
if pattern in content:
|
|
293
|
+
cpp_score += 1
|
|
294
|
+
|
|
295
|
+
# Cの特徴
|
|
296
|
+
c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
|
|
297
|
+
for pattern in c_patterns:
|
|
298
|
+
if pattern in content:
|
|
299
|
+
c_score += 1
|
|
300
|
+
|
|
301
|
+
# Objective-Cの特徴
|
|
302
|
+
objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
|
|
303
|
+
for pattern in objc_patterns:
|
|
304
|
+
if pattern in content:
|
|
305
|
+
objc_score += 3 # 強い指標なので重み大
|
|
306
|
+
|
|
307
|
+
# 最高スコアの言語を選択
|
|
308
|
+
scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
|
|
309
|
+
best_language = max(scores, key=lambda x: scores[x])
|
|
310
|
+
|
|
311
|
+
# objcが候補にない場合は除外
|
|
312
|
+
if best_language == "objc" and "objc" not in candidates:
|
|
313
|
+
best_language = "cpp" if cpp_score > c_score else "c"
|
|
314
|
+
|
|
315
|
+
return best_language if scores[best_language] > 0 else candidates[0]
|
|
316
|
+
|
|
317
|
+
def _detect_objc_vs_matlab(self, content: str, candidates: list[str]) -> str:
|
|
318
|
+
"""Objective-C vs MATLAB の判定"""
|
|
319
|
+
objc_score = 0
|
|
320
|
+
matlab_score = 0
|
|
321
|
+
|
|
322
|
+
# Objective-Cパターン
|
|
323
|
+
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
|
|
324
|
+
if pattern in content:
|
|
325
|
+
objc_score += 1
|
|
326
|
+
|
|
327
|
+
# MATLABパターン
|
|
328
|
+
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
|
|
329
|
+
if pattern in content:
|
|
330
|
+
matlab_score += 1
|
|
331
|
+
|
|
332
|
+
if objc_score > matlab_score:
|
|
333
|
+
return "objc"
|
|
334
|
+
elif matlab_score > objc_score:
|
|
335
|
+
return "matlab"
|
|
336
|
+
else:
|
|
337
|
+
return candidates[0] # デフォルト
|
|
338
|
+
|
|
339
|
+
def add_extension_mapping(self, extension: str, language: str) -> None:
|
|
340
|
+
"""
|
|
341
|
+
カスタム拡張子マッピングを追加
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
extension: ファイル拡張子(.付き)
|
|
345
|
+
language: 言語名
|
|
346
|
+
"""
|
|
347
|
+
self.EXTENSION_MAPPING[extension.lower()] = language
|
|
348
|
+
|
|
349
|
+
def get_language_info(self, language: str) -> dict[str, Any]:
|
|
350
|
+
"""
|
|
351
|
+
言語の詳細情報を取得
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
language: 言語名
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
言語情報の辞書
|
|
358
|
+
"""
|
|
359
|
+
extensions = [
|
|
360
|
+
ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
|
|
361
|
+
]
|
|
362
|
+
|
|
363
|
+
return {
|
|
364
|
+
"name": language,
|
|
365
|
+
"extensions": extensions,
|
|
366
|
+
"supported": self.is_supported(language),
|
|
367
|
+
"tree_sitter_available": language in self.SUPPORTED_LANGUAGES,
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
# グローバルインスタンス
|
|
372
|
+
detector = LanguageDetector()
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def detect_language_from_file(file_path: str) -> str:
|
|
376
|
+
"""
|
|
377
|
+
ファイルパスから言語を自動判定(シンプルAPI)
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
file_path: ファイルパス
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
判定された言語名
|
|
384
|
+
"""
|
|
385
|
+
return detector.detect_from_extension(file_path)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def is_language_supported(language: str) -> bool:
|
|
389
|
+
"""
|
|
390
|
+
言語がサポートされているか確認(シンプルAPI)
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
language: 言語名
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
サポート状況
|
|
397
|
+
"""
|
|
398
|
+
return detector.is_supported(language)
|