tree-sitter-analyzer 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (78) hide show
  1. tree_sitter_analyzer/__init__.py +133 -121
  2. tree_sitter_analyzer/__main__.py +11 -12
  3. tree_sitter_analyzer/api.py +531 -539
  4. tree_sitter_analyzer/cli/__init__.py +39 -39
  5. tree_sitter_analyzer/cli/__main__.py +12 -13
  6. tree_sitter_analyzer/cli/commands/__init__.py +26 -27
  7. tree_sitter_analyzer/cli/commands/advanced_command.py +88 -88
  8. tree_sitter_analyzer/cli/commands/base_command.py +160 -155
  9. tree_sitter_analyzer/cli/commands/default_command.py +18 -19
  10. tree_sitter_analyzer/cli/commands/partial_read_command.py +141 -133
  11. tree_sitter_analyzer/cli/commands/query_command.py +81 -82
  12. tree_sitter_analyzer/cli/commands/structure_command.py +138 -121
  13. tree_sitter_analyzer/cli/commands/summary_command.py +101 -93
  14. tree_sitter_analyzer/cli/commands/table_command.py +232 -233
  15. tree_sitter_analyzer/cli/info_commands.py +120 -121
  16. tree_sitter_analyzer/cli_main.py +277 -276
  17. tree_sitter_analyzer/core/__init__.py +15 -20
  18. tree_sitter_analyzer/core/analysis_engine.py +591 -574
  19. tree_sitter_analyzer/core/cache_service.py +320 -330
  20. tree_sitter_analyzer/core/engine.py +557 -560
  21. tree_sitter_analyzer/core/parser.py +293 -288
  22. tree_sitter_analyzer/core/query.py +494 -502
  23. tree_sitter_analyzer/encoding_utils.py +458 -460
  24. tree_sitter_analyzer/exceptions.py +337 -340
  25. tree_sitter_analyzer/file_handler.py +217 -222
  26. tree_sitter_analyzer/formatters/__init__.py +1 -1
  27. tree_sitter_analyzer/formatters/base_formatter.py +167 -168
  28. tree_sitter_analyzer/formatters/formatter_factory.py +78 -74
  29. tree_sitter_analyzer/formatters/java_formatter.py +287 -270
  30. tree_sitter_analyzer/formatters/python_formatter.py +255 -235
  31. tree_sitter_analyzer/interfaces/__init__.py +9 -10
  32. tree_sitter_analyzer/interfaces/cli.py +528 -557
  33. tree_sitter_analyzer/interfaces/cli_adapter.py +322 -319
  34. tree_sitter_analyzer/interfaces/mcp_adapter.py +180 -170
  35. tree_sitter_analyzer/interfaces/mcp_server.py +405 -416
  36. tree_sitter_analyzer/java_analyzer.py +218 -219
  37. tree_sitter_analyzer/language_detector.py +398 -400
  38. tree_sitter_analyzer/language_loader.py +224 -228
  39. tree_sitter_analyzer/languages/__init__.py +10 -11
  40. tree_sitter_analyzer/languages/java_plugin.py +1129 -1113
  41. tree_sitter_analyzer/languages/python_plugin.py +737 -712
  42. tree_sitter_analyzer/mcp/__init__.py +31 -32
  43. tree_sitter_analyzer/mcp/resources/__init__.py +44 -47
  44. tree_sitter_analyzer/mcp/resources/code_file_resource.py +212 -213
  45. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +560 -550
  46. tree_sitter_analyzer/mcp/server.py +333 -345
  47. tree_sitter_analyzer/mcp/tools/__init__.py +30 -31
  48. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +621 -557
  49. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +242 -245
  50. tree_sitter_analyzer/mcp/tools/base_tool.py +54 -55
  51. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +300 -302
  52. tree_sitter_analyzer/mcp/tools/table_format_tool.py +362 -359
  53. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +543 -476
  54. tree_sitter_analyzer/mcp/utils/__init__.py +105 -106
  55. tree_sitter_analyzer/mcp/utils/error_handler.py +549 -549
  56. tree_sitter_analyzer/models.py +470 -481
  57. tree_sitter_analyzer/output_manager.py +261 -264
  58. tree_sitter_analyzer/plugins/__init__.py +333 -334
  59. tree_sitter_analyzer/plugins/base.py +477 -446
  60. tree_sitter_analyzer/plugins/java_plugin.py +608 -625
  61. tree_sitter_analyzer/plugins/javascript_plugin.py +446 -439
  62. tree_sitter_analyzer/plugins/manager.py +362 -355
  63. tree_sitter_analyzer/plugins/plugin_loader.py +85 -83
  64. tree_sitter_analyzer/plugins/python_plugin.py +606 -598
  65. tree_sitter_analyzer/plugins/registry.py +374 -366
  66. tree_sitter_analyzer/queries/__init__.py +26 -27
  67. tree_sitter_analyzer/queries/java.py +391 -394
  68. tree_sitter_analyzer/queries/javascript.py +148 -149
  69. tree_sitter_analyzer/queries/python.py +285 -286
  70. tree_sitter_analyzer/queries/typescript.py +229 -230
  71. tree_sitter_analyzer/query_loader.py +254 -260
  72. tree_sitter_analyzer/table_formatter.py +468 -448
  73. tree_sitter_analyzer/utils.py +277 -277
  74. {tree_sitter_analyzer-0.2.0.dist-info → tree_sitter_analyzer-0.3.0.dist-info}/METADATA +21 -6
  75. tree_sitter_analyzer-0.3.0.dist-info/RECORD +77 -0
  76. tree_sitter_analyzer-0.2.0.dist-info/RECORD +0 -77
  77. {tree_sitter_analyzer-0.2.0.dist-info → tree_sitter_analyzer-0.3.0.dist-info}/WHEEL +0 -0
  78. {tree_sitter_analyzer-0.2.0.dist-info → tree_sitter_analyzer-0.3.0.dist-info}/entry_points.txt +0 -0
@@ -1,400 +1,398 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- Language Detection System
5
-
6
- Automatically detects programming language from file extensions and content.
7
- Supports multiple languages with extensible configuration.
8
- """
9
-
10
- import re
11
- from pathlib import Path
12
- from typing import Any, Dict, List, Optional, Tuple
13
-
14
-
15
- class LanguageDetector:
16
- """プログラミング言語の自動判定システム"""
17
-
18
- # 基本的な拡張子マッピング
19
- EXTENSION_MAPPING: Dict[str, str] = {
20
- # Java系
21
- ".java": "java",
22
- ".jsp": "jsp",
23
- ".jspx": "jsp",
24
- # JavaScript/TypeScript系
25
- ".js": "javascript",
26
- ".jsx": "jsx",
27
- ".ts": "typescript",
28
- ".tsx": "tsx",
29
- ".mjs": "javascript",
30
- ".cjs": "javascript",
31
- # Python系
32
- ".py": "python",
33
- ".pyx": "python",
34
- ".pyi": "python",
35
- ".pyw": "python",
36
- # C/C++系
37
- ".c": "c",
38
- ".cpp": "cpp",
39
- ".cxx": "cpp",
40
- ".cc": "cpp",
41
- ".h": "c", # 曖昧性あり
42
- ".hpp": "cpp",
43
- ".hxx": "cpp",
44
- # その他の言語
45
- ".rs": "rust",
46
- ".go": "go",
47
- ".rb": "ruby",
48
- ".php": "php",
49
- ".kt": "kotlin",
50
- ".swift": "swift",
51
- ".cs": "csharp",
52
- ".vb": "vbnet",
53
- ".fs": "fsharp",
54
- ".scala": "scala",
55
- ".clj": "clojure",
56
- ".hs": "haskell",
57
- ".ml": "ocaml",
58
- ".lua": "lua",
59
- ".pl": "perl",
60
- ".r": "r",
61
- ".m": "objc", # 曖昧性あり(MATLABとも)
62
- ".dart": "dart",
63
- ".elm": "elm",
64
- }
65
-
66
- # 曖昧な拡張子(複数言語に対応)
67
- AMBIGUOUS_EXTENSIONS: Dict[str, List[str]] = {
68
- ".h": ["c", "cpp", "objc"],
69
- ".m": ["objc", "matlab"],
70
- ".sql": ["sql", "plsql", "mysql"],
71
- ".xml": ["xml", "html", "jsp"],
72
- ".json": ["json", "jsonc"],
73
- }
74
-
75
- # コンテンツベース判定のキーワード
76
- CONTENT_PATTERNS: Dict[str, Dict[str, List[str]]] = {
77
- "c_vs_cpp": {
78
- "cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
79
- "c": ["#include <stdio.h>", "printf(", "malloc(", "typedef struct"],
80
- },
81
- "objc_vs_matlab": {
82
- "objc": ["#import", "@interface", "@implementation", "NSString", "alloc]"],
83
- "matlab": ["function ", "end;", "disp(", "clc;", "clear all"],
84
- },
85
- }
86
-
87
- # Tree-sitter対応言語(現在サポート済み)
88
- SUPPORTED_LANGUAGES = {
89
- "java",
90
- "javascript",
91
- "typescript",
92
- "python",
93
- "c",
94
- "cpp",
95
- "rust",
96
- "go",
97
- }
98
-
99
- def __init__(self) -> None:
100
- """言語検出器を初期化"""
101
- self.extension_map = {
102
- ".java": ("java", 0.9),
103
- ".js": ("javascript", 0.9),
104
- ".jsx": ("javascript", 0.8),
105
- ".ts": ("typescript", 0.9),
106
- ".tsx": ("typescript", 0.8),
107
- ".py": ("python", 0.9),
108
- ".pyw": ("python", 0.8),
109
- ".c": ("c", 0.9),
110
- ".h": ("c", 0.7),
111
- ".cpp": ("cpp", 0.9),
112
- ".cxx": ("cpp", 0.9),
113
- ".cc": ("cpp", 0.9),
114
- ".hpp": ("cpp", 0.8),
115
- ".rs": ("rust", 0.9),
116
- ".go": ("go", 0.9),
117
- ".cs": ("csharp", 0.9),
118
- ".php": ("php", 0.9),
119
- ".rb": ("ruby", 0.9),
120
- ".swift": ("swift", 0.9),
121
- ".kt": ("kotlin", 0.9),
122
- ".scala": ("scala", 0.9),
123
- ".clj": ("clojure", 0.9),
124
- ".hs": ("haskell", 0.9),
125
- ".ml": ("ocaml", 0.9),
126
- ".fs": ("fsharp", 0.9),
127
- ".elm": ("elm", 0.9),
128
- ".dart": ("dart", 0.9),
129
- ".lua": ("lua", 0.9),
130
- ".r": ("r", 0.9),
131
- ".m": ("objectivec", 0.7),
132
- ".mm": ("objectivec", 0.8),
133
- }
134
-
135
- # Content-based detection patterns
136
- self.content_patterns = {
137
- "java": [
138
- (r"package\s+[\w\.]+\s*;", 0.3),
139
- (r"public\s+class\s+\w+", 0.3),
140
- (r"import\s+[\w\.]+\s*;", 0.2),
141
- (r"@\w+\s*\(", 0.2), # Annotations
142
- ],
143
- "python": [
144
- (r"def\s+\w+\s*\(", 0.3),
145
- (r"import\s+\w+", 0.2),
146
- (r"from\s+\w+\s+import", 0.2),
147
- (r'if\s+__name__\s*==\s*["\']__main__["\']', 0.3),
148
- ],
149
- "javascript": [
150
- (r"function\s+\w+\s*\(", 0.3),
151
- (r"var\s+\w+\s*=", 0.2),
152
- (r"let\s+\w+\s*=", 0.2),
153
- (r"const\s+\w+\s*=", 0.2),
154
- (r"console\.log\s*\(", 0.1),
155
- ],
156
- "typescript": [
157
- (r"interface\s+\w+", 0.3),
158
- (r"type\s+\w+\s*=", 0.2),
159
- (r":\s*\w+\s*=", 0.2), # Type annotations
160
- (r"export\s+(interface|type|class)", 0.2),
161
- ],
162
- "c": [
163
- (r"#include\s*<[\w\.]+>", 0.3),
164
- (r"int\s+main\s*\(", 0.3),
165
- (r"printf\s*\(", 0.2),
166
- (r"#define\s+\w+", 0.2),
167
- ],
168
- "cpp": [
169
- (r"#include\s*<[\w\.]+>", 0.2),
170
- (r"using\s+namespace\s+\w+", 0.3),
171
- (r"std::\w+", 0.2),
172
- (r"class\s+\w+\s*{", 0.3),
173
- ],
174
- }
175
-
176
- from .utils import log_debug, log_warning
177
-
178
- self._log_debug = log_debug
179
- self._log_warning = log_warning
180
-
181
- def detect_language(
182
- self, file_path: str, content: Optional[str] = None
183
- ) -> Tuple[str, float]:
184
- """
185
- ファイルパスとコンテンツから言語を判定
186
-
187
- Args:
188
- file_path: ファイルパス
189
- content: ファイルコンテンツ(任意、曖昧性解決用)
190
-
191
- Returns:
192
- (言語名, 信頼度) のタプル
193
- """
194
- path = Path(file_path)
195
- extension = path.suffix.lower()
196
-
197
- # 直接マッピングで判定できる場合
198
- if extension in self.EXTENSION_MAPPING:
199
- language = self.EXTENSION_MAPPING[extension]
200
-
201
- # 曖昧性がない場合は高信頼度
202
- if extension not in self.AMBIGUOUS_EXTENSIONS:
203
- return language, 1.0
204
-
205
- # 曖昧性がある場合はコンテンツベース判定
206
- if content:
207
- refined_language = self._resolve_ambiguity(extension, content)
208
- return refined_language, 0.9 if refined_language != language else 0.7
209
- else:
210
- return language, 0.7 # コンテンツなしなので信頼度低下
211
-
212
- # 拡張子が不明な場合
213
- return "unknown", 0.0
214
-
215
- def detect_from_extension(self, file_path: str) -> str:
216
- """
217
- ファイル拡張子のみから言語を簡易判定
218
-
219
- Args:
220
- file_path: ファイルパス
221
-
222
- Returns:
223
- 判定された言語名
224
- """
225
- language, _ = self.detect_language(file_path)
226
- return language
227
-
228
- def is_supported(self, language: str) -> bool:
229
- """
230
- 指定された言語がTree-sitterでサポートされているか確認
231
-
232
- Args:
233
- language: 言語名
234
-
235
- Returns:
236
- サポート状況
237
- """
238
- return language in self.SUPPORTED_LANGUAGES
239
-
240
- def get_supported_extensions(self) -> List[str]:
241
- """
242
- サポートされている拡張子一覧を取得
243
-
244
- Returns:
245
- 拡張子のリスト
246
- """
247
- return sorted(self.EXTENSION_MAPPING.keys())
248
-
249
- def get_supported_languages(self) -> List[str]:
250
- """
251
- サポートされている言語一覧を取得
252
-
253
- Returns:
254
- 言語のリスト
255
- """
256
- return sorted(self.SUPPORTED_LANGUAGES)
257
-
258
- def _resolve_ambiguity(self, extension: str, content: str) -> str:
259
- """
260
- 曖昧な拡張子をコンテンツベースで解決
261
-
262
- Args:
263
- extension: ファイル拡張子
264
- content: ファイルコンテンツ
265
-
266
- Returns:
267
- 解決された言語名
268
- """
269
- if extension not in self.AMBIGUOUS_EXTENSIONS:
270
- return self.EXTENSION_MAPPING.get(extension, "unknown")
271
-
272
- candidates = self.AMBIGUOUS_EXTENSIONS[extension]
273
-
274
- # .h ファイルの場合(C vs C++ vs Objective-C)
275
- if extension == ".h":
276
- return self._detect_c_family(content, candidates)
277
-
278
- # .m ファイルの場合(Objective-C vs MATLAB)
279
- elif extension == ".m":
280
- return self._detect_objc_vs_matlab(content, candidates)
281
-
282
- # デフォルトは最初の候補
283
- return candidates[0]
284
-
285
- def _detect_c_family(self, content: str, candidates: List[str]) -> str:
286
- """C系言語の判定"""
287
- cpp_score = 0
288
- c_score = 0
289
- objc_score = 0
290
-
291
- # C++の特徴
292
- cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
293
- for pattern in cpp_patterns:
294
- if pattern in content:
295
- cpp_score += 1
296
-
297
- # Cの特徴
298
- c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
299
- for pattern in c_patterns:
300
- if pattern in content:
301
- c_score += 1
302
-
303
- # Objective-Cの特徴
304
- objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
305
- for pattern in objc_patterns:
306
- if pattern in content:
307
- objc_score += 3 # 強い指標なので重み大
308
-
309
- # 最高スコアの言語を選択
310
- scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
311
- best_language = max(scores, key=lambda x: scores[x])
312
-
313
- # objcが候補にない場合は除外
314
- if best_language == "objc" and "objc" not in candidates:
315
- best_language = "cpp" if cpp_score > c_score else "c"
316
-
317
- return best_language if scores[best_language] > 0 else candidates[0]
318
-
319
- def _detect_objc_vs_matlab(self, content: str, candidates: List[str]) -> str:
320
- """Objective-C vs MATLAB の判定"""
321
- objc_score = 0
322
- matlab_score = 0
323
-
324
- # Objective-Cパターン
325
- for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
326
- if pattern in content:
327
- objc_score += 1
328
-
329
- # MATLABパターン
330
- for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
331
- if pattern in content:
332
- matlab_score += 1
333
-
334
- if objc_score > matlab_score:
335
- return "objc"
336
- elif matlab_score > objc_score:
337
- return "matlab"
338
- else:
339
- return candidates[0] # デフォルト
340
-
341
- def add_extension_mapping(self, extension: str, language: str) -> None:
342
- """
343
- カスタム拡張子マッピングを追加
344
-
345
- Args:
346
- extension: ファイル拡張子(.付き)
347
- language: 言語名
348
- """
349
- self.EXTENSION_MAPPING[extension.lower()] = language
350
-
351
- def get_language_info(self, language: str) -> Dict[str, Any]:
352
- """
353
- 言語の詳細情報を取得
354
-
355
- Args:
356
- language: 言語名
357
-
358
- Returns:
359
- 言語情報の辞書
360
- """
361
- extensions = [
362
- ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
363
- ]
364
-
365
- return {
366
- "name": language,
367
- "extensions": extensions,
368
- "supported": self.is_supported(language),
369
- "tree_sitter_available": language in self.SUPPORTED_LANGUAGES,
370
- }
371
-
372
-
373
- # グローバルインスタンス
374
- detector = LanguageDetector()
375
-
376
-
377
- def detect_language_from_file(file_path: str) -> str:
378
- """
379
- ファイルパスから言語を自動判定(シンプルAPI)
380
-
381
- Args:
382
- file_path: ファイルパス
383
-
384
- Returns:
385
- 判定された言語名
386
- """
387
- return detector.detect_from_extension(file_path)
388
-
389
-
390
- def is_language_supported(language: str) -> bool:
391
- """
392
- 言語がサポートされているか確認(シンプルAPI)
393
-
394
- Args:
395
- language: 言語名
396
-
397
- Returns:
398
- サポート状況
399
- """
400
- return detector.is_supported(language)
1
+ #!/usr/bin/env python3
2
+ """
3
+ Language Detection System
4
+
5
+ Automatically detects programming language from file extensions and content.
6
+ Supports multiple languages with extensible configuration.
7
+ """
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+
13
+ class LanguageDetector:
14
+ """プログラミング言語の自動判定システム"""
15
+
16
+ # 基本的な拡張子マッピング
17
+ EXTENSION_MAPPING: dict[str, str] = {
18
+ # Java系
19
+ ".java": "java",
20
+ ".jsp": "jsp",
21
+ ".jspx": "jsp",
22
+ # JavaScript/TypeScript系
23
+ ".js": "javascript",
24
+ ".jsx": "jsx",
25
+ ".ts": "typescript",
26
+ ".tsx": "tsx",
27
+ ".mjs": "javascript",
28
+ ".cjs": "javascript",
29
+ # Python系
30
+ ".py": "python",
31
+ ".pyx": "python",
32
+ ".pyi": "python",
33
+ ".pyw": "python",
34
+ # C/C++系
35
+ ".c": "c",
36
+ ".cpp": "cpp",
37
+ ".cxx": "cpp",
38
+ ".cc": "cpp",
39
+ ".h": "c", # 曖昧性あり
40
+ ".hpp": "cpp",
41
+ ".hxx": "cpp",
42
+ # その他の言語
43
+ ".rs": "rust",
44
+ ".go": "go",
45
+ ".rb": "ruby",
46
+ ".php": "php",
47
+ ".kt": "kotlin",
48
+ ".swift": "swift",
49
+ ".cs": "csharp",
50
+ ".vb": "vbnet",
51
+ ".fs": "fsharp",
52
+ ".scala": "scala",
53
+ ".clj": "clojure",
54
+ ".hs": "haskell",
55
+ ".ml": "ocaml",
56
+ ".lua": "lua",
57
+ ".pl": "perl",
58
+ ".r": "r",
59
+ ".m": "objc", # 曖昧性あり(MATLABとも)
60
+ ".dart": "dart",
61
+ ".elm": "elm",
62
+ }
63
+
64
+ # 曖昧な拡張子(複数言語に対応)
65
+ AMBIGUOUS_EXTENSIONS: dict[str, list[str]] = {
66
+ ".h": ["c", "cpp", "objc"],
67
+ ".m": ["objc", "matlab"],
68
+ ".sql": ["sql", "plsql", "mysql"],
69
+ ".xml": ["xml", "html", "jsp"],
70
+ ".json": ["json", "jsonc"],
71
+ }
72
+
73
+ # コンテンツベース判定のキーワード
74
+ CONTENT_PATTERNS: dict[str, dict[str, list[str]]] = {
75
+ "c_vs_cpp": {
76
+ "cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
77
+ "c": ["#include <stdio.h>", "printf(", "malloc(", "typedef struct"],
78
+ },
79
+ "objc_vs_matlab": {
80
+ "objc": ["#import", "@interface", "@implementation", "NSString", "alloc]"],
81
+ "matlab": ["function ", "end;", "disp(", "clc;", "clear all"],
82
+ },
83
+ }
84
+
85
+ # Tree-sitter対応言語(現在サポート済み)
86
+ SUPPORTED_LANGUAGES = {
87
+ "java",
88
+ "javascript",
89
+ "typescript",
90
+ "python",
91
+ "c",
92
+ "cpp",
93
+ "rust",
94
+ "go",
95
+ }
96
+
97
+ def __init__(self) -> None:
98
+ """言語検出器を初期化"""
99
+ self.extension_map = {
100
+ ".java": ("java", 0.9),
101
+ ".js": ("javascript", 0.9),
102
+ ".jsx": ("javascript", 0.8),
103
+ ".ts": ("typescript", 0.9),
104
+ ".tsx": ("typescript", 0.8),
105
+ ".py": ("python", 0.9),
106
+ ".pyw": ("python", 0.8),
107
+ ".c": ("c", 0.9),
108
+ ".h": ("c", 0.7),
109
+ ".cpp": ("cpp", 0.9),
110
+ ".cxx": ("cpp", 0.9),
111
+ ".cc": ("cpp", 0.9),
112
+ ".hpp": ("cpp", 0.8),
113
+ ".rs": ("rust", 0.9),
114
+ ".go": ("go", 0.9),
115
+ ".cs": ("csharp", 0.9),
116
+ ".php": ("php", 0.9),
117
+ ".rb": ("ruby", 0.9),
118
+ ".swift": ("swift", 0.9),
119
+ ".kt": ("kotlin", 0.9),
120
+ ".scala": ("scala", 0.9),
121
+ ".clj": ("clojure", 0.9),
122
+ ".hs": ("haskell", 0.9),
123
+ ".ml": ("ocaml", 0.9),
124
+ ".fs": ("fsharp", 0.9),
125
+ ".elm": ("elm", 0.9),
126
+ ".dart": ("dart", 0.9),
127
+ ".lua": ("lua", 0.9),
128
+ ".r": ("r", 0.9),
129
+ ".m": ("objectivec", 0.7),
130
+ ".mm": ("objectivec", 0.8),
131
+ }
132
+
133
+ # Content-based detection patterns
134
+ self.content_patterns = {
135
+ "java": [
136
+ (r"package\s+[\w\.]+\s*;", 0.3),
137
+ (r"public\s+class\s+\w+", 0.3),
138
+ (r"import\s+[\w\.]+\s*;", 0.2),
139
+ (r"@\w+\s*\(", 0.2), # Annotations
140
+ ],
141
+ "python": [
142
+ (r"def\s+\w+\s*\(", 0.3),
143
+ (r"import\s+\w+", 0.2),
144
+ (r"from\s+\w+\s+import", 0.2),
145
+ (r'if\s+__name__\s*==\s*["\']__main__["\']', 0.3),
146
+ ],
147
+ "javascript": [
148
+ (r"function\s+\w+\s*\(", 0.3),
149
+ (r"var\s+\w+\s*=", 0.2),
150
+ (r"let\s+\w+\s*=", 0.2),
151
+ (r"const\s+\w+\s*=", 0.2),
152
+ (r"console\.log\s*\(", 0.1),
153
+ ],
154
+ "typescript": [
155
+ (r"interface\s+\w+", 0.3),
156
+ (r"type\s+\w+\s*=", 0.2),
157
+ (r":\s*\w+\s*=", 0.2), # Type annotations
158
+ (r"export\s+(interface|type|class)", 0.2),
159
+ ],
160
+ "c": [
161
+ (r"#include\s*<[\w\.]+>", 0.3),
162
+ (r"int\s+main\s*\(", 0.3),
163
+ (r"printf\s*\(", 0.2),
164
+ (r"#define\s+\w+", 0.2),
165
+ ],
166
+ "cpp": [
167
+ (r"#include\s*<[\w\.]+>", 0.2),
168
+ (r"using\s+namespace\s+\w+", 0.3),
169
+ (r"std::\w+", 0.2),
170
+ (r"class\s+\w+\s*{", 0.3),
171
+ ],
172
+ }
173
+
174
+ from .utils import log_debug, log_warning
175
+
176
+ self._log_debug = log_debug
177
+ self._log_warning = log_warning
178
+
179
+ def detect_language(
180
+ self, file_path: str, content: str | None = None
181
+ ) -> tuple[str, float]:
182
+ """
183
+ ファイルパスとコンテンツから言語を判定
184
+
185
+ Args:
186
+ file_path: ファイルパス
187
+ content: ファイルコンテンツ(任意、曖昧性解決用)
188
+
189
+ Returns:
190
+ (言語名, 信頼度) のタプル
191
+ """
192
+ path = Path(file_path)
193
+ extension = path.suffix.lower()
194
+
195
+ # 直接マッピングで判定できる場合
196
+ if extension in self.EXTENSION_MAPPING:
197
+ language = self.EXTENSION_MAPPING[extension]
198
+
199
+ # 曖昧性がない場合は高信頼度
200
+ if extension not in self.AMBIGUOUS_EXTENSIONS:
201
+ return language, 1.0
202
+
203
+ # 曖昧性がある場合はコンテンツベース判定
204
+ if content:
205
+ refined_language = self._resolve_ambiguity(extension, content)
206
+ return refined_language, 0.9 if refined_language != language else 0.7
207
+ else:
208
+ return language, 0.7 # コンテンツなしなので信頼度低下
209
+
210
+ # 拡張子が不明な場合
211
+ return "unknown", 0.0
212
+
213
+ def detect_from_extension(self, file_path: str) -> str:
214
+ """
215
+ ファイル拡張子のみから言語を簡易判定
216
+
217
+ Args:
218
+ file_path: ファイルパス
219
+
220
+ Returns:
221
+ 判定された言語名
222
+ """
223
+ language, _ = self.detect_language(file_path)
224
+ return language
225
+
226
+ def is_supported(self, language: str) -> bool:
227
+ """
228
+ 指定された言語がTree-sitterでサポートされているか確認
229
+
230
+ Args:
231
+ language: 言語名
232
+
233
+ Returns:
234
+ サポート状況
235
+ """
236
+ return language in self.SUPPORTED_LANGUAGES
237
+
238
+ def get_supported_extensions(self) -> list[str]:
239
+ """
240
+ サポートされている拡張子一覧を取得
241
+
242
+ Returns:
243
+ 拡張子のリスト
244
+ """
245
+ return sorted(self.EXTENSION_MAPPING.keys())
246
+
247
+ def get_supported_languages(self) -> list[str]:
248
+ """
249
+ サポートされている言語一覧を取得
250
+
251
+ Returns:
252
+ 言語のリスト
253
+ """
254
+ return sorted(self.SUPPORTED_LANGUAGES)
255
+
256
+ def _resolve_ambiguity(self, extension: str, content: str) -> str:
257
+ """
258
+ 曖昧な拡張子をコンテンツベースで解決
259
+
260
+ Args:
261
+ extension: ファイル拡張子
262
+ content: ファイルコンテンツ
263
+
264
+ Returns:
265
+ 解決された言語名
266
+ """
267
+ if extension not in self.AMBIGUOUS_EXTENSIONS:
268
+ return self.EXTENSION_MAPPING.get(extension, "unknown")
269
+
270
+ candidates = self.AMBIGUOUS_EXTENSIONS[extension]
271
+
272
+ # .h ファイルの場合(C vs C++ vs Objective-C)
273
+ if extension == ".h":
274
+ return self._detect_c_family(content, candidates)
275
+
276
+ # .m ファイルの場合(Objective-C vs MATLAB)
277
+ elif extension == ".m":
278
+ return self._detect_objc_vs_matlab(content, candidates)
279
+
280
+ # デフォルトは最初の候補
281
+ return candidates[0]
282
+
283
+ def _detect_c_family(self, content: str, candidates: list[str]) -> str:
284
+ """C系言語の判定"""
285
+ cpp_score = 0
286
+ c_score = 0
287
+ objc_score = 0
288
+
289
+ # C++の特徴
290
+ cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
291
+ for pattern in cpp_patterns:
292
+ if pattern in content:
293
+ cpp_score += 1
294
+
295
+ # Cの特徴
296
+ c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
297
+ for pattern in c_patterns:
298
+ if pattern in content:
299
+ c_score += 1
300
+
301
+ # Objective-Cの特徴
302
+ objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
303
+ for pattern in objc_patterns:
304
+ if pattern in content:
305
+ objc_score += 3 # 強い指標なので重み大
306
+
307
+ # 最高スコアの言語を選択
308
+ scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
309
+ best_language = max(scores, key=lambda x: scores[x])
310
+
311
+ # objcが候補にない場合は除外
312
+ if best_language == "objc" and "objc" not in candidates:
313
+ best_language = "cpp" if cpp_score > c_score else "c"
314
+
315
+ return best_language if scores[best_language] > 0 else candidates[0]
316
+
317
+ def _detect_objc_vs_matlab(self, content: str, candidates: list[str]) -> str:
318
+ """Objective-C vs MATLAB の判定"""
319
+ objc_score = 0
320
+ matlab_score = 0
321
+
322
+ # Objective-Cパターン
323
+ for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
324
+ if pattern in content:
325
+ objc_score += 1
326
+
327
+ # MATLABパターン
328
+ for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
329
+ if pattern in content:
330
+ matlab_score += 1
331
+
332
+ if objc_score > matlab_score:
333
+ return "objc"
334
+ elif matlab_score > objc_score:
335
+ return "matlab"
336
+ else:
337
+ return candidates[0] # デフォルト
338
+
339
+ def add_extension_mapping(self, extension: str, language: str) -> None:
340
+ """
341
+ カスタム拡張子マッピングを追加
342
+
343
+ Args:
344
+ extension: ファイル拡張子(.付き)
345
+ language: 言語名
346
+ """
347
+ self.EXTENSION_MAPPING[extension.lower()] = language
348
+
349
+ def get_language_info(self, language: str) -> dict[str, Any]:
350
+ """
351
+ 言語の詳細情報を取得
352
+
353
+ Args:
354
+ language: 言語名
355
+
356
+ Returns:
357
+ 言語情報の辞書
358
+ """
359
+ extensions = [
360
+ ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
361
+ ]
362
+
363
+ return {
364
+ "name": language,
365
+ "extensions": extensions,
366
+ "supported": self.is_supported(language),
367
+ "tree_sitter_available": language in self.SUPPORTED_LANGUAGES,
368
+ }
369
+
370
+
371
+ # グローバルインスタンス
372
+ detector = LanguageDetector()
373
+
374
+
375
+ def detect_language_from_file(file_path: str) -> str:
376
+ """
377
+ ファイルパスから言語を自動判定(シンプルAPI)
378
+
379
+ Args:
380
+ file_path: ファイルパス
381
+
382
+ Returns:
383
+ 判定された言語名
384
+ """
385
+ return detector.detect_from_extension(file_path)
386
+
387
+
388
+ def is_language_supported(language: str) -> bool:
389
+ """
390
+ 言語がサポートされているか確認(シンプルAPI)
391
+
392
+ Args:
393
+ language: 言語名
394
+
395
+ Returns:
396
+ サポート状況
397
+ """
398
+ return detector.is_supported(language)