tree-sitter-analyzer 0.9.3__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (32) hide show
  1. tree_sitter_analyzer/cli/commands/default_command.py +18 -18
  2. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -141
  3. tree_sitter_analyzer/cli/commands/query_command.py +92 -88
  4. tree_sitter_analyzer/cli/commands/table_command.py +235 -235
  5. tree_sitter_analyzer/cli/info_commands.py +121 -121
  6. tree_sitter_analyzer/cli_main.py +307 -307
  7. tree_sitter_analyzer/core/analysis_engine.py +584 -584
  8. tree_sitter_analyzer/core/cache_service.py +5 -4
  9. tree_sitter_analyzer/core/query.py +502 -502
  10. tree_sitter_analyzer/encoding_utils.py +6 -2
  11. tree_sitter_analyzer/exceptions.py +400 -406
  12. tree_sitter_analyzer/formatters/java_formatter.py +291 -291
  13. tree_sitter_analyzer/formatters/python_formatter.py +259 -259
  14. tree_sitter_analyzer/interfaces/mcp_server.py +426 -425
  15. tree_sitter_analyzer/language_detector.py +398 -398
  16. tree_sitter_analyzer/language_loader.py +224 -224
  17. tree_sitter_analyzer/languages/java_plugin.py +1202 -1202
  18. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +559 -555
  19. tree_sitter_analyzer/mcp/server.py +30 -9
  20. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +21 -4
  21. tree_sitter_analyzer/mcp/tools/table_format_tool.py +22 -4
  22. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -567
  23. tree_sitter_analyzer/models.py +470 -470
  24. tree_sitter_analyzer/security/__init__.py +22 -22
  25. tree_sitter_analyzer/security/boundary_manager.py +243 -243
  26. tree_sitter_analyzer/security/regex_checker.py +297 -292
  27. tree_sitter_analyzer/table_formatter.py +703 -652
  28. tree_sitter_analyzer/utils.py +50 -19
  29. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/METADATA +1 -1
  30. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/RECORD +32 -32
  31. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/WHEEL +0 -0
  32. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/entry_points.txt +0 -0
@@ -1,398 +1,398 @@
1
- #!/usr/bin/env python3
2
- """
3
- Language Detection System
4
-
5
- Automatically detects programming language from file extensions and content.
6
- Supports multiple languages with extensible configuration.
7
- """
8
-
9
- from pathlib import Path
10
- from typing import Any
11
-
12
-
13
- class LanguageDetector:
14
- """Automatic programming language detector"""
15
-
16
- # Basic extension mapping
17
- EXTENSION_MAPPING: dict[str, str] = {
18
- # Java系
19
- ".java": "java",
20
- ".jsp": "jsp",
21
- ".jspx": "jsp",
22
- # JavaScript/TypeScript系
23
- ".js": "javascript",
24
- ".jsx": "jsx",
25
- ".ts": "typescript",
26
- ".tsx": "tsx",
27
- ".mjs": "javascript",
28
- ".cjs": "javascript",
29
- # Python系
30
- ".py": "python",
31
- ".pyx": "python",
32
- ".pyi": "python",
33
- ".pyw": "python",
34
- # C/C++系
35
- ".c": "c",
36
- ".cpp": "cpp",
37
- ".cxx": "cpp",
38
- ".cc": "cpp",
39
- ".h": "c", # Ambiguous
40
- ".hpp": "cpp",
41
- ".hxx": "cpp",
42
- # その他の言語
43
- ".rs": "rust",
44
- ".go": "go",
45
- ".rb": "ruby",
46
- ".php": "php",
47
- ".kt": "kotlin",
48
- ".swift": "swift",
49
- ".cs": "csharp",
50
- ".vb": "vbnet",
51
- ".fs": "fsharp",
52
- ".scala": "scala",
53
- ".clj": "clojure",
54
- ".hs": "haskell",
55
- ".ml": "ocaml",
56
- ".lua": "lua",
57
- ".pl": "perl",
58
- ".r": "r",
59
- ".m": "objc", # Ambiguous (MATLAB as well)
60
- ".dart": "dart",
61
- ".elm": "elm",
62
- }
63
-
64
- # Ambiguous extensions (map to multiple languages)
65
- AMBIGUOUS_EXTENSIONS: dict[str, list[str]] = {
66
- ".h": ["c", "cpp", "objc"],
67
- ".m": ["objc", "matlab"],
68
- ".sql": ["sql", "plsql", "mysql"],
69
- ".xml": ["xml", "html", "jsp"],
70
- ".json": ["json", "jsonc"],
71
- }
72
-
73
- # Content-based detection patterns
74
- CONTENT_PATTERNS: dict[str, dict[str, list[str]]] = {
75
- "c_vs_cpp": {
76
- "cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
77
- "c": ["#include <stdio.h>", "printf(", "malloc(", "typedef struct"],
78
- },
79
- "objc_vs_matlab": {
80
- "objc": ["#import", "@interface", "@implementation", "NSString", "alloc]"],
81
- "matlab": ["function ", "end;", "disp(", "clc;", "clear all"],
82
- },
83
- }
84
-
85
- # Tree-sitter supported languages
86
- SUPPORTED_LANGUAGES = {
87
- "java",
88
- "javascript",
89
- "typescript",
90
- "python",
91
- "c",
92
- "cpp",
93
- "rust",
94
- "go",
95
- }
96
-
97
- def __init__(self) -> None:
98
- """Initialize detector"""
99
- self.extension_map = {
100
- ".java": ("java", 0.9),
101
- ".js": ("javascript", 0.9),
102
- ".jsx": ("javascript", 0.8),
103
- ".ts": ("typescript", 0.9),
104
- ".tsx": ("typescript", 0.8),
105
- ".py": ("python", 0.9),
106
- ".pyw": ("python", 0.8),
107
- ".c": ("c", 0.9),
108
- ".h": ("c", 0.7),
109
- ".cpp": ("cpp", 0.9),
110
- ".cxx": ("cpp", 0.9),
111
- ".cc": ("cpp", 0.9),
112
- ".hpp": ("cpp", 0.8),
113
- ".rs": ("rust", 0.9),
114
- ".go": ("go", 0.9),
115
- ".cs": ("csharp", 0.9),
116
- ".php": ("php", 0.9),
117
- ".rb": ("ruby", 0.9),
118
- ".swift": ("swift", 0.9),
119
- ".kt": ("kotlin", 0.9),
120
- ".scala": ("scala", 0.9),
121
- ".clj": ("clojure", 0.9),
122
- ".hs": ("haskell", 0.9),
123
- ".ml": ("ocaml", 0.9),
124
- ".fs": ("fsharp", 0.9),
125
- ".elm": ("elm", 0.9),
126
- ".dart": ("dart", 0.9),
127
- ".lua": ("lua", 0.9),
128
- ".r": ("r", 0.9),
129
- ".m": ("objectivec", 0.7),
130
- ".mm": ("objectivec", 0.8),
131
- }
132
-
133
- # Content-based detection patterns
134
- self.content_patterns = {
135
- "java": [
136
- (r"package\s+[\w\.]+\s*;", 0.3),
137
- (r"public\s+class\s+\w+", 0.3),
138
- (r"import\s+[\w\.]+\s*;", 0.2),
139
- (r"@\w+\s*\(", 0.2), # Annotations
140
- ],
141
- "python": [
142
- (r"def\s+\w+\s*\(", 0.3),
143
- (r"import\s+\w+", 0.2),
144
- (r"from\s+\w+\s+import", 0.2),
145
- (r'if\s+__name__\s*==\s*["\']__main__["\']', 0.3),
146
- ],
147
- "javascript": [
148
- (r"function\s+\w+\s*\(", 0.3),
149
- (r"var\s+\w+\s*=", 0.2),
150
- (r"let\s+\w+\s*=", 0.2),
151
- (r"const\s+\w+\s*=", 0.2),
152
- (r"console\.log\s*\(", 0.1),
153
- ],
154
- "typescript": [
155
- (r"interface\s+\w+", 0.3),
156
- (r"type\s+\w+\s*=", 0.2),
157
- (r":\s*\w+\s*=", 0.2), # Type annotations
158
- (r"export\s+(interface|type|class)", 0.2),
159
- ],
160
- "c": [
161
- (r"#include\s*<[\w\.]+>", 0.3),
162
- (r"int\s+main\s*\(", 0.3),
163
- (r"printf\s*\(", 0.2),
164
- (r"#define\s+\w+", 0.2),
165
- ],
166
- "cpp": [
167
- (r"#include\s*<[\w\.]+>", 0.2),
168
- (r"using\s+namespace\s+\w+", 0.3),
169
- (r"std::\w+", 0.2),
170
- (r"class\s+\w+\s*{", 0.3),
171
- ],
172
- }
173
-
174
- from .utils import log_debug, log_warning
175
-
176
- self._log_debug = log_debug
177
- self._log_warning = log_warning
178
-
179
- def detect_language(
180
- self, file_path: str, content: str | None = None
181
- ) -> tuple[str, float]:
182
- """
183
- ファイルパスとコンテンツから言語を判定
184
-
185
- Args:
186
- file_path: ファイルパス
187
- content: ファイルコンテンツ(任意、曖昧性解決用)
188
-
189
- Returns:
190
- (言語名, 信頼度) のタプル
191
- """
192
- path = Path(file_path)
193
- extension = path.suffix.lower()
194
-
195
- # Direct mapping by extension
196
- if extension in self.EXTENSION_MAPPING:
197
- language = self.EXTENSION_MAPPING[extension]
198
-
199
- # No ambiguity -> high confidence
200
- if extension not in self.AMBIGUOUS_EXTENSIONS:
201
- return language, 1.0
202
-
203
- # Resolve ambiguity using content
204
- if content:
205
- refined_language = self._resolve_ambiguity(extension, content)
206
- return refined_language, 0.9 if refined_language != language else 0.7
207
- else:
208
- return language, 0.7 # Lower confidence without content
209
-
210
- # Unknown extension
211
- return "unknown", 0.0
212
-
213
- def detect_from_extension(self, file_path: str) -> str:
214
- """
215
- Quick detection using extension only
216
-
217
- Args:
218
- file_path: File path
219
-
220
- Returns:
221
- Detected language name
222
- """
223
- language, _ = self.detect_language(file_path)
224
- return language
225
-
226
- def is_supported(self, language: str) -> bool:
227
- """
228
- Check if language is supported by Tree-sitter
229
-
230
- Args:
231
- language: Language name
232
-
233
- Returns:
234
- Support status
235
- """
236
- return language in self.SUPPORTED_LANGUAGES
237
-
238
- def get_supported_extensions(self) -> list[str]:
239
- """
240
- Get list of supported extensions
241
-
242
- Returns:
243
- List of extensions
244
- """
245
- return sorted(self.EXTENSION_MAPPING.keys())
246
-
247
- def get_supported_languages(self) -> list[str]:
248
- """
249
- Get list of supported languages
250
-
251
- Returns:
252
- List of languages
253
- """
254
- return sorted(self.SUPPORTED_LANGUAGES)
255
-
256
- def _resolve_ambiguity(self, extension: str, content: str) -> str:
257
- """
258
- Resolve ambiguous extension using content
259
-
260
- Args:
261
- extension: File extension
262
- content: File content
263
-
264
- Returns:
265
- Resolved language name
266
- """
267
- if extension not in self.AMBIGUOUS_EXTENSIONS:
268
- return self.EXTENSION_MAPPING.get(extension, "unknown")
269
-
270
- candidates = self.AMBIGUOUS_EXTENSIONS[extension]
271
-
272
- # .h: C vs C++ vs Objective-C
273
- if extension == ".h":
274
- return self._detect_c_family(content, candidates)
275
-
276
- # .m: Objective-C vs MATLAB
277
- elif extension == ".m":
278
- return self._detect_objc_vs_matlab(content, candidates)
279
-
280
- # Fallback to first candidate
281
- return candidates[0]
282
-
283
- def _detect_c_family(self, content: str, candidates: list[str]) -> str:
284
- """Detect among C-family languages"""
285
- cpp_score = 0
286
- c_score = 0
287
- objc_score = 0
288
-
289
- # C++ features
290
- cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
291
- for pattern in cpp_patterns:
292
- if pattern in content:
293
- cpp_score += 1
294
-
295
- # C features
296
- c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
297
- for pattern in c_patterns:
298
- if pattern in content:
299
- c_score += 1
300
-
301
- # Objective-C features
302
- objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
303
- for pattern in objc_patterns:
304
- if pattern in content:
305
- objc_score += 3 # 強い指標なので重み大
306
-
307
- # Select best-scoring language
308
- scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
309
- best_language = max(scores, key=lambda x: scores[x])
310
-
311
- # If objc not a candidate, fallback to C/C++
312
- if best_language == "objc" and "objc" not in candidates:
313
- best_language = "cpp" if cpp_score > c_score else "c"
314
-
315
- return best_language if scores[best_language] > 0 else candidates[0]
316
-
317
- def _detect_objc_vs_matlab(self, content: str, candidates: list[str]) -> str:
318
- """Detect between Objective-C and MATLAB"""
319
- objc_score = 0
320
- matlab_score = 0
321
-
322
- # Objective-C patterns
323
- for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
324
- if pattern in content:
325
- objc_score += 1
326
-
327
- # MATLAB patterns
328
- for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
329
- if pattern in content:
330
- matlab_score += 1
331
-
332
- if objc_score > matlab_score:
333
- return "objc"
334
- elif matlab_score > objc_score:
335
- return "matlab"
336
- else:
337
- return candidates[0] # default
338
-
339
- def add_extension_mapping(self, extension: str, language: str) -> None:
340
- """
341
- Add custom extension mapping
342
-
343
- Args:
344
- extension: File extension (with dot)
345
- language: Language name
346
- """
347
- self.EXTENSION_MAPPING[extension.lower()] = language
348
-
349
- def get_language_info(self, language: str) -> dict[str, Any]:
350
- """
351
- Get language information
352
-
353
- Args:
354
- language: Language name
355
-
356
- Returns:
357
- Language info dictionary
358
- """
359
- extensions = [
360
- ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
361
- ]
362
-
363
- return {
364
- "name": language,
365
- "extensions": extensions,
366
- "supported": self.is_supported(language),
367
- "tree_sitter_available": language in self.SUPPORTED_LANGUAGES,
368
- }
369
-
370
-
371
- # Global instance
372
- detector = LanguageDetector()
373
-
374
-
375
- def detect_language_from_file(file_path: str) -> str:
376
- """
377
- Detect language from path (simple API)
378
-
379
- Args:
380
- file_path: File path
381
-
382
- Returns:
383
- Detected language name
384
- """
385
- return detector.detect_from_extension(file_path)
386
-
387
-
388
- def is_language_supported(language: str) -> bool:
389
- """
390
- Check if language is supported (simple API)
391
-
392
- Args:
393
- language: Language name
394
-
395
- Returns:
396
- Support status
397
- """
398
- return detector.is_supported(language)
1
+ #!/usr/bin/env python3
2
+ """
3
+ Language Detection System
4
+
5
+ Automatically detects programming language from file extensions and content.
6
+ Supports multiple languages with extensible configuration.
7
+ """
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+
13
+ class LanguageDetector:
14
+ """Automatic programming language detector"""
15
+
16
+ # Basic extension mapping
17
+ EXTENSION_MAPPING: dict[str, str] = {
18
+ # Java系
19
+ ".java": "java",
20
+ ".jsp": "jsp",
21
+ ".jspx": "jsp",
22
+ # JavaScript/TypeScript系
23
+ ".js": "javascript",
24
+ ".jsx": "jsx",
25
+ ".ts": "typescript",
26
+ ".tsx": "tsx",
27
+ ".mjs": "javascript",
28
+ ".cjs": "javascript",
29
+ # Python系
30
+ ".py": "python",
31
+ ".pyx": "python",
32
+ ".pyi": "python",
33
+ ".pyw": "python",
34
+ # C/C++系
35
+ ".c": "c",
36
+ ".cpp": "cpp",
37
+ ".cxx": "cpp",
38
+ ".cc": "cpp",
39
+ ".h": "c", # Ambiguous
40
+ ".hpp": "cpp",
41
+ ".hxx": "cpp",
42
+ # その他の言語
43
+ ".rs": "rust",
44
+ ".go": "go",
45
+ ".rb": "ruby",
46
+ ".php": "php",
47
+ ".kt": "kotlin",
48
+ ".swift": "swift",
49
+ ".cs": "csharp",
50
+ ".vb": "vbnet",
51
+ ".fs": "fsharp",
52
+ ".scala": "scala",
53
+ ".clj": "clojure",
54
+ ".hs": "haskell",
55
+ ".ml": "ocaml",
56
+ ".lua": "lua",
57
+ ".pl": "perl",
58
+ ".r": "r",
59
+ ".m": "objc", # Ambiguous (MATLAB as well)
60
+ ".dart": "dart",
61
+ ".elm": "elm",
62
+ }
63
+
64
+ # Ambiguous extensions (map to multiple languages)
65
+ AMBIGUOUS_EXTENSIONS: dict[str, list[str]] = {
66
+ ".h": ["c", "cpp", "objc"],
67
+ ".m": ["objc", "matlab"],
68
+ ".sql": ["sql", "plsql", "mysql"],
69
+ ".xml": ["xml", "html", "jsp"],
70
+ ".json": ["json", "jsonc"],
71
+ }
72
+
73
+ # Content-based detection patterns
74
+ CONTENT_PATTERNS: dict[str, dict[str, list[str]]] = {
75
+ "c_vs_cpp": {
76
+ "cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
77
+ "c": ["#include <stdio.h>", "printf(", "malloc(", "typedef struct"],
78
+ },
79
+ "objc_vs_matlab": {
80
+ "objc": ["#import", "@interface", "@implementation", "NSString", "alloc]"],
81
+ "matlab": ["function ", "end;", "disp(", "clc;", "clear all"],
82
+ },
83
+ }
84
+
85
+ # Tree-sitter supported languages
86
+ SUPPORTED_LANGUAGES = {
87
+ "java",
88
+ "javascript",
89
+ "typescript",
90
+ "python",
91
+ "c",
92
+ "cpp",
93
+ "rust",
94
+ "go",
95
+ }
96
+
97
+ def __init__(self) -> None:
98
+ """Initialize detector"""
99
+ self.extension_map = {
100
+ ".java": ("java", 0.9),
101
+ ".js": ("javascript", 0.9),
102
+ ".jsx": ("javascript", 0.8),
103
+ ".ts": ("typescript", 0.9),
104
+ ".tsx": ("typescript", 0.8),
105
+ ".py": ("python", 0.9),
106
+ ".pyw": ("python", 0.8),
107
+ ".c": ("c", 0.9),
108
+ ".h": ("c", 0.7),
109
+ ".cpp": ("cpp", 0.9),
110
+ ".cxx": ("cpp", 0.9),
111
+ ".cc": ("cpp", 0.9),
112
+ ".hpp": ("cpp", 0.8),
113
+ ".rs": ("rust", 0.9),
114
+ ".go": ("go", 0.9),
115
+ ".cs": ("csharp", 0.9),
116
+ ".php": ("php", 0.9),
117
+ ".rb": ("ruby", 0.9),
118
+ ".swift": ("swift", 0.9),
119
+ ".kt": ("kotlin", 0.9),
120
+ ".scala": ("scala", 0.9),
121
+ ".clj": ("clojure", 0.9),
122
+ ".hs": ("haskell", 0.9),
123
+ ".ml": ("ocaml", 0.9),
124
+ ".fs": ("fsharp", 0.9),
125
+ ".elm": ("elm", 0.9),
126
+ ".dart": ("dart", 0.9),
127
+ ".lua": ("lua", 0.9),
128
+ ".r": ("r", 0.9),
129
+ ".m": ("objectivec", 0.7),
130
+ ".mm": ("objectivec", 0.8),
131
+ }
132
+
133
+ # Content-based detection patterns
134
+ self.content_patterns = {
135
+ "java": [
136
+ (r"package\s+[\w\.]+\s*;", 0.3),
137
+ (r"public\s+class\s+\w+", 0.3),
138
+ (r"import\s+[\w\.]+\s*;", 0.2),
139
+ (r"@\w+\s*\(", 0.2), # Annotations
140
+ ],
141
+ "python": [
142
+ (r"def\s+\w+\s*\(", 0.3),
143
+ (r"import\s+\w+", 0.2),
144
+ (r"from\s+\w+\s+import", 0.2),
145
+ (r'if\s+__name__\s*==\s*["\']__main__["\']', 0.3),
146
+ ],
147
+ "javascript": [
148
+ (r"function\s+\w+\s*\(", 0.3),
149
+ (r"var\s+\w+\s*=", 0.2),
150
+ (r"let\s+\w+\s*=", 0.2),
151
+ (r"const\s+\w+\s*=", 0.2),
152
+ (r"console\.log\s*\(", 0.1),
153
+ ],
154
+ "typescript": [
155
+ (r"interface\s+\w+", 0.3),
156
+ (r"type\s+\w+\s*=", 0.2),
157
+ (r":\s*\w+\s*=", 0.2), # Type annotations
158
+ (r"export\s+(interface|type|class)", 0.2),
159
+ ],
160
+ "c": [
161
+ (r"#include\s*<[\w\.]+>", 0.3),
162
+ (r"int\s+main\s*\(", 0.3),
163
+ (r"printf\s*\(", 0.2),
164
+ (r"#define\s+\w+", 0.2),
165
+ ],
166
+ "cpp": [
167
+ (r"#include\s*<[\w\.]+>", 0.2),
168
+ (r"using\s+namespace\s+\w+", 0.3),
169
+ (r"std::\w+", 0.2),
170
+ (r"class\s+\w+\s*{", 0.3),
171
+ ],
172
+ }
173
+
174
+ from .utils import log_debug, log_warning
175
+
176
+ self._log_debug = log_debug
177
+ self._log_warning = log_warning
178
+
179
+ def detect_language(
180
+ self, file_path: str, content: str | None = None
181
+ ) -> tuple[str, float]:
182
+ """
183
+ ファイルパスとコンテンツから言語を判定
184
+
185
+ Args:
186
+ file_path: ファイルパス
187
+ content: ファイルコンテンツ(任意、曖昧性解決用)
188
+
189
+ Returns:
190
+ (言語名, 信頼度) のタプル
191
+ """
192
+ path = Path(file_path)
193
+ extension = path.suffix.lower()
194
+
195
+ # Direct mapping by extension
196
+ if extension in self.EXTENSION_MAPPING:
197
+ language = self.EXTENSION_MAPPING[extension]
198
+
199
+ # No ambiguity -> high confidence
200
+ if extension not in self.AMBIGUOUS_EXTENSIONS:
201
+ return language, 1.0
202
+
203
+ # Resolve ambiguity using content
204
+ if content:
205
+ refined_language = self._resolve_ambiguity(extension, content)
206
+ return refined_language, 0.9 if refined_language != language else 0.7
207
+ else:
208
+ return language, 0.7 # Lower confidence without content
209
+
210
+ # Unknown extension
211
+ return "unknown", 0.0
212
+
213
+ def detect_from_extension(self, file_path: str) -> str:
214
+ """
215
+ Quick detection using extension only
216
+
217
+ Args:
218
+ file_path: File path
219
+
220
+ Returns:
221
+ Detected language name
222
+ """
223
+ language, _ = self.detect_language(file_path)
224
+ return language
225
+
226
+ def is_supported(self, language: str) -> bool:
227
+ """
228
+ Check if language is supported by Tree-sitter
229
+
230
+ Args:
231
+ language: Language name
232
+
233
+ Returns:
234
+ Support status
235
+ """
236
+ return language in self.SUPPORTED_LANGUAGES
237
+
238
+ def get_supported_extensions(self) -> list[str]:
239
+ """
240
+ Get list of supported extensions
241
+
242
+ Returns:
243
+ List of extensions
244
+ """
245
+ return sorted(self.EXTENSION_MAPPING.keys())
246
+
247
+ def get_supported_languages(self) -> list[str]:
248
+ """
249
+ Get list of supported languages
250
+
251
+ Returns:
252
+ List of languages
253
+ """
254
+ return sorted(self.SUPPORTED_LANGUAGES)
255
+
256
+ def _resolve_ambiguity(self, extension: str, content: str) -> str:
257
+ """
258
+ Resolve ambiguous extension using content
259
+
260
+ Args:
261
+ extension: File extension
262
+ content: File content
263
+
264
+ Returns:
265
+ Resolved language name
266
+ """
267
+ if extension not in self.AMBIGUOUS_EXTENSIONS:
268
+ return self.EXTENSION_MAPPING.get(extension, "unknown")
269
+
270
+ candidates = self.AMBIGUOUS_EXTENSIONS[extension]
271
+
272
+ # .h: C vs C++ vs Objective-C
273
+ if extension == ".h":
274
+ return self._detect_c_family(content, candidates)
275
+
276
+ # .m: Objective-C vs MATLAB
277
+ elif extension == ".m":
278
+ return self._detect_objc_vs_matlab(content, candidates)
279
+
280
+ # Fallback to first candidate
281
+ return candidates[0]
282
+
283
+ def _detect_c_family(self, content: str, candidates: list[str]) -> str:
284
+ """Detect among C-family languages"""
285
+ cpp_score = 0
286
+ c_score = 0
287
+ objc_score = 0
288
+
289
+ # C++ features
290
+ cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
291
+ for pattern in cpp_patterns:
292
+ if pattern in content:
293
+ cpp_score += 1
294
+
295
+ # C features
296
+ c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
297
+ for pattern in c_patterns:
298
+ if pattern in content:
299
+ c_score += 1
300
+
301
+ # Objective-C features
302
+ objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
303
+ for pattern in objc_patterns:
304
+ if pattern in content:
305
+ objc_score += 3 # 強い指標なので重み大
306
+
307
+ # Select best-scoring language
308
+ scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
309
+ best_language = max(scores, key=lambda x: scores[x])
310
+
311
+ # If objc not a candidate, fallback to C/C++
312
+ if best_language == "objc" and "objc" not in candidates:
313
+ best_language = "cpp" if cpp_score > c_score else "c"
314
+
315
+ return best_language if scores[best_language] > 0 else candidates[0]
316
+
317
+ def _detect_objc_vs_matlab(self, content: str, candidates: list[str]) -> str:
318
+ """Detect between Objective-C and MATLAB"""
319
+ objc_score = 0
320
+ matlab_score = 0
321
+
322
+ # Objective-C patterns
323
+ for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
324
+ if pattern in content:
325
+ objc_score += 1
326
+
327
+ # MATLAB patterns
328
+ for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
329
+ if pattern in content:
330
+ matlab_score += 1
331
+
332
+ if objc_score > matlab_score:
333
+ return "objc"
334
+ elif matlab_score > objc_score:
335
+ return "matlab"
336
+ else:
337
+ return candidates[0] # default
338
+
339
+ def add_extension_mapping(self, extension: str, language: str) -> None:
340
+ """
341
+ Add custom extension mapping
342
+
343
+ Args:
344
+ extension: File extension (with dot)
345
+ language: Language name
346
+ """
347
+ self.EXTENSION_MAPPING[extension.lower()] = language
348
+
349
+ def get_language_info(self, language: str) -> dict[str, Any]:
350
+ """
351
+ Get language information
352
+
353
+ Args:
354
+ language: Language name
355
+
356
+ Returns:
357
+ Language info dictionary
358
+ """
359
+ extensions = [
360
+ ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
361
+ ]
362
+
363
+ return {
364
+ "name": language,
365
+ "extensions": extensions,
366
+ "supported": self.is_supported(language),
367
+ "tree_sitter_available": language in self.SUPPORTED_LANGUAGES,
368
+ }
369
+
370
+
371
+ # Global instance
372
+ detector = LanguageDetector()
373
+
374
+
375
+ def detect_language_from_file(file_path: str) -> str:
376
+ """
377
+ Detect language from path (simple API)
378
+
379
+ Args:
380
+ file_path: File path
381
+
382
+ Returns:
383
+ Detected language name
384
+ """
385
+ return detector.detect_from_extension(file_path)
386
+
387
+
388
+ def is_language_supported(language: str) -> bool:
389
+ """
390
+ Check if language is supported (simple API)
391
+
392
+ Args:
393
+ language: Language name
394
+
395
+ Returns:
396
+ Support status
397
+ """
398
+ return detector.is_supported(language)