tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,553 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Language Detection System
4
+
5
+ Automatically detects programming language from file extensions and content.
6
+ Supports multiple languages with extensible configuration.
7
+ """
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+
13
+ class LanguageDetector:
14
+ """Automatic programming language detector"""
15
+
16
+ # Basic extension mapping
17
+ EXTENSION_MAPPING: dict[str, str] = {
18
+ # Java系
19
+ ".java": "java",
20
+ ".jsp": "jsp",
21
+ ".jspx": "jsp",
22
+ # JavaScript/TypeScript系
23
+ ".js": "javascript",
24
+ ".jsx": "jsx",
25
+ ".ts": "typescript",
26
+ ".tsx": "typescript", # TSX files are TypeScript with JSX
27
+ ".mts": "typescript", # ES module TypeScript
28
+ ".cts": "typescript", # CommonJS TypeScript
29
+ ".mjs": "javascript",
30
+ ".cjs": "javascript",
31
+ # Python系
32
+ ".py": "python",
33
+ ".pyx": "python",
34
+ ".pyi": "python",
35
+ ".pyw": "python",
36
+ # C/C++系
37
+ ".c": "c",
38
+ ".cpp": "cpp",
39
+ ".cxx": "cpp",
40
+ ".cc": "cpp",
41
+ ".h": "c", # Ambiguous
42
+ ".hpp": "cpp",
43
+ ".hxx": "cpp",
44
+ # その他の言語
45
+ ".rs": "rust",
46
+ ".go": "go",
47
+ ".rb": "ruby",
48
+ ".php": "php",
49
+ ".kt": "kotlin",
50
+ ".kts": "kotlin",
51
+ ".swift": "swift",
52
+ ".cs": "csharp",
53
+ ".vb": "vbnet",
54
+ ".fs": "fsharp",
55
+ ".scala": "scala",
56
+ ".clj": "clojure",
57
+ ".hs": "haskell",
58
+ ".ml": "ocaml",
59
+ ".lua": "lua",
60
+ ".pl": "perl",
61
+ ".r": "r",
62
+ ".m": "objc", # Ambiguous (MATLAB as well)
63
+ ".dart": "dart",
64
+ ".elm": "elm",
65
+ # Markdown系
66
+ ".md": "markdown",
67
+ ".markdown": "markdown",
68
+ ".mdown": "markdown",
69
+ ".mkd": "markdown",
70
+ ".mkdn": "markdown",
71
+ ".mdx": "markdown",
72
+ # HTML系
73
+ ".html": "html",
74
+ ".htm": "html",
75
+ ".xhtml": "html",
76
+ # CSS系
77
+ ".css": "css",
78
+ ".scss": "css",
79
+ ".sass": "css",
80
+ ".less": "css",
81
+ # SQL系
82
+ ".sql": "sql",
83
+ # JSON系
84
+ ".json": "json",
85
+ ".jsonc": "json",
86
+ ".json5": "json",
87
+ # YAML系
88
+ ".yaml": "yaml",
89
+ ".yml": "yaml",
90
+ }
91
+
92
+ # Ambiguous extensions (map to multiple languages)
93
+ AMBIGUOUS_EXTENSIONS: dict[str, list[str]] = {
94
+ ".h": ["c", "cpp", "objc"],
95
+ ".m": ["objc", "matlab"],
96
+ ".sql": ["sql", "plsql", "mysql"],
97
+ ".xml": ["xml", "html", "jsp"],
98
+ ".json": ["json", "jsonc"],
99
+ }
100
+
101
+ # Content-based detection patterns
102
+ CONTENT_PATTERNS: dict[str, dict[str, list[str]]] = {
103
+ "c_vs_cpp": {
104
+ "cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
105
+ "c": ["#include <stdio.h>", "printf(", "malloc(", "typedef struct"],
106
+ },
107
+ "objc_vs_matlab": {
108
+ "objc": ["#import", "@interface", "@implementation", "NSString", "alloc]"],
109
+ "matlab": ["function ", "end;", "disp(", "clc;", "clear all"],
110
+ },
111
+ }
112
+
113
+ # Tree-sitter supported languages
114
+ SUPPORTED_LANGUAGES = {
115
+ "java",
116
+ "javascript",
117
+ "typescript",
118
+ "python",
119
+ "c",
120
+ "cpp",
121
+ "rust",
122
+ "go",
123
+ "php",
124
+ "ruby",
125
+ "markdown",
126
+ "html",
127
+ "css",
128
+ "json",
129
+ "sql",
130
+ }
131
+
132
+ def __init__(self) -> None:
133
+ """Initialize detector"""
134
+ self.extension_map = {
135
+ ".java": ("java", 0.9),
136
+ ".js": ("javascript", 0.9),
137
+ ".jsx": ("javascript", 0.8),
138
+ ".ts": ("typescript", 0.9),
139
+ ".tsx": ("typescript", 0.8),
140
+ ".mts": ("typescript", 0.9),
141
+ ".cts": ("typescript", 0.9),
142
+ ".py": ("python", 0.9),
143
+ ".pyw": ("python", 0.8),
144
+ ".c": ("c", 0.9),
145
+ ".h": ("c", 0.7),
146
+ ".cpp": ("cpp", 0.9),
147
+ ".cxx": ("cpp", 0.9),
148
+ ".cc": ("cpp", 0.9),
149
+ ".hpp": ("cpp", 0.8),
150
+ ".rs": ("rust", 0.9),
151
+ ".go": ("go", 0.9),
152
+ ".cs": ("csharp", 0.9),
153
+ ".php": ("php", 0.9),
154
+ ".rb": ("ruby", 0.9),
155
+ ".swift": ("swift", 0.9),
156
+ ".kt": ("kotlin", 0.9),
157
+ ".kts": ("kotlin", 0.9),
158
+ ".scala": ("scala", 0.9),
159
+ ".clj": ("clojure", 0.9),
160
+ ".hs": ("haskell", 0.9),
161
+ ".ml": ("ocaml", 0.9),
162
+ ".fs": ("fsharp", 0.9),
163
+ ".elm": ("elm", 0.9),
164
+ ".dart": ("dart", 0.9),
165
+ ".lua": ("lua", 0.9),
166
+ ".r": ("r", 0.9),
167
+ ".m": ("objectivec", 0.7),
168
+ ".mm": ("objectivec", 0.8),
169
+ # Markdown extensions
170
+ ".md": ("markdown", 0.9),
171
+ ".markdown": ("markdown", 0.9),
172
+ ".mdown": ("markdown", 0.8),
173
+ ".mkd": ("markdown", 0.8),
174
+ ".mkdn": ("markdown", 0.8),
175
+ ".mdx": ("markdown", 0.7), # MDX might be mixed with JSX
176
+ # HTML extensions
177
+ ".html": ("html", 0.9),
178
+ ".htm": ("html", 0.9),
179
+ ".xhtml": ("html", 0.8),
180
+ # CSS extensions
181
+ ".css": ("css", 0.9),
182
+ ".scss": ("css", 0.8), # Sass/SCSS
183
+ ".sass": ("css", 0.8), # Sass
184
+ ".less": ("css", 0.8), # Less
185
+ # JSON extensions
186
+ ".json": ("json", 0.9),
187
+ ".jsonc": ("json", 0.8), # JSON with comments
188
+ ".json5": ("json", 0.8), # JSON5 format
189
+ # SQL extensions
190
+ ".sql": ("sql", 0.9),
191
+ }
192
+
193
+ # Content-based detection patterns
194
+ self.content_patterns = {
195
+ "java": [
196
+ (r"package\s+[\w\.]+\s*;", 0.3),
197
+ (r"public\s+class\s+\w+", 0.3),
198
+ (r"import\s+[\w\.]+\s*;", 0.2),
199
+ (r"@\w+\s*\(", 0.2), # Annotations
200
+ ],
201
+ "python": [
202
+ (r"def\s+\w+\s*\(", 0.3),
203
+ (r"import\s+\w+", 0.2),
204
+ (r"from\s+\w+\s+import", 0.2),
205
+ (r'if\s+__name__\s*==\s*["\']__main__["\']', 0.3),
206
+ ],
207
+ "javascript": [
208
+ (r"function\s+\w+\s*\(", 0.3),
209
+ (r"var\s+\w+\s*=", 0.2),
210
+ (r"let\s+\w+\s*=", 0.2),
211
+ (r"const\s+\w+\s*=", 0.2),
212
+ (r"console\.log\s*\(", 0.1),
213
+ ],
214
+ "typescript": [
215
+ (r"interface\s+\w+", 0.3),
216
+ (r"type\s+\w+\s*=", 0.2),
217
+ (r":\s*\w+\s*=", 0.2), # Type annotations
218
+ (r"export\s+(interface|type|class)", 0.2),
219
+ ],
220
+ "c": [
221
+ (r"#include\s*<[\w\.]+>", 0.3),
222
+ (r"int\s+main\s*\(", 0.3),
223
+ (r"printf\s*\(", 0.2),
224
+ (r"#define\s+\w+", 0.2),
225
+ ],
226
+ "cpp": [
227
+ (r"#include\s*<[\w\.]+>", 0.2),
228
+ (r"using\s+namespace\s+\w+", 0.3),
229
+ (r"std::\w+", 0.2),
230
+ (r"class\s+\w+\s*{", 0.3),
231
+ ],
232
+ "markdown": [
233
+ (r"^#{1,6}\s+", 0.4), # ATX headers
234
+ (r"^\s*[-*+]\s+", 0.3), # List items
235
+ (r"```[\w]*", 0.3), # Fenced code blocks
236
+ (r"\[.*\]\(.*\)", 0.2), # Links
237
+ (r"!\[.*\]\(.*\)", 0.2), # Images
238
+ (r"^\s*>\s+", 0.2), # Blockquotes
239
+ (r"^\s*\|.*\|", 0.2), # Tables
240
+ (r"^[-=]{3,}$", 0.2), # Setext headers or horizontal rules
241
+ ],
242
+ "html": [
243
+ (r"<!DOCTYPE\s+html", 0.4), # HTML5 doctype
244
+ (r"<html[^>]*>", 0.3), # HTML tag
245
+ (r"<head[^>]*>", 0.3), # Head tag
246
+ (r"<body[^>]*>", 0.3), # Body tag
247
+ (r"<div[^>]*>", 0.2), # Div tag
248
+ (r"<p[^>]*>", 0.2), # Paragraph tag
249
+ (r"<a\s+href=", 0.2), # Link tag with href
250
+ (r"<img\s+src=", 0.2), # Image tag with src
251
+ ],
252
+ "css": [
253
+ (r"[.#][\w-]+\s*{", 0.4), # CSS selectors
254
+ (r"@media\s+", 0.3), # Media queries
255
+ (r"@import\s+", 0.3), # Import statements
256
+ (r"@keyframes\s+", 0.3), # Keyframes
257
+ (r":\s*[\w-]+\s*;", 0.2), # Property declarations
258
+ (r"color\s*:", 0.2), # Color property
259
+ (r"font-", 0.2), # Font properties
260
+ (r"margin\s*:", 0.2), # Margin property
261
+ ],
262
+ }
263
+
264
+ from .utils import log_debug, log_warning
265
+
266
+ self._log_debug = log_debug
267
+ self._log_warning = log_warning
268
+
269
+ def detect_language(
270
+ self, file_path: str, content: str | None = None
271
+ ) -> tuple[str, float]:
272
+ """
273
+ ファイルパスとコンテンツから言語を判定
274
+
275
+ Args:
276
+ file_path: ファイルパス
277
+ content: ファイルコンテンツ(任意、曖昧性解決用)
278
+
279
+ Returns:
280
+ (言語名, 信頼度) のタプル - 常に有効な言語名を返す
281
+ """
282
+ # Handle invalid input
283
+ if not file_path or not isinstance(file_path, str):
284
+ return "unknown", 0.0
285
+
286
+ path = Path(file_path)
287
+ extension = path.suffix.lower()
288
+
289
+ # Direct mapping by extension
290
+ if extension in self.EXTENSION_MAPPING:
291
+ language = self.EXTENSION_MAPPING[extension]
292
+
293
+ # Ensure language is valid
294
+ if not language or language.strip() == "":
295
+ return "unknown", 0.0
296
+
297
+ # Use confidence from extension_map if available
298
+ if extension in self.extension_map:
299
+ _, confidence = self.extension_map[extension]
300
+ return language, confidence
301
+
302
+ # No ambiguity -> high confidence
303
+ if extension not in self.AMBIGUOUS_EXTENSIONS:
304
+ return language, 1.0
305
+
306
+ # Resolve ambiguity using content
307
+ if content:
308
+ refined_language = self._resolve_ambiguity(extension, content)
309
+ # Ensure refined language is valid
310
+ if not refined_language or refined_language.strip() == "":
311
+ refined_language = "unknown"
312
+ return refined_language, 0.9 if refined_language != language else 0.7
313
+ else:
314
+ return language, 0.7 # Lower confidence without content
315
+
316
+ # Unknown extension - always return "unknown" instead of None
317
+ return "unknown", 0.0
318
+
319
+ def detect_from_extension(self, file_path: str) -> str:
320
+ """
321
+ Quick detection using extension only
322
+
323
+ Args:
324
+ file_path: File path
325
+
326
+ Returns:
327
+ Detected language name - 常に有効な文字列を返す
328
+ """
329
+ # Handle invalid input
330
+ if not file_path or not isinstance(file_path, str):
331
+ return "unknown"
332
+
333
+ result = self.detect_language(file_path)
334
+ if isinstance(result, tuple):
335
+ language, _ = result
336
+ # Ensure language is valid
337
+ if not language or language.strip() == "":
338
+ return "unknown"
339
+ return language
340
+
341
+ def is_supported(self, language: str) -> bool:
342
+ """
343
+ Check if language is supported by Tree-sitter
344
+
345
+ Args:
346
+ language: Language name
347
+
348
+ Returns:
349
+ Support status
350
+ """
351
+ # First check the static list for basic support
352
+ if language in self.SUPPORTED_LANGUAGES:
353
+ return True
354
+
355
+ # Also check if we have a plugin for this language
356
+ try:
357
+ from .plugins.manager import PluginManager
358
+
359
+ plugin_manager = PluginManager()
360
+ plugin_manager.load_plugins() # Ensure plugins are loaded
361
+ supported_languages = plugin_manager.get_supported_languages()
362
+ return language in supported_languages
363
+ except Exception:
364
+ # Fallback to static list if plugin manager fails
365
+ return language in self.SUPPORTED_LANGUAGES
366
+
367
+ def get_supported_extensions(self) -> list[str]:
368
+ """
369
+ Get list of supported extensions
370
+
371
+ Returns:
372
+ List of extensions
373
+ """
374
+ return sorted(self.EXTENSION_MAPPING.keys())
375
+
376
+ def get_supported_languages(self) -> list[str]:
377
+ """
378
+ Get list of supported languages
379
+
380
+ Returns:
381
+ List of languages
382
+ """
383
+ return sorted(self.SUPPORTED_LANGUAGES)
384
+
385
+ def _resolve_ambiguity(self, extension: str, content: str) -> str:
386
+ """
387
+ Resolve ambiguous extension using content
388
+
389
+ Args:
390
+ extension: File extension
391
+ content: File content
392
+
393
+ Returns:
394
+ Resolved language name
395
+ """
396
+ if extension not in self.AMBIGUOUS_EXTENSIONS:
397
+ return self.EXTENSION_MAPPING.get(extension, "unknown")
398
+
399
+ candidates = self.AMBIGUOUS_EXTENSIONS[extension]
400
+
401
+ # .h: C vs C++ vs Objective-C
402
+ if extension == ".h":
403
+ return self._detect_c_family(content, candidates)
404
+
405
+ # .m: Objective-C vs MATLAB
406
+ elif extension == ".m":
407
+ return self._detect_objc_vs_matlab(content, candidates)
408
+
409
+ # Fallback to first candidate
410
+ return candidates[0]
411
+
412
+ def _detect_c_family(self, content: str, candidates: list[str]) -> str:
413
+ """Detect among C-family languages"""
414
+ cpp_score = 0
415
+ c_score = 0
416
+ objc_score = 0
417
+
418
+ # C++ features
419
+ cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
420
+ for pattern in cpp_patterns:
421
+ if pattern in content:
422
+ cpp_score += 1
423
+
424
+ # C features
425
+ c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
426
+ for pattern in c_patterns:
427
+ if pattern in content:
428
+ c_score += 1
429
+
430
+ # Objective-C features
431
+ objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
432
+ for pattern in objc_patterns:
433
+ if pattern in content:
434
+ objc_score += 3 # 強い指標なので重み大
435
+
436
+ # Select best-scoring language
437
+ scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
438
+ best_language = max(scores, key=lambda x: scores[x])
439
+
440
+ # If objc not a candidate, fallback to C/C++
441
+ if best_language == "objc" and "objc" not in candidates:
442
+ best_language = "cpp" if cpp_score > c_score else "c"
443
+
444
+ return best_language if scores[best_language] > 0 else candidates[0]
445
+
446
+ def _detect_objc_vs_matlab(self, content: str, candidates: list[str]) -> str:
447
+ """Detect between Objective-C and MATLAB"""
448
+ objc_score = 0
449
+ matlab_score = 0
450
+
451
+ # Objective-C patterns
452
+ for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
453
+ if pattern in content:
454
+ objc_score += 1
455
+
456
+ # MATLAB patterns
457
+ for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
458
+ if pattern in content:
459
+ matlab_score += 1
460
+
461
+ if objc_score > matlab_score:
462
+ return "objc"
463
+ elif matlab_score > objc_score:
464
+ return "matlab"
465
+ else:
466
+ return candidates[0] # default
467
+
468
+ def add_extension_mapping(self, extension: str, language: str) -> None:
469
+ """
470
+ Add custom extension mapping
471
+
472
+ Args:
473
+ extension: File extension (with dot)
474
+ language: Language name
475
+ """
476
+ self.EXTENSION_MAPPING[extension.lower()] = language
477
+
478
+ def get_language_info(self, language: str) -> dict[str, Any]:
479
+ """
480
+ Get language information
481
+
482
+ Args:
483
+ language: Language name
484
+
485
+ Returns:
486
+ Language info dictionary
487
+ """
488
+ extensions = [
489
+ ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
490
+ ]
491
+
492
+ return {
493
+ "name": language,
494
+ "extensions": extensions,
495
+ "supported": self.is_supported(language),
496
+ "tree_sitter_available": language in self.SUPPORTED_LANGUAGES,
497
+ }
498
+
499
+
500
+ # Global instance
501
+ detector = LanguageDetector()
502
+
503
+
504
+ def detect_language_from_file(file_path: str) -> str:
505
+ """
506
+ Detect language from path (simple API)
507
+
508
+ Args:
509
+ file_path: File path
510
+
511
+ Returns:
512
+ Detected language name - 常に有効な文字列を返す
513
+ """
514
+ # Handle invalid input
515
+ if not file_path or not isinstance(file_path, str):
516
+ return "unknown"
517
+
518
+ # Create a fresh instance to ensure latest configuration
519
+ fresh_detector = LanguageDetector()
520
+ result = fresh_detector.detect_from_extension(file_path)
521
+
522
+ # Ensure result is valid
523
+ if not result or result.strip() == "":
524
+ return "unknown"
525
+
526
+ return result
527
+
528
+
529
+ def is_language_supported(language: str) -> bool:
530
+ """
531
+ Check if language is supported (simple API)
532
+
533
+ Args:
534
+ language: Language name
535
+
536
+ Returns:
537
+ Support status
538
+ """
539
+ # First check the static list for basic support
540
+ if detector.is_supported(language):
541
+ return True
542
+
543
+ # Also check if we have a plugin for this language
544
+ try:
545
+ from .plugins.manager import PluginManager
546
+
547
+ plugin_manager = PluginManager()
548
+ plugin_manager.load_plugins() # Ensure plugins are loaded
549
+ supported_languages = plugin_manager.get_supported_languages()
550
+ return language in supported_languages
551
+ except Exception:
552
+ # Fallback to static list if plugin manager fails
553
+ return detector.is_supported(language)