tree-sitter-analyzer 1.9.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_analyzer/__init__.py +132 -0
- tree_sitter_analyzer/__main__.py +11 -0
- tree_sitter_analyzer/api.py +853 -0
- tree_sitter_analyzer/cli/__init__.py +39 -0
- tree_sitter_analyzer/cli/__main__.py +12 -0
- tree_sitter_analyzer/cli/argument_validator.py +89 -0
- tree_sitter_analyzer/cli/commands/__init__.py +26 -0
- tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
- tree_sitter_analyzer/cli/commands/base_command.py +181 -0
- tree_sitter_analyzer/cli/commands/default_command.py +18 -0
- tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
- tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
- tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
- tree_sitter_analyzer/cli/commands/query_command.py +109 -0
- tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
- tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
- tree_sitter_analyzer/cli/commands/table_command.py +414 -0
- tree_sitter_analyzer/cli/info_commands.py +124 -0
- tree_sitter_analyzer/cli_main.py +472 -0
- tree_sitter_analyzer/constants.py +85 -0
- tree_sitter_analyzer/core/__init__.py +15 -0
- tree_sitter_analyzer/core/analysis_engine.py +580 -0
- tree_sitter_analyzer/core/cache_service.py +333 -0
- tree_sitter_analyzer/core/engine.py +585 -0
- tree_sitter_analyzer/core/parser.py +293 -0
- tree_sitter_analyzer/core/query.py +605 -0
- tree_sitter_analyzer/core/query_filter.py +200 -0
- tree_sitter_analyzer/core/query_service.py +340 -0
- tree_sitter_analyzer/encoding_utils.py +530 -0
- tree_sitter_analyzer/exceptions.py +747 -0
- tree_sitter_analyzer/file_handler.py +246 -0
- tree_sitter_analyzer/formatters/__init__.py +1 -0
- tree_sitter_analyzer/formatters/base_formatter.py +201 -0
- tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
- tree_sitter_analyzer/formatters/formatter_config.py +197 -0
- tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
- tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
- tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
- tree_sitter_analyzer/formatters/go_formatter.py +368 -0
- tree_sitter_analyzer/formatters/html_formatter.py +498 -0
- tree_sitter_analyzer/formatters/java_formatter.py +423 -0
- tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
- tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
- tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
- tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
- tree_sitter_analyzer/formatters/php_formatter.py +301 -0
- tree_sitter_analyzer/formatters/python_formatter.py +830 -0
- tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
- tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
- tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
- tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
- tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
- tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
- tree_sitter_analyzer/interfaces/__init__.py +9 -0
- tree_sitter_analyzer/interfaces/cli.py +535 -0
- tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
- tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
- tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
- tree_sitter_analyzer/language_detector.py +553 -0
- tree_sitter_analyzer/language_loader.py +271 -0
- tree_sitter_analyzer/languages/__init__.py +10 -0
- tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
- tree_sitter_analyzer/languages/css_plugin.py +449 -0
- tree_sitter_analyzer/languages/go_plugin.py +836 -0
- tree_sitter_analyzer/languages/html_plugin.py +496 -0
- tree_sitter_analyzer/languages/java_plugin.py +1299 -0
- tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
- tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
- tree_sitter_analyzer/languages/php_plugin.py +862 -0
- tree_sitter_analyzer/languages/python_plugin.py +1636 -0
- tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
- tree_sitter_analyzer/languages/rust_plugin.py +673 -0
- tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
- tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
- tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
- tree_sitter_analyzer/legacy_table_formatter.py +860 -0
- tree_sitter_analyzer/mcp/__init__.py +34 -0
- tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
- tree_sitter_analyzer/mcp/server.py +869 -0
- tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
- tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
- tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
- tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
- tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
- tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
- tree_sitter_analyzer/models.py +840 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/output_manager.py +255 -0
- tree_sitter_analyzer/platform_compat/__init__.py +3 -0
- tree_sitter_analyzer/platform_compat/adapter.py +324 -0
- tree_sitter_analyzer/platform_compat/compare.py +224 -0
- tree_sitter_analyzer/platform_compat/detector.py +67 -0
- tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
- tree_sitter_analyzer/platform_compat/profiles.py +217 -0
- tree_sitter_analyzer/platform_compat/record.py +55 -0
- tree_sitter_analyzer/platform_compat/recorder.py +155 -0
- tree_sitter_analyzer/platform_compat/report.py +92 -0
- tree_sitter_analyzer/plugins/__init__.py +280 -0
- tree_sitter_analyzer/plugins/base.py +647 -0
- tree_sitter_analyzer/plugins/manager.py +384 -0
- tree_sitter_analyzer/project_detector.py +328 -0
- tree_sitter_analyzer/queries/__init__.py +27 -0
- tree_sitter_analyzer/queries/csharp.py +216 -0
- tree_sitter_analyzer/queries/css.py +615 -0
- tree_sitter_analyzer/queries/go.py +275 -0
- tree_sitter_analyzer/queries/html.py +543 -0
- tree_sitter_analyzer/queries/java.py +402 -0
- tree_sitter_analyzer/queries/javascript.py +724 -0
- tree_sitter_analyzer/queries/kotlin.py +192 -0
- tree_sitter_analyzer/queries/markdown.py +258 -0
- tree_sitter_analyzer/queries/php.py +95 -0
- tree_sitter_analyzer/queries/python.py +859 -0
- tree_sitter_analyzer/queries/ruby.py +92 -0
- tree_sitter_analyzer/queries/rust.py +223 -0
- tree_sitter_analyzer/queries/sql.py +555 -0
- tree_sitter_analyzer/queries/typescript.py +871 -0
- tree_sitter_analyzer/queries/yaml.py +236 -0
- tree_sitter_analyzer/query_loader.py +272 -0
- tree_sitter_analyzer/security/__init__.py +22 -0
- tree_sitter_analyzer/security/boundary_manager.py +277 -0
- tree_sitter_analyzer/security/regex_checker.py +297 -0
- tree_sitter_analyzer/security/validator.py +599 -0
- tree_sitter_analyzer/table_formatter.py +782 -0
- tree_sitter_analyzer/utils/__init__.py +53 -0
- tree_sitter_analyzer/utils/logging.py +433 -0
- tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
|
@@ -0,0 +1,553 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Language Detection System
|
|
4
|
+
|
|
5
|
+
Automatically detects programming language from file extensions and content.
|
|
6
|
+
Supports multiple languages with extensible configuration.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LanguageDetector:
|
|
14
|
+
"""Automatic programming language detector"""
|
|
15
|
+
|
|
16
|
+
# Basic extension mapping
|
|
17
|
+
EXTENSION_MAPPING: dict[str, str] = {
|
|
18
|
+
# Java系
|
|
19
|
+
".java": "java",
|
|
20
|
+
".jsp": "jsp",
|
|
21
|
+
".jspx": "jsp",
|
|
22
|
+
# JavaScript/TypeScript系
|
|
23
|
+
".js": "javascript",
|
|
24
|
+
".jsx": "jsx",
|
|
25
|
+
".ts": "typescript",
|
|
26
|
+
".tsx": "typescript", # TSX files are TypeScript with JSX
|
|
27
|
+
".mts": "typescript", # ES module TypeScript
|
|
28
|
+
".cts": "typescript", # CommonJS TypeScript
|
|
29
|
+
".mjs": "javascript",
|
|
30
|
+
".cjs": "javascript",
|
|
31
|
+
# Python系
|
|
32
|
+
".py": "python",
|
|
33
|
+
".pyx": "python",
|
|
34
|
+
".pyi": "python",
|
|
35
|
+
".pyw": "python",
|
|
36
|
+
# C/C++系
|
|
37
|
+
".c": "c",
|
|
38
|
+
".cpp": "cpp",
|
|
39
|
+
".cxx": "cpp",
|
|
40
|
+
".cc": "cpp",
|
|
41
|
+
".h": "c", # Ambiguous
|
|
42
|
+
".hpp": "cpp",
|
|
43
|
+
".hxx": "cpp",
|
|
44
|
+
# その他の言語
|
|
45
|
+
".rs": "rust",
|
|
46
|
+
".go": "go",
|
|
47
|
+
".rb": "ruby",
|
|
48
|
+
".php": "php",
|
|
49
|
+
".kt": "kotlin",
|
|
50
|
+
".kts": "kotlin",
|
|
51
|
+
".swift": "swift",
|
|
52
|
+
".cs": "csharp",
|
|
53
|
+
".vb": "vbnet",
|
|
54
|
+
".fs": "fsharp",
|
|
55
|
+
".scala": "scala",
|
|
56
|
+
".clj": "clojure",
|
|
57
|
+
".hs": "haskell",
|
|
58
|
+
".ml": "ocaml",
|
|
59
|
+
".lua": "lua",
|
|
60
|
+
".pl": "perl",
|
|
61
|
+
".r": "r",
|
|
62
|
+
".m": "objc", # Ambiguous (MATLAB as well)
|
|
63
|
+
".dart": "dart",
|
|
64
|
+
".elm": "elm",
|
|
65
|
+
# Markdown系
|
|
66
|
+
".md": "markdown",
|
|
67
|
+
".markdown": "markdown",
|
|
68
|
+
".mdown": "markdown",
|
|
69
|
+
".mkd": "markdown",
|
|
70
|
+
".mkdn": "markdown",
|
|
71
|
+
".mdx": "markdown",
|
|
72
|
+
# HTML系
|
|
73
|
+
".html": "html",
|
|
74
|
+
".htm": "html",
|
|
75
|
+
".xhtml": "html",
|
|
76
|
+
# CSS系
|
|
77
|
+
".css": "css",
|
|
78
|
+
".scss": "css",
|
|
79
|
+
".sass": "css",
|
|
80
|
+
".less": "css",
|
|
81
|
+
# SQL系
|
|
82
|
+
".sql": "sql",
|
|
83
|
+
# JSON系
|
|
84
|
+
".json": "json",
|
|
85
|
+
".jsonc": "json",
|
|
86
|
+
".json5": "json",
|
|
87
|
+
# YAML系
|
|
88
|
+
".yaml": "yaml",
|
|
89
|
+
".yml": "yaml",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# Ambiguous extensions (map to multiple languages)
|
|
93
|
+
AMBIGUOUS_EXTENSIONS: dict[str, list[str]] = {
|
|
94
|
+
".h": ["c", "cpp", "objc"],
|
|
95
|
+
".m": ["objc", "matlab"],
|
|
96
|
+
".sql": ["sql", "plsql", "mysql"],
|
|
97
|
+
".xml": ["xml", "html", "jsp"],
|
|
98
|
+
".json": ["json", "jsonc"],
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Content-based detection patterns
|
|
102
|
+
CONTENT_PATTERNS: dict[str, dict[str, list[str]]] = {
|
|
103
|
+
"c_vs_cpp": {
|
|
104
|
+
"cpp": ["#include <iostream>", "std::", "namespace", "class ", "template<"],
|
|
105
|
+
"c": ["#include <stdio.h>", "printf(", "malloc(", "typedef struct"],
|
|
106
|
+
},
|
|
107
|
+
"objc_vs_matlab": {
|
|
108
|
+
"objc": ["#import", "@interface", "@implementation", "NSString", "alloc]"],
|
|
109
|
+
"matlab": ["function ", "end;", "disp(", "clc;", "clear all"],
|
|
110
|
+
},
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Tree-sitter supported languages
|
|
114
|
+
SUPPORTED_LANGUAGES = {
|
|
115
|
+
"java",
|
|
116
|
+
"javascript",
|
|
117
|
+
"typescript",
|
|
118
|
+
"python",
|
|
119
|
+
"c",
|
|
120
|
+
"cpp",
|
|
121
|
+
"rust",
|
|
122
|
+
"go",
|
|
123
|
+
"php",
|
|
124
|
+
"ruby",
|
|
125
|
+
"markdown",
|
|
126
|
+
"html",
|
|
127
|
+
"css",
|
|
128
|
+
"json",
|
|
129
|
+
"sql",
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
def __init__(self) -> None:
|
|
133
|
+
"""Initialize detector"""
|
|
134
|
+
self.extension_map = {
|
|
135
|
+
".java": ("java", 0.9),
|
|
136
|
+
".js": ("javascript", 0.9),
|
|
137
|
+
".jsx": ("javascript", 0.8),
|
|
138
|
+
".ts": ("typescript", 0.9),
|
|
139
|
+
".tsx": ("typescript", 0.8),
|
|
140
|
+
".mts": ("typescript", 0.9),
|
|
141
|
+
".cts": ("typescript", 0.9),
|
|
142
|
+
".py": ("python", 0.9),
|
|
143
|
+
".pyw": ("python", 0.8),
|
|
144
|
+
".c": ("c", 0.9),
|
|
145
|
+
".h": ("c", 0.7),
|
|
146
|
+
".cpp": ("cpp", 0.9),
|
|
147
|
+
".cxx": ("cpp", 0.9),
|
|
148
|
+
".cc": ("cpp", 0.9),
|
|
149
|
+
".hpp": ("cpp", 0.8),
|
|
150
|
+
".rs": ("rust", 0.9),
|
|
151
|
+
".go": ("go", 0.9),
|
|
152
|
+
".cs": ("csharp", 0.9),
|
|
153
|
+
".php": ("php", 0.9),
|
|
154
|
+
".rb": ("ruby", 0.9),
|
|
155
|
+
".swift": ("swift", 0.9),
|
|
156
|
+
".kt": ("kotlin", 0.9),
|
|
157
|
+
".kts": ("kotlin", 0.9),
|
|
158
|
+
".scala": ("scala", 0.9),
|
|
159
|
+
".clj": ("clojure", 0.9),
|
|
160
|
+
".hs": ("haskell", 0.9),
|
|
161
|
+
".ml": ("ocaml", 0.9),
|
|
162
|
+
".fs": ("fsharp", 0.9),
|
|
163
|
+
".elm": ("elm", 0.9),
|
|
164
|
+
".dart": ("dart", 0.9),
|
|
165
|
+
".lua": ("lua", 0.9),
|
|
166
|
+
".r": ("r", 0.9),
|
|
167
|
+
".m": ("objectivec", 0.7),
|
|
168
|
+
".mm": ("objectivec", 0.8),
|
|
169
|
+
# Markdown extensions
|
|
170
|
+
".md": ("markdown", 0.9),
|
|
171
|
+
".markdown": ("markdown", 0.9),
|
|
172
|
+
".mdown": ("markdown", 0.8),
|
|
173
|
+
".mkd": ("markdown", 0.8),
|
|
174
|
+
".mkdn": ("markdown", 0.8),
|
|
175
|
+
".mdx": ("markdown", 0.7), # MDX might be mixed with JSX
|
|
176
|
+
# HTML extensions
|
|
177
|
+
".html": ("html", 0.9),
|
|
178
|
+
".htm": ("html", 0.9),
|
|
179
|
+
".xhtml": ("html", 0.8),
|
|
180
|
+
# CSS extensions
|
|
181
|
+
".css": ("css", 0.9),
|
|
182
|
+
".scss": ("css", 0.8), # Sass/SCSS
|
|
183
|
+
".sass": ("css", 0.8), # Sass
|
|
184
|
+
".less": ("css", 0.8), # Less
|
|
185
|
+
# JSON extensions
|
|
186
|
+
".json": ("json", 0.9),
|
|
187
|
+
".jsonc": ("json", 0.8), # JSON with comments
|
|
188
|
+
".json5": ("json", 0.8), # JSON5 format
|
|
189
|
+
# SQL extensions
|
|
190
|
+
".sql": ("sql", 0.9),
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Content-based detection patterns
|
|
194
|
+
self.content_patterns = {
|
|
195
|
+
"java": [
|
|
196
|
+
(r"package\s+[\w\.]+\s*;", 0.3),
|
|
197
|
+
(r"public\s+class\s+\w+", 0.3),
|
|
198
|
+
(r"import\s+[\w\.]+\s*;", 0.2),
|
|
199
|
+
(r"@\w+\s*\(", 0.2), # Annotations
|
|
200
|
+
],
|
|
201
|
+
"python": [
|
|
202
|
+
(r"def\s+\w+\s*\(", 0.3),
|
|
203
|
+
(r"import\s+\w+", 0.2),
|
|
204
|
+
(r"from\s+\w+\s+import", 0.2),
|
|
205
|
+
(r'if\s+__name__\s*==\s*["\']__main__["\']', 0.3),
|
|
206
|
+
],
|
|
207
|
+
"javascript": [
|
|
208
|
+
(r"function\s+\w+\s*\(", 0.3),
|
|
209
|
+
(r"var\s+\w+\s*=", 0.2),
|
|
210
|
+
(r"let\s+\w+\s*=", 0.2),
|
|
211
|
+
(r"const\s+\w+\s*=", 0.2),
|
|
212
|
+
(r"console\.log\s*\(", 0.1),
|
|
213
|
+
],
|
|
214
|
+
"typescript": [
|
|
215
|
+
(r"interface\s+\w+", 0.3),
|
|
216
|
+
(r"type\s+\w+\s*=", 0.2),
|
|
217
|
+
(r":\s*\w+\s*=", 0.2), # Type annotations
|
|
218
|
+
(r"export\s+(interface|type|class)", 0.2),
|
|
219
|
+
],
|
|
220
|
+
"c": [
|
|
221
|
+
(r"#include\s*<[\w\.]+>", 0.3),
|
|
222
|
+
(r"int\s+main\s*\(", 0.3),
|
|
223
|
+
(r"printf\s*\(", 0.2),
|
|
224
|
+
(r"#define\s+\w+", 0.2),
|
|
225
|
+
],
|
|
226
|
+
"cpp": [
|
|
227
|
+
(r"#include\s*<[\w\.]+>", 0.2),
|
|
228
|
+
(r"using\s+namespace\s+\w+", 0.3),
|
|
229
|
+
(r"std::\w+", 0.2),
|
|
230
|
+
(r"class\s+\w+\s*{", 0.3),
|
|
231
|
+
],
|
|
232
|
+
"markdown": [
|
|
233
|
+
(r"^#{1,6}\s+", 0.4), # ATX headers
|
|
234
|
+
(r"^\s*[-*+]\s+", 0.3), # List items
|
|
235
|
+
(r"```[\w]*", 0.3), # Fenced code blocks
|
|
236
|
+
(r"\[.*\]\(.*\)", 0.2), # Links
|
|
237
|
+
(r"!\[.*\]\(.*\)", 0.2), # Images
|
|
238
|
+
(r"^\s*>\s+", 0.2), # Blockquotes
|
|
239
|
+
(r"^\s*\|.*\|", 0.2), # Tables
|
|
240
|
+
(r"^[-=]{3,}$", 0.2), # Setext headers or horizontal rules
|
|
241
|
+
],
|
|
242
|
+
"html": [
|
|
243
|
+
(r"<!DOCTYPE\s+html", 0.4), # HTML5 doctype
|
|
244
|
+
(r"<html[^>]*>", 0.3), # HTML tag
|
|
245
|
+
(r"<head[^>]*>", 0.3), # Head tag
|
|
246
|
+
(r"<body[^>]*>", 0.3), # Body tag
|
|
247
|
+
(r"<div[^>]*>", 0.2), # Div tag
|
|
248
|
+
(r"<p[^>]*>", 0.2), # Paragraph tag
|
|
249
|
+
(r"<a\s+href=", 0.2), # Link tag with href
|
|
250
|
+
(r"<img\s+src=", 0.2), # Image tag with src
|
|
251
|
+
],
|
|
252
|
+
"css": [
|
|
253
|
+
(r"[.#][\w-]+\s*{", 0.4), # CSS selectors
|
|
254
|
+
(r"@media\s+", 0.3), # Media queries
|
|
255
|
+
(r"@import\s+", 0.3), # Import statements
|
|
256
|
+
(r"@keyframes\s+", 0.3), # Keyframes
|
|
257
|
+
(r":\s*[\w-]+\s*;", 0.2), # Property declarations
|
|
258
|
+
(r"color\s*:", 0.2), # Color property
|
|
259
|
+
(r"font-", 0.2), # Font properties
|
|
260
|
+
(r"margin\s*:", 0.2), # Margin property
|
|
261
|
+
],
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
from .utils import log_debug, log_warning
|
|
265
|
+
|
|
266
|
+
self._log_debug = log_debug
|
|
267
|
+
self._log_warning = log_warning
|
|
268
|
+
|
|
269
|
+
def detect_language(
|
|
270
|
+
self, file_path: str, content: str | None = None
|
|
271
|
+
) -> tuple[str, float]:
|
|
272
|
+
"""
|
|
273
|
+
ファイルパスとコンテンツから言語を判定
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
file_path: ファイルパス
|
|
277
|
+
content: ファイルコンテンツ(任意、曖昧性解決用)
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
(言語名, 信頼度) のタプル - 常に有効な言語名を返す
|
|
281
|
+
"""
|
|
282
|
+
# Handle invalid input
|
|
283
|
+
if not file_path or not isinstance(file_path, str):
|
|
284
|
+
return "unknown", 0.0
|
|
285
|
+
|
|
286
|
+
path = Path(file_path)
|
|
287
|
+
extension = path.suffix.lower()
|
|
288
|
+
|
|
289
|
+
# Direct mapping by extension
|
|
290
|
+
if extension in self.EXTENSION_MAPPING:
|
|
291
|
+
language = self.EXTENSION_MAPPING[extension]
|
|
292
|
+
|
|
293
|
+
# Ensure language is valid
|
|
294
|
+
if not language or language.strip() == "":
|
|
295
|
+
return "unknown", 0.0
|
|
296
|
+
|
|
297
|
+
# Use confidence from extension_map if available
|
|
298
|
+
if extension in self.extension_map:
|
|
299
|
+
_, confidence = self.extension_map[extension]
|
|
300
|
+
return language, confidence
|
|
301
|
+
|
|
302
|
+
# No ambiguity -> high confidence
|
|
303
|
+
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
304
|
+
return language, 1.0
|
|
305
|
+
|
|
306
|
+
# Resolve ambiguity using content
|
|
307
|
+
if content:
|
|
308
|
+
refined_language = self._resolve_ambiguity(extension, content)
|
|
309
|
+
# Ensure refined language is valid
|
|
310
|
+
if not refined_language or refined_language.strip() == "":
|
|
311
|
+
refined_language = "unknown"
|
|
312
|
+
return refined_language, 0.9 if refined_language != language else 0.7
|
|
313
|
+
else:
|
|
314
|
+
return language, 0.7 # Lower confidence without content
|
|
315
|
+
|
|
316
|
+
# Unknown extension - always return "unknown" instead of None
|
|
317
|
+
return "unknown", 0.0
|
|
318
|
+
|
|
319
|
+
def detect_from_extension(self, file_path: str) -> str:
|
|
320
|
+
"""
|
|
321
|
+
Quick detection using extension only
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
file_path: File path
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
Detected language name - 常に有効な文字列を返す
|
|
328
|
+
"""
|
|
329
|
+
# Handle invalid input
|
|
330
|
+
if not file_path or not isinstance(file_path, str):
|
|
331
|
+
return "unknown"
|
|
332
|
+
|
|
333
|
+
result = self.detect_language(file_path)
|
|
334
|
+
if isinstance(result, tuple):
|
|
335
|
+
language, _ = result
|
|
336
|
+
# Ensure language is valid
|
|
337
|
+
if not language or language.strip() == "":
|
|
338
|
+
return "unknown"
|
|
339
|
+
return language
|
|
340
|
+
|
|
341
|
+
def is_supported(self, language: str) -> bool:
|
|
342
|
+
"""
|
|
343
|
+
Check if language is supported by Tree-sitter
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
language: Language name
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
Support status
|
|
350
|
+
"""
|
|
351
|
+
# First check the static list for basic support
|
|
352
|
+
if language in self.SUPPORTED_LANGUAGES:
|
|
353
|
+
return True
|
|
354
|
+
|
|
355
|
+
# Also check if we have a plugin for this language
|
|
356
|
+
try:
|
|
357
|
+
from .plugins.manager import PluginManager
|
|
358
|
+
|
|
359
|
+
plugin_manager = PluginManager()
|
|
360
|
+
plugin_manager.load_plugins() # Ensure plugins are loaded
|
|
361
|
+
supported_languages = plugin_manager.get_supported_languages()
|
|
362
|
+
return language in supported_languages
|
|
363
|
+
except Exception:
|
|
364
|
+
# Fallback to static list if plugin manager fails
|
|
365
|
+
return language in self.SUPPORTED_LANGUAGES
|
|
366
|
+
|
|
367
|
+
def get_supported_extensions(self) -> list[str]:
|
|
368
|
+
"""
|
|
369
|
+
Get list of supported extensions
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
List of extensions
|
|
373
|
+
"""
|
|
374
|
+
return sorted(self.EXTENSION_MAPPING.keys())
|
|
375
|
+
|
|
376
|
+
def get_supported_languages(self) -> list[str]:
|
|
377
|
+
"""
|
|
378
|
+
Get list of supported languages
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
List of languages
|
|
382
|
+
"""
|
|
383
|
+
return sorted(self.SUPPORTED_LANGUAGES)
|
|
384
|
+
|
|
385
|
+
def _resolve_ambiguity(self, extension: str, content: str) -> str:
|
|
386
|
+
"""
|
|
387
|
+
Resolve ambiguous extension using content
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
extension: File extension
|
|
391
|
+
content: File content
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Resolved language name
|
|
395
|
+
"""
|
|
396
|
+
if extension not in self.AMBIGUOUS_EXTENSIONS:
|
|
397
|
+
return self.EXTENSION_MAPPING.get(extension, "unknown")
|
|
398
|
+
|
|
399
|
+
candidates = self.AMBIGUOUS_EXTENSIONS[extension]
|
|
400
|
+
|
|
401
|
+
# .h: C vs C++ vs Objective-C
|
|
402
|
+
if extension == ".h":
|
|
403
|
+
return self._detect_c_family(content, candidates)
|
|
404
|
+
|
|
405
|
+
# .m: Objective-C vs MATLAB
|
|
406
|
+
elif extension == ".m":
|
|
407
|
+
return self._detect_objc_vs_matlab(content, candidates)
|
|
408
|
+
|
|
409
|
+
# Fallback to first candidate
|
|
410
|
+
return candidates[0]
|
|
411
|
+
|
|
412
|
+
def _detect_c_family(self, content: str, candidates: list[str]) -> str:
|
|
413
|
+
"""Detect among C-family languages"""
|
|
414
|
+
cpp_score = 0
|
|
415
|
+
c_score = 0
|
|
416
|
+
objc_score = 0
|
|
417
|
+
|
|
418
|
+
# C++ features
|
|
419
|
+
cpp_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["cpp"]
|
|
420
|
+
for pattern in cpp_patterns:
|
|
421
|
+
if pattern in content:
|
|
422
|
+
cpp_score += 1
|
|
423
|
+
|
|
424
|
+
# C features
|
|
425
|
+
c_patterns = self.CONTENT_PATTERNS["c_vs_cpp"]["c"]
|
|
426
|
+
for pattern in c_patterns:
|
|
427
|
+
if pattern in content:
|
|
428
|
+
c_score += 1
|
|
429
|
+
|
|
430
|
+
# Objective-C features
|
|
431
|
+
objc_patterns = self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]
|
|
432
|
+
for pattern in objc_patterns:
|
|
433
|
+
if pattern in content:
|
|
434
|
+
objc_score += 3 # 強い指標なので重み大
|
|
435
|
+
|
|
436
|
+
# Select best-scoring language
|
|
437
|
+
scores = {"cpp": cpp_score, "c": c_score, "objc": objc_score}
|
|
438
|
+
best_language = max(scores, key=lambda x: scores[x])
|
|
439
|
+
|
|
440
|
+
# If objc not a candidate, fallback to C/C++
|
|
441
|
+
if best_language == "objc" and "objc" not in candidates:
|
|
442
|
+
best_language = "cpp" if cpp_score > c_score else "c"
|
|
443
|
+
|
|
444
|
+
return best_language if scores[best_language] > 0 else candidates[0]
|
|
445
|
+
|
|
446
|
+
def _detect_objc_vs_matlab(self, content: str, candidates: list[str]) -> str:
|
|
447
|
+
"""Detect between Objective-C and MATLAB"""
|
|
448
|
+
objc_score = 0
|
|
449
|
+
matlab_score = 0
|
|
450
|
+
|
|
451
|
+
# Objective-C patterns
|
|
452
|
+
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["objc"]:
|
|
453
|
+
if pattern in content:
|
|
454
|
+
objc_score += 1
|
|
455
|
+
|
|
456
|
+
# MATLAB patterns
|
|
457
|
+
for pattern in self.CONTENT_PATTERNS["objc_vs_matlab"]["matlab"]:
|
|
458
|
+
if pattern in content:
|
|
459
|
+
matlab_score += 1
|
|
460
|
+
|
|
461
|
+
if objc_score > matlab_score:
|
|
462
|
+
return "objc"
|
|
463
|
+
elif matlab_score > objc_score:
|
|
464
|
+
return "matlab"
|
|
465
|
+
else:
|
|
466
|
+
return candidates[0] # default
|
|
467
|
+
|
|
468
|
+
def add_extension_mapping(self, extension: str, language: str) -> None:
|
|
469
|
+
"""
|
|
470
|
+
Add custom extension mapping
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
extension: File extension (with dot)
|
|
474
|
+
language: Language name
|
|
475
|
+
"""
|
|
476
|
+
self.EXTENSION_MAPPING[extension.lower()] = language
|
|
477
|
+
|
|
478
|
+
def get_language_info(self, language: str) -> dict[str, Any]:
|
|
479
|
+
"""
|
|
480
|
+
Get language information
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
language: Language name
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
Language info dictionary
|
|
487
|
+
"""
|
|
488
|
+
extensions = [
|
|
489
|
+
ext for ext, lang in self.EXTENSION_MAPPING.items() if lang == language
|
|
490
|
+
]
|
|
491
|
+
|
|
492
|
+
return {
|
|
493
|
+
"name": language,
|
|
494
|
+
"extensions": extensions,
|
|
495
|
+
"supported": self.is_supported(language),
|
|
496
|
+
"tree_sitter_available": language in self.SUPPORTED_LANGUAGES,
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
# Global instance
|
|
501
|
+
detector = LanguageDetector()
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def detect_language_from_file(file_path: str) -> str:
|
|
505
|
+
"""
|
|
506
|
+
Detect language from path (simple API)
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
file_path: File path
|
|
510
|
+
|
|
511
|
+
Returns:
|
|
512
|
+
Detected language name - 常に有効な文字列を返す
|
|
513
|
+
"""
|
|
514
|
+
# Handle invalid input
|
|
515
|
+
if not file_path or not isinstance(file_path, str):
|
|
516
|
+
return "unknown"
|
|
517
|
+
|
|
518
|
+
# Create a fresh instance to ensure latest configuration
|
|
519
|
+
fresh_detector = LanguageDetector()
|
|
520
|
+
result = fresh_detector.detect_from_extension(file_path)
|
|
521
|
+
|
|
522
|
+
# Ensure result is valid
|
|
523
|
+
if not result or result.strip() == "":
|
|
524
|
+
return "unknown"
|
|
525
|
+
|
|
526
|
+
return result
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def is_language_supported(language: str) -> bool:
|
|
530
|
+
"""
|
|
531
|
+
Check if language is supported (simple API)
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
language: Language name
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
Support status
|
|
538
|
+
"""
|
|
539
|
+
# First check the static list for basic support
|
|
540
|
+
if detector.is_supported(language):
|
|
541
|
+
return True
|
|
542
|
+
|
|
543
|
+
# Also check if we have a plugin for this language
|
|
544
|
+
try:
|
|
545
|
+
from .plugins.manager import PluginManager
|
|
546
|
+
|
|
547
|
+
plugin_manager = PluginManager()
|
|
548
|
+
plugin_manager.load_plugins() # Ensure plugins are loaded
|
|
549
|
+
supported_languages = plugin_manager.get_supported_languages()
|
|
550
|
+
return language in supported_languages
|
|
551
|
+
except Exception:
|
|
552
|
+
# Fallback to static list if plugin manager fails
|
|
553
|
+
return detector.is_supported(language)
|