vcode-analysis 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,279 @@
1
+ """Kotlin AST 解析器
2
+
3
+ 提供双模式解析能力:
4
+ - fast: 正则表达式快速解析,无外部依赖
5
+ - precise: tree-sitter AST 精确解析
6
+ - auto: 根据文件特征自动选择模式
7
+ """
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Optional, Literal
11
+ from pathlib import Path
12
+
13
+ from .kotlin.models import KotlinASTResult
14
+ from .kotlin.regex_parser import KotlinRegexParser
15
+
16
+ # 尝试导入 AST 解析器
17
+ try:
18
+ from .kotlin.ast_parser import KotlinTreeSitterParser, TREE_SITTER_AVAILABLE
19
+ except ImportError:
20
+ KotlinTreeSitterParser = None
21
+ TREE_SITTER_AVAILABLE = False
22
+
23
+
24
+ # 解析模式类型
25
+ ParseMode = Literal['fast', 'precise', 'auto']
26
+
27
+
28
+ @dataclass
29
+ class KotlinParserConfig:
30
+ """Kotlin 解析器配置"""
31
+ default_mode: ParseMode = 'auto'
32
+ auto_threshold_lines: int = 500 # 超过此行数使用快速模式
33
+ prefer_precise: bool = True # 在 AST 可用时优先使用精确模式
34
+
35
+
36
+ class KotlinASTParser:
37
+ """Kotlin AST 解析器(双模式)"""
38
+
39
+ def __init__(self, config: Optional[KotlinParserConfig] = None):
40
+ """初始化解析器
41
+
42
+ Args:
43
+ config: 解析器配置,如果为 None 则使用默认配置
44
+ """
45
+ self.config = config or KotlinParserConfig()
46
+ self._regex_parser = KotlinRegexParser()
47
+ self._ast_parser = None
48
+
49
+ # 延迟初始化 AST 解析器
50
+ if TREE_SITTER_AVAILABLE and self.config.prefer_precise:
51
+ try:
52
+ self._ast_parser = KotlinTreeSitterParser()
53
+ except Exception:
54
+ self._ast_parser = None
55
+
56
+ def parse_file(
57
+ self,
58
+ file_path: str,
59
+ mode: Optional[ParseMode] = None
60
+ ) -> KotlinASTResult:
61
+ """解析 Kotlin 文件
62
+
63
+ Args:
64
+ file_path: 文件路径
65
+ mode: 解析模式
66
+ - 'fast': 正则表达式快速解析
67
+ - 'precise': tree-sitter AST 解析
68
+ - 'auto': 根据文件特征自动选择
69
+
70
+ Returns:
71
+ KotlinASTResult: 解析结果
72
+ """
73
+ path = Path(file_path)
74
+
75
+ if not path.exists():
76
+ return KotlinASTResult(
77
+ file_path=file_path,
78
+ success=False,
79
+ error=f"文件不存在: {file_path}"
80
+ )
81
+
82
+ if path.suffix not in (".kt", ".kts"):
83
+ return KotlinASTResult(
84
+ file_path=file_path,
85
+ success=False,
86
+ error=f"不是 Kotlin 文件: {file_path}"
87
+ )
88
+
89
+ try:
90
+ content = path.read_text(encoding="utf-8")
91
+ return self.parse_code(content, file_path, mode)
92
+ except Exception as e:
93
+ return KotlinASTResult(
94
+ file_path=file_path,
95
+ success=False,
96
+ error=str(e)
97
+ )
98
+
99
+ def parse_code(
100
+ self,
101
+ code: str,
102
+ file_path: str = "<string>",
103
+ mode: Optional[ParseMode] = None
104
+ ) -> KotlinASTResult:
105
+ """解析 Kotlin 代码字符串
106
+
107
+ Args:
108
+ code: Kotlin 代码字符串
109
+ file_path: 文件路径(用于错误报告)
110
+ mode: 解析模式
111
+
112
+ Returns:
113
+ KotlinASTResult: 解析结果
114
+ """
115
+ # 确定解析模式
116
+ actual_mode = mode or self.config.default_mode
117
+
118
+ if actual_mode == 'auto':
119
+ actual_mode = self._select_mode(code)
120
+
121
+ # 执行解析
122
+ if actual_mode == 'precise' and self._ast_parser:
123
+ result = self._ast_parser.parse_code(code, file_path)
124
+ result.parse_mode = 'precise'
125
+ return result
126
+ else:
127
+ result = self._regex_parser.parse_code(code, file_path)
128
+ result.parse_mode = 'fast'
129
+ return result
130
+
131
+ def _select_mode(self, code: str) -> ParseMode:
132
+ """根据代码特征选择解析模式
133
+
134
+ Args:
135
+ code: 源代码
136
+
137
+ Returns:
138
+ 选择的解析模式
139
+ """
140
+ lines = code.splitlines()
141
+ line_count = len(lines)
142
+
143
+ # 大文件使用快速模式
144
+ if line_count > self.config.auto_threshold_lines:
145
+ return 'fast'
146
+
147
+ # 复杂度检查:计算嵌套深度和大括号数量
148
+ max_depth = 0
149
+ current_depth = 0
150
+ brace_count = 0
151
+
152
+ for line in lines:
153
+ current_depth += line.count('{') - line.count('}')
154
+ max_depth = max(max_depth, current_depth)
155
+ brace_count += line.count('{')
156
+
157
+ # 复杂代码使用快速模式
158
+ if max_depth > 5 or brace_count > 50:
159
+ return 'fast'
160
+
161
+ # 检查是否有复杂的 Kotlin 特性
162
+ complex_features = [
163
+ 'sealed',
164
+ 'inline class',
165
+ 'value class',
166
+ 'by lazy',
167
+ 'suspend fun',
168
+ 'Flow<',
169
+ 'when (',
170
+ 'contract {',
171
+ ]
172
+
173
+ for feature in complex_features:
174
+ if feature in code:
175
+ # 有复杂特性时,如果 AST 可用则使用精确模式
176
+ if self._ast_parser:
177
+ return 'precise'
178
+ break
179
+
180
+ # 默认:如果 AST 可用则使用精确模式
181
+ return 'precise' if self._ast_parser else 'fast'
182
+
183
+ @property
184
+ def precise_mode_available(self) -> bool:
185
+ """检查精确模式是否可用"""
186
+ return self._ast_parser is not None
187
+
188
+ def get_parser_info(self) -> dict:
189
+ """获取解析器信息"""
190
+ return {
191
+ 'precise_mode_available': self.precise_mode_available,
192
+ 'tree_sitter_available': TREE_SITTER_AVAILABLE,
193
+ 'default_mode': self.config.default_mode,
194
+ 'auto_threshold_lines': self.config.auto_threshold_lines,
195
+ }
196
+
197
+
198
+ def analyze_kotlin_file(
199
+ file_path: str,
200
+ mode: Optional[ParseMode] = None
201
+ ) -> KotlinASTResult:
202
+ """分析 Kotlin 文件的便捷函数
203
+
204
+ Args:
205
+ file_path: 文件路径
206
+ mode: 解析模式
207
+
208
+ Returns:
209
+ KotlinASTResult: 解析结果
210
+ """
211
+ parser = KotlinASTParser()
212
+ return parser.parse_file(file_path, mode)
213
+
214
+
215
+ def analyze_kotlin_code(
216
+ code: str,
217
+ mode: Optional[ParseMode] = None
218
+ ) -> KotlinASTResult:
219
+ """分析 Kotlin 代码字符串的便捷函数
220
+
221
+ Args:
222
+ code: Kotlin 代码字符串
223
+ mode: 解析模式
224
+
225
+ Returns:
226
+ KotlinASTResult: 解析结果
227
+ """
228
+ parser = KotlinASTParser()
229
+ return parser.parse_code(code, mode=mode)
230
+
231
+
232
+ # 为了向后兼容,导出原有类型
233
+ from .kotlin.models import (
234
+ KotlinFunctionInfo,
235
+ KotlinPropertyInfo,
236
+ KotlinClassInfo,
237
+ KotlinImportInfo,
238
+ KotlinSealedClassInfo,
239
+ KotlinWhenExpression,
240
+ KotlinCoroutineInfo,
241
+ KotlinFlowOperator,
242
+ KotlinTypeAlias,
243
+ KotlinValueClass,
244
+ KotlinDelegatedProperty,
245
+ KotlinExtensionFunction,
246
+ KotlinExtensionProperty,
247
+ KotlinObjectDeclaration,
248
+ KotlinEnumClassInfo,
249
+ KotlinDataClassInfo,
250
+ )
251
+
252
+
253
+ __all__ = [
254
+ # 解析器
255
+ "KotlinASTParser",
256
+ "KotlinParserConfig",
257
+ "ParseMode",
258
+ # 便捷函数
259
+ "analyze_kotlin_file",
260
+ "analyze_kotlin_code",
261
+ # 数据类型
262
+ "KotlinASTResult",
263
+ "KotlinFunctionInfo",
264
+ "KotlinPropertyInfo",
265
+ "KotlinClassInfo",
266
+ "KotlinImportInfo",
267
+ "KotlinSealedClassInfo",
268
+ "KotlinWhenExpression",
269
+ "KotlinCoroutineInfo",
270
+ "KotlinFlowOperator",
271
+ "KotlinTypeAlias",
272
+ "KotlinValueClass",
273
+ "KotlinDelegatedProperty",
274
+ "KotlinExtensionFunction",
275
+ "KotlinExtensionProperty",
276
+ "KotlinObjectDeclaration",
277
+ "KotlinEnumClassInfo",
278
+ "KotlinDataClassInfo",
279
+ ]
@@ -0,0 +1,429 @@
1
+ """Python AST 解析器
2
+
3
+ 使用 Python 内置 ast 模块解析代码结构,提供比正则表达式更精确的分析能力。
4
+ """
5
+
6
+ import ast
7
+ from dataclasses import dataclass, field
8
+ from typing import Optional
9
+ from pathlib import Path
10
+
11
+
12
+ @dataclass
13
+ class FunctionInfo:
14
+ """函数信息"""
15
+ name: str
16
+ line_start: int
17
+ line_end: int
18
+ args: list[str]
19
+ returns: Optional[str]
20
+ docstring: Optional[str]
21
+ decorators: list[str]
22
+ is_async: bool
23
+ is_method: bool = False
24
+
25
+
26
+ @dataclass
27
+ class ClassInfo:
28
+ """类信息"""
29
+ name: str
30
+ line_start: int
31
+ line_end: int
32
+ docstring: Optional[str]
33
+ decorators: list[str]
34
+ bases: list[str]
35
+ methods: list[FunctionInfo]
36
+ attributes: list[str]
37
+
38
+
39
+ @dataclass
40
+ class ImportInfo:
41
+ """导入信息"""
42
+ module: str
43
+ names: list[str] # 导入的名称列表
44
+ aliases: dict[str, str] # 名称 -> 别名
45
+ line: int
46
+ is_from: bool # from ... import vs import
47
+
48
+
49
+ @dataclass
50
+ class VariableInfo:
51
+ """变量信息"""
52
+ name: str
53
+ line: int
54
+ value_type: Optional[str] # 推断的类型
55
+ is_constant: bool # 是否为常量(大写命名)
56
+
57
+
58
+ @dataclass
59
+ class PythonASTResult:
60
+ """Python AST 解析结果"""
61
+ file_path: str
62
+ success: bool
63
+ error: Optional[str] = None
64
+
65
+ # 结构信息
66
+ imports: list[ImportInfo] = field(default_factory=list)
67
+ classes: list[ClassInfo] = field(default_factory=list)
68
+ functions: list[FunctionInfo] = field(default_factory=list)
69
+ variables: list[VariableInfo] = field(default_factory=list)
70
+
71
+ # 统计信息
72
+ total_lines: int = 0
73
+ docstring_coverage: float = 0.0
74
+ type_hint_coverage: float = 0.0
75
+
76
+ # 模块级信息
77
+ module_docstring: Optional[str] = None
78
+
79
+
80
+ class PythonASTParser:
81
+ """Python AST 解析器"""
82
+
83
+ def __init__(self):
84
+ pass
85
+
86
+ def parse_file(self, file_path: str) -> PythonASTResult:
87
+ """解析 Python 文件"""
88
+ path = Path(file_path)
89
+
90
+ if not path.exists():
91
+ return PythonASTResult(
92
+ file_path=file_path,
93
+ success=False,
94
+ error=f"文件不存在: {file_path}"
95
+ )
96
+
97
+ if path.suffix != ".py":
98
+ return PythonASTResult(
99
+ file_path=file_path,
100
+ success=False,
101
+ error=f"不是 Python 文件: {file_path}"
102
+ )
103
+
104
+ try:
105
+ content = path.read_text(encoding="utf-8")
106
+ return self.parse_code(content, file_path)
107
+ except Exception as e:
108
+ return PythonASTResult(
109
+ file_path=file_path,
110
+ success=False,
111
+ error=str(e)
112
+ )
113
+
114
+ def parse_code(self, code: str, file_path: str = "<string>") -> PythonASTResult:
115
+ """解析 Python 代码字符串"""
116
+ try:
117
+ tree = ast.parse(code)
118
+ except SyntaxError as e:
119
+ return PythonASTResult(
120
+ file_path=file_path,
121
+ success=False,
122
+ error=f"语法错误: {e.msg} (行 {e.lineno})"
123
+ )
124
+
125
+ result = PythonASTResult(
126
+ file_path=file_path,
127
+ success=True,
128
+ total_lines=len(code.splitlines())
129
+ )
130
+
131
+ # 获取模块级 docstring
132
+ result.module_docstring = ast.get_docstring(tree)
133
+
134
+ # 遍历 AST
135
+ for node in ast.walk(tree):
136
+ if isinstance(node, ast.Import):
137
+ result.imports.append(self._parse_import(node))
138
+ elif isinstance(node, ast.ImportFrom):
139
+ result.imports.append(self._parse_import_from(node))
140
+ elif isinstance(node, ast.ClassDef):
141
+ result.classes.append(self._parse_class(node))
142
+ elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
143
+ # 只收集顶层函数(不在类中的)
144
+ if not self._is_method(tree, node):
145
+ result.functions.append(self._parse_function(node))
146
+ elif isinstance(node, ast.Assign):
147
+ # 收集顶层变量赋值
148
+ for target in node.targets:
149
+ if isinstance(target, ast.Name):
150
+ result.variables.append(self._parse_variable(target, node))
151
+
152
+ # 计算统计信息
153
+ result.docstring_coverage = self._calculate_docstring_coverage(result)
154
+ result.type_hint_coverage = self._calculate_type_hint_coverage(result)
155
+
156
+ return result
157
+
158
+ def _parse_import(self, node: ast.Import) -> ImportInfo:
159
+ """解析 import 语句"""
160
+ names = []
161
+ aliases = {}
162
+
163
+ for alias in node.names:
164
+ names.append(alias.name)
165
+ if alias.asname:
166
+ aliases[alias.name] = alias.asname
167
+
168
+ return ImportInfo(
169
+ module="",
170
+ names=names,
171
+ aliases=aliases,
172
+ line=node.lineno,
173
+ is_from=False
174
+ )
175
+
176
+ def _parse_import_from(self, node: ast.ImportFrom) -> ImportInfo:
177
+ """解析 from ... import 语句"""
178
+ names = []
179
+ aliases = {}
180
+
181
+ for alias in node.names:
182
+ names.append(alias.name)
183
+ if alias.asname:
184
+ aliases[alias.name] = alias.asname
185
+
186
+ return ImportInfo(
187
+ module=node.module or "",
188
+ names=names,
189
+ aliases=aliases,
190
+ line=node.lineno,
191
+ is_from=True
192
+ )
193
+
194
+ def _parse_class(self, node: ast.ClassDef) -> ClassInfo:
195
+ """解析类定义"""
196
+ methods = []
197
+ attributes = []
198
+
199
+ for item in node.body:
200
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
201
+ func = self._parse_function(item, is_method=True)
202
+ methods.append(func)
203
+ elif isinstance(item, ast.Assign):
204
+ for target in item.targets:
205
+ if isinstance(target, ast.Name):
206
+ attributes.append(target.id)
207
+
208
+ bases = []
209
+ for base in node.bases:
210
+ if isinstance(base, ast.Name):
211
+ bases.append(base.id)
212
+ elif isinstance(base, ast.Attribute):
213
+ bases.append(self._get_attribute_name(base))
214
+
215
+ return ClassInfo(
216
+ name=node.name,
217
+ line_start=node.lineno,
218
+ line_end=node.end_lineno or node.lineno,
219
+ docstring=ast.get_docstring(node),
220
+ decorators=[d.attr if isinstance(d, ast.Attribute) else d.id
221
+ for d in node.decorator_list
222
+ if isinstance(d, (ast.Name, ast.Attribute))],
223
+ bases=bases,
224
+ methods=methods,
225
+ attributes=attributes
226
+ )
227
+
228
+ def _parse_function(self, node, is_method: bool = False) -> FunctionInfo:
229
+ """解析函数定义"""
230
+ args = []
231
+
232
+ # 处理参数
233
+ for arg in node.args.args:
234
+ args.append(arg.arg)
235
+
236
+ if node.args.vararg:
237
+ args.append(f"*{node.args.vararg.arg}")
238
+ if node.args.kwarg:
239
+ args.append(f"**{node.args.kwarg.arg}")
240
+
241
+ # 获取返回类型
242
+ returns = None
243
+ if node.returns:
244
+ returns = self._get_annotation_string(node.returns)
245
+
246
+ # 获取装饰器
247
+ decorators = []
248
+ for d in node.decorator_list:
249
+ if isinstance(d, ast.Name):
250
+ decorators.append(d.id)
251
+ elif isinstance(d, ast.Attribute):
252
+ decorators.append(self._get_attribute_name(d))
253
+ elif isinstance(d, ast.Call):
254
+ if isinstance(d.func, ast.Name):
255
+ decorators.append(d.func.id)
256
+ elif isinstance(d.func, ast.Attribute):
257
+ decorators.append(self._get_attribute_name(d.func))
258
+
259
+ return FunctionInfo(
260
+ name=node.name,
261
+ line_start=node.lineno,
262
+ line_end=node.end_lineno or node.lineno,
263
+ args=args,
264
+ returns=returns,
265
+ docstring=ast.get_docstring(node),
266
+ decorators=decorators,
267
+ is_async=isinstance(node, ast.AsyncFunctionDef),
268
+ is_method=is_method
269
+ )
270
+
271
+ def _parse_variable(self, target: ast.Name, node: ast.Assign) -> VariableInfo:
272
+ """解析变量赋值"""
273
+ value_type = self._infer_type(node.value)
274
+
275
+ return VariableInfo(
276
+ name=target.id,
277
+ line=node.lineno,
278
+ value_type=value_type,
279
+ is_constant=target.id.isupper() or target.id.startswith("_") and target.id[1:].isupper()
280
+ )
281
+
282
+ def _is_method(self, tree: ast.Module, node) -> bool:
283
+ """检查函数是否是类方法"""
284
+ for parent in ast.walk(tree):
285
+ if isinstance(parent, ast.ClassDef):
286
+ for child in parent.body:
287
+ if child is node:
288
+ return True
289
+ return False
290
+
291
+ def _get_annotation_string(self, node) -> str:
292
+ """获取类型注解字符串"""
293
+ if isinstance(node, ast.Name):
294
+ return node.id
295
+ elif isinstance(node, ast.Constant):
296
+ return repr(node.value)
297
+ elif isinstance(node, ast.Subscript):
298
+ value = self._get_annotation_string(node.value)
299
+ slice_str = self._get_annotation_string(node.slice)
300
+ return f"{value}[{slice_str}]"
301
+ elif isinstance(node, ast.Attribute):
302
+ return self._get_attribute_name(node)
303
+ elif isinstance(node, ast.Tuple):
304
+ elements = [self._get_annotation_string(e) for e in node.elts]
305
+ return ", ".join(elements)
306
+ elif isinstance(node, ast.BinOp) and isinstance(node.op, ast.BitOr):
307
+ # Python 3.10+ union syntax: X | Y
308
+ left = self._get_annotation_string(node.left)
309
+ right = self._get_annotation_string(node.right)
310
+ return f"{left} | {right}"
311
+ return "Any"
312
+
313
+ def _get_attribute_name(self, node: ast.Attribute) -> str:
314
+ """获取属性访问的完整名称"""
315
+ parts = []
316
+ current = node
317
+ while isinstance(current, ast.Attribute):
318
+ parts.append(current.attr)
319
+ current = current.value
320
+ if isinstance(current, ast.Name):
321
+ parts.append(current.id)
322
+ return ".".join(reversed(parts))
323
+
324
+ def _infer_type(self, node) -> Optional[str]:
325
+ """推断表达式的类型"""
326
+ if isinstance(node, ast.Constant):
327
+ type_map = {
328
+ str: "str",
329
+ int: "int",
330
+ float: "float",
331
+ bool: "bool",
332
+ type(None): "None",
333
+ }
334
+ return type_map.get(type(node.value), "Any")
335
+ elif isinstance(node, ast.List):
336
+ return "list"
337
+ elif isinstance(node, ast.Dict):
338
+ return "dict"
339
+ elif isinstance(node, ast.Set):
340
+ return "set"
341
+ elif isinstance(node, ast.Tuple):
342
+ return "tuple"
343
+ elif isinstance(node, ast.Call):
344
+ if isinstance(node.func, ast.Name):
345
+ return node.func.id
346
+ elif isinstance(node.func, ast.Attribute):
347
+ return self._get_attribute_name(node.func)
348
+ elif isinstance(node, ast.ListComp):
349
+ return "list"
350
+ elif isinstance(node, ast.DictComp):
351
+ return "dict"
352
+ elif isinstance(node, ast.SetComp):
353
+ return "set"
354
+ elif isinstance(node, ast.GeneratorExp):
355
+ return "generator"
356
+ return None
357
+
358
+ def _calculate_docstring_coverage(self, result: PythonASTResult) -> float:
359
+ """计算文档字符串覆盖率"""
360
+ total = 0
361
+ documented = 0
362
+
363
+ # 模块文档
364
+ total += 1
365
+ if result.module_docstring:
366
+ documented += 1
367
+
368
+ # 类文档
369
+ for cls in result.classes:
370
+ total += 1
371
+ if cls.docstring:
372
+ documented += 1
373
+ # 方法文档
374
+ for method in cls.methods:
375
+ total += 1
376
+ if method.docstring:
377
+ documented += 1
378
+
379
+ # 函数文档
380
+ for func in result.functions:
381
+ total += 1
382
+ if func.docstring:
383
+ documented += 1
384
+
385
+ return (documented / total * 100) if total > 0 else 100.0
386
+
387
+ def _calculate_type_hint_coverage(self, result: PythonASTResult) -> float:
388
+ """计算类型注解覆盖率"""
389
+ total_params = 0
390
+ typed_params = 0
391
+ total_returns = 0
392
+ typed_returns = 0
393
+
394
+ def count_function(func: FunctionInfo):
395
+ nonlocal total_params, typed_params, total_returns, typed_returns
396
+
397
+ # 统计参数类型注解(跳过 self)
398
+ args_to_check = func.args[1:] if func.is_method else func.args
399
+ total_params += len(args_to_check)
400
+
401
+ total_returns += 1
402
+ if func.returns:
403
+ typed_returns += 1
404
+
405
+ # 统计类方法
406
+ for cls in result.classes:
407
+ for method in cls.methods:
408
+ count_function(method)
409
+
410
+ # 统计顶层函数
411
+ for func in result.functions:
412
+ count_function(func)
413
+
414
+ total = total_params + total_returns
415
+ typed = typed_params + typed_returns
416
+
417
+ return (typed / total * 100) if total > 0 else 100.0
418
+
419
+
420
+ def analyze_python_file(file_path: str) -> PythonASTResult:
421
+ """分析 Python 文件的便捷函数"""
422
+ parser = PythonASTParser()
423
+ return parser.parse_file(file_path)
424
+
425
+
426
+ def analyze_python_code(code: str) -> PythonASTResult:
427
+ """分析 Python 代码字符串的便捷函数"""
428
+ parser = PythonASTParser()
429
+ return parser.parse_code(code)