vcode-analysis 0.5.2__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {vcode_analysis-0.5.2/vcode_analysis.egg-info → vcode_analysis-0.5.3}/PKG-INFO +1 -1
  2. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/analyzers/__init__.py +6 -0
  3. vcode_analysis-0.5.3/analyzers/knowledge_graph.py +632 -0
  4. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/cli.py +68 -1
  5. vcode_analysis-0.5.3/docs/design/code-knowledge-graph.md +290 -0
  6. vcode_analysis-0.5.3/docs/design/three-year-roadmap.md +296 -0
  7. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/__init__.py +63 -0
  8. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/pyproject.toml +1 -1
  9. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3/vcode_analysis.egg-info}/PKG-INFO +1 -1
  10. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/vcode_analysis.egg-info/SOURCES.txt +3 -0
  11. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/LICENSE +0 -0
  12. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/MANIFEST.in +0 -0
  13. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/README.md +0 -0
  14. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/analyzers/architecture.py +0 -0
  15. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/analyzers/code_review.py +0 -0
  16. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/analyzers/context_builder.py +0 -0
  17. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/analyzers/directory.py +0 -0
  18. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/analyzers/documentation.py +0 -0
  19. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/analyzers/security.py +0 -0
  20. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/__init__.py +0 -0
  21. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/analyzer.py +0 -0
  22. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/batch_analyzer.py +0 -0
  23. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/batch_planner.py +0 -0
  24. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/cache_manager.py +0 -0
  25. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/config.py +0 -0
  26. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/git_handler.py +0 -0
  27. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/ignore.py +0 -0
  28. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/json_utils.py +0 -0
  29. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/llm_client.py +0 -0
  30. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/report_generator.py +0 -0
  31. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/core/token_estimator.py +0 -0
  32. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/docs/PPT/345/244/247/347/272/262-VCode_Analysis_/346/231/272/350/203/275/344/273/243/347/240/201/345/210/206/346/236/220/345/267/245/345/205/267/345/210/206/344/272/253.md" +0 -0
  33. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/docs/PPT/347/224/237/346/210/220/346/217/220/347/244/272/350/257/215-VCode_Analysis/345/210/206/344/272/253.md" +0 -0
  34. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/docs/USER_MANUAL.md +0 -0
  35. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/docs/design/batch-cache-optimization.md +0 -0
  36. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/docs/design/c-parser-design.md +0 -0
  37. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/docs/design/kotlin-parser-design.md +0 -0
  38. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/docs/design/vcode-analysis-tool.md +0 -0
  39. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/docs/security-sharing-article.md +0 -0
  40. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/c/__init__.py +0 -0
  41. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/c/ast_parser.py +0 -0
  42. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/c/models.py +0 -0
  43. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/c/patterns.py +0 -0
  44. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/c/regex_parser.py +0 -0
  45. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/c_parser.py +0 -0
  46. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/java_parser.py +0 -0
  47. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/javascript_parser.py +0 -0
  48. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/kotlin/__init__.py +0 -0
  49. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/kotlin/ast_parser.py +0 -0
  50. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/kotlin/models.py +0 -0
  51. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/kotlin/patterns.py +0 -0
  52. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/kotlin/regex_parser.py +0 -0
  53. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/kotlin_parser.py +0 -0
  54. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/python_parser.py +0 -0
  55. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/parsers/typescript_parser.py +0 -0
  56. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/requirements.txt +0 -0
  57. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/setup.cfg +0 -0
  58. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/tests/test_batch_operations.py +0 -0
  59. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/tests/test_c_parser.py +0 -0
  60. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/tests/test_kotlin_parser.py +0 -0
  61. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/tests/test_report_generator.py +0 -0
  62. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/tests/test_security_rules.py +0 -0
  63. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/vcode_analysis.egg-info/dependency_links.txt +0 -0
  64. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/vcode_analysis.egg-info/entry_points.txt +0 -0
  65. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/vcode_analysis.egg-info/requires.txt +0 -0
  66. {vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/vcode_analysis.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vcode-analysis
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: 基于大模型的智能代码分析工具,支持代码审查、文档生成、架构分析和安全扫描
5
5
  Author-email: Wellchang <2483808264@qq.com>
6
6
  License: MIT
@@ -6,6 +6,7 @@ from .architecture import ArchitectureAnalyzer, ArchitectureResult, ModuleInfo,
6
6
  from .security import SecurityAnalyzer, SecurityResult, SecurityIssue
7
7
  from .directory import DirectoryAnalyzer, DirectoryAnalysisResult, ProjectInfo
8
8
  from .context_builder import ContextBuilder, AnalysisContext
9
+ from .knowledge_graph import KnowledgeGraphAnalyzer, KnowledgeGraph, KGEntity, KGRelation, KGBusinessRule
9
10
 
10
11
  __all__ = [
11
12
  "CodeReviewAnalyzer",
@@ -24,4 +25,9 @@ __all__ = [
24
25
  "ProjectInfo",
25
26
  "ContextBuilder",
26
27
  "AnalysisContext",
28
+ "KnowledgeGraphAnalyzer",
29
+ "KnowledgeGraph",
30
+ "KGEntity",
31
+ "KGRelation",
32
+ "KGBusinessRule",
27
33
  ]
@@ -0,0 +1,632 @@
1
+ """代码知识图谱分析器 — 自动构建代码库的领域模型图谱(实体、关系、业务规则)"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING
10
+
11
+ if TYPE_CHECKING:
12
+ from core.analyzer import Analyzer
13
+
14
+ from parsers import get_parser_for_file
15
+
16
+
17
+ # ── 数据模型 ──────────────────────────────────────────────
18
+
19
+
20
+ @dataclass
21
+ class KGEntity:
22
+ """知识图谱实体"""
23
+
24
+ id: str
25
+ name: str
26
+ type: str # class | function | module | variable | interface
27
+ file_path: str
28
+ line: int = 0
29
+ metadata: dict = field(default_factory=dict)
30
+
31
+ def to_dict(self) -> dict:
32
+ return {
33
+ "id": self.id,
34
+ "name": self.name,
35
+ "type": self.type,
36
+ "file_path": self.file_path,
37
+ "line": self.line,
38
+ "metadata": self.metadata,
39
+ }
40
+
41
+
42
+ @dataclass
43
+ class KGRelation:
44
+ """知识图谱关系"""
45
+
46
+ source_id: str
47
+ target_id: str
48
+ type: str # imports | calls | inherits | contains | depends_on | implements
49
+ weight: float = 1.0
50
+ metadata: dict = field(default_factory=dict)
51
+
52
+ def to_dict(self) -> dict:
53
+ return {
54
+ "source_id": self.source_id,
55
+ "target_id": self.target_id,
56
+ "type": self.type,
57
+ "weight": self.weight,
58
+ "metadata": self.metadata,
59
+ }
60
+
61
+
62
+ @dataclass
63
+ class KGBusinessRule:
64
+ """业务规则"""
65
+
66
+ id: str
67
+ description: str
68
+ category: str # naming | pattern | constraint | convention
69
+ entities: list = field(default_factory=list)
70
+ source: str = "static" # static | llm
71
+
72
+ def to_dict(self) -> dict:
73
+ return {
74
+ "id": self.id,
75
+ "description": self.description,
76
+ "category": self.category,
77
+ "entities": self.entities,
78
+ "source": self.source,
79
+ }
80
+
81
+
82
+ @dataclass
83
+ class KnowledgeGraph:
84
+ """知识图谱"""
85
+
86
+ entities: list = field(default_factory=list)
87
+ relations: list = field(default_factory=list)
88
+ business_rules: list = field(default_factory=list)
89
+ metadata: dict = field(default_factory=dict)
90
+
91
+ def to_dict(self) -> dict:
92
+ return {
93
+ "entities": [e.to_dict() for e in self.entities],
94
+ "relations": [r.to_dict() for r in self.relations],
95
+ "business_rules": [br.to_dict() for br in self.business_rules],
96
+ "metadata": self.metadata,
97
+ }
98
+
99
+ def to_json(self, indent: int = 2) -> str:
100
+ return json.dumps(self.to_dict(), indent=indent, ensure_ascii=False)
101
+
102
+ def to_markdown(self) -> str:
103
+ lines = ["# 代码知识图谱\n"]
104
+
105
+ # 项目元信息
106
+ if self.metadata:
107
+ lines.append("## 项目信息\n")
108
+ for k, v in self.metadata.items():
109
+ lines.append(f"- **{k}**: {v}")
110
+ lines.append("")
111
+
112
+ # 实体统计
113
+ type_counts: dict[str, int] = {}
114
+ for e in self.entities:
115
+ type_counts[e.type] = type_counts.get(e.type, 0) + 1
116
+ lines.append("## 实体统计\n")
117
+ lines.append(f"- 总计: {len(self.entities)} 个实体\n")
118
+ for t, c in sorted(type_counts.items(), key=lambda x: -x[1]):
119
+ lines.append(f"- {t}: {c}")
120
+ lines.append("")
121
+
122
+ # 关系统计
123
+ rel_type_counts: dict[str, int] = {}
124
+ for r in self.relations:
125
+ rel_type_counts[r.type] = rel_type_counts.get(r.type, 0) + 1
126
+ lines.append("## 关系统计\n")
127
+ lines.append(f"- 总计: {len(self.relations)} 条关系\n")
128
+ for t, c in sorted(rel_type_counts.items(), key=lambda x: -x[1]):
129
+ lines.append(f"- {t}: {c}")
130
+ lines.append("")
131
+
132
+ # 实体列表
133
+ lines.append("## 实体列表\n")
134
+ for e in sorted(self.entities, key=lambda x: (x.type, x.id)):
135
+ loc = f":{e.line}" if e.line else ""
136
+ lines.append(f"- **[{e.type}]** `{e.id}` — {e.file_path}{loc}")
137
+ if e.metadata:
138
+ for mk, mv in e.metadata.items():
139
+ if mv and mk in ("bases", "decorators", "params"):
140
+ lines.append(f" - {mk}: {mv}")
141
+ lines.append("")
142
+
143
+ # 关系列表
144
+ lines.append("## 关系列表\n")
145
+ for r in sorted(self.relations, key=lambda x: (x.type, x.source_id)):
146
+ lines.append(f"- `{r.source_id}` →[{r.type}]→ `{r.target_id}`")
147
+ lines.append("")
148
+
149
+ # 业务规则
150
+ if self.business_rules:
151
+ lines.append("## 业务规则\n")
152
+ for br in sorted(self.business_rules, key=lambda x: x.category):
153
+ lines.append(f"- **[{br.category}]** {br.description}")
154
+ if br.entities:
155
+ lines.append(f" - 涉及: {', '.join(br.entities[:5])}")
156
+ lines.append("")
157
+
158
+ return "\n".join(lines)
159
+
160
+
161
+ # ── 分析器 ──────────────────────────────────────────────
162
+
163
+
164
+ class KnowledgeGraphAnalyzer:
165
+ """知识图谱分析器 — 从代码库自动构建领域模型图谱"""
166
+
167
+ def __init__(self, analyzer: Analyzer):
168
+ self.analyzer = analyzer
169
+
170
+ def analyze(self, target_path: str, use_llm: bool = False) -> KnowledgeGraph:
171
+ """分析目标路径,构建知识图谱"""
172
+ target = Path(target_path).resolve()
173
+ if not target.exists():
174
+ raise FileNotFoundError(f"目标路径不存在: {target_path}")
175
+
176
+ # 扫描文件
177
+ file_infos = list(self.analyzer.scan_files(str(target)))
178
+ if not file_infos:
179
+ return KnowledgeGraph(metadata={"target_path": str(target), "file_count": 0})
180
+
181
+ # 提取实体和关系
182
+ entities = self._extract_entities(file_infos)
183
+ relations = self._extract_relations(entities, file_infos)
184
+
185
+ # 检测业务规则
186
+ business_rules = self._detect_business_rules(entities, relations)
187
+
188
+ kg = KnowledgeGraph(
189
+ entities=entities,
190
+ relations=relations,
191
+ business_rules=business_rules,
192
+ metadata={
193
+ "target_path": str(target),
194
+ "file_count": len(file_infos),
195
+ "entity_count": len(entities),
196
+ "relation_count": len(relations),
197
+ "rule_count": len(business_rules),
198
+ },
199
+ )
200
+
201
+ # 可选 LLM 增强
202
+ if use_llm:
203
+ kg = self._enhance_with_llm(kg)
204
+
205
+ return kg
206
+
207
+ def _extract_entities(self, file_infos: list) -> list[KGEntity]:
208
+ """从文件列表中提取所有实体"""
209
+ entities = []
210
+ entity_id_set: set[str] = set()
211
+
212
+ for fi in file_infos:
213
+ # FileInfo 对象有 relative_path 和 path 属性
214
+ file_path = fi.relative_path if hasattr(fi, "relative_path") else str(fi)
215
+ abs_path = str(fi.path) if hasattr(fi, "path") else file_path
216
+ module_id = self._file_to_module_id(file_path)
217
+
218
+ # 模块实体
219
+ module_entity = KGEntity(
220
+ id=module_id,
221
+ name=Path(file_path).stem,
222
+ type="module",
223
+ file_path=file_path,
224
+ )
225
+ entities.append(module_entity)
226
+ entity_id_set.add(module_id)
227
+
228
+ # 使用解析器提取类和函数
229
+ parser = get_parser_for_file(file_path)
230
+ if parser is None:
231
+ continue
232
+
233
+ try:
234
+ code = Path(abs_path).read_text(encoding="utf-8", errors="ignore")
235
+ result = parser.parse_code(code, file_path)
236
+ except Exception:
237
+ continue
238
+
239
+ # 类实体
240
+ for cls in getattr(result, "classes", []):
241
+ cls_id = f"{module_id}.{cls.name}"
242
+ if cls_id in entity_id_set:
243
+ cls_id = f"{cls_id}@{file_path}:{getattr(cls, 'line_start', 0)}"
244
+ entity_id_set.add(cls_id)
245
+
246
+ cls_entity = KGEntity(
247
+ id=cls_id,
248
+ name=cls.name,
249
+ type="class",
250
+ file_path=file_path,
251
+ line=getattr(cls, "line_start", 0),
252
+ metadata={
253
+ "bases": getattr(cls, "bases", []),
254
+ "methods": [m.name for m in getattr(cls, "methods", [])],
255
+ "docstring": getattr(cls, "docstring", None),
256
+ },
257
+ )
258
+ entities.append(cls_entity)
259
+
260
+ # 类包含方法 → contains 关系(在 _extract_relations 中处理)
261
+ # 方法实体
262
+ for m in getattr(cls, "methods", []):
263
+ method_id = f"{cls_id}.{m.name}"
264
+ if method_id in entity_id_set:
265
+ method_id = f"{method_id}@{file_path}:{getattr(m, 'line_start', 0)}"
266
+ entity_id_set.add(method_id)
267
+
268
+ entities.append(
269
+ KGEntity(
270
+ id=method_id,
271
+ name=m.name,
272
+ type="function",
273
+ file_path=file_path,
274
+ line=getattr(m, "line_start", 0),
275
+ metadata={
276
+ "params": [str(p) for p in getattr(m, "params", getattr(m, "args", []))],
277
+ "decorators": getattr(m, "decorators", []),
278
+ },
279
+ )
280
+ )
281
+
282
+ # 函数实体(模块级)
283
+ for func in getattr(result, "functions", []):
284
+ func_id = f"{module_id}.{func.name}"
285
+ if func_id in entity_id_set:
286
+ func_id = f"{func_id}@{file_path}:{getattr(func, 'line_start', 0)}"
287
+ entity_id_set.add(func_id)
288
+
289
+ entities.append(
290
+ KGEntity(
291
+ id=func_id,
292
+ name=func.name,
293
+ type="function",
294
+ file_path=file_path,
295
+ line=getattr(func, "line_start", 0),
296
+ metadata={
297
+ "params": [str(p) for p in getattr(func, "params", getattr(func, "args", []))],
298
+ "decorators": getattr(func, "decorators", []),
299
+ "return_type": getattr(func, "return_type", getattr(func, "returns", None)),
300
+ },
301
+ )
302
+ )
303
+
304
+ return entities
305
+
306
+ def _extract_relations(self, entities: list[KGEntity], file_infos: list) -> list[KGRelation]:
307
+ """从实体和文件信息中提取关系"""
308
+ relations = []
309
+ entity_map = {e.id: e for e in entities}
310
+
311
+ # contains 关系:模块包含类/函数
312
+ modules = [e for e in entities if e.type == "module"]
313
+ for mod in modules:
314
+ mod_prefix = mod.id + "."
315
+ for e in entities:
316
+ if e.type in ("class", "function") and e.id.startswith(mod_prefix):
317
+ # 确保是直接子级(非嵌套方法)
318
+ remainder = e.id[len(mod_prefix) :]
319
+ if "." not in remainder or (
320
+ e.type == "function" and remainder.count(".") == 1
321
+ ):
322
+ relations.append(
323
+ KGRelation(
324
+ source_id=mod.id,
325
+ target_id=e.id,
326
+ type="contains",
327
+ )
328
+ )
329
+
330
+ # contains 关系:类包含方法
331
+ classes = [e for e in entities if e.type == "class"]
332
+ for cls in classes:
333
+ cls_prefix = cls.id + "."
334
+ for e in entities:
335
+ if e.type == "function" and e.id.startswith(cls_prefix):
336
+ relations.append(
337
+ KGRelation(
338
+ source_id=cls.id,
339
+ target_id=e.id,
340
+ type="contains",
341
+ )
342
+ )
343
+
344
+ # inherits 关系:类的 bases
345
+ for cls in classes:
346
+ for base in cls.metadata.get("bases", []):
347
+ target_id = self._resolve_entity_id(base, entity_map, cls.file_path)
348
+ if target_id:
349
+ relations.append(
350
+ KGRelation(
351
+ source_id=cls.id,
352
+ target_id=target_id,
353
+ type="inherits",
354
+ )
355
+ )
356
+
357
+ # imports / depends_on 关系:从解析结果提取
358
+ for fi in file_infos:
359
+ # FileInfo 对象有 relative_path 属性
360
+ file_path = fi.relative_path if hasattr(fi, "relative_path") else str(fi)
361
+ module_id = self._file_to_module_id(file_path)
362
+
363
+ parser = get_parser_for_file(file_path)
364
+ if parser is None:
365
+ continue
366
+
367
+ try:
368
+ code = Path(file_path).read_text(encoding="utf-8", errors="ignore")
369
+ result = parser.parse_code(code, file_path)
370
+ except Exception:
371
+ continue
372
+
373
+ for imp in getattr(result, "imports", []):
374
+ imp_module = getattr(imp, "module", None) or getattr(imp, "source", None) or ""
375
+ if imp_module:
376
+ # 尝试匹配项目内模块
377
+ target_id = self._resolve_import_to_entity(imp_module, entity_map)
378
+ if target_id:
379
+ relations.append(
380
+ KGRelation(
381
+ source_id=module_id,
382
+ target_id=target_id,
383
+ type="imports",
384
+ )
385
+ )
386
+
387
+ return relations
388
+
389
+ def _detect_business_rules(
390
+ self, entities: list[KGEntity], relations: list[KGRelation]
391
+ ) -> list[KGBusinessRule]:
392
+ """从实体和关系中检测业务规则"""
393
+ rules = []
394
+ rule_id = 0
395
+
396
+ # 命名约定检测
397
+ for e in entities:
398
+ if e.type == "class":
399
+ if e.name and not re.match(r"^[A-Z]", e.name):
400
+ rule_id += 1
401
+ rules.append(
402
+ KGBusinessRule(
403
+ id=f"BR{rule_id:03d}",
404
+ description=f"类 '{e.name}' 未遵循大写开头命名约定",
405
+ category="naming",
406
+ entities=[e.id],
407
+ source="static",
408
+ )
409
+ )
410
+ elif e.type == "function":
411
+ if e.name and not e.name.startswith("_") and not re.match(
412
+ r"^[a-z_]", e.name
413
+ ):
414
+ rule_id += 1
415
+ rules.append(
416
+ KGBusinessRule(
417
+ id=f"BR{rule_id:03d}",
418
+ description=f"函数 '{e.name}' 未遵循蛇形命名约定",
419
+ category="naming",
420
+ entities=[e.id],
421
+ source="static",
422
+ )
423
+ )
424
+
425
+ # 架构约束:循环依赖检测
426
+ module_ids = {e.id for e in entities if e.type == "module"}
427
+ import_rels = [r for r in relations if r.type == "imports"]
428
+ graph: dict[str, set[str]] = {m: set() for m in module_ids}
429
+ for r in import_rels:
430
+ if r.source_id in graph and r.target_id in graph:
431
+ graph[r.source_id].add(r.target_id)
432
+
433
+ cycles = self._find_cycles(graph)
434
+ for cycle in cycles:
435
+ rule_id += 1
436
+ rules.append(
437
+ KGBusinessRule(
438
+ id=f"BR{rule_id:03d}",
439
+ description=f"循环依赖: {' → '.join(cycle)}",
440
+ category="constraint",
441
+ entities=cycle,
442
+ source="static",
443
+ )
444
+ )
445
+
446
+ # 设计模式检测
447
+ for cls in entities:
448
+ if cls.type != "class":
449
+ continue
450
+ bases = cls.metadata.get("bases", [])
451
+ methods = cls.metadata.get("methods", [])
452
+
453
+ # 单例模式
454
+ if any("_instance" in m for m in methods) and "__new__" in methods:
455
+ rule_id += 1
456
+ rules.append(
457
+ KGBusinessRule(
458
+ id=f"BR{rule_id:03d}",
459
+ description=f"类 '{cls.name}' 可能使用单例模式",
460
+ category="pattern",
461
+ entities=[cls.id],
462
+ source="static",
463
+ )
464
+ )
465
+
466
+ # 工厂模式
467
+ if any(
468
+ "create" in m or "build" in m or "make" in m for m in methods
469
+ ) and not bases:
470
+ rule_id += 1
471
+ rules.append(
472
+ KGBusinessRule(
473
+ id=f"BR{rule_id:03d}",
474
+ description=f"类 '{cls.name}' 可能使用工厂模式",
475
+ category="pattern",
476
+ entities=[cls.id],
477
+ source="static",
478
+ )
479
+ )
480
+
481
+ return rules
482
+
483
+ def _enhance_with_llm(self, kg: KnowledgeGraph) -> KnowledgeGraph:
484
+ """使用 LLM 增强知识图谱(推断隐含业务规则和语义关系)"""
485
+ try:
486
+ from core.llm_client import LLMClient
487
+
488
+ llm = LLMClient(self.analyzer.config.llm)
489
+
490
+ # 构建摘要供 LLM 分析
491
+ summary = self._build_kg_summary(kg)
492
+ prompt = (
493
+ "分析以下代码知识图谱摘要,推断隐含的业务规则、领域概念和语义关系。\n\n"
494
+ f"{summary}\n\n"
495
+ "请以 JSON 格式返回,包含:\n"
496
+ '1. "business_rules": [{id, description, category, entities}]\n'
497
+ '2. "suggested_relations": [{source_id, target_id, type, description}]\n'
498
+ )
499
+
500
+ response = llm.analyze(prompt)
501
+ if response:
502
+ from core.json_utils import JSONUtils
503
+
504
+ data = JSONUtils.parse_json(response)
505
+ if data:
506
+ # 添加 LLM 推断的业务规则
507
+ for br in data.get("business_rules", []):
508
+ kg.business_rules.append(
509
+ KGBusinessRule(
510
+ id=br.get("id", f"BR_LLM_{len(kg.business_rules)}"),
511
+ description=br.get("description", ""),
512
+ category=br.get("category", "convention"),
513
+ entities=br.get("entities", []),
514
+ source="llm",
515
+ )
516
+ )
517
+ # 添加 LLM 推断的关系
518
+ for sr in data.get("suggested_relations", []):
519
+ kg.relations.append(
520
+ KGRelation(
521
+ source_id=sr.get("source_id", ""),
522
+ target_id=sr.get("target_id", ""),
523
+ type=sr.get("type", "semantic"),
524
+ metadata={"description": sr.get("description", "")},
525
+ )
526
+ )
527
+ except Exception:
528
+ pass # LLM 增强失败不影响基础图谱
529
+
530
+ return kg
531
+
532
+ # ── 辅助方法 ──────────────────────────────────────────
533
+
534
+ @staticmethod
535
+ def _file_to_module_id(file_path: str) -> str:
536
+ """将文件路径转换为模块 ID"""
537
+ p = Path(file_path)
538
+ parts = list(p.parts)
539
+ # 移除扩展名
540
+ if parts:
541
+ parts[-1] = Path(parts[-1]).stem
542
+ # __init__ 文件用目录名
543
+ if parts and parts[-1] == "__init__":
544
+ parts = parts[:-1]
545
+ return ".".join(parts) if parts else Path(file_path).stem
546
+
547
+ @staticmethod
548
+ def _resolve_entity_id(name: str, entity_map: dict, current_file: str) -> str | None:
549
+ """尝试将名称解析为实体 ID"""
550
+ if name in entity_map:
551
+ return name
552
+ # 尝试当前模块前缀
553
+ module_id = KnowledgeGraphAnalyzer._file_to_module_id(current_file)
554
+ candidates = [f"{module_id}.{name}", name]
555
+ for c in candidates:
556
+ if c in entity_map:
557
+ return c
558
+ # 模糊匹配
559
+ for eid in entity_map:
560
+ if eid.endswith(f".{name}"):
561
+ return eid
562
+ return None
563
+
564
+ @staticmethod
565
+ def _resolve_import_to_entity(module_name: str, entity_map: dict) -> str | None:
566
+ """将导入模块名解析为项目内实体 ID"""
567
+ if module_name in entity_map:
568
+ return module_name
569
+ # 尝试将点分隔的模块名匹配
570
+ for eid in entity_map:
571
+ if eid == module_name or eid.endswith(f".{module_name}"):
572
+ return eid
573
+ # 尝试将模块路径的各段匹配
574
+ parts = module_name.split(".")
575
+ for i in range(len(parts)):
576
+ suffix = ".".join(parts[i:])
577
+ for eid in entity_map:
578
+ if eid.endswith(suffix):
579
+ return eid
580
+ return None
581
+
582
+ @staticmethod
583
+ def _find_cycles(graph: dict[str, set[str]]) -> list[list[str]]:
584
+ """检测有向图中的循环依赖"""
585
+ cycles = []
586
+ visited: set[str] = set()
587
+ rec_stack: set[str] = set()
588
+ path: list[str] = []
589
+
590
+ def dfs(node: str):
591
+ visited.add(node)
592
+ rec_stack.add(node)
593
+ path.append(node)
594
+
595
+ for neighbor in graph.get(node, set()):
596
+ if neighbor not in visited:
597
+ dfs(neighbor)
598
+ elif neighbor in rec_stack:
599
+ # 找到循环
600
+ cycle_start = path.index(neighbor)
601
+ cycle = path[cycle_start:] + [neighbor]
602
+ cycles.append(cycle)
603
+
604
+ path.pop()
605
+ rec_stack.discard(node)
606
+
607
+ for node in graph:
608
+ if node not in visited:
609
+ dfs(node)
610
+
611
+ return cycles
612
+
613
+ @staticmethod
614
+ def _build_kg_summary(kg: KnowledgeGraph) -> str:
615
+ """构建知识图谱摘要供 LLM 分析"""
616
+ lines = [f"项目包含 {len(kg.entities)} 个实体, {len(kg.relations)} 条关系"]
617
+ lines.append("\n实体类型分布:")
618
+ type_counts: dict[str, int] = {}
619
+ for e in kg.entities:
620
+ type_counts[e.type] = type_counts.get(e.type, 0) + 1
621
+ for t, c in sorted(type_counts.items()):
622
+ lines.append(f" {t}: {c}")
623
+
624
+ lines.append("\n关键实体 (前 30):")
625
+ for e in kg.entities[:30]:
626
+ lines.append(f" [{e.type}] {e.id}")
627
+
628
+ lines.append("\n关键关系 (前 30):")
629
+ for r in kg.relations[:30]:
630
+ lines.append(f" {r.source_id} --[{r.type}]--> {r.target_id}")
631
+
632
+ return "\n".join(lines)