PyPI - vcode-analysis - Versions diffs - 0.5.2__tar.gz → 0.5.3__tar.gz - Mend

vcode-analysis 0.5.2tar.gz → 0.5.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

{vcode_analysis-0.5.2/vcode_analysis.egg-info → vcode_analysis-0.5.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vcode-analysis
-Version: 0.5.2
+Version: 0.5.3
 Summary: 基于大模型的智能代码分析工具，支持代码审查、文档生成、架构分析和安全扫描
 Author-email: Wellchang <2483808264@qq.com>
 License: MIT

{vcode_analysis-0.5.2 → vcode_analysis-0.5.3}/analyzers/__init__.py RENAMED Viewed

@@ -6,6 +6,7 @@ from .architecture import ArchitectureAnalyzer, ArchitectureResult, ModuleInfo,
 from .security import SecurityAnalyzer, SecurityResult, SecurityIssue
 from .directory import DirectoryAnalyzer, DirectoryAnalysisResult, ProjectInfo
 from .context_builder import ContextBuilder, AnalysisContext
+from .knowledge_graph import KnowledgeGraphAnalyzer, KnowledgeGraph, KGEntity, KGRelation, KGBusinessRule
 __all__ = [
     "CodeReviewAnalyzer",
@@ -24,4 +25,9 @@ __all__ = [
     "ProjectInfo",
     "ContextBuilder",
     "AnalysisContext",
+    "KnowledgeGraphAnalyzer",
+    "KnowledgeGraph",
+    "KGEntity",
+    "KGRelation",
+    "KGBusinessRule",
 ]

vcode_analysis-0.5.3/analyzers/knowledge_graph.py ADDED Viewed

@@ -0,0 +1,632 @@
+"""代码知识图谱分析器 — 自动构建代码库的领域模型图谱（实体、关系、业务规则）"""
+from __future__ import annotations
+import json
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from core.analyzer import Analyzer
+from parsers import get_parser_for_file
+# ── 数据模型 ──────────────────────────────────────────────
+@dataclass
+class KGEntity:
+    """知识图谱实体"""
+    id: str
+    name: str
+    type: str  # class | function | module | variable | interface
+    file_path: str
+    line: int = 0
+    metadata: dict = field(default_factory=dict)
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "name": self.name,
+            "type": self.type,
+            "file_path": self.file_path,
+            "line": self.line,
+            "metadata": self.metadata,
+        }
+@dataclass
+class KGRelation:
+    """知识图谱关系"""
+    source_id: str
+    target_id: str
+    type: str  # imports | calls | inherits | contains | depends_on | implements
+    weight: float = 1.0
+    metadata: dict = field(default_factory=dict)
+    def to_dict(self) -> dict:
+        return {
+            "source_id": self.source_id,
+            "target_id": self.target_id,
+            "type": self.type,
+            "weight": self.weight,
+            "metadata": self.metadata,
+        }
+@dataclass
+class KGBusinessRule:
+    """业务规则"""
+    id: str
+    description: str
+    category: str  # naming | pattern | constraint | convention
+    entities: list = field(default_factory=list)
+    source: str = "static"  # static | llm
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "description": self.description,
+            "category": self.category,
+            "entities": self.entities,
+            "source": self.source,
+        }
+@dataclass
+class KnowledgeGraph:
+    """知识图谱"""
+    entities: list = field(default_factory=list)
+    relations: list = field(default_factory=list)
+    business_rules: list = field(default_factory=list)
+    metadata: dict = field(default_factory=dict)
+    def to_dict(self) -> dict:
+        return {
+            "entities": [e.to_dict() for e in self.entities],
+            "relations": [r.to_dict() for r in self.relations],
+            "business_rules": [br.to_dict() for br in self.business_rules],
+            "metadata": self.metadata,
+        }
+    def to_json(self, indent: int = 2) -> str:
+        return json.dumps(self.to_dict(), indent=indent, ensure_ascii=False)
+    def to_markdown(self) -> str:
+        lines = ["# 代码知识图谱\n"]
+        # 项目元信息
+        if self.metadata:
+            lines.append("## 项目信息\n")
+            for k, v in self.metadata.items():
+                lines.append(f"- **{k}**: {v}")
+            lines.append("")
+        # 实体统计
+        type_counts: dict[str, int] = {}
+        for e in self.entities:
+            type_counts[e.type] = type_counts.get(e.type, 0) + 1
+        lines.append("## 实体统计\n")
+        lines.append(f"- 总计: {len(self.entities)} 个实体\n")
+        for t, c in sorted(type_counts.items(), key=lambda x: -x[1]):
+            lines.append(f"- {t}: {c}")
+        lines.append("")
+        # 关系统计
+        rel_type_counts: dict[str, int] = {}
+        for r in self.relations:
+            rel_type_counts[r.type] = rel_type_counts.get(r.type, 0) + 1
+        lines.append("## 关系统计\n")
+        lines.append(f"- 总计: {len(self.relations)} 条关系\n")
+        for t, c in sorted(rel_type_counts.items(), key=lambda x: -x[1]):
+            lines.append(f"- {t}: {c}")
+        lines.append("")
+        # 实体列表
+        lines.append("## 实体列表\n")
+        for e in sorted(self.entities, key=lambda x: (x.type, x.id)):
+            loc = f":{e.line}" if e.line else ""
+            lines.append(f"- **[{e.type}]** `{e.id}` — {e.file_path}{loc}")
+            if e.metadata:
+                for mk, mv in e.metadata.items():
+                    if mv and mk in ("bases", "decorators", "params"):
+                        lines.append(f"  - {mk}: {mv}")
+        lines.append("")
+        # 关系列表
+        lines.append("## 关系列表\n")
+        for r in sorted(self.relations, key=lambda x: (x.type, x.source_id)):
+            lines.append(f"- `{r.source_id}` →[{r.type}]→ `{r.target_id}`")
+        lines.append("")
+        # 业务规则
+        if self.business_rules:
+            lines.append("## 业务规则\n")
+            for br in sorted(self.business_rules, key=lambda x: x.category):
+                lines.append(f"- **[{br.category}]** {br.description}")
+                if br.entities:
+                    lines.append(f"  - 涉及: {', '.join(br.entities[:5])}")
+            lines.append("")
+        return "\n".join(lines)
+# ── 分析器 ──────────────────────────────────────────────
+class KnowledgeGraphAnalyzer:
+    """知识图谱分析器 — 从代码库自动构建领域模型图谱"""
+    def __init__(self, analyzer: Analyzer):
+        self.analyzer = analyzer
+    def analyze(self, target_path: str, use_llm: bool = False) -> KnowledgeGraph:
+        """分析目标路径，构建知识图谱"""
+        target = Path(target_path).resolve()
+        if not target.exists():
+            raise FileNotFoundError(f"目标路径不存在: {target_path}")
+        # 扫描文件
+        file_infos = list(self.analyzer.scan_files(str(target)))
+        if not file_infos:
+            return KnowledgeGraph(metadata={"target_path": str(target), "file_count": 0})
+        # 提取实体和关系
+        entities = self._extract_entities(file_infos)
+        relations = self._extract_relations(entities, file_infos)
+        # 检测业务规则
+        business_rules = self._detect_business_rules(entities, relations)
+        kg = KnowledgeGraph(
+            entities=entities,
+            relations=relations,
+            business_rules=business_rules,
+            metadata={
+                "target_path": str(target),
+                "file_count": len(file_infos),
+                "entity_count": len(entities),
+                "relation_count": len(relations),
+                "rule_count": len(business_rules),
+            },
+        )
+        # 可选 LLM 增强
+        if use_llm:
+            kg = self._enhance_with_llm(kg)
+        return kg
+    def _extract_entities(self, file_infos: list) -> list[KGEntity]:
+        """从文件列表中提取所有实体"""
+        entities = []
+        entity_id_set: set[str] = set()
+        for fi in file_infos:
+            # FileInfo 对象有 relative_path 和 path 属性
+            file_path = fi.relative_path if hasattr(fi, "relative_path") else str(fi)
+            abs_path = str(fi.path) if hasattr(fi, "path") else file_path
+            module_id = self._file_to_module_id(file_path)
+            # 模块实体
+            module_entity = KGEntity(
+                id=module_id,
+                name=Path(file_path).stem,
+                type="module",
+                file_path=file_path,
+            )
+            entities.append(module_entity)
+            entity_id_set.add(module_id)
+            # 使用解析器提取类和函数
+            parser = get_parser_for_file(file_path)
+            if parser is None:
+                continue
+            try:
+                code = Path(abs_path).read_text(encoding="utf-8", errors="ignore")
+                result = parser.parse_code(code, file_path)
+            except Exception:
+                continue
+            # 类实体
+            for cls in getattr(result, "classes", []):
+                cls_id = f"{module_id}.{cls.name}"
+                if cls_id in entity_id_set:
+                    cls_id = f"{cls_id}@{file_path}:{getattr(cls, 'line_start', 0)}"
+                entity_id_set.add(cls_id)
+                cls_entity = KGEntity(
+                    id=cls_id,
+                    name=cls.name,
+                    type="class",
+                    file_path=file_path,
+                    line=getattr(cls, "line_start", 0),
+                    metadata={
+                        "bases": getattr(cls, "bases", []),
+                        "methods": [m.name for m in getattr(cls, "methods", [])],
+                        "docstring": getattr(cls, "docstring", None),
+                    },
+                )
+                entities.append(cls_entity)
+                # 类包含方法 → contains 关系（在 _extract_relations 中处理）
+                # 方法实体
+                for m in getattr(cls, "methods", []):
+                    method_id = f"{cls_id}.{m.name}"
+                    if method_id in entity_id_set:
+                        method_id = f"{method_id}@{file_path}:{getattr(m, 'line_start', 0)}"
+                    entity_id_set.add(method_id)
+                    entities.append(
+                        KGEntity(
+                            id=method_id,
+                            name=m.name,
+                            type="function",
+                            file_path=file_path,
+                            line=getattr(m, "line_start", 0),
+                            metadata={
+                                "params": [str(p) for p in getattr(m, "params", getattr(m, "args", []))],
+                                "decorators": getattr(m, "decorators", []),
+                            },
+                        )
+                    )
+            # 函数实体（模块级）
+            for func in getattr(result, "functions", []):
+                func_id = f"{module_id}.{func.name}"
+                if func_id in entity_id_set:
+                    func_id = f"{func_id}@{file_path}:{getattr(func, 'line_start', 0)}"
+                entity_id_set.add(func_id)
+                entities.append(
+                    KGEntity(
+                        id=func_id,
+                        name=func.name,
+                        type="function",
+                        file_path=file_path,
+                        line=getattr(func, "line_start", 0),
+                        metadata={
+                            "params": [str(p) for p in getattr(func, "params", getattr(func, "args", []))],
+                            "decorators": getattr(func, "decorators", []),
+                            "return_type": getattr(func, "return_type", getattr(func, "returns", None)),
+                        },
+                    )
+                )
+        return entities
+    def _extract_relations(self, entities: list[KGEntity], file_infos: list) -> list[KGRelation]:
+        """从实体和文件信息中提取关系"""
+        relations = []
+        entity_map = {e.id: e for e in entities}
+        # contains 关系：模块包含类/函数
+        modules = [e for e in entities if e.type == "module"]
+        for mod in modules:
+            mod_prefix = mod.id + "."
+            for e in entities:
+                if e.type in ("class", "function") and e.id.startswith(mod_prefix):
+                    # 确保是直接子级（非嵌套方法）
+                    remainder = e.id[len(mod_prefix) :]
+                    if "." not in remainder or (
+                        e.type == "function" and remainder.count(".") == 1
+                    ):
+                        relations.append(
+                            KGRelation(
+                                source_id=mod.id,
+                                target_id=e.id,
+                                type="contains",
+                            )
+                        )
+        # contains 关系：类包含方法
+        classes = [e for e in entities if e.type == "class"]
+        for cls in classes:
+            cls_prefix = cls.id + "."
+            for e in entities:
+                if e.type == "function" and e.id.startswith(cls_prefix):
+                    relations.append(
+                        KGRelation(
+                            source_id=cls.id,
+                            target_id=e.id,
+                            type="contains",
+                        )
+                    )
+        # inherits 关系：类的 bases
+        for cls in classes:
+            for base in cls.metadata.get("bases", []):
+                target_id = self._resolve_entity_id(base, entity_map, cls.file_path)
+                if target_id:
+                    relations.append(
+                        KGRelation(
+                            source_id=cls.id,
+                            target_id=target_id,
+                            type="inherits",
+                        )
+                    )
+        # imports / depends_on 关系：从解析结果提取
+        for fi in file_infos:
+            # FileInfo 对象有 relative_path 属性
+            file_path = fi.relative_path if hasattr(fi, "relative_path") else str(fi)
+            module_id = self._file_to_module_id(file_path)
+            parser = get_parser_for_file(file_path)
+            if parser is None:
+                continue
+            try:
+                code = Path(file_path).read_text(encoding="utf-8", errors="ignore")
+                result = parser.parse_code(code, file_path)
+            except Exception:
+                continue
+            for imp in getattr(result, "imports", []):
+                imp_module = getattr(imp, "module", None) or getattr(imp, "source", None) or ""
+                if imp_module:
+                    # 尝试匹配项目内模块
+                    target_id = self._resolve_import_to_entity(imp_module, entity_map)
+                    if target_id:
+                        relations.append(
+                            KGRelation(
+                                source_id=module_id,
+                                target_id=target_id,
+                                type="imports",
+                            )
+                        )
+        return relations
+    def _detect_business_rules(
+        self, entities: list[KGEntity], relations: list[KGRelation]
+    ) -> list[KGBusinessRule]:
+        """从实体和关系中检测业务规则"""
+        rules = []
+        rule_id = 0
+        # 命名约定检测
+        for e in entities:
+            if e.type == "class":
+                if e.name and not re.match(r"^[A-Z]", e.name):
+                    rule_id += 1
+                    rules.append(
+                        KGBusinessRule(
+                            id=f"BR{rule_id:03d}",
+                            description=f"类 '{e.name}' 未遵循大写开头命名约定",
+                            category="naming",
+                            entities=[e.id],
+                            source="static",
+                        )
+                    )
+            elif e.type == "function":
+                if e.name and not e.name.startswith("_") and not re.match(
+                    r"^[a-z_]", e.name
+                ):
+                    rule_id += 1
+                    rules.append(
+                        KGBusinessRule(
+                            id=f"BR{rule_id:03d}",
+                            description=f"函数 '{e.name}' 未遵循蛇形命名约定",
+                            category="naming",
+                            entities=[e.id],
+                            source="static",
+                        )
+                    )
+        # 架构约束：循环依赖检测
+        module_ids = {e.id for e in entities if e.type == "module"}
+        import_rels = [r for r in relations if r.type == "imports"]
+        graph: dict[str, set[str]] = {m: set() for m in module_ids}
+        for r in import_rels:
+            if r.source_id in graph and r.target_id in graph:
+                graph[r.source_id].add(r.target_id)
+        cycles = self._find_cycles(graph)
+        for cycle in cycles:
+            rule_id += 1
+            rules.append(
+                KGBusinessRule(
+                    id=f"BR{rule_id:03d}",
+                    description=f"循环依赖: {' → '.join(cycle)}",
+                    category="constraint",
+                    entities=cycle,
+                    source="static",
+                )
+            )
+        # 设计模式检测
+        for cls in entities:
+            if cls.type != "class":
+                continue
+            bases = cls.metadata.get("bases", [])
+            methods = cls.metadata.get("methods", [])
+            # 单例模式
+            if any("_instance" in m for m in methods) and "__new__" in methods:
+                rule_id += 1
+                rules.append(
+                    KGBusinessRule(
+                        id=f"BR{rule_id:03d}",
+                        description=f"类 '{cls.name}' 可能使用单例模式",
+                        category="pattern",
+                        entities=[cls.id],
+                        source="static",
+                    )
+                )
+            # 工厂模式
+            if any(
+                "create" in m or "build" in m or "make" in m for m in methods
+            ) and not bases:
+                rule_id += 1
+                rules.append(
+                    KGBusinessRule(
+                        id=f"BR{rule_id:03d}",
+                        description=f"类 '{cls.name}' 可能使用工厂模式",
+                        category="pattern",
+                        entities=[cls.id],
+                        source="static",
+                    )
+                )
+        return rules
+    def _enhance_with_llm(self, kg: KnowledgeGraph) -> KnowledgeGraph:
+        """使用 LLM 增强知识图谱（推断隐含业务规则和语义关系）"""
+        try:
+            from core.llm_client import LLMClient
+            llm = LLMClient(self.analyzer.config.llm)
+            # 构建摘要供 LLM 分析
+            summary = self._build_kg_summary(kg)
+            prompt = (
+                "分析以下代码知识图谱摘要，推断隐含的业务规则、领域概念和语义关系。\n\n"
+                f"{summary}\n\n"
+                "请以 JSON 格式返回，包含:\n"
+                '1. "business_rules": [{id, description, category, entities}]\n'
+                '2. "suggested_relations": [{source_id, target_id, type, description}]\n'
+            )
+            response = llm.analyze(prompt)
+            if response:
+                from core.json_utils import JSONUtils
+                data = JSONUtils.parse_json(response)
+                if data:
+                    # 添加 LLM 推断的业务规则
+                    for br in data.get("business_rules", []):
+                        kg.business_rules.append(
+                            KGBusinessRule(
+                                id=br.get("id", f"BR_LLM_{len(kg.business_rules)}"),
+                                description=br.get("description", ""),
+                                category=br.get("category", "convention"),
+                                entities=br.get("entities", []),
+                                source="llm",
+                            )
+                        )
+                    # 添加 LLM 推断的关系
+                    for sr in data.get("suggested_relations", []):
+                        kg.relations.append(
+                            KGRelation(
+                                source_id=sr.get("source_id", ""),
+                                target_id=sr.get("target_id", ""),
+                                type=sr.get("type", "semantic"),
+                                metadata={"description": sr.get("description", "")},
+                            )
+                        )
+        except Exception:
+            pass  # LLM 增强失败不影响基础图谱
+        return kg
+    # ── 辅助方法 ──────────────────────────────────────────
+    @staticmethod
+    def _file_to_module_id(file_path: str) -> str:
+        """将文件路径转换为模块 ID"""
+        p = Path(file_path)
+        parts = list(p.parts)
+        # 移除扩展名
+        if parts:
+            parts[-1] = Path(parts[-1]).stem
+        # __init__ 文件用目录名
+        if parts and parts[-1] == "__init__":
+            parts = parts[:-1]
+        return ".".join(parts) if parts else Path(file_path).stem
+    @staticmethod
+    def _resolve_entity_id(name: str, entity_map: dict, current_file: str) -> str | None:
+        """尝试将名称解析为实体 ID"""
+        if name in entity_map:
+            return name
+        # 尝试当前模块前缀
+        module_id = KnowledgeGraphAnalyzer._file_to_module_id(current_file)
+        candidates = [f"{module_id}.{name}", name]
+        for c in candidates:
+            if c in entity_map:
+                return c
+        # 模糊匹配
+        for eid in entity_map:
+            if eid.endswith(f".{name}"):
+                return eid
+        return None
+    @staticmethod
+    def _resolve_import_to_entity(module_name: str, entity_map: dict) -> str | None:
+        """将导入模块名解析为项目内实体 ID"""
+        if module_name in entity_map:
+            return module_name
+        # 尝试将点分隔的模块名匹配
+        for eid in entity_map:
+            if eid == module_name or eid.endswith(f".{module_name}"):
+                return eid
+        # 尝试将模块路径的各段匹配
+        parts = module_name.split(".")
+        for i in range(len(parts)):
+            suffix = ".".join(parts[i:])
+            for eid in entity_map:
+                if eid.endswith(suffix):
+                    return eid
+        return None
+    @staticmethod
+    def _find_cycles(graph: dict[str, set[str]]) -> list[list[str]]:
+        """检测有向图中的循环依赖"""
+        cycles = []
+        visited: set[str] = set()
+        rec_stack: set[str] = set()
+        path: list[str] = []
+        def dfs(node: str):
+            visited.add(node)
+            rec_stack.add(node)
+            path.append(node)
+            for neighbor in graph.get(node, set()):
+                if neighbor not in visited:
+                    dfs(neighbor)
+                elif neighbor in rec_stack:
+                    # 找到循环
+                    cycle_start = path.index(neighbor)
+                    cycle = path[cycle_start:] + [neighbor]
+                    cycles.append(cycle)
+            path.pop()
+            rec_stack.discard(node)
+        for node in graph:
+            if node not in visited:
+                dfs(node)
+        return cycles
+    @staticmethod
+    def _build_kg_summary(kg: KnowledgeGraph) -> str:
+        """构建知识图谱摘要供 LLM 分析"""
+        lines = [f"项目包含 {len(kg.entities)} 个实体, {len(kg.relations)} 条关系"]
+        lines.append("\n实体类型分布:")
+        type_counts: dict[str, int] = {}
+        for e in kg.entities:
+            type_counts[e.type] = type_counts.get(e.type, 0) + 1
+        for t, c in sorted(type_counts.items()):
+            lines.append(f"  {t}: {c}")
+        lines.append("\n关键实体 (前 30):")
+        for e in kg.entities[:30]:
+            lines.append(f"  [{e.type}] {e.id}")
+        lines.append("\n关键关系 (前 30):")
+        for r in kg.relations[:30]:
+            lines.append(f"  {r.source_id} --[{r.type}]--> {r.target_id}")
+        return "\n".join(lines)

vcode-analysis 0.5.2__tar.gz → 0.5.3__tar.gz

vcode-analysis 0.5.2tar.gz → 0.5.3tar.gz