PyPI - source-kb - Versions diffs - 0.2.2__py3-none-any.whl - Mend

source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

cli/__init__.py +50 -0
cli/__main__.py +5 -0
cli/commands/__init__.py +1 -0
cli/commands/anchor_fix.py +47 -0
cli/commands/diff_doc.py +52 -0
cli/commands/dispatch.py +77 -0
cli/commands/extract.py +72 -0
cli/commands/file_list.py +74 -0
cli/commands/index.py +84 -0
cli/commands/lock.py +89 -0
cli/commands/merge.py +60 -0
cli/commands/merge_delta.py +19 -0
cli/commands/metadata.py +24 -0
cli/commands/pipeline.py +45 -0
cli/commands/post_merge.py +43 -0
cli/commands/query.py +52 -0
cli/commands/render.py +101 -0
cli/commands/scan_repos.py +46 -0
cli/commands/setup.py +94 -0
cli/commands/split.py +196 -0
cli/commands/stale_files.py +98 -0
cli/commands/validate.py +191 -0
core/__init__.py +32 -0
core/config.py +261 -0
core/docs/__init__.py +7 -0
core/docs/section_updater.py +286 -0
core/docs/shared.py +149 -0
core/git.py +294 -0
core/interfaces.py +249 -0
core/monitor/__init__.py +5 -0
core/monitor/progress.py +83 -0
core/monitor/prompt_store.py +49 -0
core/paths.py +141 -0
core/preset.py +237 -0
core/preset_accessors.py +202 -0
core/preset_classify.py +132 -0
core/preset_hooks.py +129 -0
core/preset_profile.py +89 -0
core/prompt/__init__.py +7 -0
core/prompt/__main__.py +147 -0
core/prompt/content.py +320 -0
core/prompt/context_manager.py +164 -0
core/prompt/renderer.py +236 -0
core/prompt/response_parser.py +274 -0
core/prompt/templates.py +357 -0
core/prompt/validate_parity.py +162 -0
core/prompt/variables.py +339 -0
core/rag/__init__.py +22 -0
core/rag/__main__.py +136 -0
core/rag/bm25_index.py +268 -0
core/rag/chunker.py +273 -0
core/rag/embedder.py +151 -0
core/rag/indexer.py +292 -0
core/rag/loader.py +89 -0
core/rag/retriever.py +82 -0
core/skeleton/__init__.py +11 -0
core/skeleton/__main__.py +934 -0
core/skeleton/anchor_fix.py +250 -0
core/skeleton/classify.py +331 -0
core/skeleton/cmd_anchor_fix.py +43 -0
core/skeleton/cmd_diff_doc.py +44 -0
core/skeleton/cmd_lock.py +87 -0
core/skeleton/cmd_merge_delta.py +41 -0
core/skeleton/community.py +233 -0
core/skeleton/dependency_graph.py +306 -0
core/skeleton/diff_doc.py +248 -0
core/skeleton/dispatch.py +273 -0
core/skeleton/dispatch_render.py +319 -0
core/skeleton/dispatch_source.py +111 -0
core/skeleton/extract.py +218 -0
core/skeleton/extract_methods.py +298 -0
core/skeleton/file_list.py +239 -0
core/skeleton/impact.py +278 -0
core/skeleton/jar_download.py +177 -0
core/skeleton/jar_resolver.py +186 -0
core/skeleton/loader.py +162 -0
core/skeleton/merge.py +278 -0
core/skeleton/merge_delta.py +229 -0
core/skeleton/metadata.py +96 -0
core/skeleton/metadata_builders.py +264 -0
core/skeleton/module_dag.py +330 -0
core/skeleton/parsers/__init__.py +71 -0
core/skeleton/parsers/jqassistant.py +300 -0
core/skeleton/parsers/jqassistant_cypher.py +225 -0
core/skeleton/parsers/regex.py +171 -0
core/skeleton/parsers/treesitter.py +324 -0
core/skeleton/parsers/treesitter_java.py +284 -0
core/skeleton/parsers/treesitter_multi.py +289 -0
core/skeleton/pom_parser.py +299 -0
core/skeleton/post_merge.py +295 -0
core/skeleton/post_merge_llm.py +82 -0
core/skeleton/query.py +195 -0
core/skeleton/shard_context.py +177 -0
core/skeleton/split.py +180 -0
core/skeleton/split_cache.py +107 -0
core/skeleton/split_feedback.py +174 -0
core/skeleton/split_plan.py +219 -0
core/skeleton/split_plan_helpers.py +305 -0
core/skeleton/split_plan_llm.py +274 -0
core/utils.py +135 -0
core/validators/__init__.py +65 -0
core/validators/__main__.py +215 -0
core/validators/consistency.py +203 -0
core/validators/coverage.py +171 -0
core/validators/duplicates.py +76 -0
core/validators/engine.py +224 -0
core/validators/links.py +76 -0
core/validators/sampling.py +169 -0
core/validators/structure.py +144 -0
engine/__init__.py +7 -0
engine/assembler.py +231 -0
engine/confirm.py +65 -0
engine/dedup.py +106 -0
engine/main.py +211 -0
engine/pipeline/__init__.py +163 -0
engine/pipeline/recovery.py +250 -0
engine/pipeline/steps/__init__.py +23 -0
engine/pipeline/steps/audit.py +220 -0
engine/pipeline/steps/audit_apply.py +195 -0
engine/pipeline/steps/audit_helpers.py +155 -0
engine/pipeline/steps/classify_llm.py +236 -0
engine/pipeline/steps/classify_prompt.py +223 -0
engine/pipeline/steps/finalize.py +160 -0
engine/pipeline/steps/generate.py +169 -0
engine/pipeline/steps/generate_batch.py +197 -0
engine/pipeline/steps/generate_recovery.py +170 -0
engine/pipeline/steps/llm_plan_split.py +253 -0
engine/pipeline/steps/lock.py +64 -0
engine/pipeline/steps/preflight.py +237 -0
engine/pipeline/steps/preflight_adjust.py +147 -0
engine/pipeline/steps/pregenerate.py +130 -0
engine/pipeline/steps/quality.py +81 -0
engine/pipeline/steps/skeleton.py +149 -0
engine/pipeline/steps/source.py +163 -0
engine/pipeline/steps/sync.py +117 -0
engine/pipeline/steps/sync_finalize.py +237 -0
engine/pipeline/steps/sync_update.py +341 -0
engine/pipelines.py +91 -0
engine/runner.py +335 -0
engine/strategies/__init__.py +86 -0
engine/strategies/api.py +128 -0
engine/strategies/delegated.py +50 -0
engine/strategies/dryrun.py +25 -0
engine/two_phase.py +143 -0
mcp_server/__init__.py +73 -0
mcp_server/__main__.py +5 -0
mcp_server/tools/__init__.py +1 -0
mcp_server/tools/config.py +63 -0
mcp_server/tools/discovery.py +276 -0
mcp_server/tools/generation.py +184 -0
mcp_server/tools/planning.py +144 -0
mcp_server/tools/source.py +175 -0
mcp_server/tools/validation.py +140 -0
mcp_server/tools/workflow.py +166 -0
mcp_server/workflow_loader.py +204 -0
presets/generic/audit_dimensions.md +132 -0
presets/generic/doc_types.yaml +152 -0
presets/generic/preset.yaml +115 -0
presets/java-spring/audit_dimensions.md +228 -0
presets/java-spring/audit_dimensions.yaml +203 -0
presets/java-spring/doc_types.yaml +269 -0
presets/java-spring/hooks.py +122 -0
presets/java-spring/preset.yaml +341 -0
presets/java-spring/templates/README.md +34 -0
presets/java-spring/templates/audit-system.md +15 -0
presets/java-spring/templates/subagent-aop.md +105 -0
presets/java-spring/templates/subagent-api.md +63 -0
presets/java-spring/templates/subagent-architecture.md +111 -0
presets/java-spring/templates/subagent-async-events.md +107 -0
presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
presets/java-spring/templates/subagent-audit-architecture.md +38 -0
presets/java-spring/templates/subagent-audit-business.md +40 -0
presets/java-spring/templates/subagent-audit-data-models.md +40 -0
presets/java-spring/templates/subagent-business.md +129 -0
presets/java-spring/templates/subagent-caching.md +75 -0
presets/java-spring/templates/subagent-database-access.md +114 -0
presets/java-spring/templates/subagent-enum.md +75 -0
presets/java-spring/templates/subagent-error-handling.md +91 -0
presets/java-spring/templates/subagent-external-integrations.md +80 -0
presets/java-spring/templates/subagent-index.md +122 -0
presets/java-spring/templates/subagent-messaging.md +97 -0
presets/java-spring/templates/subagent-model.md +88 -0
presets/java-spring/templates/subagent-observability.md +91 -0
presets/java-spring/templates/subagent-scheduled.md +81 -0
presets/java-spring/templates/subagent-security.md +102 -0
presets/java-spring/templates/subagent-structure.md +101 -0
presets/java-spring/templates/subagent-sync-section.md +34 -0
presets/java-spring/templates/subagent-utils.md +73 -0
presets/java-spring/templates/sync-system.md +8 -0
presets/java-spring/workflow-extensions.md +112 -0
skills/__init__.py +1 -0
skills/_shared/README.md +30 -0
skills/_shared/doc-coverage-shared.md +134 -0
skills/_shared/doc-quality-standard.md +1058 -0
skills/_shared/doc-subagent-rules.md +762 -0
skills/_shared/windows-compat.md +89 -0
skills/kb-audit/SKILL.md +52 -0
skills/kb-audit/rules.md +88 -0
skills/kb-audit/steps/step-01-prepare.md +75 -0
skills/kb-audit/steps/step-02-audit.md +96 -0
skills/kb-audit/steps/step-03-verify.md +65 -0
skills/kb-audit/steps/step-04-report.md +64 -0
skills/kb-init/SKILL.md +146 -0
skills/kb-init/rules.md +187 -0
skills/kb-init/steps/step-01-scope.md +62 -0
skills/kb-init/steps/step-02-source.md +410 -0
skills/kb-init/steps/step-03-generate.md +307 -0
skills/kb-init/steps/step-04-quality.md +92 -0
skills/kb-init/steps/step-05-finalize.md +132 -0
skills/kb-init/templates/core/execution-modes.md +29 -0
skills/kb-init/templates/core/output-only.md +4 -0
skills/kb-init/templates/core/readwrite.md +33 -0
skills/kb-search/SKILL.md +138 -0
skills/kb-search/rules.md +64 -0
skills/kb-sync/SKILL.md +43 -0
skills/kb-sync/rules.md +70 -0
skills/kb-sync/scripts/rebuild_module.py +91 -0
skills/kb-sync/scripts/scan_repos.py +687 -0
skills/kb-sync/steps/step-01-detect.md +72 -0
skills/kb-sync/steps/step-02-update.md +71 -0
skills/kb-sync/steps/step-03-verify.md +47 -0
skills/kb-sync/steps/step-04-finalize.md +52 -0
source_kb-0.2.2.dist-info/METADATA +194 -0
source_kb-0.2.2.dist-info/RECORD +228 -0
source_kb-0.2.2.dist-info/WHEEL +5 -0
source_kb-0.2.2.dist-info/entry_points.txt +3 -0
source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
source_kb-0.2.2.dist-info/top_level.txt +6 -0

core/docs/section_updater.py ADDED Viewed

@@ -0,0 +1,286 @@
+"""Section-level document manipulation.
+Locates and replaces/appends/removes specific sections in Markdown documents.
+Shared by both sync and audit pipelines.
+Usage:
+    from core.docs.section_updater import replace_section, append_section, remove_section, list_sections
+    success = replace_section(doc_path, "## User Management", new_content, level=2)
+"""
+from __future__ import annotations
+import re
+import unicodedata
+from pathlib import Path
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def replace_section(
+    doc_path: Path,
+    heading: str,
+    new_content: str,
+    level: int = 2,
+) -> bool:
+    """Replace content of a specific section.
+    Section boundary: from target heading (inclusive) to next heading of
+    equal or higher level (exclusive), or end of file.
+    Args:
+        doc_path: Path to the markdown file
+        heading: Full heading text (e.g., "## User Management") or just the text part
+        new_content: Replacement content (heading line is preserved, only body replaced)
+        level: Heading level (2 = ##, 3 = ###)
+    Returns:
+        True if section found and replaced, False if heading not found.
+    """
+    lines, bom = _read_lines(doc_path)
+    normalized_heading = _ensure_heading_prefix(heading, level)
+    bounds = find_section_boundaries(lines, normalized_heading, level)
+    if bounds is None:
+        return False
+    start_idx, end_idx = bounds
+    # Build replacement: keep heading line + new content
+    heading_line = lines[start_idx]
+    content_lines = new_content.rstrip("\n").split("\n") if new_content.strip() else []
+    # Ensure blank line after heading if content exists
+    replacement = [heading_line]
+    if content_lines:
+        if content_lines[0].strip():  # no blank line at start
+            replacement.append("")
+        replacement.extend(content_lines)
+        replacement.append("")  # blank line before next section
+    new_lines = lines[:start_idx] + replacement + lines[end_idx:]
+    _write_lines(doc_path, new_lines, bom)
+    return True
+def append_section(
+    doc_path: Path,
+    heading: str,
+    content: str,
+    parent_heading: str | None = None,
+    level: int = 2,
+) -> bool:
+    """Append a new section to the document.
+    If parent_heading is specified, inserts as last child of that section.
+    Otherwise appends at end of file.
+    Returns:
+        True if successfully appended.
+    """
+    lines, bom = _read_lines(doc_path)
+    normalized_heading = _ensure_heading_prefix(heading, level)
+    content_lines = content.rstrip("\n").split("\n") if content.strip() else []
+    section_block = ["", normalized_heading]
+    if content_lines:
+        section_block.append("")
+        section_block.extend(content_lines)
+    section_block.append("")
+    if parent_heading:
+        parent_normalized = _ensure_heading_prefix(parent_heading, level - 1 if level > 1 else level)
+        parent_level = _count_hashes(parent_normalized)
+        bounds = find_section_boundaries(lines, parent_normalized, parent_level)
+        if bounds is not None:
+            _, end_idx = bounds
+            # Insert before the end of parent section
+            new_lines = lines[:end_idx] + section_block + lines[end_idx:]
+            _write_lines(doc_path, new_lines, bom)
+            return True
+    # Fallback: append at end
+    # Ensure file ends with newline before appending
+    if lines and lines[-1].strip():
+        lines.append("")
+    new_lines = lines + section_block
+    _write_lines(doc_path, new_lines, bom)
+    return True
+def remove_section(
+    doc_path: Path,
+    heading: str,
+    level: int = 2,
+) -> bool:
+    """Remove a section and its content.
+    Returns:
+        True if section found and removed, False if not found.
+    """
+    lines, bom = _read_lines(doc_path)
+    normalized_heading = _ensure_heading_prefix(heading, level)
+    bounds = find_section_boundaries(lines, normalized_heading, level)
+    if bounds is None:
+        return False
+    start_idx, end_idx = bounds
+    new_lines = lines[:start_idx] + lines[end_idx:]
+    # Clean up double blank lines at removal point
+    if start_idx > 0 and start_idx < len(new_lines):
+        if not new_lines[start_idx - 1].strip() and (
+            start_idx >= len(new_lines) or not new_lines[start_idx].strip()
+        ):
+            new_lines.pop(start_idx)
+    _write_lines(doc_path, new_lines, bom)
+    return True
+def list_sections(
+    doc_path: Path,
+    level: int = 2,
+) -> list[str]:
+    """List all section headings at the specified level.
+    Returns:
+        List of heading strings (e.g., ["## User Management", "## Order Processing"])
+    """
+    lines, _ = _read_lines(doc_path)
+    prefix = "#" * level + " "
+    result: list[str] = []
+    for line in lines:
+        if line.startswith(prefix) and not line.startswith("#" * (level + 1)):
+            result.append(line)
+    return result
+def find_section_boundaries(
+    lines: list[str],
+    heading: str,
+    level: int = 2,
+) -> tuple[int, int] | None:
+    """Find start and end line indices for a section.
+    Args:
+        lines: Document lines (no trailing newlines)
+        heading: Normalized heading with prefix (e.g., "## User Management")
+        level: Heading level
+    Returns:
+        (start_idx, end_idx) where start is the heading line (inclusive) and
+        end is the first line of the next section (exclusive), or len(lines).
+        Returns None if heading not found.
+    """
+    target_norm = _normalize_heading(heading)
+    start_idx: int | None = None
+    for i, line in enumerate(lines):
+        if _normalize_heading(line) == target_norm:
+            start_idx = i
+            break
+    if start_idx is None:
+        return None
+    # Find end: next heading of equal or higher level
+    target_level = _count_hashes(heading)
+    for j in range(start_idx + 1, len(lines)):
+        if lines[j].startswith("#"):
+            current_level = _count_hashes(lines[j])
+            if current_level <= target_level:
+                return (start_idx, j)
+    return (start_idx, len(lines))
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+_EMOJI_PATTERN = re.compile(
+    r"[\U0001F300-\U0001F9FF\U00002600-\U000027BF\U0001FA00-\U0001FA6F"
+    r"\U0001FA70-\U0001FAFF\U00002702-\U000027B0]+\s*"
+)
+def _normalize_heading(text: str) -> str:
+    """Normalize a heading for comparison.
+    Strips whitespace, removes emoji prefixes after #, lowercases.
+    """
+    text = text.strip()
+    if not text.startswith("#"):
+        return text.lower()
+    # Split into prefix (###) and title
+    match = re.match(r"(#+)\s*(.*)", text)
+    if not match:
+        return text.lower()
+    hashes = match.group(1)
+    title = match.group(2)
+    # Remove leading emoji from title
+    title = _EMOJI_PATTERN.sub("", title).strip()
+    return f"{hashes} {title}".lower()
+def _count_hashes(line: str) -> int:
+    """Count leading # characters."""
+    count = 0
+    for ch in line:
+        if ch == "#":
+            count += 1
+        else:
+            break
+    return count
+def _ensure_heading_prefix(heading: str, level: int) -> str:
+    """Ensure heading has the correct # prefix."""
+    heading = heading.strip()
+    if heading.startswith("#"):
+        return heading
+    return "#" * level + " " + heading
+def _read_lines(doc_path: Path) -> tuple[list[str], str]:
+    """Read file into lines, detecting BOM.
+    Returns:
+        (lines_without_newlines, bom_prefix)
+    """
+    raw = doc_path.read_bytes()
+    bom = ""
+    if raw.startswith(b"\xef\xbb\xbf"):
+        bom = "\ufeff"
+        raw = raw[3:]
+    text = raw.decode("utf-8")
+    # Normalize line endings
+    text = text.replace("\r\n", "\n").replace("\r", "\n")
+    lines = text.split("\n")
+    # Remove trailing empty line if file ended with \n (split artifact)
+    if lines and lines[-1] == "":
+        lines.pop()
+    return lines, bom
+def _write_lines(doc_path: Path, lines: list[str], bom: str = "") -> None:
+    """Write lines back to file, preserving BOM and ensuring trailing newline."""
+    content = "\n".join(lines)
+    if not content.endswith("\n"):
+        content += "\n"
+    if bom:
+        doc_path.write_bytes(b"\xef\xbb\xbf" + content.encode("utf-8"))
+    else:
+        doc_path.write_text(content, encoding="utf-8")

core/docs/shared.py ADDED Viewed

@@ -0,0 +1,149 @@
+"""Shared document generation — cross-module summaries.
+Generates _shared/ documents that aggregate information across all modules.
+Usage:
+    from core.docs.shared import generate_shared_docs
+    generated = generate_shared_docs(knowledge_dir, config, kb_name)
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import Any
+from core.paths import ensure_dir
+logger = logging.getLogger(__name__)
+def generate_shared_docs(
+    knowledge_dir: Path,
+    config: dict[str, Any],
+    kb_name: str,
+) -> list[str]:
+    """Generate _shared/ cross-module documents.
+    Returns list of generated file names.
+    """
+    shared_dir = knowledge_dir / "_shared"
+    ensure_dir(shared_dir)
+    generated: list[str] = []
+    # Always generate project overview
+    overview = _generate_project_overview(shared_dir, knowledge_dir, config, kb_name)
+    if overview:
+        generated.append("project-overview.md")
+    # Cross-module calls (if multiple modules)
+    modules = _get_module_dirs(knowledge_dir)
+    if len(modules) >= 2:
+        cross = _generate_cross_module_calls(shared_dir, modules)
+        if cross:
+            generated.append("cross-module-calls.md")
+    return generated
+def _generate_project_overview(
+    shared_dir: Path, knowledge_dir: Path, config: dict, kb_name: str
+) -> bool:
+    """Generate project-overview.md from module index files."""
+    kb_config = config["knowledge_bases"][kb_name]
+    modules = _get_module_dirs(knowledge_dir)
+    lines = [f"# {kb_config.get('name', kb_name)} — Project Overview", ""]
+    for module_dir in modules:
+        index_file = module_dir / "index.md"
+        if index_file.exists():
+            # Extract first few lines as summary
+            content = index_file.read_text(encoding="utf-8")
+            first_lines = content.splitlines()[:5]
+            lines.append(f"## {module_dir.name}")
+            lines.extend(first_lines)
+            lines.append("")
+        else:
+            lines.append(f"## {module_dir.name}")
+            lines.append("(index.md not yet generated)")
+            lines.append("")
+    output = shared_dir / "project-overview.md"
+    output.write_text("\n".join(lines), encoding="utf-8")
+    return True
+def _generate_cross_module_calls(shared_dir: Path, modules: list[Path]) -> bool:
+    """Generate cross-module-calls.md by scanning for inter-module references."""
+    lines = ["# Cross-module call relationships", ""]
+    for module_dir in modules:
+        bl_file = module_dir / "business-logic.md"
+        if not bl_file.exists():
+            continue
+        content = bl_file.read_text(encoding="utf-8")
+        # Find references to other modules
+        other_modules = [m.name for m in modules if m != module_dir]
+        refs = []
+        for other in other_modules:
+            if other in content:
+                refs.append(other)
+        if refs:
+            lines.append(f"## {module_dir.name}")
+            lines.append(f"References modules: {', '.join(refs)}")
+            lines.append("")
+    if len(lines) <= 2:
+        return False
+    output = shared_dir / "cross-module-calls.md"
+    output.write_text("\n".join(lines), encoding="utf-8")
+    return True
+def _get_module_dirs(knowledge_dir: Path) -> list[Path]:
+    """Get all module directories (non-hidden, non-_shared)."""
+    if not knowledge_dir.is_dir():
+        return []
+    return sorted(
+        d for d in knowledge_dir.iterdir()
+        if d.is_dir() and not d.name.startswith(".") and d.name != "_shared"
+    )
+def module_order_topo(config: dict, kb_name: str) -> list[str]:
+    """Compute topological order of modules based on inter-module dependencies.
+    Reads pom.xml or package.json to determine dependency DAG.
+    Falls back to alphabetical order if no dependencies detected.
+    Returns ordered list of module names (upstream first).
+    """
+    kb_config = config["knowledge_bases"][kb_name]
+    source = kb_config.get("source", {})
+    if source.get("structure") == "multi-repo":
+        modules = [r["name"] for r in source.get("repos", [])]
+    elif source.get("structure") == "monorepo":
+        modules = [m["name"] for m in source.get("modules", [])]
+    else:
+        return []
+    # TODO: Parse pom.xml/package.json for actual dependency graph
+    # For now, heuristic: base-lib types first, then services
+    base_libs = []
+    services = []
+    for mod_cfg in source.get("repos", source.get("modules", [])):
+        name = mod_cfg.get("name", "")
+        mod_type = mod_cfg.get("type", "service")
+        if mod_type in ("base-lib", "api-contract"):
+            base_libs.append(name)
+        else:
+            services.append(name)
+    return base_libs + services