source-kb 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +50 -0
- cli/__main__.py +5 -0
- cli/commands/__init__.py +1 -0
- cli/commands/anchor_fix.py +47 -0
- cli/commands/diff_doc.py +52 -0
- cli/commands/dispatch.py +77 -0
- cli/commands/extract.py +72 -0
- cli/commands/file_list.py +74 -0
- cli/commands/index.py +84 -0
- cli/commands/lock.py +89 -0
- cli/commands/merge.py +60 -0
- cli/commands/merge_delta.py +19 -0
- cli/commands/metadata.py +24 -0
- cli/commands/pipeline.py +45 -0
- cli/commands/post_merge.py +43 -0
- cli/commands/query.py +52 -0
- cli/commands/render.py +101 -0
- cli/commands/scan_repos.py +46 -0
- cli/commands/setup.py +94 -0
- cli/commands/split.py +196 -0
- cli/commands/stale_files.py +98 -0
- cli/commands/validate.py +191 -0
- core/__init__.py +32 -0
- core/config.py +261 -0
- core/docs/__init__.py +7 -0
- core/docs/section_updater.py +286 -0
- core/docs/shared.py +149 -0
- core/git.py +294 -0
- core/interfaces.py +249 -0
- core/monitor/__init__.py +5 -0
- core/monitor/progress.py +83 -0
- core/monitor/prompt_store.py +49 -0
- core/paths.py +141 -0
- core/preset.py +237 -0
- core/preset_accessors.py +202 -0
- core/preset_classify.py +132 -0
- core/preset_hooks.py +129 -0
- core/preset_profile.py +89 -0
- core/prompt/__init__.py +7 -0
- core/prompt/__main__.py +147 -0
- core/prompt/content.py +320 -0
- core/prompt/context_manager.py +164 -0
- core/prompt/renderer.py +236 -0
- core/prompt/response_parser.py +274 -0
- core/prompt/templates.py +357 -0
- core/prompt/validate_parity.py +162 -0
- core/prompt/variables.py +339 -0
- core/rag/__init__.py +22 -0
- core/rag/__main__.py +136 -0
- core/rag/bm25_index.py +268 -0
- core/rag/chunker.py +273 -0
- core/rag/embedder.py +151 -0
- core/rag/indexer.py +292 -0
- core/rag/loader.py +89 -0
- core/rag/retriever.py +82 -0
- core/skeleton/__init__.py +11 -0
- core/skeleton/__main__.py +934 -0
- core/skeleton/anchor_fix.py +250 -0
- core/skeleton/classify.py +331 -0
- core/skeleton/cmd_anchor_fix.py +43 -0
- core/skeleton/cmd_diff_doc.py +44 -0
- core/skeleton/cmd_lock.py +87 -0
- core/skeleton/cmd_merge_delta.py +41 -0
- core/skeleton/community.py +233 -0
- core/skeleton/dependency_graph.py +306 -0
- core/skeleton/diff_doc.py +248 -0
- core/skeleton/dispatch.py +273 -0
- core/skeleton/dispatch_render.py +319 -0
- core/skeleton/dispatch_source.py +111 -0
- core/skeleton/extract.py +218 -0
- core/skeleton/extract_methods.py +298 -0
- core/skeleton/file_list.py +239 -0
- core/skeleton/impact.py +278 -0
- core/skeleton/jar_download.py +177 -0
- core/skeleton/jar_resolver.py +186 -0
- core/skeleton/loader.py +162 -0
- core/skeleton/merge.py +278 -0
- core/skeleton/merge_delta.py +229 -0
- core/skeleton/metadata.py +96 -0
- core/skeleton/metadata_builders.py +264 -0
- core/skeleton/module_dag.py +330 -0
- core/skeleton/parsers/__init__.py +71 -0
- core/skeleton/parsers/jqassistant.py +300 -0
- core/skeleton/parsers/jqassistant_cypher.py +225 -0
- core/skeleton/parsers/regex.py +171 -0
- core/skeleton/parsers/treesitter.py +324 -0
- core/skeleton/parsers/treesitter_java.py +284 -0
- core/skeleton/parsers/treesitter_multi.py +289 -0
- core/skeleton/pom_parser.py +299 -0
- core/skeleton/post_merge.py +295 -0
- core/skeleton/post_merge_llm.py +82 -0
- core/skeleton/query.py +195 -0
- core/skeleton/shard_context.py +177 -0
- core/skeleton/split.py +180 -0
- core/skeleton/split_cache.py +107 -0
- core/skeleton/split_feedback.py +174 -0
- core/skeleton/split_plan.py +219 -0
- core/skeleton/split_plan_helpers.py +305 -0
- core/skeleton/split_plan_llm.py +274 -0
- core/utils.py +135 -0
- core/validators/__init__.py +65 -0
- core/validators/__main__.py +215 -0
- core/validators/consistency.py +203 -0
- core/validators/coverage.py +171 -0
- core/validators/duplicates.py +76 -0
- core/validators/engine.py +224 -0
- core/validators/links.py +76 -0
- core/validators/sampling.py +169 -0
- core/validators/structure.py +144 -0
- engine/__init__.py +7 -0
- engine/assembler.py +231 -0
- engine/confirm.py +65 -0
- engine/dedup.py +106 -0
- engine/main.py +211 -0
- engine/pipeline/__init__.py +163 -0
- engine/pipeline/recovery.py +250 -0
- engine/pipeline/steps/__init__.py +23 -0
- engine/pipeline/steps/audit.py +220 -0
- engine/pipeline/steps/audit_apply.py +195 -0
- engine/pipeline/steps/audit_helpers.py +155 -0
- engine/pipeline/steps/classify_llm.py +236 -0
- engine/pipeline/steps/classify_prompt.py +223 -0
- engine/pipeline/steps/finalize.py +160 -0
- engine/pipeline/steps/generate.py +169 -0
- engine/pipeline/steps/generate_batch.py +197 -0
- engine/pipeline/steps/generate_recovery.py +170 -0
- engine/pipeline/steps/llm_plan_split.py +253 -0
- engine/pipeline/steps/lock.py +64 -0
- engine/pipeline/steps/preflight.py +237 -0
- engine/pipeline/steps/preflight_adjust.py +147 -0
- engine/pipeline/steps/pregenerate.py +130 -0
- engine/pipeline/steps/quality.py +81 -0
- engine/pipeline/steps/skeleton.py +149 -0
- engine/pipeline/steps/source.py +163 -0
- engine/pipeline/steps/sync.py +117 -0
- engine/pipeline/steps/sync_finalize.py +237 -0
- engine/pipeline/steps/sync_update.py +341 -0
- engine/pipelines.py +91 -0
- engine/runner.py +335 -0
- engine/strategies/__init__.py +86 -0
- engine/strategies/api.py +128 -0
- engine/strategies/delegated.py +50 -0
- engine/strategies/dryrun.py +25 -0
- engine/two_phase.py +143 -0
- mcp_server/__init__.py +73 -0
- mcp_server/__main__.py +5 -0
- mcp_server/tools/__init__.py +1 -0
- mcp_server/tools/config.py +63 -0
- mcp_server/tools/discovery.py +276 -0
- mcp_server/tools/generation.py +184 -0
- mcp_server/tools/planning.py +144 -0
- mcp_server/tools/source.py +175 -0
- mcp_server/tools/validation.py +140 -0
- mcp_server/tools/workflow.py +166 -0
- mcp_server/workflow_loader.py +204 -0
- presets/generic/audit_dimensions.md +132 -0
- presets/generic/doc_types.yaml +152 -0
- presets/generic/preset.yaml +115 -0
- presets/java-spring/audit_dimensions.md +228 -0
- presets/java-spring/audit_dimensions.yaml +203 -0
- presets/java-spring/doc_types.yaml +269 -0
- presets/java-spring/hooks.py +122 -0
- presets/java-spring/preset.yaml +341 -0
- presets/java-spring/templates/README.md +34 -0
- presets/java-spring/templates/audit-system.md +15 -0
- presets/java-spring/templates/subagent-aop.md +105 -0
- presets/java-spring/templates/subagent-api.md +63 -0
- presets/java-spring/templates/subagent-architecture.md +111 -0
- presets/java-spring/templates/subagent-async-events.md +107 -0
- presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
- presets/java-spring/templates/subagent-audit-architecture.md +38 -0
- presets/java-spring/templates/subagent-audit-business.md +40 -0
- presets/java-spring/templates/subagent-audit-data-models.md +40 -0
- presets/java-spring/templates/subagent-business.md +129 -0
- presets/java-spring/templates/subagent-caching.md +75 -0
- presets/java-spring/templates/subagent-database-access.md +114 -0
- presets/java-spring/templates/subagent-enum.md +75 -0
- presets/java-spring/templates/subagent-error-handling.md +91 -0
- presets/java-spring/templates/subagent-external-integrations.md +80 -0
- presets/java-spring/templates/subagent-index.md +122 -0
- presets/java-spring/templates/subagent-messaging.md +97 -0
- presets/java-spring/templates/subagent-model.md +88 -0
- presets/java-spring/templates/subagent-observability.md +91 -0
- presets/java-spring/templates/subagent-scheduled.md +81 -0
- presets/java-spring/templates/subagent-security.md +102 -0
- presets/java-spring/templates/subagent-structure.md +101 -0
- presets/java-spring/templates/subagent-sync-section.md +34 -0
- presets/java-spring/templates/subagent-utils.md +73 -0
- presets/java-spring/templates/sync-system.md +8 -0
- presets/java-spring/workflow-extensions.md +112 -0
- skills/__init__.py +1 -0
- skills/_shared/README.md +30 -0
- skills/_shared/doc-coverage-shared.md +134 -0
- skills/_shared/doc-quality-standard.md +1058 -0
- skills/_shared/doc-subagent-rules.md +762 -0
- skills/_shared/windows-compat.md +89 -0
- skills/kb-audit/SKILL.md +52 -0
- skills/kb-audit/rules.md +88 -0
- skills/kb-audit/steps/step-01-prepare.md +75 -0
- skills/kb-audit/steps/step-02-audit.md +96 -0
- skills/kb-audit/steps/step-03-verify.md +65 -0
- skills/kb-audit/steps/step-04-report.md +64 -0
- skills/kb-init/SKILL.md +146 -0
- skills/kb-init/rules.md +187 -0
- skills/kb-init/steps/step-01-scope.md +62 -0
- skills/kb-init/steps/step-02-source.md +410 -0
- skills/kb-init/steps/step-03-generate.md +307 -0
- skills/kb-init/steps/step-04-quality.md +92 -0
- skills/kb-init/steps/step-05-finalize.md +132 -0
- skills/kb-init/templates/core/execution-modes.md +29 -0
- skills/kb-init/templates/core/output-only.md +4 -0
- skills/kb-init/templates/core/readwrite.md +33 -0
- skills/kb-search/SKILL.md +138 -0
- skills/kb-search/rules.md +64 -0
- skills/kb-sync/SKILL.md +43 -0
- skills/kb-sync/rules.md +70 -0
- skills/kb-sync/scripts/rebuild_module.py +91 -0
- skills/kb-sync/scripts/scan_repos.py +687 -0
- skills/kb-sync/steps/step-01-detect.md +72 -0
- skills/kb-sync/steps/step-02-update.md +71 -0
- skills/kb-sync/steps/step-03-verify.md +47 -0
- skills/kb-sync/steps/step-04-finalize.md +52 -0
- source_kb-0.2.2.dist-info/METADATA +194 -0
- source_kb-0.2.2.dist-info/RECORD +228 -0
- source_kb-0.2.2.dist-info/WHEEL +5 -0
- source_kb-0.2.2.dist-info/entry_points.txt +3 -0
- source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
- source_kb-0.2.2.dist-info/top_level.txt +6 -0
engine/two_phase.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Two-phase generation — CLI-only strategy for split documents.
|
|
2
|
+
|
|
3
|
+
Phase 1: Each shard generates a structural outline (2-4KB)
|
|
4
|
+
Phase 2: All outlines merged → each shard generates full content with global context
|
|
5
|
+
|
|
6
|
+
This eliminates information isolation between shards in output-only mode
|
|
7
|
+
(where source is inlined and shards can't read each other's files).
|
|
8
|
+
|
|
9
|
+
Agent mode doesn't need this — sub-agents can read any file directly.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from core.interfaces import LlmStrategy
|
|
18
|
+
from engine.runner import SubagentTask, SubagentResult, run_batch
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
OUTLINE_SUFFIX = """
|
|
23
|
+
|
|
24
|
+
## ⚠️ Current Task: Generate document outline only (Phase 1/2)
|
|
25
|
+
|
|
26
|
+
Please **output only the document outline**, not the full content.
|
|
27
|
+
|
|
28
|
+
### Outline Format Requirements
|
|
29
|
+
|
|
30
|
+
```markdown
|
|
31
|
+
## [Section Title]
|
|
32
|
+
|
|
33
|
+
**Summary**: 1-2 sentences describing the business process covered by this section
|
|
34
|
+
|
|
35
|
+
**Methods covered**:
|
|
36
|
+
- ClassName.method1 — one-sentence responsibility
|
|
37
|
+
- ClassName.method2 — one-sentence responsibility
|
|
38
|
+
|
|
39
|
+
**Key business rules** (numbered list, one sentence each):
|
|
40
|
+
1. ...
|
|
41
|
+
2. ...
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Rules
|
|
47
|
+
1. Each `##` section corresponds to one business process
|
|
48
|
+
2. Only write title + summary + method list + key rules
|
|
49
|
+
3. Keep the size within **2-4KB**
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
FULL_PHASE_PREFIX = """
|
|
53
|
+
|
|
54
|
+
## Global Document Outline (section plans for all shards)
|
|
55
|
+
|
|
56
|
+
Below is the combined document outline generated by all shards in Phase 1. You only need to expand **the sections you are responsible for**.
|
|
57
|
+
|
|
58
|
+
{combined_outline}
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Sections You Are Responsible For
|
|
63
|
+
|
|
64
|
+
Based on your file list, expand the sections in the outline above that belong to you.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def run_two_phase(
|
|
69
|
+
tasks: list[SubagentTask],
|
|
70
|
+
strategy: LlmStrategy,
|
|
71
|
+
max_concurrent: int = 5,
|
|
72
|
+
) -> list[SubagentResult]:
|
|
73
|
+
"""Execute two-phase generation for split documents.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
tasks: Original full-generation tasks (one per shard)
|
|
77
|
+
strategy: LLM execution strategy
|
|
78
|
+
max_concurrent: Concurrency limit
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
SubagentResult list from phase 2 (full content)
|
|
82
|
+
"""
|
|
83
|
+
if not tasks:
|
|
84
|
+
return []
|
|
85
|
+
|
|
86
|
+
# Phase 1: Generate outlines
|
|
87
|
+
logger.info("[two-phase] Phase 1: generating %d outlines", len(tasks))
|
|
88
|
+
outline_tasks = [
|
|
89
|
+
SubagentTask(
|
|
90
|
+
task_id=f"{t.task_id}__outline",
|
|
91
|
+
prompt=t.prompt + OUTLINE_SUFFIX,
|
|
92
|
+
output_path=t.output_path.parent / ".meta" / "outlines" / f"{t.task_id}.md",
|
|
93
|
+
doc_type=t.doc_type,
|
|
94
|
+
timeout=300,
|
|
95
|
+
)
|
|
96
|
+
for t in tasks
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
outline_results = run_batch(outline_tasks, strategy, max_concurrent)
|
|
100
|
+
|
|
101
|
+
# Collect outlines
|
|
102
|
+
outlines: list[tuple[str, str]] = []
|
|
103
|
+
for result in outline_results:
|
|
104
|
+
if result.status == "done" and result.content:
|
|
105
|
+
name = result.task_id.replace("__outline", "")
|
|
106
|
+
outlines.append((name, result.content))
|
|
107
|
+
|
|
108
|
+
if not outlines:
|
|
109
|
+
logger.warning("[two-phase] Phase 1 failed, falling back to direct generation")
|
|
110
|
+
return run_batch(tasks, strategy, max_concurrent)
|
|
111
|
+
|
|
112
|
+
# Merge outlines
|
|
113
|
+
combined = "\n\n---\n\n".join(f"### Shard: {name}\n\n{content}" for name, content in outlines)
|
|
114
|
+
logger.info("[two-phase] Phase 1 done: %d outlines, %d chars", len(outlines), len(combined))
|
|
115
|
+
|
|
116
|
+
# Phase 2: Full generation with global outline context
|
|
117
|
+
logger.info("[two-phase] Phase 2: generating full content")
|
|
118
|
+
prefix = FULL_PHASE_PREFIX.format(combined_outline=combined)
|
|
119
|
+
|
|
120
|
+
full_tasks = [
|
|
121
|
+
SubagentTask(
|
|
122
|
+
task_id=t.task_id,
|
|
123
|
+
prompt=_inject_outline(t.prompt, prefix),
|
|
124
|
+
output_path=t.output_path,
|
|
125
|
+
doc_type=t.doc_type,
|
|
126
|
+
timeout=t.timeout,
|
|
127
|
+
)
|
|
128
|
+
for t in tasks
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
results = run_batch(full_tasks, strategy, max_concurrent)
|
|
132
|
+
logger.info("[two-phase] Phase 2 done: %d/%d succeeded",
|
|
133
|
+
sum(1 for r in results if r.status == "done"), len(results))
|
|
134
|
+
return results
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _inject_outline(prompt: str, prefix: str) -> str:
|
|
138
|
+
"""Inject outline context into the original prompt."""
|
|
139
|
+
marker = "## Rules you must follow"
|
|
140
|
+
if marker in prompt:
|
|
141
|
+
pos = prompt.index(marker)
|
|
142
|
+
return prompt[:pos] + prefix + "\n" + prompt[pos:]
|
|
143
|
+
return prompt + prefix
|
mcp_server/__init__.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""source-kb MCP Server — code knowledge base documentation generation toolkit.
|
|
2
|
+
|
|
3
|
+
Provides AI agents with project discovery, workflow orchestration, skeleton extraction,
|
|
4
|
+
document generation, and coverage validation capabilities via the MCP protocol.
|
|
5
|
+
|
|
6
|
+
After installation:
|
|
7
|
+
uvx source-kb-mcp
|
|
8
|
+
|
|
9
|
+
For local development:
|
|
10
|
+
python -m mcp_server
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import sys
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
|
20
|
+
if str(PROJECT_ROOT) not in sys.path:
|
|
21
|
+
sys.path.insert(0, str(PROJECT_ROOT))
|
|
22
|
+
|
|
23
|
+
from mcp.server.fastmcp import FastMCP
|
|
24
|
+
|
|
25
|
+
mcp = FastMCP(
|
|
26
|
+
"source-kb",
|
|
27
|
+
instructions=(
|
|
28
|
+
"A toolkit for automatically generating structured knowledge base documentation from source code. "
|
|
29
|
+
"Start by calling discover() to understand project status, then use get_workflow() to get workflow guidance."
|
|
30
|
+
),
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def project_root() -> Path:
|
|
35
|
+
"""Get the user's project root directory."""
|
|
36
|
+
return Path(os.environ.get("SOURCE_KB_ROOT", os.getcwd()))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def find_config(root: Path | None = None) -> Path | None:
|
|
40
|
+
"""Find the kb-project.yaml configuration file."""
|
|
41
|
+
root = root or project_root()
|
|
42
|
+
for name in ("kb-project.yaml", "kb-project.yml"):
|
|
43
|
+
p = root / name
|
|
44
|
+
if p.exists():
|
|
45
|
+
return p
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Register all tools
|
|
50
|
+
from mcp_server.tools.discovery import register as _reg_discovery # noqa: E402
|
|
51
|
+
from mcp_server.tools.workflow import register as _reg_workflow # noqa: E402
|
|
52
|
+
from mcp_server.tools.source import register as _reg_source # noqa: E402
|
|
53
|
+
from mcp_server.tools.planning import register as _reg_planning # noqa: E402
|
|
54
|
+
from mcp_server.tools.validation import register as _reg_validation # noqa: E402
|
|
55
|
+
from mcp_server.tools.generation import register as _reg_generation # noqa: E402
|
|
56
|
+
from mcp_server.tools.config import register as _reg_config # noqa: E402
|
|
57
|
+
|
|
58
|
+
_reg_discovery(mcp)
|
|
59
|
+
_reg_workflow(mcp)
|
|
60
|
+
_reg_source(mcp)
|
|
61
|
+
_reg_planning(mcp)
|
|
62
|
+
_reg_validation(mcp)
|
|
63
|
+
_reg_generation(mcp)
|
|
64
|
+
_reg_config(mcp)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def main():
|
|
68
|
+
"""MCP Server entry point."""
|
|
69
|
+
mcp.run()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
if __name__ == "__main__":
|
|
73
|
+
main()
|
mcp_server/__main__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""MCP Server tool modules."""
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Config tools — preset listing and configuration queries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from mcp.server.fastmcp import FastMCP
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def register(mcp: FastMCP) -> None:
|
|
12
|
+
"""Register config tools on the MCP server."""
|
|
13
|
+
|
|
14
|
+
@mcp.tool()
|
|
15
|
+
def list_presets() -> str:
|
|
16
|
+
"""List available language presets and their supported document types.
|
|
17
|
+
|
|
18
|
+
Returns the name, description, and supported doc_types list for each preset.
|
|
19
|
+
"""
|
|
20
|
+
from mcp_server import PROJECT_ROOT
|
|
21
|
+
|
|
22
|
+
presets_dir = PROJECT_ROOT / "presets"
|
|
23
|
+
result = {}
|
|
24
|
+
|
|
25
|
+
if presets_dir.is_dir():
|
|
26
|
+
import yaml
|
|
27
|
+
for preset_dir in sorted(presets_dir.iterdir()):
|
|
28
|
+
if not preset_dir.is_dir():
|
|
29
|
+
continue
|
|
30
|
+
preset_yaml = preset_dir / "preset.yaml"
|
|
31
|
+
if not preset_yaml.exists():
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
cfg = yaml.safe_load(preset_yaml.read_text(encoding="utf-8"))
|
|
35
|
+
|
|
36
|
+
# Merge doc_types.yaml if exists
|
|
37
|
+
doc_types_yaml = preset_dir / "doc_types.yaml"
|
|
38
|
+
doc_types = {}
|
|
39
|
+
if doc_types_yaml.exists():
|
|
40
|
+
raw = yaml.safe_load(doc_types_yaml.read_text(encoding="utf-8")) or {}
|
|
41
|
+
doc_types = raw.get("doc_types", raw)
|
|
42
|
+
elif "doc_types" in cfg:
|
|
43
|
+
doc_types = cfg["doc_types"]
|
|
44
|
+
|
|
45
|
+
doc_type_list = []
|
|
46
|
+
for dt_key, dt_cfg in doc_types.items():
|
|
47
|
+
if isinstance(dt_cfg, dict):
|
|
48
|
+
doc_type_list.append({
|
|
49
|
+
"name": dt_key,
|
|
50
|
+
"filename": dt_cfg.get("filename", f"{dt_key}.md"),
|
|
51
|
+
"batch": dt_cfg.get("batch", 99),
|
|
52
|
+
"conditional": dt_cfg.get("conditional", False),
|
|
53
|
+
"global_view": dt_cfg.get("global_view", False),
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
result[preset_dir.name] = {
|
|
57
|
+
"name": cfg.get("name", preset_dir.name),
|
|
58
|
+
"description": cfg.get("description", ""),
|
|
59
|
+
"doc_types": doc_type_list,
|
|
60
|
+
"doc_type_count": len(doc_type_list),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return json.dumps({"status": "ok", "presets": result}, ensure_ascii=False, indent=2)
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""Discovery tools — project detection and initialization."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from mcp.server.fastmcp import FastMCP
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def register(mcp: FastMCP) -> None:
|
|
14
|
+
"""Register discovery tools on the MCP server."""
|
|
15
|
+
|
|
16
|
+
@mcp.tool()
|
|
17
|
+
def discover() -> str:
|
|
18
|
+
"""Discover project status and available workflows. This is the entry-point tool that agents should call first.
|
|
19
|
+
|
|
20
|
+
Scans the current directory, detects whether a kb-project.yaml configuration exists,
|
|
21
|
+
checks the state of any generated knowledge bases, and returns available workflows
|
|
22
|
+
with suggested next actions.
|
|
23
|
+
"""
|
|
24
|
+
from mcp_server import project_root, find_config
|
|
25
|
+
from mcp_server.workflow_loader import list_workflows
|
|
26
|
+
|
|
27
|
+
root = project_root()
|
|
28
|
+
config_path = find_config(root)
|
|
29
|
+
|
|
30
|
+
result: dict[str, Any] = {
|
|
31
|
+
"project_root": str(root),
|
|
32
|
+
"project_detected": config_path is not None,
|
|
33
|
+
"config_path": str(config_path) if config_path else None,
|
|
34
|
+
"knowledge_bases": [],
|
|
35
|
+
"available_workflows": list_workflows(),
|
|
36
|
+
"suggested_action": "",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if config_path:
|
|
40
|
+
import yaml
|
|
41
|
+
try:
|
|
42
|
+
cfg = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
|
43
|
+
base_dir = config_path.parent
|
|
44
|
+
for kb_name, kb_cfg in cfg.get("knowledge_bases", {}).items():
|
|
45
|
+
kb_dir = Path(kb_cfg.get("knowledge_dir", ""))
|
|
46
|
+
if not kb_dir.is_absolute():
|
|
47
|
+
kb_dir = (base_dir / kb_dir).resolve()
|
|
48
|
+
|
|
49
|
+
source = kb_cfg.get("source", {})
|
|
50
|
+
if source.get("structure") == "monorepo":
|
|
51
|
+
modules = [m["name"] for m in source.get("modules", [])]
|
|
52
|
+
else:
|
|
53
|
+
modules = [r["name"] for r in source.get("repos", [])]
|
|
54
|
+
|
|
55
|
+
has_docs = kb_dir.is_dir() and any(kb_dir.rglob("*.md"))
|
|
56
|
+
|
|
57
|
+
result["knowledge_bases"].append({
|
|
58
|
+
"name": kb_name,
|
|
59
|
+
"preset": kb_cfg.get("preset", "generic"),
|
|
60
|
+
"modules": modules,
|
|
61
|
+
"has_docs": has_docs,
|
|
62
|
+
"knowledge_dir": str(kb_dir),
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
if result["knowledge_bases"]:
|
|
66
|
+
has_any_docs = any(kb["has_docs"] for kb in result["knowledge_bases"])
|
|
67
|
+
if has_any_docs:
|
|
68
|
+
result["suggested_action"] = (
|
|
69
|
+
"Knowledge base exists. Use get_workflow('kb-sync') for incremental updates, "
|
|
70
|
+
"or get_workflow('kb-audit') to audit quality."
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
result["suggested_action"] = (
|
|
74
|
+
"Configuration is ready but no documents exist yet. Call get_workflow('kb-init') to start initialization."
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
result["suggested_action"] = "Configuration file is empty. Call init_project() to generate configuration."
|
|
78
|
+
except Exception as e:
|
|
79
|
+
result["error"] = f"Configuration parsing failed: {e}"
|
|
80
|
+
result["suggested_action"] = "Configuration file parsing failed. Please check the YAML format."
|
|
81
|
+
else:
|
|
82
|
+
result["suggested_action"] = (
|
|
83
|
+
"No kb-project.yaml detected. Call detect_project() to scan project structure, "
|
|
84
|
+
"then call init_project() to generate configuration."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return json.dumps(result, ensure_ascii=False, indent=2)
|
|
88
|
+
|
|
89
|
+
@mcp.tool()
|
|
90
|
+
def detect_project(path: str = ".") -> str:
|
|
91
|
+
"""Detect project structure and recommend kb-project.yaml configuration.
|
|
92
|
+
|
|
93
|
+
Scans for pom.xml, build.gradle, .git, etc. to automatically infer project type,
|
|
94
|
+
module list, and recommended preset. Returns a configuration suggestion ready to be written.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
path: Project root directory path (defaults to current directory)
|
|
98
|
+
"""
|
|
99
|
+
from mcp_server import project_root
|
|
100
|
+
|
|
101
|
+
root = Path(path).resolve() if path != "." else project_root()
|
|
102
|
+
|
|
103
|
+
result: dict[str, Any] = {
|
|
104
|
+
"project_root": str(root),
|
|
105
|
+
"preset": "generic",
|
|
106
|
+
"structure": "unknown",
|
|
107
|
+
"modules": [],
|
|
108
|
+
"git_url": None,
|
|
109
|
+
"branch": "main",
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
# Detect Java/Spring
|
|
113
|
+
pom = root / "pom.xml"
|
|
114
|
+
if pom.exists():
|
|
115
|
+
result["preset"] = "java-spring"
|
|
116
|
+
content = pom.read_text(encoding="utf-8", errors="replace")
|
|
117
|
+
if "<modules>" in content:
|
|
118
|
+
result["structure"] = "monorepo"
|
|
119
|
+
modules = re.findall(r"<module>([^<]+)</module>", content)
|
|
120
|
+
for m in modules:
|
|
121
|
+
mod_type = "service"
|
|
122
|
+
if "api" in m.lower() or "contract" in m.lower():
|
|
123
|
+
mod_type = "api-contract"
|
|
124
|
+
elif "common" in m.lower() or "base" in m.lower() or "lib" in m.lower():
|
|
125
|
+
mod_type = "library"
|
|
126
|
+
result["modules"].append({"name": m, "path": m, "type": mod_type})
|
|
127
|
+
else:
|
|
128
|
+
result["structure"] = "multi-repo"
|
|
129
|
+
result["modules"].append({"name": root.name, "type": "service"})
|
|
130
|
+
|
|
131
|
+
# Detect git remote
|
|
132
|
+
git_config = root / ".git" / "config"
|
|
133
|
+
if git_config.exists():
|
|
134
|
+
content = git_config.read_text(encoding="utf-8", errors="replace")
|
|
135
|
+
urls = re.findall(r"url\s*=\s*(.+)", content)
|
|
136
|
+
if urls:
|
|
137
|
+
result["git_url"] = urls[0].strip()
|
|
138
|
+
# Detect current branch
|
|
139
|
+
head = root / ".git" / "HEAD"
|
|
140
|
+
if head.exists():
|
|
141
|
+
head_content = head.read_text(encoding="utf-8").strip()
|
|
142
|
+
if head_content.startswith("ref: refs/heads/"):
|
|
143
|
+
result["branch"] = head_content.replace("ref: refs/heads/", "")
|
|
144
|
+
|
|
145
|
+
# Generate suggested config
|
|
146
|
+
project_name = root.name.lower().replace(" ", "-")
|
|
147
|
+
if result["structure"] == "monorepo" and result["modules"]:
|
|
148
|
+
source_block = {
|
|
149
|
+
"structure": "monorepo",
|
|
150
|
+
"url": result["git_url"] or "https://your-git-server/repo.git",
|
|
151
|
+
"repo_name": project_name,
|
|
152
|
+
"branch": result["branch"],
|
|
153
|
+
"cache_dir": "./.source-cache",
|
|
154
|
+
"modules": result["modules"],
|
|
155
|
+
}
|
|
156
|
+
else:
|
|
157
|
+
repos = []
|
|
158
|
+
for m in result["modules"]:
|
|
159
|
+
repos.append({
|
|
160
|
+
"name": m["name"],
|
|
161
|
+
"url": result["git_url"] or "https://your-git-server/repo.git",
|
|
162
|
+
"branch": result["branch"],
|
|
163
|
+
"type": m.get("type", "service"),
|
|
164
|
+
})
|
|
165
|
+
source_block = {
|
|
166
|
+
"structure": "multi-repo",
|
|
167
|
+
"cache_dir": "./.source-cache",
|
|
168
|
+
"repos": repos,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
import yaml
|
|
172
|
+
suggested = {
|
|
173
|
+
"version": 1,
|
|
174
|
+
"knowledge_bases": {
|
|
175
|
+
project_name: {
|
|
176
|
+
"name": project_name,
|
|
177
|
+
"collection": f"{project_name.replace('-', '_')}_knowledge",
|
|
178
|
+
"knowledge_dir": f"./knowledge/{project_name}",
|
|
179
|
+
"preset": result["preset"],
|
|
180
|
+
"source": source_block,
|
|
181
|
+
}
|
|
182
|
+
},
|
|
183
|
+
"embedding": {"backend": "chromadb-default"},
|
|
184
|
+
"agent": {"model": "delegated"},
|
|
185
|
+
}
|
|
186
|
+
result["suggested_config"] = yaml.dump(
|
|
187
|
+
suggested, allow_unicode=True, default_flow_style=False, sort_keys=False
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
return json.dumps(result, ensure_ascii=False, indent=2)
|
|
191
|
+
|
|
192
|
+
@mcp.tool()
|
|
193
|
+
def init_project(
|
|
194
|
+
project_name: str,
|
|
195
|
+
preset: str = "generic",
|
|
196
|
+
structure: str = "multi-repo",
|
|
197
|
+
knowledge_dir: str = "",
|
|
198
|
+
repos: str = "[]",
|
|
199
|
+
modules: str = "[]",
|
|
200
|
+
git_url: str = "",
|
|
201
|
+
branch: str = "main",
|
|
202
|
+
) -> str:
|
|
203
|
+
"""Generate a kb-project.yaml configuration file.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
project_name: Project/knowledge base name (lowercase with hyphens)
|
|
207
|
+
preset: Language preset (generic | java-spring)
|
|
208
|
+
structure: Repository structure (multi-repo | monorepo)
|
|
209
|
+
knowledge_dir: Knowledge base output directory (defaults to ./knowledge/{project_name})
|
|
210
|
+
repos: JSON list of repositories for multi-repo [{name, url, branch, type}]
|
|
211
|
+
modules: JSON list of modules for monorepo [{name, path, type}]
|
|
212
|
+
git_url: Repository URL for monorepo
|
|
213
|
+
branch: Branch name for monorepo
|
|
214
|
+
"""
|
|
215
|
+
import yaml
|
|
216
|
+
from mcp_server import project_root
|
|
217
|
+
|
|
218
|
+
root = project_root()
|
|
219
|
+
if not knowledge_dir:
|
|
220
|
+
knowledge_dir = f"./knowledge/{project_name}"
|
|
221
|
+
|
|
222
|
+
if structure == "monorepo":
|
|
223
|
+
try:
|
|
224
|
+
modules_list = json.loads(modules)
|
|
225
|
+
except (json.JSONDecodeError, TypeError):
|
|
226
|
+
modules_list = []
|
|
227
|
+
source_block: dict[str, Any] = {
|
|
228
|
+
"structure": "monorepo",
|
|
229
|
+
"url": git_url or "https://your-git-server/repo.git",
|
|
230
|
+
"repo_name": project_name,
|
|
231
|
+
"branch": branch,
|
|
232
|
+
"cache_dir": "./.source-cache",
|
|
233
|
+
"modules": modules_list if modules_list else [
|
|
234
|
+
{"name": "example-service", "path": "services/example", "type": "service"}
|
|
235
|
+
],
|
|
236
|
+
}
|
|
237
|
+
else:
|
|
238
|
+
try:
|
|
239
|
+
repos_list = json.loads(repos)
|
|
240
|
+
except (json.JSONDecodeError, TypeError):
|
|
241
|
+
repos_list = []
|
|
242
|
+
source_block = {
|
|
243
|
+
"structure": "multi-repo",
|
|
244
|
+
"cache_dir": "./.source-cache",
|
|
245
|
+
"repos": repos_list if repos_list else [
|
|
246
|
+
{"name": "example-service", "url": "https://git.example.com/repo.git",
|
|
247
|
+
"branch": "main", "type": "service"}
|
|
248
|
+
],
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
config = {
|
|
252
|
+
"version": 1,
|
|
253
|
+
"knowledge_bases": {
|
|
254
|
+
project_name: {
|
|
255
|
+
"name": project_name,
|
|
256
|
+
"collection": f"{project_name.replace('-', '_')}_knowledge",
|
|
257
|
+
"knowledge_dir": knowledge_dir,
|
|
258
|
+
"preset": preset,
|
|
259
|
+
"source": source_block,
|
|
260
|
+
}
|
|
261
|
+
},
|
|
262
|
+
"embedding": {"backend": "chromadb-default"},
|
|
263
|
+
"agent": {"model": "delegated"},
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
output_path = root / "kb-project.yaml"
|
|
267
|
+
output_path.write_text(
|
|
268
|
+
yaml.dump(config, allow_unicode=True, default_flow_style=False, sort_keys=False),
|
|
269
|
+
encoding="utf-8",
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
return json.dumps({
|
|
273
|
+
"status": "ok",
|
|
274
|
+
"config_path": str(output_path),
|
|
275
|
+
"message": f"Created {output_path}. Please review the repository URLs and other settings, then call get_workflow('kb-init') to start initialization.",
|
|
276
|
+
}, ensure_ascii=False, indent=2)
|