source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
engine/two_phase.py ADDED
@@ -0,0 +1,143 @@
1
+ """Two-phase generation — CLI-only strategy for split documents.
2
+
3
+ Phase 1: Each shard generates a structural outline (2-4KB)
4
+ Phase 2: All outlines merged → each shard generates full content with global context
5
+
6
+ This eliminates information isolation between shards in output-only mode
7
+ (where source is inlined and shards can't read each other's files).
8
+
9
+ Agent mode doesn't need this — sub-agents can read any file directly.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ from pathlib import Path
16
+
17
+ from core.interfaces import LlmStrategy
18
+ from engine.runner import SubagentTask, SubagentResult, run_batch
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ OUTLINE_SUFFIX = """
23
+
24
+ ## ⚠️ Current Task: Generate document outline only (Phase 1/2)
25
+
26
+ Please **output only the document outline**, not the full content.
27
+
28
+ ### Outline Format Requirements
29
+
30
+ ```markdown
31
+ ## [Section Title]
32
+
33
+ **Summary**: 1-2 sentences describing the business process covered by this section
34
+
35
+ **Methods covered**:
36
+ - ClassName.method1 — one-sentence responsibility
37
+ - ClassName.method2 — one-sentence responsibility
38
+
39
+ **Key business rules** (numbered list, one sentence each):
40
+ 1. ...
41
+ 2. ...
42
+
43
+ ---
44
+ ```
45
+
46
+ ### Rules
47
+ 1. Each `##` section corresponds to one business process
48
+ 2. Only write title + summary + method list + key rules
49
+ 3. Keep the size within **2-4KB**
50
+ """
51
+
52
+ FULL_PHASE_PREFIX = """
53
+
54
+ ## Global Document Outline (section plans for all shards)
55
+
56
+ Below is the combined document outline generated by all shards in Phase 1. You only need to expand **the sections you are responsible for**.
57
+
58
+ {combined_outline}
59
+
60
+ ---
61
+
62
+ ## Sections You Are Responsible For
63
+
64
+ Based on your file list, expand the sections in the outline above that belong to you.
65
+ """
66
+
67
+
68
+ def run_two_phase(
69
+ tasks: list[SubagentTask],
70
+ strategy: LlmStrategy,
71
+ max_concurrent: int = 5,
72
+ ) -> list[SubagentResult]:
73
+ """Execute two-phase generation for split documents.
74
+
75
+ Args:
76
+ tasks: Original full-generation tasks (one per shard)
77
+ strategy: LLM execution strategy
78
+ max_concurrent: Concurrency limit
79
+
80
+ Returns:
81
+ SubagentResult list from phase 2 (full content)
82
+ """
83
+ if not tasks:
84
+ return []
85
+
86
+ # Phase 1: Generate outlines
87
+ logger.info("[two-phase] Phase 1: generating %d outlines", len(tasks))
88
+ outline_tasks = [
89
+ SubagentTask(
90
+ task_id=f"{t.task_id}__outline",
91
+ prompt=t.prompt + OUTLINE_SUFFIX,
92
+ output_path=t.output_path.parent / ".meta" / "outlines" / f"{t.task_id}.md",
93
+ doc_type=t.doc_type,
94
+ timeout=300,
95
+ )
96
+ for t in tasks
97
+ ]
98
+
99
+ outline_results = run_batch(outline_tasks, strategy, max_concurrent)
100
+
101
+ # Collect outlines
102
+ outlines: list[tuple[str, str]] = []
103
+ for result in outline_results:
104
+ if result.status == "done" and result.content:
105
+ name = result.task_id.replace("__outline", "")
106
+ outlines.append((name, result.content))
107
+
108
+ if not outlines:
109
+ logger.warning("[two-phase] Phase 1 failed, falling back to direct generation")
110
+ return run_batch(tasks, strategy, max_concurrent)
111
+
112
+ # Merge outlines
113
+ combined = "\n\n---\n\n".join(f"### Shard: {name}\n\n{content}" for name, content in outlines)
114
+ logger.info("[two-phase] Phase 1 done: %d outlines, %d chars", len(outlines), len(combined))
115
+
116
+ # Phase 2: Full generation with global outline context
117
+ logger.info("[two-phase] Phase 2: generating full content")
118
+ prefix = FULL_PHASE_PREFIX.format(combined_outline=combined)
119
+
120
+ full_tasks = [
121
+ SubagentTask(
122
+ task_id=t.task_id,
123
+ prompt=_inject_outline(t.prompt, prefix),
124
+ output_path=t.output_path,
125
+ doc_type=t.doc_type,
126
+ timeout=t.timeout,
127
+ )
128
+ for t in tasks
129
+ ]
130
+
131
+ results = run_batch(full_tasks, strategy, max_concurrent)
132
+ logger.info("[two-phase] Phase 2 done: %d/%d succeeded",
133
+ sum(1 for r in results if r.status == "done"), len(results))
134
+ return results
135
+
136
+
137
+ def _inject_outline(prompt: str, prefix: str) -> str:
138
+ """Inject outline context into the original prompt."""
139
+ marker = "## Rules you must follow"
140
+ if marker in prompt:
141
+ pos = prompt.index(marker)
142
+ return prompt[:pos] + prefix + "\n" + prompt[pos:]
143
+ return prompt + prefix
mcp_server/__init__.py ADDED
@@ -0,0 +1,73 @@
1
+ """source-kb MCP Server — code knowledge base documentation generation toolkit.
2
+
3
+ Provides AI agents with project discovery, workflow orchestration, skeleton extraction,
4
+ document generation, and coverage validation capabilities via the MCP protocol.
5
+
6
+ After installation:
7
+ uvx source-kb-mcp
8
+
9
+ For local development:
10
+ python -m mcp_server
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ import sys
17
+ from pathlib import Path
18
+
19
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
20
+ if str(PROJECT_ROOT) not in sys.path:
21
+ sys.path.insert(0, str(PROJECT_ROOT))
22
+
23
+ from mcp.server.fastmcp import FastMCP
24
+
25
+ mcp = FastMCP(
26
+ "source-kb",
27
+ instructions=(
28
+ "A toolkit for automatically generating structured knowledge base documentation from source code. "
29
+ "Start by calling discover() to understand project status, then use get_workflow() to get workflow guidance."
30
+ ),
31
+ )
32
+
33
+
34
+ def project_root() -> Path:
35
+ """Get the user's project root directory."""
36
+ return Path(os.environ.get("SOURCE_KB_ROOT", os.getcwd()))
37
+
38
+
39
+ def find_config(root: Path | None = None) -> Path | None:
40
+ """Find the kb-project.yaml configuration file."""
41
+ root = root or project_root()
42
+ for name in ("kb-project.yaml", "kb-project.yml"):
43
+ p = root / name
44
+ if p.exists():
45
+ return p
46
+ return None
47
+
48
+
49
+ # Register all tools
50
+ from mcp_server.tools.discovery import register as _reg_discovery # noqa: E402
51
+ from mcp_server.tools.workflow import register as _reg_workflow # noqa: E402
52
+ from mcp_server.tools.source import register as _reg_source # noqa: E402
53
+ from mcp_server.tools.planning import register as _reg_planning # noqa: E402
54
+ from mcp_server.tools.validation import register as _reg_validation # noqa: E402
55
+ from mcp_server.tools.generation import register as _reg_generation # noqa: E402
56
+ from mcp_server.tools.config import register as _reg_config # noqa: E402
57
+
58
+ _reg_discovery(mcp)
59
+ _reg_workflow(mcp)
60
+ _reg_source(mcp)
61
+ _reg_planning(mcp)
62
+ _reg_validation(mcp)
63
+ _reg_generation(mcp)
64
+ _reg_config(mcp)
65
+
66
+
67
+ def main():
68
+ """MCP Server entry point."""
69
+ mcp.run()
70
+
71
+
72
+ if __name__ == "__main__":
73
+ main()
mcp_server/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """__main__ support for python -m mcp_server."""
2
+
3
+ from mcp_server import main
4
+
5
+ main()
@@ -0,0 +1 @@
1
+ """MCP Server tool modules."""
@@ -0,0 +1,63 @@
1
+ """Config tools — preset listing and configuration queries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+
8
+ from mcp.server.fastmcp import FastMCP
9
+
10
+
11
+ def register(mcp: FastMCP) -> None:
12
+ """Register config tools on the MCP server."""
13
+
14
+ @mcp.tool()
15
+ def list_presets() -> str:
16
+ """List available language presets and their supported document types.
17
+
18
+ Returns the name, description, and supported doc_types list for each preset.
19
+ """
20
+ from mcp_server import PROJECT_ROOT
21
+
22
+ presets_dir = PROJECT_ROOT / "presets"
23
+ result = {}
24
+
25
+ if presets_dir.is_dir():
26
+ import yaml
27
+ for preset_dir in sorted(presets_dir.iterdir()):
28
+ if not preset_dir.is_dir():
29
+ continue
30
+ preset_yaml = preset_dir / "preset.yaml"
31
+ if not preset_yaml.exists():
32
+ continue
33
+
34
+ cfg = yaml.safe_load(preset_yaml.read_text(encoding="utf-8"))
35
+
36
+ # Merge doc_types.yaml if exists
37
+ doc_types_yaml = preset_dir / "doc_types.yaml"
38
+ doc_types = {}
39
+ if doc_types_yaml.exists():
40
+ raw = yaml.safe_load(doc_types_yaml.read_text(encoding="utf-8")) or {}
41
+ doc_types = raw.get("doc_types", raw)
42
+ elif "doc_types" in cfg:
43
+ doc_types = cfg["doc_types"]
44
+
45
+ doc_type_list = []
46
+ for dt_key, dt_cfg in doc_types.items():
47
+ if isinstance(dt_cfg, dict):
48
+ doc_type_list.append({
49
+ "name": dt_key,
50
+ "filename": dt_cfg.get("filename", f"{dt_key}.md"),
51
+ "batch": dt_cfg.get("batch", 99),
52
+ "conditional": dt_cfg.get("conditional", False),
53
+ "global_view": dt_cfg.get("global_view", False),
54
+ })
55
+
56
+ result[preset_dir.name] = {
57
+ "name": cfg.get("name", preset_dir.name),
58
+ "description": cfg.get("description", ""),
59
+ "doc_types": doc_type_list,
60
+ "doc_type_count": len(doc_type_list),
61
+ }
62
+
63
+ return json.dumps({"status": "ok", "presets": result}, ensure_ascii=False, indent=2)
@@ -0,0 +1,276 @@
1
+ """Discovery tools — project detection and initialization."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from mcp.server.fastmcp import FastMCP
11
+
12
+
13
+ def register(mcp: FastMCP) -> None:
14
+ """Register discovery tools on the MCP server."""
15
+
16
+ @mcp.tool()
17
+ def discover() -> str:
18
+ """Discover project status and available workflows. This is the entry-point tool that agents should call first.
19
+
20
+ Scans the current directory, detects whether a kb-project.yaml configuration exists,
21
+ checks the state of any generated knowledge bases, and returns available workflows
22
+ with suggested next actions.
23
+ """
24
+ from mcp_server import project_root, find_config
25
+ from mcp_server.workflow_loader import list_workflows
26
+
27
+ root = project_root()
28
+ config_path = find_config(root)
29
+
30
+ result: dict[str, Any] = {
31
+ "project_root": str(root),
32
+ "project_detected": config_path is not None,
33
+ "config_path": str(config_path) if config_path else None,
34
+ "knowledge_bases": [],
35
+ "available_workflows": list_workflows(),
36
+ "suggested_action": "",
37
+ }
38
+
39
+ if config_path:
40
+ import yaml
41
+ try:
42
+ cfg = yaml.safe_load(config_path.read_text(encoding="utf-8"))
43
+ base_dir = config_path.parent
44
+ for kb_name, kb_cfg in cfg.get("knowledge_bases", {}).items():
45
+ kb_dir = Path(kb_cfg.get("knowledge_dir", ""))
46
+ if not kb_dir.is_absolute():
47
+ kb_dir = (base_dir / kb_dir).resolve()
48
+
49
+ source = kb_cfg.get("source", {})
50
+ if source.get("structure") == "monorepo":
51
+ modules = [m["name"] for m in source.get("modules", [])]
52
+ else:
53
+ modules = [r["name"] for r in source.get("repos", [])]
54
+
55
+ has_docs = kb_dir.is_dir() and any(kb_dir.rglob("*.md"))
56
+
57
+ result["knowledge_bases"].append({
58
+ "name": kb_name,
59
+ "preset": kb_cfg.get("preset", "generic"),
60
+ "modules": modules,
61
+ "has_docs": has_docs,
62
+ "knowledge_dir": str(kb_dir),
63
+ })
64
+
65
+ if result["knowledge_bases"]:
66
+ has_any_docs = any(kb["has_docs"] for kb in result["knowledge_bases"])
67
+ if has_any_docs:
68
+ result["suggested_action"] = (
69
+ "Knowledge base exists. Use get_workflow('kb-sync') for incremental updates, "
70
+ "or get_workflow('kb-audit') to audit quality."
71
+ )
72
+ else:
73
+ result["suggested_action"] = (
74
+ "Configuration is ready but no documents exist yet. Call get_workflow('kb-init') to start initialization."
75
+ )
76
+ else:
77
+ result["suggested_action"] = "Configuration file is empty. Call init_project() to generate configuration."
78
+ except Exception as e:
79
+ result["error"] = f"Configuration parsing failed: {e}"
80
+ result["suggested_action"] = "Configuration file parsing failed. Please check the YAML format."
81
+ else:
82
+ result["suggested_action"] = (
83
+ "No kb-project.yaml detected. Call detect_project() to scan project structure, "
84
+ "then call init_project() to generate configuration."
85
+ )
86
+
87
+ return json.dumps(result, ensure_ascii=False, indent=2)
88
+
89
+ @mcp.tool()
90
+ def detect_project(path: str = ".") -> str:
91
+ """Detect project structure and recommend kb-project.yaml configuration.
92
+
93
+ Scans for pom.xml, build.gradle, .git, etc. to automatically infer project type,
94
+ module list, and recommended preset. Returns a configuration suggestion ready to be written.
95
+
96
+ Args:
97
+ path: Project root directory path (defaults to current directory)
98
+ """
99
+ from mcp_server import project_root
100
+
101
+ root = Path(path).resolve() if path != "." else project_root()
102
+
103
+ result: dict[str, Any] = {
104
+ "project_root": str(root),
105
+ "preset": "generic",
106
+ "structure": "unknown",
107
+ "modules": [],
108
+ "git_url": None,
109
+ "branch": "main",
110
+ }
111
+
112
+ # Detect Java/Spring
113
+ pom = root / "pom.xml"
114
+ if pom.exists():
115
+ result["preset"] = "java-spring"
116
+ content = pom.read_text(encoding="utf-8", errors="replace")
117
+ if "<modules>" in content:
118
+ result["structure"] = "monorepo"
119
+ modules = re.findall(r"<module>([^<]+)</module>", content)
120
+ for m in modules:
121
+ mod_type = "service"
122
+ if "api" in m.lower() or "contract" in m.lower():
123
+ mod_type = "api-contract"
124
+ elif "common" in m.lower() or "base" in m.lower() or "lib" in m.lower():
125
+ mod_type = "library"
126
+ result["modules"].append({"name": m, "path": m, "type": mod_type})
127
+ else:
128
+ result["structure"] = "multi-repo"
129
+ result["modules"].append({"name": root.name, "type": "service"})
130
+
131
+ # Detect git remote
132
+ git_config = root / ".git" / "config"
133
+ if git_config.exists():
134
+ content = git_config.read_text(encoding="utf-8", errors="replace")
135
+ urls = re.findall(r"url\s*=\s*(.+)", content)
136
+ if urls:
137
+ result["git_url"] = urls[0].strip()
138
+ # Detect current branch
139
+ head = root / ".git" / "HEAD"
140
+ if head.exists():
141
+ head_content = head.read_text(encoding="utf-8").strip()
142
+ if head_content.startswith("ref: refs/heads/"):
143
+ result["branch"] = head_content.replace("ref: refs/heads/", "")
144
+
145
+ # Generate suggested config
146
+ project_name = root.name.lower().replace(" ", "-")
147
+ if result["structure"] == "monorepo" and result["modules"]:
148
+ source_block = {
149
+ "structure": "monorepo",
150
+ "url": result["git_url"] or "https://your-git-server/repo.git",
151
+ "repo_name": project_name,
152
+ "branch": result["branch"],
153
+ "cache_dir": "./.source-cache",
154
+ "modules": result["modules"],
155
+ }
156
+ else:
157
+ repos = []
158
+ for m in result["modules"]:
159
+ repos.append({
160
+ "name": m["name"],
161
+ "url": result["git_url"] or "https://your-git-server/repo.git",
162
+ "branch": result["branch"],
163
+ "type": m.get("type", "service"),
164
+ })
165
+ source_block = {
166
+ "structure": "multi-repo",
167
+ "cache_dir": "./.source-cache",
168
+ "repos": repos,
169
+ }
170
+
171
+ import yaml
172
+ suggested = {
173
+ "version": 1,
174
+ "knowledge_bases": {
175
+ project_name: {
176
+ "name": project_name,
177
+ "collection": f"{project_name.replace('-', '_')}_knowledge",
178
+ "knowledge_dir": f"./knowledge/{project_name}",
179
+ "preset": result["preset"],
180
+ "source": source_block,
181
+ }
182
+ },
183
+ "embedding": {"backend": "chromadb-default"},
184
+ "agent": {"model": "delegated"},
185
+ }
186
+ result["suggested_config"] = yaml.dump(
187
+ suggested, allow_unicode=True, default_flow_style=False, sort_keys=False
188
+ )
189
+
190
+ return json.dumps(result, ensure_ascii=False, indent=2)
191
+
192
+ @mcp.tool()
193
+ def init_project(
194
+ project_name: str,
195
+ preset: str = "generic",
196
+ structure: str = "multi-repo",
197
+ knowledge_dir: str = "",
198
+ repos: str = "[]",
199
+ modules: str = "[]",
200
+ git_url: str = "",
201
+ branch: str = "main",
202
+ ) -> str:
203
+ """Generate a kb-project.yaml configuration file.
204
+
205
+ Args:
206
+ project_name: Project/knowledge base name (lowercase with hyphens)
207
+ preset: Language preset (generic | java-spring)
208
+ structure: Repository structure (multi-repo | monorepo)
209
+ knowledge_dir: Knowledge base output directory (defaults to ./knowledge/{project_name})
210
+ repos: JSON list of repositories for multi-repo [{name, url, branch, type}]
211
+ modules: JSON list of modules for monorepo [{name, path, type}]
212
+ git_url: Repository URL for monorepo
213
+ branch: Branch name for monorepo
214
+ """
215
+ import yaml
216
+ from mcp_server import project_root
217
+
218
+ root = project_root()
219
+ if not knowledge_dir:
220
+ knowledge_dir = f"./knowledge/{project_name}"
221
+
222
+ if structure == "monorepo":
223
+ try:
224
+ modules_list = json.loads(modules)
225
+ except (json.JSONDecodeError, TypeError):
226
+ modules_list = []
227
+ source_block: dict[str, Any] = {
228
+ "structure": "monorepo",
229
+ "url": git_url or "https://your-git-server/repo.git",
230
+ "repo_name": project_name,
231
+ "branch": branch,
232
+ "cache_dir": "./.source-cache",
233
+ "modules": modules_list if modules_list else [
234
+ {"name": "example-service", "path": "services/example", "type": "service"}
235
+ ],
236
+ }
237
+ else:
238
+ try:
239
+ repos_list = json.loads(repos)
240
+ except (json.JSONDecodeError, TypeError):
241
+ repos_list = []
242
+ source_block = {
243
+ "structure": "multi-repo",
244
+ "cache_dir": "./.source-cache",
245
+ "repos": repos_list if repos_list else [
246
+ {"name": "example-service", "url": "https://git.example.com/repo.git",
247
+ "branch": "main", "type": "service"}
248
+ ],
249
+ }
250
+
251
+ config = {
252
+ "version": 1,
253
+ "knowledge_bases": {
254
+ project_name: {
255
+ "name": project_name,
256
+ "collection": f"{project_name.replace('-', '_')}_knowledge",
257
+ "knowledge_dir": knowledge_dir,
258
+ "preset": preset,
259
+ "source": source_block,
260
+ }
261
+ },
262
+ "embedding": {"backend": "chromadb-default"},
263
+ "agent": {"model": "delegated"},
264
+ }
265
+
266
+ output_path = root / "kb-project.yaml"
267
+ output_path.write_text(
268
+ yaml.dump(config, allow_unicode=True, default_flow_style=False, sort_keys=False),
269
+ encoding="utf-8",
270
+ )
271
+
272
+ return json.dumps({
273
+ "status": "ok",
274
+ "config_path": str(output_path),
275
+ "message": f"Created {output_path}. Please review the repository URLs and other settings, then call get_workflow('kb-init') to start initialization.",
276
+ }, ensure_ascii=False, indent=2)