source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
@@ -0,0 +1,184 @@
1
+ """Generation fallback tool — direct LLM API call for agents without sub-agent support."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from mcp.server.fastmcp import FastMCP
11
+
12
+
13
+ def register(mcp: FastMCP) -> None:
14
+ """Register generation tools on the MCP server."""
15
+
16
+ @mcp.tool()
17
+ def generate_doc(
18
+ kb_name: str,
19
+ module_name: str,
20
+ doc_type: str,
21
+ shard: int = 0,
22
+ ) -> str:
23
+ """Directly call the LLM API to generate a single document (fallback when sub-agent capability is unavailable).
24
+
25
+ Requires environment variables:
26
+ - LLM_BASE_URL: API endpoint (e.g., https://api.anthropic.com)
27
+ - LLM_API_KEY: API key
28
+ - LLM_MODEL: Model name (e.g., claude-sonnet-4-6)
29
+
30
+ Internal flow: render_prompt -> call LLM -> write output file
31
+
32
+ Args:
33
+ kb_name: Knowledge base name
34
+ module_name: Module name
35
+ doc_type: Document type
36
+ shard: Shard index (0 = no sharding)
37
+ """
38
+ import os
39
+
40
+ # Check LLM config
41
+ base_url = os.environ.get("LLM_BASE_URL", "")
42
+ api_key = os.environ.get("LLM_API_KEY", "")
43
+ model = os.environ.get("LLM_MODEL", "")
44
+
45
+ if not all([base_url, api_key, model]):
46
+ missing = []
47
+ if not base_url:
48
+ missing.append("LLM_BASE_URL")
49
+ if not api_key:
50
+ missing.append("LLM_API_KEY")
51
+ if not model:
52
+ missing.append("LLM_MODEL")
53
+ return json.dumps({
54
+ "status": "error",
55
+ "message": f"Missing environment variables: {', '.join(missing)}. generate_doc requires direct LLM API access.",
56
+ "hint": "If your agent supports sub-agents, use get_subagent_prompt to get the prompt and dispatch a sub-agent instead.",
57
+ }, ensure_ascii=False, indent=2)
58
+
59
+ from mcp_server import find_config
60
+ from core.config import load_config
61
+ from core.preset import load_preset, get_template_path
62
+ from core.prompt.renderer import render_prompt
63
+ from core.prompt.variables import ReferencePromptAssembler
64
+
65
+ config_path = find_config()
66
+ if not config_path:
67
+ return json.dumps({"status": "error", "message": "kb-project.yaml not found"})
68
+
69
+ config = load_config(config_path)
70
+ kb_cfg = config.get_kb(kb_name)
71
+ preset_name = kb_cfg.get("preset", "generic")
72
+ preset = load_preset(preset_name)
73
+
74
+ # Resolve template
75
+ template_name = get_template_path(preset, doc_type, preset_name)
76
+ if not template_name:
77
+ template_name = f"subagent-{doc_type}.md"
78
+
79
+ from core.preset import find_preset_template
80
+ from mcp_server import PROJECT_ROOT
81
+ template_path = find_preset_template(preset_name, template_name)
82
+ if not template_path:
83
+ candidate = PROJECT_ROOT / "skills" / "kb-init" / "templates" / template_name
84
+ if candidate.exists():
85
+ template_path = candidate
86
+
87
+ if not template_path:
88
+ return json.dumps({
89
+ "status": "error",
90
+ "message": f"Template not found: {template_name}",
91
+ }, ensure_ascii=False, indent=2)
92
+
93
+ # Render prompt
94
+ extra_vars: dict[str, str] = {}
95
+ if shard > 0:
96
+ base_dir = config.config_path.parent
97
+ kb_dir = Path(kb_cfg["knowledge_dir"])
98
+ if not kb_dir.is_absolute():
99
+ kb_dir = (base_dir / kb_dir).resolve()
100
+ module_dir = kb_dir / module_name
101
+ shard_file = module_dir / ".meta" / "shards" / f"{doc_type}-shard-{shard}.txt"
102
+ if shard_file.exists():
103
+ extra_vars["file_list_override"] = str(shard_file)
104
+
105
+ assembler = ReferencePromptAssembler(
106
+ project_root=config.config_path.parent,
107
+ preset=preset,
108
+ )
109
+
110
+ rendered_prompt = render_prompt(
111
+ template_path=template_path,
112
+ config=config.raw,
113
+ kb_name=kb_name,
114
+ module_name=module_name,
115
+ doc_type=doc_type,
116
+ assembler=assembler,
117
+ extras=extra_vars,
118
+ preset=preset,
119
+ )
120
+
121
+ # Determine output path
122
+ base_dir = config.config_path.parent
123
+ kb_dir = Path(kb_cfg["knowledge_dir"])
124
+ if not kb_dir.is_absolute():
125
+ kb_dir = (base_dir / kb_dir).resolve()
126
+ module_dir = kb_dir / module_name
127
+ module_dir.mkdir(parents=True, exist_ok=True)
128
+
129
+ doc_types_cfg = preset.get("doc_types", {})
130
+ dt_cfg = doc_types_cfg.get(doc_type, {})
131
+ filename = dt_cfg.get("filename", f"{doc_type}.md") if isinstance(dt_cfg, dict) else f"{doc_type}.md"
132
+ output_path = module_dir / filename
133
+
134
+ # Call LLM API
135
+ start_time = time.time()
136
+ try:
137
+ from engine.llm_client import call_llm
138
+ response = call_llm(
139
+ prompt=rendered_prompt,
140
+ model=model,
141
+ base_url=base_url,
142
+ api_key=api_key,
143
+ )
144
+ except ImportError:
145
+ # Fallback: use requests directly for OpenAI-compatible API
146
+ import requests
147
+ headers = {
148
+ "Authorization": f"Bearer {api_key}",
149
+ "Content-Type": "application/json",
150
+ }
151
+ payload = {
152
+ "model": model,
153
+ "messages": [{"role": "user", "content": rendered_prompt}],
154
+ "max_tokens": 8192,
155
+ }
156
+ try:
157
+ resp = requests.post(
158
+ f"{base_url.rstrip('/')}/chat/completions",
159
+ headers=headers,
160
+ json=payload,
161
+ timeout=600,
162
+ )
163
+ resp.raise_for_status()
164
+ data = resp.json()
165
+ response = data["choices"][0]["message"]["content"]
166
+ except Exception as e:
167
+ elapsed = time.time() - start_time
168
+ return json.dumps({
169
+ "status": "failed",
170
+ "error": str(e),
171
+ "elapsed_seconds": round(elapsed, 1),
172
+ }, ensure_ascii=False, indent=2)
173
+
174
+ elapsed = time.time() - start_time
175
+
176
+ # Write output
177
+ output_path.write_text(response, encoding="utf-8")
178
+
179
+ return json.dumps({
180
+ "status": "done",
181
+ "output_path": str(output_path),
182
+ "file_size_kb": round(output_path.stat().st_size / 1024, 1),
183
+ "elapsed_seconds": round(elapsed, 1),
184
+ }, ensure_ascii=False, indent=2)
@@ -0,0 +1,144 @@
1
+ """Planning tools — dispatch plan computation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from mcp.server.fastmcp import FastMCP
10
+
11
+
12
+ def register(mcp: FastMCP) -> None:
13
+ """Register planning tools on the MCP server."""
14
+
15
+ @mcp.tool()
16
+ def dispatch_plan(kb_name: str, module_name: str) -> str:
17
+ """Compute a document generation dispatch plan. The agent should present this to the user for confirmation before execution.
18
+
19
+ Based on the skeleton and file lists, determines the generation strategy for each doc_type:
20
+ - Whether sharding is needed (based on file count and line count)
21
+ - Batch order (based on depends_on dependencies)
22
+ - Estimated time
23
+
24
+ Args:
25
+ kb_name: Knowledge base name
26
+ module_name: Module name
27
+ """
28
+ from mcp_server import find_config
29
+ from core.config import load_config
30
+ from core.preset import load_preset, get_batch_plan
31
+
32
+ config_path = find_config()
33
+ if not config_path:
34
+ return json.dumps({"status": "error", "message": "kb-project.yaml not found"})
35
+
36
+ config = load_config(config_path)
37
+ kb_cfg = config.get_kb(kb_name)
38
+ preset_name = kb_cfg.get("preset", "generic")
39
+ preset = load_preset(preset_name)
40
+
41
+ # Resolve module dir
42
+ base_dir = config.config_path.parent
43
+ kb_dir = Path(kb_cfg["knowledge_dir"])
44
+ if not kb_dir.is_absolute():
45
+ kb_dir = (base_dir / kb_dir).resolve()
46
+ module_dir = kb_dir / module_name
47
+
48
+ # Load skeleton stats
49
+ skeleton_path = module_dir / ".meta" / "skeleton" / "skeleton.json"
50
+ if not skeleton_path.exists():
51
+ return json.dumps({
52
+ "status": "error",
53
+ "message": f"Skeleton does not exist: {skeleton_path}. Please call skeleton_extract first.",
54
+ }, ensure_ascii=False, indent=2)
55
+
56
+ from core.skeleton.query import load_skeleton, stats
57
+ entries = load_skeleton(skeleton_path)
58
+ skel_stats = stats(entries)
59
+
60
+ # Read file lists to determine which doc types have files
61
+ file_list_dir = module_dir / ".meta" / "file-lists"
62
+ doc_types = preset.get("doc_types", {})
63
+
64
+ plan_entries: list[dict[str, Any]] = []
65
+ total_subagents = 0
66
+
67
+ for dt_key, dt_cfg in doc_types.items():
68
+ if not isinstance(dt_cfg, dict):
69
+ continue
70
+
71
+ filename = dt_cfg.get("filename", f"{dt_key}.md")
72
+ batch = dt_cfg.get("batch", 99)
73
+ conditional = dt_cfg.get("conditional", False)
74
+
75
+ # Check file list
76
+ fl_path = file_list_dir / f"{dt_key}.txt"
77
+ file_count = 0
78
+ total_lines = 0
79
+ if fl_path.exists():
80
+ content = fl_path.read_text(encoding="utf-8").strip()
81
+ files = [f for f in content.splitlines() if f.strip() and not f.startswith("#")]
82
+ file_count = len(files)
83
+ # Estimate lines from skeleton
84
+ for e in entries:
85
+ if e["file"] in files:
86
+ total_lines += e.get("total_lines", 0)
87
+
88
+ # Skip conditional doc types with no files
89
+ if conditional and file_count == 0:
90
+ continue
91
+
92
+ # Global view docs don't need file lists
93
+ if dt_cfg.get("global_view", False):
94
+ file_count = skel_stats.get("files", 0)
95
+ total_lines = skel_stats.get("total_lines", 0)
96
+
97
+ # Determine split strategy
98
+ split_count = 1
99
+ if total_lines > 12000:
100
+ split_count = min(4, (total_lines + 5999) // 6000)
101
+ elif total_lines > 6000:
102
+ split_count = 2
103
+
104
+ total_subagents += split_count
105
+
106
+ plan_entries.append({
107
+ "doc_type": dt_key,
108
+ "filename": filename,
109
+ "batch": batch,
110
+ "file_count": file_count,
111
+ "total_lines": total_lines,
112
+ "split_count": split_count,
113
+ "conditional": conditional,
114
+ "global_view": dt_cfg.get("global_view", False),
115
+ })
116
+
117
+ # Sort by batch
118
+ plan_entries.sort(key=lambda x: x["batch"])
119
+
120
+ # Group by batch
121
+ batches: dict[int, list[str]] = {}
122
+ for entry in plan_entries:
123
+ b = entry["batch"]
124
+ if b not in batches:
125
+ batches[b] = []
126
+ batches[b].append(entry["doc_type"])
127
+
128
+ # Estimate time (rough: 2 min per subagent)
129
+ estimated_minutes = total_subagents * 2
130
+
131
+ return json.dumps({
132
+ "status": "ok",
133
+ "module_stats": {
134
+ "source_files": skel_stats.get("files", 0),
135
+ "total_lines": skel_stats.get("total_lines", 0),
136
+ "methods": skel_stats.get("methods", 0),
137
+ "skeleton_size_kb": round(skeleton_path.stat().st_size / 1024, 1),
138
+ },
139
+ "entries": plan_entries,
140
+ "batch_order": [{"batch": k, "doc_types": v} for k, v in sorted(batches.items())],
141
+ "total_subagents": total_subagents,
142
+ "total_doc_types": len(plan_entries),
143
+ "estimated_minutes": estimated_minutes,
144
+ }, ensure_ascii=False, indent=2)
@@ -0,0 +1,175 @@
1
+ """Source management tools — skeleton extraction and file classification."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from mcp.server.fastmcp import FastMCP
10
+
11
+
12
+ def register(mcp: FastMCP) -> None:
13
+ """Register source tools on the MCP server."""
14
+
15
+ @mcp.tool()
16
+ def skeleton_extract(
17
+ repo_path: str,
18
+ preset: str = "generic",
19
+ output: str = "",
20
+ summary: bool = True,
21
+ subpath: str = "",
22
+ ) -> str:
23
+ """Extract code skeleton (classes, methods, field signatures) from a source repository.
24
+
25
+ The skeleton is a structured summary of the source code containing class names, method signatures,
26
+ field lists, and complexity assessments for each file. Used for subsequent document generation
27
+ and coverage validation.
28
+
29
+ Args:
30
+ repo_path: Local repository path (e.g., .source-cache/backend)
31
+ preset: Language preset (generic | java-spring)
32
+ output: Output directory (module knowledge base directory; skeleton is written to .meta/skeleton/)
33
+ summary: Whether to also generate a summary file (recommended true)
34
+ subpath: Monorepo module subdirectory path (e.g., services/user-service)
35
+ """
36
+ from core.skeleton.extract import extract_skeleton
37
+ from core.preset import load_preset
38
+
39
+ preset_cfg = load_preset(preset)
40
+ repo = Path(repo_path)
41
+ output_dir = Path(output) if output else None
42
+
43
+ if not repo.exists():
44
+ return json.dumps({
45
+ "status": "error",
46
+ "message": f"Repository path does not exist: {repo_path}",
47
+ }, ensure_ascii=False, indent=2)
48
+
49
+ entries = extract_skeleton(
50
+ repo, preset_cfg,
51
+ ref="HEAD",
52
+ subpath=subpath or None,
53
+ output_dir=output_dir,
54
+ compact=True,
55
+ )
56
+
57
+ result: dict[str, Any] = {
58
+ "status": "ok",
59
+ "files_parsed": len(entries),
60
+ "methods": sum(len(e.get("methods", [])) for e in entries),
61
+ "classes": sum(len(e.get("classes", [])) for e in entries),
62
+ }
63
+
64
+ if output_dir:
65
+ skeleton_path = output_dir / ".meta" / "skeleton" / "skeleton.json"
66
+ summary_path = output_dir / ".meta" / "skeleton" / "skeleton-summary.json"
67
+ result["skeleton_path"] = str(skeleton_path)
68
+ if skeleton_path.exists():
69
+ result["skeleton_size_kb"] = round(skeleton_path.stat().st_size / 1024, 1)
70
+ if summary and summary_path.exists():
71
+ result["summary_path"] = str(summary_path)
72
+ else:
73
+ result["note"] = f"Total {len(entries)} entries. Specify the output parameter to save to disk."
74
+
75
+ return json.dumps(result, ensure_ascii=False, indent=2)
76
+
77
+ @mcp.tool()
78
+ def classify_files(kb_name: str, module_name: str) -> str:
79
+ """Extract file lists for all doc_types in a module and check coverage.
80
+
81
+ Performs in one pass: load skeleton -> match against preset classification rules ->
82
+ write to .meta/file-lists/ -> compute coverage statistics.
83
+
84
+ Args:
85
+ kb_name: Knowledge base name (key in kb-project.yaml)
86
+ module_name: Module name
87
+ """
88
+ from mcp_server import find_config
89
+ from core.config import load_config
90
+ from core.preset import load_preset
91
+ from core.skeleton.file_list import load_skeleton, extract_file_list
92
+
93
+ config_path = find_config()
94
+ if not config_path:
95
+ return json.dumps({"status": "error", "message": "kb-project.yaml not found"})
96
+
97
+ config = load_config(config_path)
98
+ kb_cfg = config.get_kb(kb_name)
99
+ preset_name = kb_cfg.get("preset", "generic")
100
+ preset = load_preset(preset_name)
101
+
102
+ # Resolve paths
103
+ base_dir = config.config_path.parent
104
+ kb_dir = Path(kb_cfg["knowledge_dir"])
105
+ if not kb_dir.is_absolute():
106
+ kb_dir = (base_dir / kb_dir).resolve()
107
+ module_dir = kb_dir / module_name
108
+
109
+ # Find skeleton
110
+ skeleton_path = module_dir / ".meta" / "skeleton" / "skeleton.json"
111
+ if not skeleton_path.exists():
112
+ return json.dumps({
113
+ "status": "error",
114
+ "message": f"Skeleton file does not exist: {skeleton_path}. Please call skeleton_extract first.",
115
+ }, ensure_ascii=False, indent=2)
116
+
117
+ entries = load_skeleton(module_dir)
118
+
119
+ # Resolve source_cache
120
+ source = kb_cfg.get("source", {})
121
+ cache_dir = Path(source.get("cache_dir", "./.source-cache"))
122
+ if not cache_dir.is_absolute():
123
+ cache_dir = (base_dir / cache_dir).resolve()
124
+
125
+ if source.get("structure") == "monorepo":
126
+ repo_name = source.get("repo_name", "repo")
127
+ # Find module path
128
+ module_path = module_name
129
+ for m in source.get("modules", []):
130
+ if m["name"] == module_name:
131
+ module_path = m.get("path", module_name)
132
+ break
133
+ source_cache = cache_dir / repo_name / module_path
134
+ else:
135
+ source_cache = cache_dir / module_name
136
+
137
+ # Extract file lists for all doc types
138
+ doc_types = preset.get("doc_types", {})
139
+ file_list_dir = module_dir / ".meta" / "file-lists"
140
+ file_list_dir.mkdir(parents=True, exist_ok=True)
141
+
142
+ per_doc_type: dict[str, int] = {}
143
+ all_classified_files: set[str] = set()
144
+
145
+ for dt_key, dt_cfg in doc_types.items():
146
+ if not isinstance(dt_cfg, dict):
147
+ continue
148
+ if dt_cfg.get("global_view", False):
149
+ continue
150
+
151
+ files = extract_file_list(entries, preset, dt_key, source_cache)
152
+ per_doc_type[dt_key] = len(files)
153
+ all_classified_files.update(files)
154
+
155
+ # Write file list
156
+ output_file = file_list_dir / f"{dt_key}.txt"
157
+ output_file.write_text("\n".join(files) + "\n" if files else "", encoding="utf-8")
158
+
159
+ # Compute coverage
160
+ all_source_files = {e["file"] for e in entries}
161
+ uncovered = sorted(all_source_files - all_classified_files)
162
+ coverage_pct = round(
163
+ (len(all_source_files) - len(uncovered)) / max(len(all_source_files), 1) * 100, 1
164
+ )
165
+
166
+ return json.dumps({
167
+ "status": "ok",
168
+ "per_doc_type": per_doc_type,
169
+ "total_source_files": len(all_source_files),
170
+ "classified_files": len(all_classified_files),
171
+ "uncovered_files": uncovered[:20],
172
+ "uncovered_count": len(uncovered),
173
+ "coverage_pct": coverage_pct,
174
+ "file_list_dir": str(file_list_dir),
175
+ }, ensure_ascii=False, indent=2)
@@ -0,0 +1,140 @@
1
+ """Validation tools — coverage check and progress monitoring."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from mcp.server.fastmcp import FastMCP
11
+
12
+
13
+ def register(mcp: FastMCP) -> None:
14
+ """Register validation tools on the MCP server."""
15
+
16
+ @mcp.tool()
17
+ def coverage_check(module_dir: str, module_type: str = "service") -> str:
18
+ """Check documentation coverage.
19
+
20
+ Compares methods/classes in the skeleton against generated document content to compute coverage.
21
+ Target: >= 80%.
22
+
23
+ Args:
24
+ module_dir: Module knowledge base directory (containing .md documents and .meta/ skeleton)
25
+ module_type: Module type (service | library | api-contract)
26
+ """
27
+ from core.validators.coverage import CoverageValidator
28
+
29
+ mod_dir = Path(module_dir)
30
+ if not mod_dir.is_dir():
31
+ return json.dumps({"status": "error", "message": f"Directory does not exist: {module_dir}"})
32
+
33
+ validator = CoverageValidator()
34
+ result = validator.validate(mod_dir, module_type=module_type)
35
+
36
+ return json.dumps({
37
+ "status": "ok" if result.passed else "fail",
38
+ "passed": result.passed,
39
+ "errors": result.errors[:10],
40
+ "warnings": result.warnings[:10],
41
+ "error_count": len(result.errors),
42
+ "warning_count": len(result.warnings),
43
+ }, ensure_ascii=False, indent=2)
44
+
45
+ @mcp.tool()
46
+ def check_progress(module_dir: str) -> str:
47
+ """Check module document generation progress.
48
+
49
+ Scans the .meta/progress/ directory and target document files to determine the status
50
+ of each doc_type. Used to monitor sub-agent execution progress and detect stalled tasks.
51
+
52
+ Args:
53
+ module_dir: Module knowledge base directory
54
+ """
55
+ mod_dir = Path(module_dir)
56
+ if not mod_dir.is_dir():
57
+ return json.dumps({"status": "error", "message": f"Directory does not exist: {module_dir}"})
58
+
59
+ progress_dir = mod_dir / ".meta" / "progress"
60
+ file_list_dir = mod_dir / ".meta" / "file-lists"
61
+
62
+ per_doc: list[dict[str, Any]] = []
63
+ summary = {"done": 0, "in_progress": 0, "failed": 0, "pending": 0}
64
+
65
+ # Determine expected doc types from file lists
66
+ expected_docs: set[str] = set()
67
+ if file_list_dir.is_dir():
68
+ for fl in file_list_dir.glob("*.txt"):
69
+ if fl.stat().st_size > 0:
70
+ expected_docs.add(fl.stem)
71
+
72
+ # Also check for existing .md files (already generated)
73
+ for md in mod_dir.glob("*.md"):
74
+ doc_name = md.stem
75
+ if doc_name in expected_docs or not expected_docs:
76
+ status = "done"
77
+ file_size = md.stat().st_size
78
+ per_doc.append({
79
+ "doc_type": doc_name,
80
+ "status": status,
81
+ "file_size": file_size,
82
+ "file_size_kb": round(file_size / 1024, 1),
83
+ })
84
+ summary["done"] += 1
85
+ expected_docs.discard(doc_name)
86
+
87
+ # Check progress files for in-progress/failed
88
+ if progress_dir.is_dir():
89
+ for pf in progress_dir.iterdir():
90
+ if not pf.is_file():
91
+ continue
92
+ doc_type = pf.stem
93
+ try:
94
+ content = pf.read_text(encoding="utf-8").strip()
95
+ lines = content.splitlines()
96
+ last_line = lines[-1] if lines else ""
97
+
98
+ if "DONE" in last_line:
99
+ status = "done"
100
+ elif "ERROR" in last_line or "FAILED" in last_line:
101
+ status = "failed"
102
+ else:
103
+ status = "in_progress"
104
+ # Check for stall (last modified > 5 min ago)
105
+ age = time.time() - pf.stat().st_mtime
106
+ if age > 300:
107
+ status = "stalled"
108
+
109
+ # Only add if not already counted as done via .md file
110
+ if not any(d["doc_type"] == doc_type for d in per_doc):
111
+ per_doc.append({
112
+ "doc_type": doc_type,
113
+ "status": status,
114
+ "last_update": last_line[:80],
115
+ })
116
+ summary[status if status != "stalled" else "failed"] += 1
117
+ expected_docs.discard(doc_type)
118
+ except Exception:
119
+ pass
120
+
121
+ # Remaining expected docs are pending
122
+ for doc_type in sorted(expected_docs):
123
+ per_doc.append({"doc_type": doc_type, "status": "pending"})
124
+ summary["pending"] += 1
125
+
126
+ # Generate recommendations
127
+ recommendations = []
128
+ if summary["failed"] > 0:
129
+ recommendations.append("Some documents failed. Check errors and retry with generate_doc or get_subagent_prompt")
130
+ if summary["pending"] > 0:
131
+ recommendations.append(f"{summary['pending']} documents still pending generation")
132
+ if summary["done"] > 0 and summary["pending"] == 0 and summary["in_progress"] == 0:
133
+ recommendations.append("All documents complete. Call coverage_check to validate quality")
134
+
135
+ return json.dumps({
136
+ "status": "ok",
137
+ "per_doc": sorted(per_doc, key=lambda x: x["doc_type"]),
138
+ "summary": summary,
139
+ "recommendations": recommendations,
140
+ }, ensure_ascii=False, indent=2)