source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
cli/__init__.py ADDED
@@ -0,0 +1,50 @@
1
+ """source-kb — unified CLI entry point.
2
+
3
+ Usage:
4
+ source-kb <command> [options]
5
+ source-kb --help
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import importlib
12
+ import pkgutil
13
+ import sys
14
+
15
+
16
+ def _discover_commands(subparsers: argparse._SubParsersAction) -> None:
17
+ """Auto-discover and register all command modules in cli.commands."""
18
+ import cli.commands as cmd_pkg
19
+
20
+ for finder, name, _ in pkgutil.iter_modules(cmd_pkg.__path__):
21
+ module = importlib.import_module(f"cli.commands.{name}")
22
+ if hasattr(module, "register"):
23
+ module.register(subparsers)
24
+
25
+
26
+ def main(argv: list[str] | None = None) -> None:
27
+ sys.stdout.reconfigure(encoding="utf-8")
28
+ sys.stderr.reconfigure(encoding="utf-8")
29
+
30
+ from core import __version__
31
+
32
+ parser = argparse.ArgumentParser(
33
+ prog="source-kb",
34
+ description="Auto-generate structured knowledge base documents from source code.",
35
+ )
36
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
37
+
38
+ subparsers = parser.add_subparsers(dest="command")
39
+ _discover_commands(subparsers)
40
+
41
+ args = parser.parse_args(argv)
42
+ if not args.command:
43
+ parser.print_help()
44
+ sys.exit(0)
45
+
46
+ if hasattr(args, "func"):
47
+ args.func(args)
48
+ else:
49
+ parser.print_help()
50
+ sys.exit(1)
cli/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow running as: python -m cli"""
2
+
3
+ from cli import main
4
+
5
+ main()
@@ -0,0 +1 @@
1
+ """Command registry — modules here are auto-discovered by the entry point."""
@@ -0,0 +1,47 @@
1
+ """source-kb anchor-fix — Fix broken cross-document anchor links."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("anchor-fix", help="Fix broken cross-document anchor links")
13
+ p.add_argument("--module-dir", required=True, help="Module documentation directory")
14
+ p.add_argument("--dry-run", action="store_true", help="Report without fixing")
15
+ p.add_argument("--threshold", type=float, default=0.8, help="Fuzzy match threshold (0.0-1.0)")
16
+ p.set_defaults(func=run)
17
+
18
+
19
+ def run(args: argparse.Namespace) -> None:
20
+ from core.skeleton.anchor_fix import fix_anchors
21
+
22
+ module_dir = Path(args.module_dir)
23
+ if not module_dir.is_dir():
24
+ print(f"Error: directory not found: {module_dir}", file=sys.stderr)
25
+ sys.exit(1)
26
+
27
+ result = fix_anchors(
28
+ module_dir,
29
+ dry_run=args.dry_run,
30
+ similarity_threshold=args.threshold,
31
+ )
32
+
33
+ mode = " (dry-run)" if args.dry_run else ""
34
+ print(f"Anchor fix{mode}: scanned {result.files_scanned} files, checked {result.links_checked} links")
35
+ if result.links_fixed or result.links_degraded:
36
+ print(f" Fixed: {result.links_fixed}, Degraded: {result.links_degraded}")
37
+ for d in result.details:
38
+ action = "fixed" if d["action"] == "fixed" else "degraded"
39
+ print(f" [{action}] {d['file']}: {d['old_link']} -> {d['new_link']}")
40
+ else:
41
+ print(" All links valid.")
42
+
43
+ print(json.dumps({
44
+ "status": "ok", "files_scanned": result.files_scanned,
45
+ "links_checked": result.links_checked, "links_fixed": result.links_fixed,
46
+ "links_degraded": result.links_degraded,
47
+ }, ensure_ascii=False), file=sys.stderr)
@@ -0,0 +1,52 @@
1
+ """source-kb diff-doc — Compare document against skeleton."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("diff-doc", help="Compare document against skeleton for inconsistencies")
13
+ p.add_argument("--doc-path", required=True, help="Path to markdown document")
14
+ p.add_argument("--skeleton-path", required=True, help="Path to skeleton JSON or directory")
15
+ p.add_argument("--doc-type", help="Document type for specific comparison rules")
16
+ p.add_argument("--output", help="Output JSON path (default: stdout)")
17
+ p.set_defaults(func=run)
18
+
19
+
20
+ def run(args: argparse.Namespace) -> None:
21
+ from core.skeleton.diff_doc import diff_doc
22
+
23
+ doc_path = Path(args.doc_path)
24
+ skeleton_path = Path(args.skeleton_path)
25
+
26
+ if not doc_path.exists():
27
+ print(f"Error: document not found: {doc_path}", file=sys.stderr)
28
+ sys.exit(1)
29
+ if not skeleton_path.exists():
30
+ print(f"Error: skeleton not found: {skeleton_path}", file=sys.stderr)
31
+ sys.exit(1)
32
+
33
+ result = diff_doc(doc_path, skeleton_path, doc_type=args.doc_type)
34
+
35
+ output = {
36
+ "doc_type": result.doc_type,
37
+ "doc_items": result.doc_items_count,
38
+ "skeleton_items": result.skeleton_items_count,
39
+ "missing": result.missing_count,
40
+ "stale": result.stale_count,
41
+ "findings": result.to_json(),
42
+ }
43
+
44
+ output_json = json.dumps(output, ensure_ascii=False, indent=2)
45
+ if args.output:
46
+ Path(args.output).write_text(output_json, encoding="utf-8")
47
+ print(f"Written to: {args.output}")
48
+ else:
49
+ print(output_json)
50
+
51
+ print(json.dumps({"status": "ok", "missing": result.missing_count,
52
+ "stale": result.stale_count}, ensure_ascii=False), file=sys.stderr)
@@ -0,0 +1,77 @@
1
+ """source-kb dispatch — Generate dispatch plan preview."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("dispatch", help="Generate dispatch plan preview")
13
+ p.add_argument("--config", help="kb-project.yaml path")
14
+ p.add_argument("--kb", required=True, help="Knowledge base name")
15
+ p.add_argument("--module", required=True, help="Module name")
16
+ p.add_argument("--mode", default="readwrite", choices=["readwrite", "output-only"])
17
+ p.set_defaults(func=run)
18
+
19
+
20
+ def run(args: argparse.Namespace) -> None:
21
+ from core.config import load_config
22
+ from core.preset import load_preset
23
+ from core.skeleton.dispatch import compute_dispatch_plan
24
+ from core.skeleton.dispatch_render import render_markdown, plan_to_tasks, write_shard_files
25
+
26
+ config = load_config(Path(args.config) if args.config else None)
27
+ kb_config = config.get_kb(args.kb)
28
+ preset_name = kb_config.get("preset", "generic")
29
+ preset = load_preset(preset_name)
30
+
31
+ knowledge_dir = Path(kb_config["knowledge_dir"])
32
+ source = kb_config.get("source", {})
33
+ cache_dir = Path(source.get("cache_dir", "./.source-cache"))
34
+
35
+ module_name = args.module
36
+ module_dir = knowledge_dir / module_name
37
+ source_cache = cache_dir / module_name
38
+
39
+ module_type = "service"
40
+ repos = source.get("repos", [])
41
+ for repo in repos:
42
+ if repo.get("name") == module_name:
43
+ module_type = repo.get("type", "service")
44
+ break
45
+
46
+ mode = args.mode
47
+
48
+ plan = compute_dispatch_plan(
49
+ preset=preset, module_dir=module_dir, source_cache=source_cache,
50
+ mode=mode, module_name=module_name, module_type=module_type,
51
+ )
52
+
53
+ markdown = render_markdown(plan, mode=mode)
54
+
55
+ meta_dir = module_dir / ".meta"
56
+ meta_dir.mkdir(parents=True, exist_ok=True)
57
+ (meta_dir / "dispatch-plan.md").write_text(markdown, encoding="utf-8")
58
+
59
+ tasks = plan_to_tasks(
60
+ plan=plan, kb_name=args.kb, preset_name=preset_name,
61
+ preset=preset, knowledge_dir=knowledge_dir, mode=mode,
62
+ )
63
+ (meta_dir / "dispatch-tasks.json").write_text(
64
+ json.dumps(tasks, ensure_ascii=False, indent=2), encoding="utf-8"
65
+ )
66
+
67
+ write_shard_files(plan, module_dir)
68
+
69
+ print(f"Dispatch plan: {len(plan.entries)} doc types, {plan.total_shards()} shards (mode={mode})")
70
+ print(f" Written to: {meta_dir / 'dispatch-plan.md'}")
71
+ print(f" Tasks JSON: {meta_dir / 'dispatch-tasks.json'}")
72
+ for e in plan.entries:
73
+ shards_str = f" x{e.split_count}" if e.split_count > 1 else ""
74
+ print(f" - {e.doc_type} ({e.file_count} files, {e.total_lines} lines){shards_str}")
75
+
76
+ print(json.dumps({"status": "ok", "entries": len(plan.entries),
77
+ "total_shards": plan.total_shards()}, ensure_ascii=False), file=sys.stderr)
@@ -0,0 +1,72 @@
1
+ """source-kb extract — Extract skeleton from source repository."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("extract", help="Extract skeleton from source repo")
13
+ p.add_argument("--repo", required=True, help="Repository path")
14
+ p.add_argument("--preset", required=True, help="Preset name")
15
+ p.add_argument("--ref", default="HEAD", help="Git reference")
16
+ p.add_argument("--output", help="Output directory")
17
+ p.add_argument("--split-by-package", action="store_true")
18
+ p.add_argument("--no-compact", action="store_true")
19
+ p.add_argument("--summary", action="store_true")
20
+ p.add_argument("--subpath", help="Subdirectory scope (monorepo module path)")
21
+ p.add_argument("--files", nargs="*", help="Extract only specified file paths (incremental)")
22
+ p.add_argument("--doc", help="Extract only files relevant to specified doc type")
23
+ p.set_defaults(func=run)
24
+
25
+
26
+ def run(args: argparse.Namespace) -> None:
27
+ from core.skeleton.extract import extract_skeleton
28
+ from core.preset import load_preset
29
+
30
+ preset = load_preset(args.preset)
31
+ repo_path = Path(args.repo)
32
+ output_dir = Path(args.output) if args.output else None
33
+
34
+ files_filter = args.files
35
+ if args.doc and not files_filter:
36
+ doc_types = preset.get("doc_types", {})
37
+ if args.doc not in doc_types:
38
+ valid = ", ".join(doc_types.keys())
39
+ print(f"Error: unknown doc type '{args.doc}'. Valid: {valid}", file=sys.stderr)
40
+ sys.exit(1)
41
+
42
+ entries = extract_skeleton(
43
+ repo_path, preset,
44
+ ref=args.ref or "HEAD",
45
+ subpath=args.subpath,
46
+ output_dir=output_dir,
47
+ split_by_package=args.split_by_package,
48
+ compact=not args.no_compact,
49
+ files=files_filter,
50
+ )
51
+
52
+ result = {
53
+ "status": "ok",
54
+ "files": len(entries),
55
+ "methods": sum(len(e.get("methods", [])) for e in entries),
56
+ "classes": sum(len(e.get("classes", [])) for e in entries),
57
+ }
58
+ if args.doc:
59
+ result["doc_filter"] = args.doc
60
+
61
+ if args.summary and output_dir:
62
+ result["summary"] = str(output_dir / ".meta" / "skeleton-summary.json")
63
+
64
+ if not output_dir:
65
+ default_out = repo_path / ".meta" / "skeleton"
66
+ default_out.mkdir(parents=True, exist_ok=True)
67
+ skel_file = default_out / "skeleton.json"
68
+ skel_file.write_text(json.dumps(entries, ensure_ascii=False, indent=1), encoding="utf-8")
69
+ result["output"] = str(skel_file)
70
+ print(f"Extracted to: {skel_file}")
71
+
72
+ print(json.dumps(result, ensure_ascii=False), file=sys.stderr)
@@ -0,0 +1,74 @@
1
+ """source-kb file-list — Extract file list for a doc type."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("file-list", help="Extract file list for a doc type")
13
+ p.add_argument("--skeleton", help="Skeleton JSON path")
14
+ p.add_argument("--module-dir", help="Module directory")
15
+ p.add_argument("--preset", required=True, help="Preset name")
16
+ p.add_argument("--doc-type", help="Document type")
17
+ p.add_argument("--source-cache", help="Source cache path")
18
+ p.add_argument("--output", help="Output file path")
19
+ p.add_argument("--coverage-check", action="store_true")
20
+ p.set_defaults(func=run)
21
+
22
+
23
+ def run(args: argparse.Namespace) -> None:
24
+ from core.skeleton.file_list import load_skeleton, extract_file_list, check_coverage
25
+ from core.preset import load_preset
26
+
27
+ preset = load_preset(args.preset)
28
+ module_dir = Path(args.module_dir) if args.module_dir else None
29
+ skeleton_path = Path(args.skeleton) if args.skeleton else None
30
+
31
+ if skeleton_path:
32
+ from core.skeleton.query import load_skeleton as ql
33
+ entries = ql(skeleton_path)
34
+ elif module_dir:
35
+ entries = load_skeleton(module_dir)
36
+ else:
37
+ print("Error: --skeleton or --module-dir required", file=sys.stderr)
38
+ sys.exit(1)
39
+
40
+ source_cache = Path(args.source_cache) if args.source_cache else Path(".")
41
+
42
+ if args.coverage_check:
43
+ from core.paths import file_list_dir
44
+ fl_dir = file_list_dir(module_dir) if module_dir else None
45
+ report = check_coverage(entries, preset, fl_dir)
46
+ print(f"Coverage: {report.coverage_pct:.1f}% ({report.covered}/{report.total_files})")
47
+ if report.uncovered_files:
48
+ print(f"Uncovered ({report.uncovered_count}):")
49
+ for f in report.uncovered_files[:20]:
50
+ print(f" {f}")
51
+ if args.output:
52
+ Path(args.output).parent.mkdir(parents=True, exist_ok=True)
53
+ Path(args.output).write_text("\n".join(report.uncovered_files), encoding="utf-8")
54
+ print(json.dumps({"status": "ok", "coverage_pct": round(report.coverage_pct, 1),
55
+ "uncovered": report.uncovered_count}, ensure_ascii=False), file=sys.stderr)
56
+ return
57
+
58
+ files = extract_file_list(entries, preset, args.doc_type, source_cache)
59
+ if args.output:
60
+ Path(args.output).parent.mkdir(parents=True, exist_ok=True)
61
+ Path(args.output).write_text("\n".join(files) + "\n", encoding="utf-8")
62
+ print(f"Written {len(files)} files to: {args.output}")
63
+ elif module_dir and args.doc_type:
64
+ from core.paths import file_list_dir
65
+ fl_dir = file_list_dir(module_dir)
66
+ fl_dir.mkdir(parents=True, exist_ok=True)
67
+ out_path = fl_dir / f"{args.doc_type}.txt"
68
+ out_path.write_text("\n".join(files) + "\n", encoding="utf-8")
69
+ print(f"Written {len(files)} files to: {out_path}")
70
+ else:
71
+ for f in files:
72
+ print(f)
73
+
74
+ print(json.dumps({"status": "ok", "files": len(files)}, ensure_ascii=False), file=sys.stderr)
cli/commands/index.py ADDED
@@ -0,0 +1,84 @@
1
+ """source-kb index / search — RAG index and search operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("index", help="Build/rebuild vector index for knowledge base")
13
+ p.add_argument("--config", help="kb-project.yaml path")
14
+ p.add_argument("--kb", required=True, help="Knowledge base name")
15
+ p.add_argument("--module", help="Module name (for incremental rebuild)")
16
+ p.add_argument("--files", nargs="*", help="Specific files to rebuild (incremental)")
17
+ p.set_defaults(func=run_index)
18
+
19
+ p = subparsers.add_parser("search", help="Search knowledge base via RAG")
20
+ p.add_argument("--config", help="kb-project.yaml path")
21
+ p.add_argument("--kb", required=True, help="Knowledge base name")
22
+ p.add_argument("query", help="Search query")
23
+ p.set_defaults(func=run_search)
24
+
25
+
26
+ def run_index(args: argparse.Namespace) -> None:
27
+ from core.config import load_config, find_config
28
+ from core.rag.loader import load_documents
29
+ from core.rag.chunker import chunk_documents
30
+ from core.rag.indexer import build_index
31
+
32
+ config_path = Path(args.config) if args.config else find_config()
33
+ config = load_config(config_path)
34
+ kb_cfg = config.get_kb(args.kb)
35
+ knowledge_dir = Path(kb_cfg["knowledge_dir"])
36
+ collection_name = kb_cfg["collection"]
37
+
38
+ incremental = bool(args.module or args.files)
39
+ if args.module:
40
+ knowledge_dir = knowledge_dir / args.module
41
+
42
+ if args.files:
43
+ docs = load_documents(knowledge_dir, file_filter=args.files)
44
+ else:
45
+ docs = load_documents(knowledge_dir)
46
+
47
+ if not docs:
48
+ print(f"No documents found in {knowledge_dir}")
49
+ print(json.dumps({"status": "ok", "docs": 0}, ensure_ascii=False), file=sys.stderr)
50
+ return
51
+
52
+ chunks = chunk_documents(docs)
53
+ print(f"Indexing {len(docs)} docs, {len(chunks)} chunks...")
54
+ build_index(chunks, collection_name, config, kb_name=args.kb, incremental=incremental)
55
+ print(f"Index built: {len(chunks)} chunks -> collection '{collection_name}'")
56
+ print(json.dumps({"status": "ok", "docs": len(docs), "chunks": len(chunks),
57
+ "collection": collection_name}, ensure_ascii=False), file=sys.stderr)
58
+
59
+
60
+ def run_search(args: argparse.Namespace) -> None:
61
+ from core.config import load_config, find_config
62
+ from core.rag.retriever import retrieve
63
+
64
+ config_path = Path(args.config) if args.config else find_config()
65
+ config = load_config(config_path)
66
+
67
+ results = retrieve(args.query, config, kb_name=args.kb)
68
+ if not results:
69
+ print(f"No results for '{args.query}'")
70
+ print(json.dumps({"status": "ok", "matches": 0}, ensure_ascii=False), file=sys.stderr)
71
+ return
72
+
73
+ for i, r in enumerate(results, 1):
74
+ score = r["score"]
75
+ source = r["metadata"].get("source", "?")
76
+ section = r["metadata"].get("section", "")
77
+ header = f"[{i}] {source}"
78
+ if section:
79
+ header += f" > {section}"
80
+ print(f"{header} (score: {score:.3f})")
81
+ print(f" {r['text'][:200]}...")
82
+ print()
83
+
84
+ print(json.dumps({"status": "ok", "matches": len(results)}, ensure_ascii=False), file=sys.stderr)
cli/commands/lock.py ADDED
@@ -0,0 +1,89 @@
1
+ """source-kb lock — Knowledge base lock management."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import os
8
+ import sys
9
+ import time
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+
13
+
14
+ def register(subparsers: argparse._SubParsersAction) -> None:
15
+ p = subparsers.add_parser("lock", help="Acquire/release/check knowledge base lock")
16
+ p.add_argument("--action", required=True, choices=["acquire", "release", "status"])
17
+ p.add_argument("--dir", required=True, help="Knowledge directory")
18
+ p.add_argument("--operation", help="Operation name (kb-init|kb-sync|kb-audit)")
19
+ p.add_argument("--timeout", type=int, default=30, help="Timeout in minutes")
20
+ p.set_defaults(func=run)
21
+
22
+
23
+ def run(args: argparse.Namespace) -> None:
24
+ lock_dir = Path(args.dir)
25
+ lock_path = lock_dir / ".kb-lock"
26
+ lock_meta_path = lock_dir / ".kb-lock.json"
27
+
28
+ if args.action == "acquire":
29
+ lock_dir.mkdir(parents=True, exist_ok=True)
30
+
31
+ if lock_path.exists():
32
+ if _is_stale_lock(lock_meta_path):
33
+ lock_path.unlink(missing_ok=True)
34
+ lock_meta_path.unlink(missing_ok=True)
35
+ print(json.dumps({"status": "warning", "message": "Stale lock overridden"},
36
+ ensure_ascii=False), file=sys.stderr)
37
+ else:
38
+ meta = _read_meta(lock_meta_path)
39
+ print(json.dumps({"status": "error", "message": "Already locked",
40
+ **meta}, ensure_ascii=False), file=sys.stderr)
41
+ sys.exit(1)
42
+
43
+ lock_path.write_text(str(os.getpid()), encoding="utf-8")
44
+ meta = {
45
+ "pid": os.getpid(),
46
+ "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S%z"),
47
+ "operation": args.operation or "unknown",
48
+ }
49
+ lock_meta_path.write_text(json.dumps(meta, ensure_ascii=False), encoding="utf-8")
50
+ print(json.dumps({"status": "ok", "action": "acquired"}, ensure_ascii=False), file=sys.stderr)
51
+
52
+ elif args.action == "release":
53
+ lock_path.unlink(missing_ok=True)
54
+ lock_meta_path.unlink(missing_ok=True)
55
+ print(json.dumps({"status": "ok", "action": "released"}, ensure_ascii=False), file=sys.stderr)
56
+
57
+ elif args.action == "status":
58
+ if lock_path.exists():
59
+ meta = _read_meta(lock_meta_path)
60
+ stale = _is_stale_lock(lock_meta_path)
61
+ print(json.dumps({"status": "locked", "stale": stale, **meta},
62
+ ensure_ascii=False), file=sys.stderr)
63
+ else:
64
+ print(json.dumps({"status": "unlocked"}, ensure_ascii=False), file=sys.stderr)
65
+
66
+
67
+ def _read_meta(lock_meta_path: Path) -> dict:
68
+ if not lock_meta_path.exists():
69
+ return {}
70
+ try:
71
+ return json.loads(lock_meta_path.read_text(encoding="utf-8"))
72
+ except (json.JSONDecodeError, OSError):
73
+ return {}
74
+
75
+
76
+ def _is_stale_lock(lock_meta_path: Path) -> bool:
77
+ if not lock_meta_path.exists():
78
+ return True
79
+ try:
80
+ meta = json.loads(lock_meta_path.read_text(encoding="utf-8"))
81
+ ts = meta.get("timestamp", "")
82
+ if not ts:
83
+ return True
84
+ lock_time = datetime.fromisoformat(ts.replace("Z", "+00:00"))
85
+ now = datetime.now(timezone.utc)
86
+ age_minutes = (now - lock_time).total_seconds() / 60
87
+ return age_minutes > 30
88
+ except (json.JSONDecodeError, OSError, ValueError):
89
+ return True
cli/commands/merge.py ADDED
@@ -0,0 +1,60 @@
1
+ """source-kb merge — Merge document shards into single files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("merge", help="Merge document shards into single files")
13
+ p.add_argument("--dir", required=True, help="Module directory containing shards")
14
+ p.add_argument("--doc-type", help="Merge only this doc type (default: all)")
15
+ p.add_argument("--keep-shards", action="store_true", help="Keep shard files after merge")
16
+ p.set_defaults(func=run)
17
+
18
+
19
+ def run(args: argparse.Namespace) -> None:
20
+ from core.skeleton.merge import find_shards, merge_shards
21
+
22
+ module_dir = Path(args.dir)
23
+ if not module_dir.is_dir():
24
+ print(f"Error: directory not found: {module_dir}", file=sys.stderr)
25
+ sys.exit(1)
26
+
27
+ if args.doc_type:
28
+ prefixes = [args.doc_type]
29
+ else:
30
+ seen = set()
31
+ for f in module_dir.glob("*-shard-*.md"):
32
+ prefix = f.name.rsplit("-shard-", 1)[0]
33
+ seen.add(prefix)
34
+ for f in module_dir.glob(".shard-*-*.md"):
35
+ prefix = f.name.split(".shard-", 1)[1].rsplit("-", 1)[0]
36
+ seen.add(prefix)
37
+ prefixes = sorted(seen)
38
+
39
+ if not prefixes:
40
+ print("No shards found to merge.")
41
+ print(json.dumps({"status": "ok", "merged": 0}, ensure_ascii=False), file=sys.stderr)
42
+ return
43
+
44
+ merged_count = 0
45
+ for prefix in prefixes:
46
+ shards = find_shards(module_dir, prefix)
47
+ if not shards:
48
+ continue
49
+ content = merge_shards(shards)
50
+ out_path = module_dir / f"{prefix}.md"
51
+ out_path.write_text(content, encoding="utf-8")
52
+ print(f" Merged {len(shards)} shards -> {out_path.name}")
53
+
54
+ if not args.keep_shards:
55
+ for s in shards:
56
+ s.unlink()
57
+
58
+ merged_count += 1
59
+
60
+ print(json.dumps({"status": "ok", "merged": merged_count}, ensure_ascii=False), file=sys.stderr)
@@ -0,0 +1,19 @@
1
+ """source-kb merge-delta — Merge skeleton delta into existing skeleton."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+
7
+
8
+ def register(subparsers: argparse._SubParsersAction) -> None:
9
+ p = subparsers.add_parser("merge-delta", help="Merge skeleton delta into existing skeleton")
10
+ p.add_argument("--delta", required=True, help="Path to delta JSON file")
11
+ p.add_argument("--target", required=True, help="Target module directory")
12
+ p.add_argument("--dry-run", action="store_true", help="Preview merge without writing")
13
+ p.add_argument("--no-cleanup", action="store_true", help="Keep delta file after merge")
14
+ p.set_defaults(func=run)
15
+
16
+
17
+ def run(args: argparse.Namespace) -> None:
18
+ from core.skeleton.cmd_merge_delta import cmd_merge_delta
19
+ cmd_merge_delta(args)
@@ -0,0 +1,24 @@
1
+ """source-kb metadata — Pre-generate global metadata."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("metadata", help="Pre-generate global metadata for a module")
13
+ p.add_argument("--module-dir", required=True, help="Module directory")
14
+ p.add_argument("--module-name", help="Module name (defaults to directory name)")
15
+ p.set_defaults(func=run)
16
+
17
+
18
+ def run(args: argparse.Namespace) -> None:
19
+ from core.skeleton.metadata import pregenerate
20
+
21
+ module_dir = Path(args.module_dir)
22
+ output = pregenerate(module_dir, module_name=args.module_name or module_dir.name)
23
+ print(f"Generated: {output}")
24
+ print(json.dumps({"status": "ok", "output": str(output)}, ensure_ascii=False), file=sys.stderr)