source-kb 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. source_kb-0.2.4/LICENSE +21 -0
  2. source_kb-0.2.4/PKG-INFO +173 -0
  3. source_kb-0.2.4/README.en.md +140 -0
  4. source_kb-0.2.4/README.md +139 -0
  5. source_kb-0.2.4/cli/__init__.py +50 -0
  6. source_kb-0.2.4/cli/__main__.py +5 -0
  7. source_kb-0.2.4/cli/commands/__init__.py +1 -0
  8. source_kb-0.2.4/cli/commands/anchor_fix.py +47 -0
  9. source_kb-0.2.4/cli/commands/audit.py +18 -0
  10. source_kb-0.2.4/cli/commands/diff_doc.py +52 -0
  11. source_kb-0.2.4/cli/commands/dispatch.py +77 -0
  12. source_kb-0.2.4/cli/commands/extract.py +72 -0
  13. source_kb-0.2.4/cli/commands/file_list.py +74 -0
  14. source_kb-0.2.4/cli/commands/index.py +84 -0
  15. source_kb-0.2.4/cli/commands/jar_resolve.py +45 -0
  16. source_kb-0.2.4/cli/commands/lock.py +89 -0
  17. source_kb-0.2.4/cli/commands/merge.py +60 -0
  18. source_kb-0.2.4/cli/commands/merge_delta.py +19 -0
  19. source_kb-0.2.4/cli/commands/metadata.py +24 -0
  20. source_kb-0.2.4/cli/commands/module_dag.py +17 -0
  21. source_kb-0.2.4/cli/commands/post_merge.py +43 -0
  22. source_kb-0.2.4/cli/commands/query.py +52 -0
  23. source_kb-0.2.4/cli/commands/record_feedback.py +22 -0
  24. source_kb-0.2.4/cli/commands/render.py +101 -0
  25. source_kb-0.2.4/cli/commands/scan_repos.py +44 -0
  26. source_kb-0.2.4/cli/commands/setup.py +94 -0
  27. source_kb-0.2.4/cli/commands/split.py +196 -0
  28. source_kb-0.2.4/cli/commands/stale_files.py +98 -0
  29. source_kb-0.2.4/cli/commands/validate.py +191 -0
  30. source_kb-0.2.4/core/__init__.py +36 -0
  31. source_kb-0.2.4/core/config.py +261 -0
  32. source_kb-0.2.4/core/docs/__init__.py +7 -0
  33. source_kb-0.2.4/core/docs/section_updater.py +286 -0
  34. source_kb-0.2.4/core/docs/shared.py +149 -0
  35. source_kb-0.2.4/core/git.py +294 -0
  36. source_kb-0.2.4/core/interfaces.py +249 -0
  37. source_kb-0.2.4/core/monitor/__init__.py +5 -0
  38. source_kb-0.2.4/core/monitor/progress.py +83 -0
  39. source_kb-0.2.4/core/monitor/prompt_store.py +49 -0
  40. source_kb-0.2.4/core/paths.py +141 -0
  41. source_kb-0.2.4/core/preset.py +237 -0
  42. source_kb-0.2.4/core/preset_accessors.py +202 -0
  43. source_kb-0.2.4/core/preset_classify.py +132 -0
  44. source_kb-0.2.4/core/preset_hooks.py +129 -0
  45. source_kb-0.2.4/core/preset_profile.py +89 -0
  46. source_kb-0.2.4/core/prompt/__init__.py +7 -0
  47. source_kb-0.2.4/core/prompt/__main__.py +147 -0
  48. source_kb-0.2.4/core/prompt/content.py +320 -0
  49. source_kb-0.2.4/core/prompt/context_manager.py +164 -0
  50. source_kb-0.2.4/core/prompt/renderer.py +236 -0
  51. source_kb-0.2.4/core/prompt/response_parser.py +274 -0
  52. source_kb-0.2.4/core/prompt/templates.py +357 -0
  53. source_kb-0.2.4/core/prompt/validate_parity.py +162 -0
  54. source_kb-0.2.4/core/prompt/variables.py +339 -0
  55. source_kb-0.2.4/core/rag/__init__.py +22 -0
  56. source_kb-0.2.4/core/rag/__main__.py +136 -0
  57. source_kb-0.2.4/core/rag/bm25_index.py +268 -0
  58. source_kb-0.2.4/core/rag/chunker.py +273 -0
  59. source_kb-0.2.4/core/rag/embedder.py +151 -0
  60. source_kb-0.2.4/core/rag/indexer.py +292 -0
  61. source_kb-0.2.4/core/rag/loader.py +89 -0
  62. source_kb-0.2.4/core/rag/retriever.py +82 -0
  63. source_kb-0.2.4/core/scan_repos.py +664 -0
  64. source_kb-0.2.4/core/skeleton/__init__.py +11 -0
  65. source_kb-0.2.4/core/skeleton/__main__.py +934 -0
  66. source_kb-0.2.4/core/skeleton/anchor_fix.py +250 -0
  67. source_kb-0.2.4/core/skeleton/classify.py +331 -0
  68. source_kb-0.2.4/core/skeleton/cmd_anchor_fix.py +43 -0
  69. source_kb-0.2.4/core/skeleton/cmd_diff_doc.py +44 -0
  70. source_kb-0.2.4/core/skeleton/cmd_lock.py +87 -0
  71. source_kb-0.2.4/core/skeleton/cmd_merge_delta.py +41 -0
  72. source_kb-0.2.4/core/skeleton/community.py +233 -0
  73. source_kb-0.2.4/core/skeleton/dependency_graph.py +306 -0
  74. source_kb-0.2.4/core/skeleton/diff_doc.py +248 -0
  75. source_kb-0.2.4/core/skeleton/dispatch.py +273 -0
  76. source_kb-0.2.4/core/skeleton/dispatch_render.py +319 -0
  77. source_kb-0.2.4/core/skeleton/dispatch_source.py +111 -0
  78. source_kb-0.2.4/core/skeleton/extract.py +218 -0
  79. source_kb-0.2.4/core/skeleton/extract_methods.py +298 -0
  80. source_kb-0.2.4/core/skeleton/file_list.py +239 -0
  81. source_kb-0.2.4/core/skeleton/impact.py +278 -0
  82. source_kb-0.2.4/core/skeleton/jar_download.py +177 -0
  83. source_kb-0.2.4/core/skeleton/jar_resolver.py +186 -0
  84. source_kb-0.2.4/core/skeleton/loader.py +162 -0
  85. source_kb-0.2.4/core/skeleton/merge.py +278 -0
  86. source_kb-0.2.4/core/skeleton/merge_delta.py +229 -0
  87. source_kb-0.2.4/core/skeleton/metadata.py +96 -0
  88. source_kb-0.2.4/core/skeleton/metadata_builders.py +264 -0
  89. source_kb-0.2.4/core/skeleton/module_dag.py +330 -0
  90. source_kb-0.2.4/core/skeleton/parsers/__init__.py +71 -0
  91. source_kb-0.2.4/core/skeleton/parsers/jqassistant.py +300 -0
  92. source_kb-0.2.4/core/skeleton/parsers/jqassistant_cypher.py +225 -0
  93. source_kb-0.2.4/core/skeleton/parsers/regex.py +171 -0
  94. source_kb-0.2.4/core/skeleton/parsers/treesitter.py +324 -0
  95. source_kb-0.2.4/core/skeleton/parsers/treesitter_java.py +284 -0
  96. source_kb-0.2.4/core/skeleton/parsers/treesitter_multi.py +289 -0
  97. source_kb-0.2.4/core/skeleton/pom_parser.py +299 -0
  98. source_kb-0.2.4/core/skeleton/post_merge.py +295 -0
  99. source_kb-0.2.4/core/skeleton/post_merge_llm.py +82 -0
  100. source_kb-0.2.4/core/skeleton/query.py +195 -0
  101. source_kb-0.2.4/core/skeleton/shard_context.py +177 -0
  102. source_kb-0.2.4/core/skeleton/split.py +180 -0
  103. source_kb-0.2.4/core/skeleton/split_cache.py +107 -0
  104. source_kb-0.2.4/core/skeleton/split_feedback.py +174 -0
  105. source_kb-0.2.4/core/skeleton/split_plan.py +219 -0
  106. source_kb-0.2.4/core/skeleton/split_plan_helpers.py +305 -0
  107. source_kb-0.2.4/core/skeleton/split_plan_llm.py +274 -0
  108. source_kb-0.2.4/core/utils.py +135 -0
  109. source_kb-0.2.4/core/validators/__init__.py +65 -0
  110. source_kb-0.2.4/core/validators/__main__.py +215 -0
  111. source_kb-0.2.4/core/validators/consistency.py +203 -0
  112. source_kb-0.2.4/core/validators/coverage.py +171 -0
  113. source_kb-0.2.4/core/validators/duplicates.py +76 -0
  114. source_kb-0.2.4/core/validators/engine.py +224 -0
  115. source_kb-0.2.4/core/validators/links.py +76 -0
  116. source_kb-0.2.4/core/validators/sampling.py +169 -0
  117. source_kb-0.2.4/core/validators/structure.py +144 -0
  118. source_kb-0.2.4/engine/__init__.py +7 -0
  119. source_kb-0.2.4/engine/assembler.py +231 -0
  120. source_kb-0.2.4/engine/confirm.py +65 -0
  121. source_kb-0.2.4/engine/dedup.py +106 -0
  122. source_kb-0.2.4/engine/main.py +211 -0
  123. source_kb-0.2.4/engine/pipeline/__init__.py +163 -0
  124. source_kb-0.2.4/engine/pipeline/recovery.py +250 -0
  125. source_kb-0.2.4/engine/pipeline/steps/__init__.py +23 -0
  126. source_kb-0.2.4/engine/pipeline/steps/audit.py +220 -0
  127. source_kb-0.2.4/engine/pipeline/steps/audit_apply.py +195 -0
  128. source_kb-0.2.4/engine/pipeline/steps/audit_helpers.py +155 -0
  129. source_kb-0.2.4/engine/pipeline/steps/classify_llm.py +236 -0
  130. source_kb-0.2.4/engine/pipeline/steps/classify_prompt.py +223 -0
  131. source_kb-0.2.4/engine/pipeline/steps/finalize.py +160 -0
  132. source_kb-0.2.4/engine/pipeline/steps/generate.py +169 -0
  133. source_kb-0.2.4/engine/pipeline/steps/generate_batch.py +197 -0
  134. source_kb-0.2.4/engine/pipeline/steps/generate_recovery.py +170 -0
  135. source_kb-0.2.4/engine/pipeline/steps/llm_plan_split.py +253 -0
  136. source_kb-0.2.4/engine/pipeline/steps/lock.py +64 -0
  137. source_kb-0.2.4/engine/pipeline/steps/preflight.py +237 -0
  138. source_kb-0.2.4/engine/pipeline/steps/preflight_adjust.py +147 -0
  139. source_kb-0.2.4/engine/pipeline/steps/pregenerate.py +130 -0
  140. source_kb-0.2.4/engine/pipeline/steps/quality.py +81 -0
  141. source_kb-0.2.4/engine/pipeline/steps/skeleton.py +149 -0
  142. source_kb-0.2.4/engine/pipeline/steps/source.py +163 -0
  143. source_kb-0.2.4/engine/pipeline/steps/sync.py +117 -0
  144. source_kb-0.2.4/engine/pipeline/steps/sync_finalize.py +237 -0
  145. source_kb-0.2.4/engine/pipeline/steps/sync_update.py +341 -0
  146. source_kb-0.2.4/engine/pipelines.py +91 -0
  147. source_kb-0.2.4/engine/runner.py +335 -0
  148. source_kb-0.2.4/engine/strategies/__init__.py +86 -0
  149. source_kb-0.2.4/engine/strategies/api.py +128 -0
  150. source_kb-0.2.4/engine/strategies/delegated.py +50 -0
  151. source_kb-0.2.4/engine/strategies/dryrun.py +25 -0
  152. source_kb-0.2.4/engine/two_phase.py +143 -0
  153. source_kb-0.2.4/mcp_server/__init__.py +73 -0
  154. source_kb-0.2.4/mcp_server/__main__.py +5 -0
  155. source_kb-0.2.4/mcp_server/tools/__init__.py +1 -0
  156. source_kb-0.2.4/mcp_server/tools/config.py +63 -0
  157. source_kb-0.2.4/mcp_server/tools/discovery.py +276 -0
  158. source_kb-0.2.4/mcp_server/tools/generation.py +184 -0
  159. source_kb-0.2.4/mcp_server/tools/planning.py +144 -0
  160. source_kb-0.2.4/mcp_server/tools/source.py +175 -0
  161. source_kb-0.2.4/mcp_server/tools/validation.py +140 -0
  162. source_kb-0.2.4/mcp_server/tools/workflow.py +166 -0
  163. source_kb-0.2.4/mcp_server/workflow_loader.py +204 -0
  164. source_kb-0.2.4/presets/generic/audit_dimensions.md +132 -0
  165. source_kb-0.2.4/presets/generic/doc_types.yaml +152 -0
  166. source_kb-0.2.4/presets/generic/preset.yaml +115 -0
  167. source_kb-0.2.4/presets/java-spring/audit_dimensions.md +228 -0
  168. source_kb-0.2.4/presets/java-spring/audit_dimensions.yaml +203 -0
  169. source_kb-0.2.4/presets/java-spring/doc_types.yaml +269 -0
  170. source_kb-0.2.4/presets/java-spring/hooks.py +122 -0
  171. source_kb-0.2.4/presets/java-spring/preset.yaml +341 -0
  172. source_kb-0.2.4/presets/java-spring/templates/README.md +34 -0
  173. source_kb-0.2.4/presets/java-spring/templates/audit-system.md +15 -0
  174. source_kb-0.2.4/presets/java-spring/templates/subagent-aop.md +105 -0
  175. source_kb-0.2.4/presets/java-spring/templates/subagent-api.md +63 -0
  176. source_kb-0.2.4/presets/java-spring/templates/subagent-architecture.md +111 -0
  177. source_kb-0.2.4/presets/java-spring/templates/subagent-async-events.md +107 -0
  178. source_kb-0.2.4/presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  179. source_kb-0.2.4/presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  180. source_kb-0.2.4/presets/java-spring/templates/subagent-audit-business.md +40 -0
  181. source_kb-0.2.4/presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  182. source_kb-0.2.4/presets/java-spring/templates/subagent-business.md +129 -0
  183. source_kb-0.2.4/presets/java-spring/templates/subagent-caching.md +75 -0
  184. source_kb-0.2.4/presets/java-spring/templates/subagent-database-access.md +114 -0
  185. source_kb-0.2.4/presets/java-spring/templates/subagent-enum.md +75 -0
  186. source_kb-0.2.4/presets/java-spring/templates/subagent-error-handling.md +91 -0
  187. source_kb-0.2.4/presets/java-spring/templates/subagent-external-integrations.md +80 -0
  188. source_kb-0.2.4/presets/java-spring/templates/subagent-index.md +122 -0
  189. source_kb-0.2.4/presets/java-spring/templates/subagent-messaging.md +97 -0
  190. source_kb-0.2.4/presets/java-spring/templates/subagent-model.md +88 -0
  191. source_kb-0.2.4/presets/java-spring/templates/subagent-observability.md +91 -0
  192. source_kb-0.2.4/presets/java-spring/templates/subagent-scheduled.md +81 -0
  193. source_kb-0.2.4/presets/java-spring/templates/subagent-security.md +102 -0
  194. source_kb-0.2.4/presets/java-spring/templates/subagent-structure.md +101 -0
  195. source_kb-0.2.4/presets/java-spring/templates/subagent-sync-section.md +34 -0
  196. source_kb-0.2.4/presets/java-spring/templates/subagent-utils.md +73 -0
  197. source_kb-0.2.4/presets/java-spring/templates/sync-system.md +8 -0
  198. source_kb-0.2.4/presets/java-spring/workflow-extensions.md +112 -0
  199. source_kb-0.2.4/pyproject.toml +62 -0
  200. source_kb-0.2.4/setup.cfg +4 -0
  201. source_kb-0.2.4/skills/__init__.py +1 -0
  202. source_kb-0.2.4/skills/_shared/README.md +30 -0
  203. source_kb-0.2.4/skills/_shared/doc-coverage-shared.md +134 -0
  204. source_kb-0.2.4/skills/_shared/doc-quality-standard.md +1058 -0
  205. source_kb-0.2.4/skills/_shared/doc-subagent-rules.md +762 -0
  206. source_kb-0.2.4/skills/_shared/windows-compat.md +89 -0
  207. source_kb-0.2.4/skills/kb-audit/SKILL.md +52 -0
  208. source_kb-0.2.4/skills/kb-audit/rules.md +88 -0
  209. source_kb-0.2.4/skills/kb-audit/steps/step-01-prepare.md +75 -0
  210. source_kb-0.2.4/skills/kb-audit/steps/step-02-audit.md +96 -0
  211. source_kb-0.2.4/skills/kb-audit/steps/step-03-verify.md +65 -0
  212. source_kb-0.2.4/skills/kb-audit/steps/step-04-report.md +64 -0
  213. source_kb-0.2.4/skills/kb-init/SKILL.md +142 -0
  214. source_kb-0.2.4/skills/kb-init/rules.md +187 -0
  215. source_kb-0.2.4/skills/kb-init/steps/step-01-scope.md +62 -0
  216. source_kb-0.2.4/skills/kb-init/steps/step-02-source.md +410 -0
  217. source_kb-0.2.4/skills/kb-init/steps/step-03-generate.md +279 -0
  218. source_kb-0.2.4/skills/kb-init/steps/step-04-quality.md +92 -0
  219. source_kb-0.2.4/skills/kb-init/steps/step-05-finalize.md +132 -0
  220. source_kb-0.2.4/skills/kb-init/templates/core/execution-modes.md +29 -0
  221. source_kb-0.2.4/skills/kb-init/templates/core/output-only.md +4 -0
  222. source_kb-0.2.4/skills/kb-init/templates/core/readwrite.md +33 -0
  223. source_kb-0.2.4/skills/kb-search/SKILL.md +138 -0
  224. source_kb-0.2.4/skills/kb-search/rules.md +64 -0
  225. source_kb-0.2.4/skills/kb-sync/SKILL.md +43 -0
  226. source_kb-0.2.4/skills/kb-sync/rules.md +70 -0
  227. source_kb-0.2.4/skills/kb-sync/steps/step-01-detect.md +72 -0
  228. source_kb-0.2.4/skills/kb-sync/steps/step-02-update.md +71 -0
  229. source_kb-0.2.4/skills/kb-sync/steps/step-03-verify.md +47 -0
  230. source_kb-0.2.4/skills/kb-sync/steps/step-04-finalize.md +52 -0
  231. source_kb-0.2.4/source_kb.egg-info/PKG-INFO +173 -0
  232. source_kb-0.2.4/source_kb.egg-info/SOURCES.txt +299 -0
  233. source_kb-0.2.4/source_kb.egg-info/dependency_links.txt +1 -0
  234. source_kb-0.2.4/source_kb.egg-info/entry_points.txt +2 -0
  235. source_kb-0.2.4/source_kb.egg-info/requires.txt +19 -0
  236. source_kb-0.2.4/source_kb.egg-info/top_level.txt +3 -0
  237. source_kb-0.2.4/tests/test_anchor_fix.py +133 -0
  238. source_kb-0.2.4/tests/test_assembler.py +144 -0
  239. source_kb-0.2.4/tests/test_bm25_index.py +174 -0
  240. source_kb-0.2.4/tests/test_classify.py +94 -0
  241. source_kb-0.2.4/tests/test_classify_files.py +142 -0
  242. source_kb-0.2.4/tests/test_cli_commands.py +73 -0
  243. source_kb-0.2.4/tests/test_cli_runner.py +116 -0
  244. source_kb-0.2.4/tests/test_cli_strategies.py +176 -0
  245. source_kb-0.2.4/tests/test_cli_sync_steps.py +371 -0
  246. source_kb-0.2.4/tests/test_community.py +155 -0
  247. source_kb-0.2.4/tests/test_config.py +219 -0
  248. source_kb-0.2.4/tests/test_context_manager.py +107 -0
  249. source_kb-0.2.4/tests/test_core_config.py +231 -0
  250. source_kb-0.2.4/tests/test_core_remaining.py +174 -0
  251. source_kb-0.2.4/tests/test_dependency_graph.py +151 -0
  252. source_kb-0.2.4/tests/test_diff_doc.py +106 -0
  253. source_kb-0.2.4/tests/test_dispatch.py +62 -0
  254. source_kb-0.2.4/tests/test_dispatch_render.py +183 -0
  255. source_kb-0.2.4/tests/test_e2e_agent.py +433 -0
  256. source_kb-0.2.4/tests/test_e2e_engine.py +385 -0
  257. source_kb-0.2.4/tests/test_engine_pipeline_full.py +168 -0
  258. source_kb-0.2.4/tests/test_engine_remaining.py +173 -0
  259. source_kb-0.2.4/tests/test_engine_steps_batch.py +306 -0
  260. source_kb-0.2.4/tests/test_generation.py +144 -0
  261. source_kb-0.2.4/tests/test_git.py +83 -0
  262. source_kb-0.2.4/tests/test_impact.py +310 -0
  263. source_kb-0.2.4/tests/test_mcp_config.py +58 -0
  264. source_kb-0.2.4/tests/test_mcp_discovery.py +182 -0
  265. source_kb-0.2.4/tests/test_mcp_planning.py +103 -0
  266. source_kb-0.2.4/tests/test_mcp_source.py +179 -0
  267. source_kb-0.2.4/tests/test_mcp_validation.py +174 -0
  268. source_kb-0.2.4/tests/test_mcp_workflow.py +100 -0
  269. source_kb-0.2.4/tests/test_merge.py +191 -0
  270. source_kb-0.2.4/tests/test_merge_delta.py +170 -0
  271. source_kb-0.2.4/tests/test_module_dag.py +144 -0
  272. source_kb-0.2.4/tests/test_phase1_quick.py +167 -0
  273. source_kb-0.2.4/tests/test_pipeline_infra.py +216 -0
  274. source_kb-0.2.4/tests/test_post_merge.py +62 -0
  275. source_kb-0.2.4/tests/test_preset.py +81 -0
  276. source_kb-0.2.4/tests/test_progress.py +77 -0
  277. source_kb-0.2.4/tests/test_prompt_assembler.py +84 -0
  278. source_kb-0.2.4/tests/test_rag_chunker.py +95 -0
  279. source_kb-0.2.4/tests/test_rag_indexer.py +100 -0
  280. source_kb-0.2.4/tests/test_rag_loader.py +91 -0
  281. source_kb-0.2.4/tests/test_recovery.py +158 -0
  282. source_kb-0.2.4/tests/test_regex_parser.py +204 -0
  283. source_kb-0.2.4/tests/test_response_parser_quick.py +69 -0
  284. source_kb-0.2.4/tests/test_shard_context.py +157 -0
  285. source_kb-0.2.4/tests/test_skeleton_main_cmds.py +194 -0
  286. source_kb-0.2.4/tests/test_skeleton_query.py +172 -0
  287. source_kb-0.2.4/tests/test_split.py +73 -0
  288. source_kb-0.2.4/tests/test_split_cache.py +153 -0
  289. source_kb-0.2.4/tests/test_split_feedback.py +57 -0
  290. source_kb-0.2.4/tests/test_split_plan.py +213 -0
  291. source_kb-0.2.4/tests/test_templates.py +117 -0
  292. source_kb-0.2.4/tests/test_utils.py +80 -0
  293. source_kb-0.2.4/tests/test_validate_parity.py +120 -0
  294. source_kb-0.2.4/tests/test_validators_consistency.py +116 -0
  295. source_kb-0.2.4/tests/test_validators_coverage.py +176 -0
  296. source_kb-0.2.4/tests/test_validators_duplicates.py +62 -0
  297. source_kb-0.2.4/tests/test_validators_engine.py +157 -0
  298. source_kb-0.2.4/tests/test_validators_links.py +81 -0
  299. source_kb-0.2.4/tests/test_validators_registry.py +61 -0
  300. source_kb-0.2.4/tests/test_validators_sampling.py +163 -0
  301. source_kb-0.2.4/tests/test_workflow_loader.py +140 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 source-kb contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,173 @@
1
+ Metadata-Version: 2.4
2
+ Name: source-kb
3
+ Version: 0.2.4
4
+ Summary: Auto-generate structured knowledge base documents from source code. Supports AI agent mode (skill-based) and standalone CLI.
5
+ License-Expression: MIT
6
+ Keywords: knowledge-base,documentation,code-analysis,llm,rag
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: Software Development :: Documentation
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: requests>=2.28.0
18
+ Requires-Dist: pyyaml>=6.0
19
+ Requires-Dist: filelock>=3.12.0
20
+ Provides-Extra: skeleton
21
+ Requires-Dist: tree-sitter<0.22.0,>=0.21.0; extra == "skeleton"
22
+ Requires-Dist: tree-sitter-languages>=1.10.0; extra == "skeleton"
23
+ Provides-Extra: rag
24
+ Requires-Dist: chromadb>=0.4.0; extra == "rag"
25
+ Provides-Extra: full
26
+ Requires-Dist: chromadb>=0.4.0; extra == "full"
27
+ Requires-Dist: tree-sitter<0.22.0,>=0.21.0; extra == "full"
28
+ Requires-Dist: tree-sitter-languages>=1.10.0; extra == "full"
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest>=7.0; extra == "dev"
31
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ # source-kb
35
+
36
+ English | [中文](README.md)
37
+
38
+ Auto-generate structured knowledge base documents from source code, build vector indexes, and support RAG retrieval.
39
+
40
+ ## Features
41
+
42
+ - **CLI toolchain**: skeleton extraction, prompt rendering, intelligent splitting, quality validation, vector indexing — all standalone Python scripts
43
+ - **Platform-agnostic**: works in Kiro / Cursor / Claude Code / Windsurf / any AI Agent
44
+ - **LLM-agnostic**: without an Agent, use engine with any OpenAI-compatible API (Anthropic / OpenAI / DeepSeek / Ollama / vLLM, etc.)
45
+ - **Intelligent subagent splitting**: semantic grouping by business domain (LLM-based) or package-aware greedy algorithm (code-based)
46
+ - **Two-phase generation**: outline first, then expand — eliminates information isolation between shards
47
+ - **Method-level source injection**: prioritized by complexity (high=full body / medium=first 20 lines / low=signature only)
48
+ - **Real-time quality validation**: each subagent output is verified immediately, with automatic retry on failure
49
+ - **Document deduplication**: LLM-powered post-merge dedup (internal redundancy + cross-doc ownership)
50
+ - **Module doc distillation**: summarize multiple microservice docs into domain-level documentation
51
+ - Any language support via preset system (java-spring built-in)
52
+ - Source code access through git remote (multi-repo & monorepo)
53
+ - Lightweight self-built RAG engine (ChromaDB + pluggable embedding)
54
+ - Auto-publish docs to source repo knowledge branch
55
+
56
+ ## Quick Start
57
+
58
+ ### Option 1: Skill Mode (recommended)
59
+
60
+ Install the CLI toolchain:
61
+
62
+ ```bash
63
+ pip install source-kb[full]
64
+ ```
65
+
66
+ Clone the presets repo into your project:
67
+
68
+ ```bash
69
+ git clone https://github.com/anthropics/source-kb-presets.git
70
+ cp -r source-kb-presets/skills ./skills
71
+ cp -r source-kb-presets/presets ./presets
72
+ cp -r source-kb-presets/examples ./examples
73
+ ```
74
+
75
+ Then talk to your AI Agent (Kiro / Claude Code / Cursor / Windsurf):
76
+
77
+ ```
78
+ Please read skills/kb-init/SKILL.md and follow the steps to initialize the knowledge base.
79
+ ```
80
+
81
+ Or add the guide to your project rules file (`.kiro/steering/`, `.cursorrules`, `.claude/CLAUDE.md`, etc.). See [Getting Started](docs/getting-started.md).
82
+
83
+ **Non-Java project?** Works the same way. Configure `kb-project.yaml` with the `generic` preset:
84
+
85
+ ```bash
86
+ # Use generic preset (works for any language)
87
+ source-kb extract --repo .source-cache/my-app --preset generic --output knowledge/my-app
88
+ ```
89
+
90
+ > The generic preset uses path-based rules + regex parser for file classification — no tree-sitter required. For higher accuracy, create a custom preset following `presets/java-spring/` as reference.
91
+
92
+ ### Option 2: CLI Mode (no Agent, requires LLM API config)
93
+
94
+ ```bash
95
+ pip install source-kb[full]
96
+
97
+ export LLM_BASE_URL="https://api.anthropic.com"
98
+ export LLM_MODEL="claude-sonnet-4-6"
99
+ export LLM_API_KEY="sk-xxx"
100
+ source-kb extract --repo .source-cache/my-app --preset java-spring --summary --output knowledge/my-app
101
+ source-kb index --kb my-app
102
+ ```
103
+
104
+ > Full guide: [Getting Started](docs/getting-started.md).
105
+
106
+ ## CLI Quick Reference
107
+
108
+ ```bash
109
+ # Skeleton extraction
110
+ source-kb extract --repo .source-cache/xxx --preset java-spring --summary --output knowledge/xxx
111
+
112
+ # Prompt rendering
113
+ source-kb render --template presets/java-spring/templates/subagent-business.md --module xxx --kb yyy --doc-type business-logic --mode readwrite
114
+
115
+ # File list extraction
116
+ source-kb file-list --skeleton knowledge/xxx/.meta/skeleton/skeleton.json --preset java-spring --doc-type business-logic --output knowledge/xxx/.meta/file-lists/business-logic.txt
117
+
118
+ # Coverage validation
119
+ source-kb validate coverage check --skeleton knowledge/xxx/.meta/skeleton/skeleton.json --docs-dir knowledge/xxx --type service
120
+
121
+ # Index & search
122
+ source-kb index --kb my-project
123
+ source-kb search --kb my-project "query"
124
+ ```
125
+
126
+ ## Skills
127
+
128
+ | Skill | Purpose |
129
+ |-------|---------|
130
+ | kb-init | Generate all docs from source + build index |
131
+ | kb-audit | Compare docs vs source, fix inconsistencies |
132
+ | kb-sync | Detect git changes, incrementally update docs and index |
133
+ | kb-search | Vector retrieval + contextual answers |
134
+
135
+ Skill files are Agent operation guides — no platform-specific instructions.
136
+
137
+ ## Requirements
138
+
139
+ - Python 3.10~3.12 (⚠️ 3.13 not supported — tree-sitter-languages has no prebuilt wheel)
140
+ - Git
141
+ - Embedding backend (Ollama / OpenAI-compatible / DashScope / ChromaDB built-in)
142
+
143
+ ## Installation
144
+
145
+ ```bash
146
+ # Basic install (CLI toolchain)
147
+ pip install source-kb
148
+
149
+ # Full install (skeleton parsing + vector index)
150
+ pip install source-kb[full]
151
+
152
+ # Skeleton parsing only (tree-sitter)
153
+ pip install source-kb[skeleton]
154
+
155
+ # Vector index only (ChromaDB)
156
+ pip install source-kb[rag]
157
+ ```
158
+
159
+ Verify installation:
160
+
161
+ ```bash
162
+ source-kb --version
163
+ source-kb --help
164
+ ```
165
+
166
+ ## Documentation
167
+
168
+ - [Getting Started](docs/getting-started.md) — Installation, configuration, first run
169
+ - [Configuration Reference](docs/configuration.md) — Full kb-project.yaml field reference
170
+ - [Custom Presets](docs/custom-presets.md) — Create and customize language presets
171
+ - [Preset Development](docs/presets.md) — Built-in preset internals
172
+ - [Design Document](docs/design-v5.md) — Architecture design and technical decisions
173
+ - [CLI vs Agent Mode](docs/cli-vs-agent-mode-analysis.md) — Two modes compared
@@ -0,0 +1,140 @@
1
+ # source-kb
2
+
3
+ English | [中文](README.md)
4
+
5
+ Auto-generate structured knowledge base documents from source code, build vector indexes, and support RAG retrieval.
6
+
7
+ ## Features
8
+
9
+ - **CLI toolchain**: skeleton extraction, prompt rendering, intelligent splitting, quality validation, vector indexing — all standalone Python scripts
10
+ - **Platform-agnostic**: works in Kiro / Cursor / Claude Code / Windsurf / any AI Agent
11
+ - **LLM-agnostic**: without an Agent, use engine with any OpenAI-compatible API (Anthropic / OpenAI / DeepSeek / Ollama / vLLM, etc.)
12
+ - **Intelligent subagent splitting**: semantic grouping by business domain (LLM-based) or package-aware greedy algorithm (code-based)
13
+ - **Two-phase generation**: outline first, then expand — eliminates information isolation between shards
14
+ - **Method-level source injection**: prioritized by complexity (high=full body / medium=first 20 lines / low=signature only)
15
+ - **Real-time quality validation**: each subagent output is verified immediately, with automatic retry on failure
16
+ - **Document deduplication**: LLM-powered post-merge dedup (internal redundancy + cross-doc ownership)
17
+ - **Module doc distillation**: summarize multiple microservice docs into domain-level documentation
18
+ - Any language support via preset system (java-spring built-in)
19
+ - Source code access through git remote (multi-repo & monorepo)
20
+ - Lightweight self-built RAG engine (ChromaDB + pluggable embedding)
21
+ - Auto-publish docs to source repo knowledge branch
22
+
23
+ ## Quick Start
24
+
25
+ ### Option 1: Skill Mode (recommended)
26
+
27
+ Install the CLI toolchain:
28
+
29
+ ```bash
30
+ pip install source-kb[full]
31
+ ```
32
+
33
+ Clone the presets repo into your project:
34
+
35
+ ```bash
36
+ git clone https://github.com/anthropics/source-kb-presets.git
37
+ cp -r source-kb-presets/skills ./skills
38
+ cp -r source-kb-presets/presets ./presets
39
+ cp -r source-kb-presets/examples ./examples
40
+ ```
41
+
42
+ Then talk to your AI Agent (Kiro / Claude Code / Cursor / Windsurf):
43
+
44
+ ```
45
+ Please read skills/kb-init/SKILL.md and follow the steps to initialize the knowledge base.
46
+ ```
47
+
48
+ Or add the guide to your project rules file (`.kiro/steering/`, `.cursorrules`, `.claude/CLAUDE.md`, etc.). See [Getting Started](docs/getting-started.md).
49
+
50
+ **Non-Java project?** Works the same way. Configure `kb-project.yaml` with the `generic` preset:
51
+
52
+ ```bash
53
+ # Use generic preset (works for any language)
54
+ source-kb extract --repo .source-cache/my-app --preset generic --output knowledge/my-app
55
+ ```
56
+
57
+ > The generic preset uses path-based rules + regex parser for file classification — no tree-sitter required. For higher accuracy, create a custom preset following `presets/java-spring/` as reference.
58
+
59
+ ### Option 2: CLI Mode (no Agent, requires LLM API config)
60
+
61
+ ```bash
62
+ pip install source-kb[full]
63
+
64
+ export LLM_BASE_URL="https://api.anthropic.com"
65
+ export LLM_MODEL="claude-sonnet-4-6"
66
+ export LLM_API_KEY="sk-xxx"
67
+ source-kb extract --repo .source-cache/my-app --preset java-spring --summary --output knowledge/my-app
68
+ source-kb index --kb my-app
69
+ ```
70
+
71
+ > Full guide: [Getting Started](docs/getting-started.md).
72
+
73
+ ## CLI Quick Reference
74
+
75
+ ```bash
76
+ # Skeleton extraction
77
+ source-kb extract --repo .source-cache/xxx --preset java-spring --summary --output knowledge/xxx
78
+
79
+ # Prompt rendering
80
+ source-kb render --template presets/java-spring/templates/subagent-business.md --module xxx --kb yyy --doc-type business-logic --mode readwrite
81
+
82
+ # File list extraction
83
+ source-kb file-list --skeleton knowledge/xxx/.meta/skeleton/skeleton.json --preset java-spring --doc-type business-logic --output knowledge/xxx/.meta/file-lists/business-logic.txt
84
+
85
+ # Coverage validation
86
+ source-kb validate coverage check --skeleton knowledge/xxx/.meta/skeleton/skeleton.json --docs-dir knowledge/xxx --type service
87
+
88
+ # Index & search
89
+ source-kb index --kb my-project
90
+ source-kb search --kb my-project "query"
91
+ ```
92
+
93
+ ## Skills
94
+
95
+ | Skill | Purpose |
96
+ |-------|---------|
97
+ | kb-init | Generate all docs from source + build index |
98
+ | kb-audit | Compare docs vs source, fix inconsistencies |
99
+ | kb-sync | Detect git changes, incrementally update docs and index |
100
+ | kb-search | Vector retrieval + contextual answers |
101
+
102
+ Skill files are Agent operation guides — no platform-specific instructions.
103
+
104
+ ## Requirements
105
+
106
+ - Python 3.10~3.12 (⚠️ 3.13 not supported — tree-sitter-languages has no prebuilt wheel)
107
+ - Git
108
+ - Embedding backend (Ollama / OpenAI-compatible / DashScope / ChromaDB built-in)
109
+
110
+ ## Installation
111
+
112
+ ```bash
113
+ # Basic install (CLI toolchain)
114
+ pip install source-kb
115
+
116
+ # Full install (skeleton parsing + vector index)
117
+ pip install source-kb[full]
118
+
119
+ # Skeleton parsing only (tree-sitter)
120
+ pip install source-kb[skeleton]
121
+
122
+ # Vector index only (ChromaDB)
123
+ pip install source-kb[rag]
124
+ ```
125
+
126
+ Verify installation:
127
+
128
+ ```bash
129
+ source-kb --version
130
+ source-kb --help
131
+ ```
132
+
133
+ ## Documentation
134
+
135
+ - [Getting Started](docs/getting-started.md) — Installation, configuration, first run
136
+ - [Configuration Reference](docs/configuration.md) — Full kb-project.yaml field reference
137
+ - [Custom Presets](docs/custom-presets.md) — Create and customize language presets
138
+ - [Preset Development](docs/presets.md) — Built-in preset internals
139
+ - [Design Document](docs/design-v5.md) — Architecture design and technical decisions
140
+ - [CLI vs Agent Mode](docs/cli-vs-agent-mode-analysis.md) — Two modes compared
@@ -0,0 +1,139 @@
1
+ # source-kb
2
+
3
+ [English](README.en.md) | 中文
4
+
5
+ 从源码自动生成结构化知识库文档,构建向量索引,支持 RAG 检索。
6
+
7
+ ## 特性
8
+
9
+ - **CLI 工具链**:骨架提取、prompt 渲染、智能拆分、质量校验、向量索引 — 全部是独立的 Python 脚本
10
+ - **不绑定任何 AI 平台**:在 Kiro / Cursor / Claude Code / Windsurf / 任何 Agent 中使用
11
+ - **不绑定任何 LLM**:无 Agent 环境时通过 engine 调用任意 OpenAI 兼容接口(Anthropic / OpenAI / DeepSeek / Ollama / vLLM 等)
12
+ - **智能子代理拆分**:按业务域语义分组(LLM 智能拆分)或包感知贪心算法(代码规则拆分)
13
+ - **两阶段生成**:分片场景下先生成 outline 再展开,消除信息隔离
14
+ - **方法级精准注入**:按复杂度分级注入源码(high=完整/medium=前20行/low=签名)
15
+ - **实时质量校验**:每个子代理完成后立即验证,失败自动重试
16
+ - **文档去重优化**:合并后 LLM 自动去重(内部冗余 + 跨文档归属)
17
+ - **Module 文档萃取**:从多个微服务文档汇总为领域级文档
18
+ - 支持任意语言(通过 preset 预设系统,内置 java-spring)
19
+ - 源码通过 git remote 读取,支持 multi-repo 和 monorepo
20
+ - 自建轻量 RAG 引擎(ChromaDB + 可插拔 embedding)
21
+ - 文档自动发布到源码仓库 knowledge 分支
22
+
23
+ ## 快速开始
24
+
25
+ ### 方式一:Skill 模式(推荐)
26
+
27
+ 安装 CLI 工具包:
28
+
29
+ ```bash
30
+ pip install source-kb[full]
31
+ ```
32
+
33
+ 克隆 presets 仓库到你的项目中:
34
+
35
+ ```bash
36
+ git clone https://github.com/anthropics/source-kb-presets.git
37
+ cp -r source-kb-presets/skills ./skills
38
+ cp -r source-kb-presets/presets ./presets
39
+ cp -r source-kb-presets/examples ./examples
40
+ ```
41
+
42
+ 在 AI Agent(Kiro / Claude Code / Cursor / Windsurf)中直接对话:
43
+
44
+ ```
45
+ 请读取 skills/kb-init/SKILL.md,按里面的流程为我初始化知识库。
46
+ ```
47
+
48
+ 或将操作指南加入项目规则文件(`.kiro/steering/`、`.cursorrules`、`.claude/CLAUDE.md` 等),详见 [快速开始](docs/getting-started.md)。
49
+
50
+ **非 Java 项目?** 同样支持。配置 `kb-project.yaml` 时选择 `generic` preset:
51
+
52
+ ```bash
53
+ # 使用 generic preset(适用于任意语言)
54
+ source-kb extract --repo .source-cache/my-app --preset generic --output knowledge/my-app
55
+ ```
56
+
57
+ > generic preset 使用路径规则 + regex 解析器分类文件,无需 tree-sitter。如需更高精度,可参考 `presets/java-spring/` 创建自定义 preset。
58
+
59
+ ### 方式二:CLI 模式(无 Agent 环境,需配置 LLM API)
60
+
61
+ ```bash
62
+ pip install source-kb[full]
63
+
64
+ export LLM_BASE_URL="https://api.anthropic.com"
65
+ export LLM_MODEL="claude-sonnet-4-6"
66
+ export LLM_API_KEY="sk-xxx"
67
+ source-kb extract --repo .source-cache/my-app --preset java-spring --summary --output knowledge/my-app
68
+ source-kb index --kb my-app
69
+ ```
70
+
71
+ > 完整指南见 [快速开始](docs/getting-started.md)。
72
+
73
+ ## CLI 工具速查
74
+
75
+ ```bash
76
+ # 骨架提取
77
+ source-kb extract --repo .source-cache/xxx --preset java-spring --summary --output knowledge/xxx
78
+
79
+ # Prompt 渲染
80
+ source-kb render --template presets/java-spring/templates/subagent-business.md --module xxx --kb yyy --doc-type business-logic --mode readwrite
81
+
82
+ # 文件清单提取
83
+ source-kb file-list --skeleton knowledge/xxx/.meta/skeleton/skeleton.json --preset java-spring --doc-type business-logic --output knowledge/xxx/.meta/file-lists/business-logic.txt
84
+
85
+ # 覆盖率校验
86
+ source-kb validate coverage check --skeleton knowledge/xxx/.meta/skeleton/skeleton.json --docs-dir knowledge/xxx --type service
87
+
88
+ # 索引构建与检索
89
+ source-kb index --kb my-project
90
+ source-kb search --kb my-project "查询词"
91
+ ```
92
+
93
+ ## Skill 说明
94
+
95
+ | Skill | 用途 |
96
+ |-------|------|
97
+ | kb-init | 从源码生成全部文档 + 构建索引 |
98
+ | kb-audit | 逐章节对比文档与源码,修复不一致 |
99
+ | kb-sync | 检测 git 变动,增量更新文档和索引 |
100
+ | kb-search | 向量检索 + 上下文回答 |
101
+
102
+ Skill 文件是 Agent 的操作指南,不包含任何平台特定指令。
103
+
104
+ ## 环境要求
105
+
106
+ - Python 3.10~3.12(⚠️ 3.13 不支持,tree-sitter-languages 无预编译 wheel)
107
+ - Git
108
+ - Embedding 后端(Ollama / OpenAI 兼容 / DashScope / ChromaDB 内置)
109
+
110
+ ## 安装
111
+
112
+ ```bash
113
+ # 基础安装(CLI 工具链)
114
+ pip install source-kb
115
+
116
+ # 完整安装(骨架解析 + 向量索引)
117
+ pip install source-kb[full]
118
+
119
+ # 仅骨架解析(tree-sitter)
120
+ pip install source-kb[skeleton]
121
+
122
+ # 仅向量索引(ChromaDB)
123
+ pip install source-kb[rag]
124
+ ```
125
+
126
+ 验证安装:
127
+
128
+ ```bash
129
+ source-kb --version
130
+ source-kb --help
131
+ ```
132
+
133
+ ## 文档
134
+
135
+ - [快速开始](docs/getting-started.md) — 安装、配置、首次运行
136
+ - [配置参考](docs/configuration.md) — kb-project.yaml 完整字段说明
137
+ - [设计文档](docs/design-v5.md) — 架构设计与技术决策
138
+ - [CLI vs Agent 模式](docs/cli-vs-agent-mode-analysis.md) — 两种模式对比
139
+ - [Preset 开发](docs/presets.md) — 自定义语言预设指南
@@ -0,0 +1,50 @@
1
+ """source-kb — unified CLI entry point.
2
+
3
+ Usage:
4
+ source-kb <command> [options]
5
+ source-kb --help
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import importlib
12
+ import pkgutil
13
+ import sys
14
+
15
+
16
+ def _discover_commands(subparsers: argparse._SubParsersAction) -> None:
17
+ """Auto-discover and register all command modules in cli.commands."""
18
+ import cli.commands as cmd_pkg
19
+
20
+ for finder, name, _ in pkgutil.iter_modules(cmd_pkg.__path__):
21
+ module = importlib.import_module(f"cli.commands.{name}")
22
+ if hasattr(module, "register"):
23
+ module.register(subparsers)
24
+
25
+
26
+ def main(argv: list[str] | None = None) -> None:
27
+ sys.stdout.reconfigure(encoding="utf-8")
28
+ sys.stderr.reconfigure(encoding="utf-8")
29
+
30
+ from core import __version__
31
+
32
+ parser = argparse.ArgumentParser(
33
+ prog="source-kb",
34
+ description="Auto-generate structured knowledge base documents from source code.",
35
+ )
36
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
37
+
38
+ subparsers = parser.add_subparsers(dest="command")
39
+ _discover_commands(subparsers)
40
+
41
+ args = parser.parse_args(argv)
42
+ if not args.command:
43
+ parser.print_help()
44
+ sys.exit(0)
45
+
46
+ if hasattr(args, "func"):
47
+ args.func(args)
48
+ else:
49
+ parser.print_help()
50
+ sys.exit(1)
@@ -0,0 +1,5 @@
1
+ """Allow running as: python -m cli"""
2
+
3
+ from cli import main
4
+
5
+ main()
@@ -0,0 +1 @@
1
+ """Command registry — modules here are auto-discovered by the entry point."""
@@ -0,0 +1,47 @@
1
+ """source-kb anchor-fix — Fix broken cross-document anchor links."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("anchor-fix", help="Fix broken cross-document anchor links")
13
+ p.add_argument("--module-dir", required=True, help="Module documentation directory")
14
+ p.add_argument("--dry-run", action="store_true", help="Report without fixing")
15
+ p.add_argument("--threshold", type=float, default=0.8, help="Fuzzy match threshold (0.0-1.0)")
16
+ p.set_defaults(func=run)
17
+
18
+
19
+ def run(args: argparse.Namespace) -> None:
20
+ from core.skeleton.anchor_fix import fix_anchors
21
+
22
+ module_dir = Path(args.module_dir)
23
+ if not module_dir.is_dir():
24
+ print(f"Error: directory not found: {module_dir}", file=sys.stderr)
25
+ sys.exit(1)
26
+
27
+ result = fix_anchors(
28
+ module_dir,
29
+ dry_run=args.dry_run,
30
+ similarity_threshold=args.threshold,
31
+ )
32
+
33
+ mode = " (dry-run)" if args.dry_run else ""
34
+ print(f"Anchor fix{mode}: scanned {result.files_scanned} files, checked {result.links_checked} links")
35
+ if result.links_fixed or result.links_degraded:
36
+ print(f" Fixed: {result.links_fixed}, Degraded: {result.links_degraded}")
37
+ for d in result.details:
38
+ action = "fixed" if d["action"] == "fixed" else "degraded"
39
+ print(f" [{action}] {d['file']}: {d['old_link']} -> {d['new_link']}")
40
+ else:
41
+ print(" All links valid.")
42
+
43
+ print(json.dumps({
44
+ "status": "ok", "files_scanned": result.files_scanned,
45
+ "links_checked": result.links_checked, "links_fixed": result.links_fixed,
46
+ "links_degraded": result.links_degraded,
47
+ }, ensure_ascii=False), file=sys.stderr)
@@ -0,0 +1,18 @@
1
+ """source-kb audit — Low-confidence classification audit."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+
7
+
8
+ def register(subparsers: argparse._SubParsersAction) -> None:
9
+ p = subparsers.add_parser("audit", help="Low-confidence classification audit")
10
+ p.add_argument("--skeleton", required=True, help="Skeleton JSON path")
11
+ p.add_argument("--preset", required=True, help="Preset name")
12
+ p.add_argument("--threshold", type=float, default=0.7, help="Confidence threshold (default: 0.7)")
13
+ p.set_defaults(func=run)
14
+
15
+
16
+ def run(args: argparse.Namespace) -> None:
17
+ from core.skeleton import cmd_audit
18
+ cmd_audit(args)
@@ -0,0 +1,52 @@
1
+ """source-kb diff-doc — Compare document against skeleton."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("diff-doc", help="Compare document against skeleton for inconsistencies")
13
+ p.add_argument("--doc-path", required=True, help="Path to markdown document")
14
+ p.add_argument("--skeleton-path", required=True, help="Path to skeleton JSON or directory")
15
+ p.add_argument("--doc-type", help="Document type for specific comparison rules")
16
+ p.add_argument("--output", help="Output JSON path (default: stdout)")
17
+ p.set_defaults(func=run)
18
+
19
+
20
+ def run(args: argparse.Namespace) -> None:
21
+ from core.skeleton.diff_doc import diff_doc
22
+
23
+ doc_path = Path(args.doc_path)
24
+ skeleton_path = Path(args.skeleton_path)
25
+
26
+ if not doc_path.exists():
27
+ print(f"Error: document not found: {doc_path}", file=sys.stderr)
28
+ sys.exit(1)
29
+ if not skeleton_path.exists():
30
+ print(f"Error: skeleton not found: {skeleton_path}", file=sys.stderr)
31
+ sys.exit(1)
32
+
33
+ result = diff_doc(doc_path, skeleton_path, doc_type=args.doc_type)
34
+
35
+ output = {
36
+ "doc_type": result.doc_type,
37
+ "doc_items": result.doc_items_count,
38
+ "skeleton_items": result.skeleton_items_count,
39
+ "missing": result.missing_count,
40
+ "stale": result.stale_count,
41
+ "findings": result.to_json(),
42
+ }
43
+
44
+ output_json = json.dumps(output, ensure_ascii=False, indent=2)
45
+ if args.output:
46
+ Path(args.output).write_text(output_json, encoding="utf-8")
47
+ print(f"Written to: {args.output}")
48
+ else:
49
+ print(output_json)
50
+
51
+ print(json.dumps({"status": "ok", "missing": result.missing_count,
52
+ "stale": result.stale_count}, ensure_ascii=False), file=sys.stderr)