source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
core/paths.py ADDED
@@ -0,0 +1,141 @@
1
+ """Canonical path resolution for knowledge base artifacts.
2
+
3
+ All modules use this single source of truth for file/directory paths.
4
+ No hard-coded path strings elsewhere in the codebase.
5
+
6
+ Directory structure:
7
+ knowledge/{kb}/{module}/
8
+ ├── .meta/
9
+ │ ├── skeleton/ # Skeleton files (JSON)
10
+ │ ├── file-lists/ # File classification lists
11
+ │ ├── prompts/ # Rendered sub-agent prompts
12
+ │ └── progress/ # Progress tracking
13
+ └── *.md # Final documents
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from pathlib import Path
19
+
20
+ META_DIR = ".meta"
21
+
22
+
23
+ # ─── Directory paths ────────────────────────────────────────
24
+
25
+ def meta_dir(module_dir: Path) -> Path:
26
+ return module_dir / META_DIR
27
+
28
+
29
+ def skeleton_dir(module_dir: Path) -> Path:
30
+ return meta_dir(module_dir) / "skeleton"
31
+
32
+
33
+ def file_list_dir(module_dir: Path) -> Path:
34
+ return meta_dir(module_dir) / "file-lists"
35
+
36
+
37
+ def prompts_dir(module_dir: Path) -> Path:
38
+ return meta_dir(module_dir) / "prompts"
39
+
40
+
41
+ def progress_dir(module_dir: Path) -> Path:
42
+ return meta_dir(module_dir) / "progress"
43
+
44
+
45
+ # ─── File paths ─────────────────────────────────────────────
46
+
47
+ def skeleton_path(module_dir: Path) -> Path:
48
+ return skeleton_dir(module_dir) / "skeleton.json"
49
+
50
+
51
+ def skeleton_shards_dir(module_dir: Path) -> Path:
52
+ return skeleton_dir(module_dir) / "shards"
53
+
54
+
55
+ def skeleton_summary_path(module_dir: Path) -> Path:
56
+ return skeleton_dir(module_dir) / "summary.json"
57
+
58
+
59
+ def skeleton_stats_path(module_dir: Path) -> Path:
60
+ return skeleton_dir(module_dir) / "stats.json"
61
+
62
+
63
+ def file_list_path(module_dir: Path, doc_type: str) -> Path:
64
+ return file_list_dir(module_dir) / f"{doc_type}.txt"
65
+
66
+
67
+ def prompt_path(module_dir: Path, doc_type: str) -> Path:
68
+ return prompts_dir(module_dir) / f"{doc_type}.md"
69
+
70
+
71
+ def progress_path(module_dir: Path, doc_name: str) -> Path:
72
+ return progress_dir(module_dir) / doc_name
73
+
74
+
75
+ def progress_pid_path(module_dir: Path, doc_name: str) -> Path:
76
+ return progress_dir(module_dir) / f"{doc_name}.hb-pid"
77
+
78
+
79
+ # ─── Shard paths (unified naming for both modes) ───────────
80
+
81
+ def shard_doc_path(module_dir: Path, doc_basename: str, shard_name: str) -> Path:
82
+ """Output path for a shard document: {module_dir}/{doc_basename}-{shard_name}.md"""
83
+ return module_dir / f"{doc_basename}-{shard_name}.md"
84
+
85
+
86
+ def shard_file_list_path(module_dir: Path, doc_type: str, shard_name: str) -> Path:
87
+ """File list path for a shard: .meta/file-lists/{doc_type}-{shard_name}.txt"""
88
+ return file_list_dir(module_dir) / f"{doc_type}-{shard_name}.txt"
89
+
90
+
91
+ # ─── Resolution (prefer new paths, fallback to legacy) ─────
92
+
93
+ def resolve_skeleton(module_dir: Path) -> Path | None:
94
+ """Resolve skeleton file or shards directory. Returns None if not found."""
95
+ new = skeleton_path(module_dir)
96
+ if new.exists():
97
+ return new
98
+ new_shards = skeleton_shards_dir(module_dir)
99
+ if new_shards.is_dir():
100
+ return new_shards
101
+ # Legacy fallback
102
+ for legacy in (module_dir / ".skeleton.json", module_dir / ".skeleton"):
103
+ if legacy.exists():
104
+ return legacy
105
+ return None
106
+
107
+
108
+ def resolve_skeleton_summary(module_dir: Path) -> Path | None:
109
+ """Resolve skeleton summary file."""
110
+ new = skeleton_summary_path(module_dir)
111
+ if new.exists():
112
+ return new
113
+ old = module_dir / ".skeleton-summary.json"
114
+ if old.exists():
115
+ return old
116
+ return None
117
+
118
+
119
+ def resolve_file_list(module_dir: Path, doc_type: str) -> Path | None:
120
+ """Resolve file list for a doc type."""
121
+ new = file_list_path(module_dir, doc_type)
122
+ if new.exists():
123
+ return new
124
+ # Legacy fallback
125
+ for name in (f".file-list-{doc_type}.txt", f".file-list-{doc_type.split('-')[0]}.txt"):
126
+ old = module_dir / name
127
+ if old.exists():
128
+ return old
129
+ return None
130
+
131
+
132
+ def resolve_progress(module_dir: Path, doc_name: str) -> Path | None:
133
+ """Resolve progress file. Returns None if not found."""
134
+ p = progress_path(module_dir, doc_name)
135
+ return p if p.exists() else None
136
+
137
+
138
+ def ensure_dir(path: Path) -> Path:
139
+ """Create directory if it doesn't exist. Returns the path."""
140
+ path.mkdir(parents=True, exist_ok=True)
141
+ return path
core/preset.py ADDED
@@ -0,0 +1,237 @@
1
+ """Preset configuration loading and file classification.
2
+
3
+ Loads language presets (java-spring, generic, etc.) and provides rule-based
4
+ file classification. All classification rules come from preset.yaml config.
5
+
6
+ Public API:
7
+ load_preset(name) → dict
8
+ classify_file(preset, file_path, skeleton_entry) → list[str]
9
+ get_doc_filename(preset, doc_type) → str
10
+ get_affected_docs(preset, categories) → set[str]
11
+ get_doc_type_mapping(preset) → dict
12
+ CompiledClassifier(preset) — optimized batch classifier
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import copy
18
+ import fnmatch
19
+ import os
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ import yaml
24
+
25
+ _BUILTIN_PRESETS_DIR = Path(__file__).resolve().parent.parent / "presets"
26
+ PRESETS_DIR = _BUILTIN_PRESETS_DIR # backward compat
27
+
28
+ _preset_cache: dict[str, dict] = {}
29
+ _preset_lock = __import__("threading").Lock()
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Preset search path
34
+ # ---------------------------------------------------------------------------
35
+
36
+
37
+ def _get_preset_search_dirs() -> list[Path]:
38
+ """Return preset directories in priority order (first match wins).
39
+
40
+ Search order:
41
+ 1. SOURCE_KB_PRESETS_DIR env var (explicit override)
42
+ 2. ./presets/ relative to cwd (project-local customization)
43
+ 3. Built-in presets bundled with the package (fallback)
44
+ """
45
+ dirs: list[Path] = []
46
+
47
+ env_dir = os.environ.get("SOURCE_KB_PRESETS_DIR")
48
+ if env_dir:
49
+ p = Path(env_dir)
50
+ if p.is_dir():
51
+ dirs.append(p)
52
+
53
+ local_dir = Path.cwd() / "presets"
54
+ if local_dir.is_dir() and local_dir.resolve() != _BUILTIN_PRESETS_DIR.resolve():
55
+ dirs.append(local_dir)
56
+
57
+ dirs.append(_BUILTIN_PRESETS_DIR)
58
+ return dirs
59
+
60
+
61
+ def _find_preset_dir(preset_name: str) -> Path | None:
62
+ """Find the first directory containing the named preset."""
63
+ for base in _get_preset_search_dirs():
64
+ candidate = base / preset_name / "preset.yaml"
65
+ if candidate.exists():
66
+ return base / preset_name
67
+ return None
68
+
69
+
70
+ # ---------------------------------------------------------------------------
71
+ # Preset loading
72
+ # ---------------------------------------------------------------------------
73
+
74
+
75
+ def load_preset(preset_name: str) -> dict:
76
+ """Load preset config (with custom_rules + doc_types merge). Raises FileNotFoundError.
77
+
78
+ Search order: SOURCE_KB_PRESETS_DIR > ./presets/ > built-in presets.
79
+ Results are cached per preset_name. Returns a deep copy so callers can mutate safely.
80
+ """
81
+ with _preset_lock:
82
+ if preset_name in _preset_cache:
83
+ return copy.deepcopy(_preset_cache[preset_name])
84
+
85
+ _validate_name(preset_name)
86
+ preset_dir = _find_preset_dir(preset_name)
87
+ if preset_dir is None:
88
+ raise FileNotFoundError(
89
+ f"Preset '{preset_name}' not found. Searched: {[str(d) for d in _get_preset_search_dirs()]}"
90
+ )
91
+
92
+ preset_path = preset_dir / "preset.yaml"
93
+ with open(preset_path, encoding="utf-8") as f:
94
+ base = yaml.safe_load(f)
95
+
96
+ # Merge custom_rules.yaml if exists
97
+ custom_path = preset_dir / "custom_rules.yaml"
98
+ if custom_path.exists():
99
+ with open(custom_path, encoding="utf-8") as f:
100
+ custom = yaml.safe_load(f) or {}
101
+ base = _merge_custom_rules(base, custom)
102
+
103
+ # Merge doc_types.yaml if exists (doc_type definitions, split config, limits, parsers)
104
+ doc_types_path = preset_dir / "doc_types.yaml"
105
+ if doc_types_path.exists():
106
+ with open(doc_types_path, encoding="utf-8") as f:
107
+ doc_types_data = yaml.safe_load(f) or {}
108
+ # Merge top-level keys (doc_types, split, limits, parsers) into base
109
+ for key in ("doc_types", "split", "limits", "parsers"):
110
+ if key in doc_types_data:
111
+ base[key] = doc_types_data[key]
112
+
113
+ base["_preset_dir"] = str(preset_dir)
114
+
115
+ with _preset_lock:
116
+ _preset_cache[preset_name] = base
117
+ return copy.deepcopy(base)
118
+
119
+
120
+ def find_preset_template(preset_name: str, template_name: str) -> Path | None:
121
+ """Find a template file within a preset's templates/ directory.
122
+
123
+ Uses the same search path as load_preset: env > local > built-in.
124
+ """
125
+ preset_dir = _find_preset_dir(preset_name)
126
+ if preset_dir is None:
127
+ return None
128
+ candidate = preset_dir / "templates" / template_name
129
+ if candidate.exists():
130
+ return candidate
131
+ return None
132
+
133
+
134
+ def _validate_name(name: str):
135
+ import re
136
+ if not name or not re.match(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$", name):
137
+ raise ValueError(f"Invalid preset name: {name!r}")
138
+
139
+
140
+ def _merge_custom_rules(base: dict, custom: dict) -> dict:
141
+ """Merge custom_rules into base preset (append patterns, add categories)."""
142
+ merged = copy.deepcopy(base)
143
+ custom_cls = custom.get("file_classification", {})
144
+ if not custom_cls:
145
+ return merged
146
+
147
+ base_cls = merged.setdefault("file_classification", {})
148
+ for cat, cfg in custom_cls.items():
149
+ if not isinstance(cfg, dict):
150
+ continue
151
+ clean = {k: v for k, v in cfg.items() if not k.startswith("_")}
152
+ if cat in base_cls:
153
+ for key, value in clean.items():
154
+ if key.endswith("_append") and isinstance(value, list):
155
+ real_key = key.removesuffix("_append")
156
+ existing = base_cls[cat].get(real_key, [])
157
+ if isinstance(existing, list):
158
+ for item in value:
159
+ if item not in existing:
160
+ existing.append(item)
161
+ base_cls[cat][real_key] = existing
162
+ else:
163
+ base_cls[cat] = clean
164
+ return merged
165
+
166
+
167
+ # ---------------------------------------------------------------------------
168
+ # Doc type mapping
169
+ # ---------------------------------------------------------------------------
170
+
171
+
172
+ def get_doc_filename(preset: dict, doc_type: str, *, strict: bool = False) -> str:
173
+ """Get the output filename for a doc type key."""
174
+ doc_types = preset.get("doc_types", {})
175
+ if doc_type in doc_types:
176
+ return doc_types[doc_type].get("filename", f"{doc_type}.md")
177
+ if strict:
178
+ raise KeyError(f"Unknown doc-type '{doc_type}'. Valid: {sorted(doc_types.keys())}")
179
+ return f"{doc_type}.md"
180
+
181
+
182
+ def get_doc_type_mapping(preset: dict) -> dict[str, str]:
183
+ """Return full doc-type → filename mapping (built from doc_types config)."""
184
+ doc_types = preset.get("doc_types", {})
185
+ if doc_types:
186
+ return {dt: cfg.get("filename", f"{dt}.md") for dt, cfg in doc_types.items()}
187
+ return {}
188
+
189
+
190
+ def get_affected_docs(preset: dict, categories: list[str]) -> set[str]:
191
+ """Given category names, return affected document filenames."""
192
+ classification = preset.get("file_classification", {})
193
+ docs: set[str] = set()
194
+ for cat in categories:
195
+ cfg = classification.get(cat, {})
196
+ docs.update(cfg.get("affects", []))
197
+ return docs
198
+
199
+
200
+ def get_file_classification(preset: dict) -> dict:
201
+ """Return the file_classification section."""
202
+ return dict(preset.get("file_classification", {}))
203
+
204
+
205
+ # ---------------------------------------------------------------------------
206
+ # File classification (delegated to preset_classify.py)
207
+ # ---------------------------------------------------------------------------
208
+
209
+ from core.preset_classify import classify_file # noqa: E402, F401
210
+
211
+
212
+ # ---------------------------------------------------------------------------
213
+ # Utility accessors (delegated to preset_accessors.py)
214
+ # ---------------------------------------------------------------------------
215
+
216
+ from core.preset_accessors import ( # noqa: E402, F401
217
+ get_coverage_skip_patterns,
218
+ get_generation_order,
219
+ get_doc_types,
220
+ get_doc_type_config,
221
+ get_template_path,
222
+ get_split_override,
223
+ get_ownership_keywords,
224
+ get_global_view_types,
225
+ get_limits,
226
+ get_search_routing,
227
+ get_dependency_chain,
228
+ get_dedup_rules,
229
+ get_batch_plan,
230
+ )
231
+
232
+
233
+ # ---------------------------------------------------------------------------
234
+ # Profile resolution and show-config (delegated to preset_profile.py)
235
+ # ---------------------------------------------------------------------------
236
+
237
+ from core.preset_profile import resolve_profile, show_config # noqa: E402, F401
@@ -0,0 +1,202 @@
1
+ """Preset utility accessors — query preset config for various subsystems.
2
+
3
+ All functions take a loaded preset dict and return derived configuration.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+
10
+
11
+ def get_coverage_skip_patterns(preset: dict) -> list[str]:
12
+ return list(preset.get("coverage_skip_patterns", []))
13
+
14
+
15
+ def get_generation_order(preset: dict) -> list[list[str]]:
16
+ """Return batch generation order derived from doc_types config.
17
+
18
+ Groups doc_types by their 'batch' field, sorted ascending.
19
+ Each batch is a list of doc_type keys that can be generated in parallel.
20
+ """
21
+ doc_types = preset.get("doc_types", {})
22
+ if not doc_types:
23
+ return list(preset.get("generation_order", [["source-tree-analysis"]]))
24
+
25
+ batches: dict[int, list[str]] = {}
26
+ for dt_key, dt_config in doc_types.items():
27
+ if not isinstance(dt_config, dict):
28
+ continue
29
+ batch_num = dt_config.get("batch", 99)
30
+ batches.setdefault(batch_num, []).append(dt_key)
31
+
32
+ return [batches[b] for b in sorted(batches.keys())]
33
+
34
+
35
+ def get_doc_types(preset: dict) -> dict[str, dict]:
36
+ """Return the full doc_types configuration section."""
37
+ return dict(preset.get("doc_types", {}))
38
+
39
+
40
+ def get_doc_type_config(preset: dict, doc_type: str) -> dict:
41
+ """Get config for a specific doc_type. Returns empty dict if not found."""
42
+ doc_types = preset.get("doc_types", {})
43
+ return doc_types.get(doc_type, {})
44
+
45
+
46
+ def get_template_path(preset: dict, doc_type: str, preset_name: str) -> str | None:
47
+ """Get the template filename for a doc_type from preset config.
48
+
49
+ Returns relative path like 'subagent-business.md', or None if not configured.
50
+ """
51
+ dt_config = get_doc_type_config(preset, doc_type)
52
+ return dt_config.get("template")
53
+
54
+
55
+ def get_split_override(preset: dict, doc_type: str, mode: str) -> dict:
56
+ """Get per-doc-type split threshold overrides."""
57
+ dt_config = get_doc_type_config(preset, doc_type)
58
+ overrides = dt_config.get("split_override", {})
59
+ return overrides.get(mode, {})
60
+
61
+
62
+ def get_ownership_keywords(preset: dict) -> dict[str, list[str]]:
63
+ """Get doc_type -> ownership keywords mapping (for duplicate detection)."""
64
+ doc_types = preset.get("doc_types", {})
65
+ result: dict[str, list[str]] = {}
66
+ for dt_key, dt_config in doc_types.items():
67
+ if not isinstance(dt_config, dict):
68
+ continue
69
+ keywords = dt_config.get("owns_keywords", [])
70
+ if keywords:
71
+ filename = dt_config.get("filename", f"{dt_key}.md")
72
+ result[filename] = keywords
73
+ return result
74
+
75
+
76
+ def get_global_view_types(preset: dict) -> frozenset[str]:
77
+ """Return all doc_type keys marked as global_view."""
78
+ doc_types = preset.get("doc_types", {})
79
+
80
+ global_keys: set[str] = set()
81
+ for dt_key, dt_config in doc_types.items():
82
+ if isinstance(dt_config, dict) and dt_config.get("global_view", False):
83
+ global_keys.add(dt_key)
84
+
85
+ if not global_keys and not doc_types:
86
+ return frozenset({"source-tree-analysis", "index"})
87
+
88
+ return frozenset(global_keys)
89
+
90
+
91
+ def get_limits(preset: dict, project_config: dict | None = None) -> dict[str, int]:
92
+ """Get configurable limits (timeouts, size thresholds, etc.).
93
+
94
+ Resolution order: project_config (kb-project.yaml) > preset (doc_types.yaml) > defaults.
95
+ """
96
+ defaults = {
97
+ "min_doc_size_bytes": 500,
98
+ "max_source_inline_bytes": 300000,
99
+ "max_skeleton_inline_bytes": 50000,
100
+ "max_output_tokens": 8192,
101
+ "prior_docs_max_chars": 2000,
102
+ "shard_context_max_chars": 1500,
103
+ "spawn_timeout_default": 900,
104
+ "heartbeat_interval": 30,
105
+ "max_retries": 2,
106
+ }
107
+ configured = preset.get("limits", {})
108
+ result = {**defaults, **configured}
109
+ if project_config:
110
+ project_limits = project_config.get("limits", {})
111
+ result = {**result, **project_limits}
112
+ return result
113
+
114
+
115
+ def get_search_routing(preset: dict) -> dict[str, list[str]]:
116
+ """Get filename -> search keywords mapping for question routing."""
117
+ doc_types = preset.get("doc_types", {})
118
+ result: dict[str, list[str]] = {}
119
+ for dt_key, dt_config in doc_types.items():
120
+ if not isinstance(dt_config, dict):
121
+ continue
122
+ filename = dt_config.get("filename", f"{dt_key}.md")
123
+ keywords = dt_config.get("search_keywords", [])
124
+ if not keywords:
125
+ keywords = [dt_key.replace("-", " ")]
126
+ result[filename] = keywords
127
+ return result
128
+
129
+
130
+ def get_dependency_chain(preset: dict) -> list[str]:
131
+ """Return doc_type keys in dependency-safe generation/update order (topological sort)."""
132
+ doc_types = preset.get("doc_types", {})
133
+ if not doc_types:
134
+ return []
135
+
136
+ deps: dict[str, set[str]] = {}
137
+ for dt_key, dt_config in doc_types.items():
138
+ if not isinstance(dt_config, dict):
139
+ continue
140
+ depends_on = dt_config.get("depends_on", [])
141
+ deps[dt_key] = set(depends_on) & set(doc_types.keys())
142
+
143
+ in_degree: dict[str, int] = {k: len(v) for k, v in deps.items()}
144
+ queue: list[str] = sorted(
145
+ [k for k, d in in_degree.items() if d == 0],
146
+ key=lambda k: (doc_types.get(k, {}).get("batch", 99), k),
147
+ )
148
+ result: list[str] = []
149
+
150
+ while queue:
151
+ node = queue.pop(0)
152
+ result.append(node)
153
+ for k, dep_set in deps.items():
154
+ if node in dep_set:
155
+ dep_set.discard(node)
156
+ in_degree[k] -= 1
157
+ if in_degree[k] == 0:
158
+ queue.append(k)
159
+ queue.sort(key=lambda x: (doc_types.get(x, {}).get("batch", 99), x))
160
+
161
+ for k in deps:
162
+ if k not in result:
163
+ result.append(k)
164
+
165
+ return result
166
+
167
+
168
+ def get_dedup_rules(preset: dict) -> dict[str, list[str]]:
169
+ """Get filename -> dedup detection patterns."""
170
+ doc_types = preset.get("doc_types", {})
171
+ result: dict[str, list[str]] = {}
172
+ for dt_key, dt_config in doc_types.items():
173
+ if not isinstance(dt_config, dict):
174
+ continue
175
+ patterns = list(dt_config.get("dedup_patterns", []))
176
+ owns = dt_config.get("owns_keywords", [])
177
+ if patterns or owns:
178
+ filename = dt_config.get("filename", f"{dt_key}.md")
179
+ result[filename] = patterns + owns
180
+ return result
181
+
182
+
183
+ def get_batch_plan(preset: dict) -> dict[int, list[dict]]:
184
+ """Get batch_num -> list of doc_type info dicts for generation planning."""
185
+ doc_types = preset.get("doc_types", {})
186
+ result: dict[int, list[dict]] = {}
187
+ for dt_key, dt_config in doc_types.items():
188
+ if not isinstance(dt_config, dict):
189
+ continue
190
+ batch_num = dt_config.get("batch", 99)
191
+ entry = {
192
+ "name": dt_key,
193
+ "filename": dt_config.get("filename", f"{dt_key}.md"),
194
+ "conditional": dt_config.get("conditional", False),
195
+ "depends_on": dt_config.get("depends_on", []),
196
+ "global_view": dt_config.get("global_view", False),
197
+ "template": dt_config.get("template"),
198
+ }
199
+ result.setdefault(batch_num, []).append(entry)
200
+ for batch_num in result:
201
+ result[batch_num].sort(key=lambda x: x["name"])
202
+ return result