source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
core/git.py ADDED
@@ -0,0 +1,294 @@
1
+ """Git operations wrapper — clone, fetch, diff, file reading.
2
+
3
+ Provides retry, timeout, and input validation for all git commands.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ import subprocess
10
+ import time
11
+ from pathlib import Path
12
+
13
+
14
+ # ─── Errors ─────────────────────────────────────────────────
15
+
16
+ class GitError(Exception):
17
+ pass
18
+
19
+ class GitAuthError(GitError):
20
+ pass
21
+
22
+ class GitNetworkError(GitError):
23
+ pass
24
+
25
+ class GitRepoNotFoundError(GitError):
26
+ pass
27
+
28
+ class GitBranchNotFoundError(GitError):
29
+ pass
30
+
31
+ class ConfigError(GitError):
32
+ pass
33
+
34
+
35
+ # ─── Validation ─────────────────────────────────────────────
36
+
37
+ _SAFE_NAME = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$")
38
+ _SAFE_URL = re.compile(r"^(?:https?://|git://|ssh://|git@)[^\s]+$")
39
+ _SAFE_REF = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._/~^{}\-]*$")
40
+
41
+
42
+ def _validate_name(name: str, label: str = "name"):
43
+ if not name or not _SAFE_NAME.match(name) or ".." in name:
44
+ raise ConfigError(f"Invalid {label}: {name!r}")
45
+
46
+
47
+ def _validate_url(url: str):
48
+ if not url:
49
+ return
50
+ if url.startswith("-"):
51
+ raise ConfigError(f"Invalid URL: {url!r}")
52
+ if not _SAFE_URL.match(url):
53
+ raise ConfigError(f"Invalid URL: {url!r}")
54
+
55
+
56
+ def _validate_ref(ref: str):
57
+ if not ref or ref.startswith("-"):
58
+ raise ConfigError(f"Invalid ref: {ref!r}")
59
+ if "/.." in ref or "../" in ref:
60
+ raise ConfigError(f"Invalid ref: {ref!r}")
61
+ if not _SAFE_REF.match(ref):
62
+ raise ConfigError(f"Invalid ref: {ref!r}")
63
+
64
+
65
+ # ─── Core ───────────────────────────────────────────────────
66
+
67
+ def git(repo_path: str | Path, *args, timeout: int = 60) -> tuple[int, str, str]:
68
+ """Execute a git command. Returns (returncode, stdout, stderr)."""
69
+ cmd = ["git", "-C", str(repo_path)] + list(args)
70
+ try:
71
+ r = subprocess.run(cmd, capture_output=True, encoding="utf-8", errors="replace", timeout=timeout)
72
+ return r.returncode, r.stdout, r.stderr
73
+ except subprocess.TimeoutExpired:
74
+ raise GitNetworkError(f"Git command timed out: {' '.join(cmd)}")
75
+ except FileNotFoundError:
76
+ raise GitError("git not found — please install git")
77
+
78
+
79
+ # ─── Repository management ──────────────────────────────────
80
+
81
+ def fetch_with_retry(repo_path: Path, branch: str = "main", max_retries: int = 3) -> bool:
82
+ """Git fetch with exponential backoff retry."""
83
+ _validate_ref(branch)
84
+ for attempt in range(max_retries):
85
+ code, _, stderr = git(repo_path, "fetch", "origin", branch)
86
+ if code == 0:
87
+ return True
88
+ if code == 128 and ("authentication" in stderr.lower() or "permission denied" in stderr.lower()):
89
+ raise GitAuthError(f"Authentication failed: {stderr.strip()}")
90
+ if attempt < max_retries - 1:
91
+ time.sleep(2 ** (attempt + 1))
92
+ else:
93
+ raise GitNetworkError(f"Fetch failed after {max_retries} retries: {stderr.strip()}")
94
+ return False
95
+
96
+
97
+ def clone_with_retry(url: str, target: Path, branch: str = "main", max_retries: int = 3) -> bool:
98
+ """Git clone with retry. Returns True on success."""
99
+ _validate_url(url)
100
+ if (target / ".git").exists():
101
+ return True
102
+ target.parent.mkdir(parents=True, exist_ok=True)
103
+
104
+ for attempt in range(max_retries):
105
+ cmd = ["git", "clone", "--depth", "1", "--single-branch", "-b", branch, "--", url, str(target)]
106
+ try:
107
+ r = subprocess.run(cmd, capture_output=True, encoding="utf-8", errors="replace", timeout=300)
108
+ except subprocess.TimeoutExpired:
109
+ if attempt >= max_retries - 1:
110
+ raise GitNetworkError(f"Clone timed out after {max_retries} retries")
111
+ time.sleep(2 ** (attempt + 1))
112
+ continue
113
+
114
+ if r.returncode == 0:
115
+ return True
116
+ err = r.stderr.lower()
117
+ if r.returncode == 128:
118
+ if "authentication" in err or "permission denied" in err:
119
+ raise GitAuthError(f"Authentication failed: {r.stderr.strip()}")
120
+ if "not found" in err or "does not exist" in err:
121
+ raise GitRepoNotFoundError(f"Repository not found: {url}")
122
+ if "branch" in err:
123
+ raise GitBranchNotFoundError(f"Branch not found: {branch}")
124
+ # Clean partial clone
125
+ if target.exists():
126
+ import shutil
127
+ shutil.rmtree(target, ignore_errors=True)
128
+ if attempt < max_retries - 1:
129
+ time.sleep(2 ** (attempt + 1))
130
+ else:
131
+ raise GitNetworkError(f"Clone failed after {max_retries} retries: {r.stderr.strip()}")
132
+ return False
133
+
134
+
135
+ def ensure_repo(url: str | None, local: str | None, cache_dir: Path, module_name: str, branch: str = "main") -> Path:
136
+ """Ensure a local repo is available (clone or use existing)."""
137
+ _validate_name(module_name, "module_name")
138
+ target = cache_dir / module_name
139
+
140
+ if (target / ".git").exists():
141
+ fetch_with_retry(target, branch)
142
+ return target
143
+ if url:
144
+ _validate_url(url)
145
+ clone_with_retry(url, target, branch)
146
+ return target
147
+ if local:
148
+ local_path = Path(local).expanduser()
149
+ if not (local_path / ".git").exists():
150
+ raise ConfigError(f"Not a git repo: {local_path}")
151
+ return local_path
152
+ raise ConfigError(f"Module {module_name} has neither url nor local configured")
153
+
154
+
155
+ # ─── File reading ───────────────────────────────────────────
156
+
157
+ def read_file(repo_path: Path, ref: str, filepath: str) -> str | None:
158
+ """Read file content at a given ref. Returns None if not found."""
159
+ _validate_ref(ref)
160
+ code, stdout, stderr = git(repo_path, "show", f"{ref}:{filepath}")
161
+ if code == 0:
162
+ return stdout
163
+ if "does not exist" in stderr or "not found" in stderr:
164
+ return None
165
+ raise GitError(f"Failed to read file: {stderr.strip()}")
166
+
167
+
168
+ def ls_tree(repo_path: Path, ref: str = "origin/main") -> list[str]:
169
+ """List all files at a given ref."""
170
+ _validate_ref(ref)
171
+ code, stdout, stderr = git(repo_path, "ls-tree", "-r", "--name-only", ref)
172
+ if code != 0:
173
+ raise GitError(f"ls-tree failed: {stderr.strip()}")
174
+ return [l.strip() for l in stdout.splitlines() if l.strip()]
175
+
176
+
177
+ # ─── Diff operations ────────────────────────────────────────
178
+
179
+ def get_head_commit(repo_path: Path, ref: str = "origin/main") -> str | None:
180
+ """Get commit hash for a ref. Returns None if unknown."""
181
+ _validate_ref(ref)
182
+ code, stdout, _ = git(repo_path, "rev-parse", ref)
183
+ return stdout.strip() if code == 0 else None
184
+
185
+
186
+ def diff_files(repo_path: Path, old_commit: str, new_commit: str) -> list[dict]:
187
+ """Get changed files between two commits. Returns [{file, status, old_path?, new_path?}]."""
188
+ _validate_ref(old_commit)
189
+ _validate_ref(new_commit)
190
+ code, stdout, stderr = git(repo_path, "diff", "--name-status", f"{old_commit}..{new_commit}")
191
+ if code != 0:
192
+ raise GitError(f"diff failed: {stderr.strip()}")
193
+
194
+ changes = []
195
+ for line in stdout.splitlines():
196
+ if not line.strip():
197
+ continue
198
+ parts = line.split("\t")
199
+ status = parts[0][0]
200
+ if status == "R" and len(parts) >= 3:
201
+ changes.append({"status": "R", "old_path": parts[1], "new_path": parts[2], "file": parts[2]})
202
+ elif len(parts) >= 2:
203
+ changes.append({"status": status, "file": parts[1]})
204
+ return changes
205
+
206
+
207
+ def diff_stat(repo_path: Path, old_commit: str, new_commit: str) -> str:
208
+ """Get shortstat between two commits."""
209
+ _validate_ref(old_commit)
210
+ _validate_ref(new_commit)
211
+ code, stdout, stderr = git(repo_path, "diff", "--shortstat", f"{old_commit}..{new_commit}")
212
+ if code != 0:
213
+ raise GitError(f"diff stat failed: {stderr.strip()}")
214
+ return stdout.strip()
215
+
216
+
217
+ def detect_changes(repo_path: Path, from_ref: str, to_ref: str) -> list[dict]:
218
+ """Detect changes between two refs with human-readable status.
219
+
220
+ Returns list of {"path": str, "status": "added"|"modified"|"deleted"|"renamed"}.
221
+ """
222
+ _STATUS_MAP = {"A": "added", "M": "modified", "D": "deleted", "R": "renamed", "C": "copied", "T": "modified"}
223
+ raw = diff_files(repo_path, from_ref, to_ref)
224
+ result = []
225
+ for entry in raw:
226
+ status = _STATUS_MAP.get(entry["status"], "modified")
227
+ result.append({"path": entry["file"], "status": status})
228
+ return result
229
+
230
+
231
+ def get_commit_log(
232
+ repo_path: Path, from_ref: str, to_ref: str, max_count: int = 50
233
+ ) -> list[dict]:
234
+ """Get commit log between two refs.
235
+
236
+ Returns list of {"hash": str, "author": str, "date": str, "message": str}.
237
+ """
238
+ _validate_ref(from_ref)
239
+ _validate_ref(to_ref)
240
+ fmt = "%H%n%an%n%aI%n%s%n---"
241
+ code, stdout, stderr = git(
242
+ repo_path, "log", f"--max-count={max_count}", f"--format={fmt}", f"{from_ref}..{to_ref}"
243
+ )
244
+ if code != 0:
245
+ raise GitError(f"git log failed: {stderr.strip()}")
246
+ commits = []
247
+ lines = stdout.strip().split("\n")
248
+ i = 0
249
+ while i + 3 < len(lines):
250
+ commits.append({
251
+ "hash": lines[i].strip(),
252
+ "author": lines[i + 1].strip(),
253
+ "date": lines[i + 2].strip(),
254
+ "message": lines[i + 3].strip(),
255
+ })
256
+ # skip separator line "---"
257
+ i += 5
258
+ return commits
259
+
260
+
261
+ def clone_repo(
262
+ url: str, dest: Path, branch: str = "main", auth: dict | None = None
263
+ ) -> Path:
264
+ """Clone a repository with optional authentication.
265
+
266
+ Args:
267
+ url: Repository URL.
268
+ dest: Destination path.
269
+ branch: Branch to clone.
270
+ auth: Optional dict with "type" ("ssh_key"|"token") and credentials.
271
+ - {"type": "ssh_key", "key_path": "/path/to/key"}
272
+ - {"type": "token", "token": "ghp_xxx"}
273
+
274
+ Returns:
275
+ Path to the cloned repository.
276
+ """
277
+ _validate_url(url)
278
+ if auth:
279
+ auth_type = auth.get("type", "")
280
+ if auth_type == "token" and auth.get("token"):
281
+ # Inject token into HTTPS URL
282
+ token = auth["token"]
283
+ if url.startswith("https://"):
284
+ url = url.replace("https://", f"https://x-access-token:{token}@", 1)
285
+ elif auth_type == "ssh_key" and auth.get("key_path"):
286
+ key_path = auth["key_path"]
287
+ import os
288
+ os.environ["GIT_SSH_COMMAND"] = f'ssh -i {key_path} -o StrictHostKeyChecking=no'
289
+ clone_with_retry(url, dest, branch)
290
+ # Clean up SSH env if set
291
+ if auth and auth.get("type") == "ssh_key":
292
+ import os
293
+ os.environ.pop("GIT_SSH_COMMAND", None)
294
+ return dest
core/interfaces.py ADDED
@@ -0,0 +1,249 @@
1
+ """Abstract interfaces and shared data classes for the core package.
2
+
3
+ All ABCs and dataclasses that define contracts between layers live here.
4
+ No imports from cli/ or skill/ are permitted.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from abc import ABC, abstractmethod
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import Any, Literal
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # LLM data transfer objects
20
+ # ---------------------------------------------------------------------------
21
+
22
+
23
+ @dataclass
24
+ class LlmRequest:
25
+ """Encapsulates a single LLM call request."""
26
+
27
+ system: str
28
+ user: str
29
+ model: str | None = None # None = use default from config
30
+ max_tokens: int = 8192
31
+ temperature: float = 0.3
32
+
33
+
34
+ @dataclass
35
+ class LlmResponse:
36
+ """Encapsulates the result of an LLM call."""
37
+
38
+ content: str
39
+ status: Literal["done", "delegated", "dry-run", "failed"]
40
+ usage: dict[str, int] = field(default_factory=dict)
41
+ elapsed: float = 0.0
42
+ error: str = ""
43
+ prompt_file: str = "" # For delegated mode
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # LlmStrategy ABC
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ class LlmStrategy(ABC):
52
+ """Abstract LLM execution backend.
53
+
54
+ Implementations: ApiLlmStrategy, DelegatedLlmStrategy, DryRunLlmStrategy.
55
+ """
56
+
57
+ @abstractmethod
58
+ def call(self, request: LlmRequest) -> LlmResponse: ...
59
+
60
+ def call_batch(
61
+ self, requests: list[LlmRequest], max_concurrent: int = 5
62
+ ) -> list[LlmResponse]:
63
+ """Execute multiple requests. Default: sequential. CLI overrides with ThreadPoolExecutor."""
64
+ return [self.call(r) for r in requests]
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # PromptAssembler ABC
69
+ # ---------------------------------------------------------------------------
70
+
71
+
72
+ class PromptAssembler(ABC):
73
+ """Abstract prompt content assembly strategy.
74
+
75
+ CLI injects InlineAssembler (inlines source); Agent uses ReferenceAssembler
76
+ (emits file path references).
77
+ """
78
+
79
+ @abstractmethod
80
+ def resolve_file_list(self, module_dir: Path, doc_type: str,
81
+ file_list_override: str | None = None) -> str: ...
82
+
83
+ @abstractmethod
84
+ def resolve_source_content(
85
+ self, module_dir: Path, doc_type: str, source_cache: Path
86
+ ) -> str: ...
87
+
88
+ def resolve_source_content_from_paths(
89
+ self, module_dir: Path, doc_type: str, source_cache: Path, file_paths: list[str]
90
+ ) -> str:
91
+ """Resolve source content from an explicit file path list (shard override).
92
+
93
+ Default implementation falls back to resolve_source_content.
94
+ Subclasses may override for optimized shard handling.
95
+ """
96
+ return self.resolve_source_content(module_dir, doc_type, source_cache)
97
+
98
+ @abstractmethod
99
+ def resolve_skeleton_content(self, module_dir: Path) -> str: ...
100
+
101
+ @abstractmethod
102
+ def should_append_source(self) -> bool: ...
103
+
104
+
105
+ # ---------------------------------------------------------------------------
106
+ # SkeletonParser ABC
107
+ # ---------------------------------------------------------------------------
108
+
109
+
110
+ class SkeletonParser(ABC):
111
+ """Language-specific skeleton parser. Registered by name in parser registry."""
112
+
113
+ name: str
114
+ priority: int # Higher = preferred (jqassistant=100, treesitter=80, regex=50)
115
+
116
+ @abstractmethod
117
+ def can_parse(self, repo_path: Path, preset: dict[str, Any]) -> bool: ...
118
+
119
+ @abstractmethod
120
+ def parse(
121
+ self, repo_path: Path, preset: dict[str, Any], **kwargs: Any
122
+ ) -> list[dict[str, Any]]: ...
123
+
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Step ABC and StepResult
127
+ # ---------------------------------------------------------------------------
128
+
129
+
130
+ @dataclass
131
+ class StepResult:
132
+ """Outcome of a single pipeline step execution."""
133
+
134
+ status: Literal["ok", "failed", "skipped", "delegated"]
135
+ message: str = ""
136
+ details: dict[str, Any] = field(default_factory=dict)
137
+
138
+ @property
139
+ def ok(self) -> bool:
140
+ return self.status == "ok"
141
+
142
+ @property
143
+ def skipped(self) -> bool:
144
+ return self.status == "skipped"
145
+
146
+ @property
147
+ def delegated(self) -> bool:
148
+ return self.status == "delegated"
149
+
150
+ def __str__(self) -> str:
151
+ return f"[{self.status.upper()}] {self.message}"
152
+
153
+
154
+ class Step(ABC):
155
+ """Abstract pipeline step with optional checkpoint and rollback."""
156
+
157
+ name: str
158
+ checkpoint: str | None = None
159
+
160
+ def __init__(self, name: str, checkpoint: str | None = None):
161
+ self.name = name
162
+ self.checkpoint = checkpoint
163
+
164
+ @abstractmethod
165
+ def run(self, ctx: PipelineContext) -> StepResult: ...
166
+
167
+ def rollback(self, ctx: PipelineContext) -> None:
168
+ """Optional cleanup on failure. Default is no-op."""
169
+
170
+ def __repr__(self) -> str:
171
+ cp = f" (cp={self.checkpoint})" if self.checkpoint else ""
172
+ return f"<{self.__class__.__name__} '{self.name}'{cp}>"
173
+
174
+
175
+ # ---------------------------------------------------------------------------
176
+ # PipelineContext
177
+ # ---------------------------------------------------------------------------
178
+
179
+
180
+ @dataclass
181
+ class PipelineContext:
182
+ """Shared state carried across pipeline steps."""
183
+
184
+ config: dict[str, Any]
185
+ kb_name: str
186
+ kb_config: dict[str, Any]
187
+ knowledge_dir: Path
188
+ project_root: Path
189
+ module: str | None = None
190
+ cache_dir: Path = field(default_factory=lambda: Path(".source-cache"))
191
+ state: dict[str, Any] = field(default_factory=dict)
192
+
193
+ @classmethod
194
+ def from_config(
195
+ cls,
196
+ config: dict[str, Any],
197
+ kb_name: str,
198
+ *,
199
+ module: str | None = None,
200
+ ) -> PipelineContext:
201
+ """Construct context from a loaded configuration dict."""
202
+ kb_config = config["knowledge_bases"][kb_name]
203
+ knowledge_dir = Path(kb_config["knowledge_dir"])
204
+ source = kb_config.get("source", {})
205
+ cache_dir = Path(source.get("cache_dir", "./.source-cache"))
206
+ project_root = Path.cwd()
207
+
208
+ return cls(
209
+ config=config,
210
+ kb_name=kb_name,
211
+ kb_config=kb_config,
212
+ knowledge_dir=knowledge_dir,
213
+ project_root=project_root,
214
+ module=module,
215
+ cache_dir=cache_dir,
216
+ )
217
+
218
+
219
+ # ---------------------------------------------------------------------------
220
+ # Validator ABC and ValidationResult
221
+ # ---------------------------------------------------------------------------
222
+
223
+
224
+ @dataclass
225
+ class ValidationResult:
226
+ """Structured output from a validator run."""
227
+
228
+ errors: list[str] = field(default_factory=list)
229
+ warnings: list[str] = field(default_factory=list)
230
+
231
+ @property
232
+ def passed(self) -> bool:
233
+ return len(self.errors) == 0
234
+
235
+ def merge(self, other: ValidationResult) -> ValidationResult:
236
+ """Combine two results into one."""
237
+ return ValidationResult(
238
+ errors=self.errors + other.errors,
239
+ warnings=self.warnings + other.warnings,
240
+ )
241
+
242
+
243
+ class Validator(ABC):
244
+ """Abstract document quality validator. Registered by name."""
245
+
246
+ name: str
247
+
248
+ @abstractmethod
249
+ def validate(self, module_dir: Path, **kwargs: Any) -> ValidationResult: ...
@@ -0,0 +1,5 @@
1
+ """core.monitor — progress monitoring and heartbeat detection.
2
+
3
+ Provides structured progress file read/write and heartbeat timeout detection.
4
+ Used by both CLI (automatic heartbeat) and Agent (manual progress check).
5
+ """
@@ -0,0 +1,83 @@
1
+ """Progress monitoring — read/write progress files, detect heartbeat timeouts.
2
+
3
+ Usage:
4
+ from core.monitor.progress import write_progress, read_progress, check_heartbeat
5
+
6
+ write_progress(module_dir, "business-logic", "RUNNING")
7
+ status = read_progress(module_dir, "business-logic")
8
+ stale = check_heartbeat(module_dir, timeout_seconds=60)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import time
14
+ from pathlib import Path
15
+
16
+ from core.paths import progress_path, progress_dir, ensure_dir
17
+
18
+
19
+ def write_progress(module_dir: Path, doc_name: str, message: str) -> None:
20
+ """Write a progress status message."""
21
+ path = progress_path(module_dir, doc_name)
22
+ ensure_dir(path.parent)
23
+ ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
24
+ path.write_text(f"{message} ({ts})", encoding="utf-8")
25
+
26
+
27
+ def read_progress(module_dir: Path, doc_name: str) -> str | None:
28
+ """Read current progress status. Returns None if no progress file."""
29
+ path = progress_path(module_dir, doc_name)
30
+ if path.exists():
31
+ return path.read_text(encoding="utf-8").strip()
32
+ return None
33
+
34
+
35
+ def check_heartbeat(module_dir: Path, timeout_seconds: int = 60) -> list[str]:
36
+ """Find progress files with stale heartbeats (no update within timeout).
37
+
38
+ Returns list of doc_names that appear stale/stuck.
39
+ """
40
+ stale: list[str] = []
41
+ prog_dir = progress_dir(module_dir)
42
+ if not prog_dir.is_dir():
43
+ return stale
44
+
45
+ now = time.time()
46
+ for f in prog_dir.iterdir():
47
+ if not f.is_file() or f.name.endswith(".hb-pid"):
48
+ continue
49
+ mtime = f.stat().st_mtime
50
+ if now - mtime > timeout_seconds:
51
+ content = f.read_text(encoding="utf-8").strip()
52
+ # Only flag if still in RUNNING state
53
+ if "RUNNING" in content or "HEARTBEAT" in content:
54
+ stale.append(f.name)
55
+
56
+ return stale
57
+
58
+
59
+ def cleanup_progress(module_dir: Path) -> int:
60
+ """Remove all progress files (after successful completion). Returns count removed."""
61
+ prog_dir = progress_dir(module_dir)
62
+ if not prog_dir.is_dir():
63
+ return 0
64
+ count = 0
65
+ for f in prog_dir.iterdir():
66
+ f.unlink(missing_ok=True)
67
+ count += 1
68
+ return count
69
+
70
+
71
+ def check_progress(module_dir: Path) -> dict[str, str]:
72
+ """Read all progress files and return {doc_name: status} mapping.
73
+
74
+ Returns empty dict if no progress directory or no progress files.
75
+ """
76
+ prog_dir = progress_dir(module_dir)
77
+ if not prog_dir.is_dir():
78
+ return {}
79
+ result: dict[str, str] = {}
80
+ for f in prog_dir.iterdir():
81
+ if f.is_file() and not f.name.endswith(".hb-pid"):
82
+ result[f.stem] = f.read_text(encoding="utf-8").strip()
83
+ return result
@@ -0,0 +1,49 @@
1
+ """Prompt persistence — save rendered prompts to disk for debugging.
2
+
3
+ Usage:
4
+ from core.monitor.prompt_store import save_prompt
5
+
6
+ save_prompt(module_dir, task_id, system_prompt, user_prompt)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import time
12
+ from pathlib import Path
13
+
14
+
15
+ def save_prompt(
16
+ module_dir: Path,
17
+ task_id: str,
18
+ system: str,
19
+ user: str,
20
+ *,
21
+ model: str = "",
22
+ ) -> Path:
23
+ """Persist a rendered prompt to .meta/prompts/{task_id}.md.
24
+
25
+ Returns the path to the saved file.
26
+ """
27
+ prompts_dir = module_dir / ".meta" / "prompts"
28
+ prompts_dir.mkdir(parents=True, exist_ok=True)
29
+
30
+ safe_id = task_id.replace("/", "_").replace("\\", "_")
31
+ path = prompts_dir / f"{safe_id}.md"
32
+
33
+ ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
34
+ header = f"<!-- task_id: {task_id} | model: {model} | saved: {ts} -->\n\n"
35
+ content = header
36
+ if system:
37
+ content += f"## System Prompt\n\n{system}\n\n"
38
+ content += f"## User Prompt\n\n{user}\n"
39
+
40
+ path.write_text(content, encoding="utf-8")
41
+ return path
42
+
43
+
44
+ def should_save_prompts(config: dict) -> bool:
45
+ """Check if prompt persistence is enabled in config."""
46
+ debug = config.get("debug", {})
47
+ if isinstance(debug, dict):
48
+ return bool(debug.get("save_prompts", False))
49
+ return False