source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
@@ -0,0 +1,269 @@
1
+ # Doc type definitions for java-spring preset
2
+ # Each doc_type defines: template, batch order, generation conditions, split overrides, and size expectations.
3
+ # core/ code reads this config — no doc_type names are hardcoded in Python.
4
+
5
+ doc_types:
6
+ source-tree-analysis:
7
+ filename: "source-tree-analysis.md"
8
+ template: "subagent-structure.md"
9
+ batch: 1
10
+ global_view: true
11
+ conditional: false
12
+ search_keywords: ["目录结构", "文件树", "类职责", "包结构", "文件", "源码结构"]
13
+ size_expectations: {"min": 50, "warn_max": 1000, "hard_max": 2000}
14
+
15
+ data-models:
16
+ filename: "data-models.md"
17
+ template: "subagent-model.md"
18
+ batch: 2
19
+ conditional: true
20
+ search_keywords: ["表", "字段", "实体", "模型", "DTO", "VO", "Entity", "Domain"]
21
+ dedup_patterns: ["| 字段 | 类型 |", "字段表格"]
22
+ split_override:
23
+ readwrite:
24
+ max_lines: 30000
25
+ max_files_per_shard: 500
26
+ size_expectations: {"min": 50, "warn_max": 1500, "hard_max": 3000}
27
+ validators:
28
+ - type: sampling_check
29
+ params:
30
+ source: field_names
31
+ sample_size: 5
32
+
33
+ enums-and-constants:
34
+ filename: "enums-and-constants.md"
35
+ template: "subagent-enum.md"
36
+ batch: 2
37
+ conditional: true
38
+ search_keywords: ["枚举", "常量", "状态码", "Enum", "Constant"]
39
+ split_override:
40
+ readwrite:
41
+ max_lines: 20000
42
+ max_files_per_shard: 300
43
+ size_expectations: {"min": 30, "warn_max": 2000, "hard_max": 4000}
44
+ validators:
45
+ - type: sampling_check
46
+ params:
47
+ source: enum_values
48
+ sample_size: 5
49
+
50
+ database-access:
51
+ filename: "database-access.md"
52
+ template: "subagent-database-access.md"
53
+ batch: 2
54
+ conditional: true
55
+ search_keywords: ["SQL", "Mapper", "DAO", "数据访问", "Repository", "查询", "事务"]
56
+ depends_on: ["data-models"]
57
+ size_expectations: {"min": 50, "warn_max": 1500, "hard_max": 3000}
58
+
59
+ api-contracts:
60
+ filename: "api-contracts.md"
61
+ template: "subagent-api.md"
62
+ batch: 3
63
+ conditional: true
64
+ search_keywords: ["接口", "API", "参数", "错误码", "URL", "Controller", "端点"]
65
+ depends_on: ["data-models"]
66
+ size_expectations: {"min": 50, "warn_max": 1500, "hard_max": 3000}
67
+ validators:
68
+ - type: skeleton_count_match
69
+ params:
70
+ skeleton_field: "controllers"
71
+ annotations: ["@RestController", "@Controller"]
72
+ doc_heading_level: 2
73
+ min_ratio: 0.5
74
+
75
+ architecture:
76
+ filename: "architecture.md"
77
+ template: "subagent-architecture.md"
78
+ batch: 3
79
+ conditional: false
80
+ search_keywords: ["架构", "依赖", "配置", "技术栈", "中间件", "分层", "部署"]
81
+ size_expectations: {"min": 50, "warn_max": 1000, "hard_max": 2000}
82
+
83
+ caching:
84
+ filename: "caching.md"
85
+ template: "subagent-caching.md"
86
+ batch: 3
87
+ conditional: true
88
+ search_keywords: ["Redis", "缓存", "key", "Memcached", "TTL", "Cache", "Caffeine"]
89
+ owns_keywords: ["Redis", "缓存键", "RedisConstant", "Jedis", "Memcached", "Caffeine", "EhCache", "GuavaCache", "CacheManager"]
90
+ dedup_patterns: ["缓存 Key 常量", "TTL", "CAS 流程"]
91
+ depends_on: ["data-models"]
92
+ size_expectations: {"min": 30, "warn_max": 800, "hard_max": 1500}
93
+
94
+ messaging:
95
+ filename: "messaging.md"
96
+ template: "subagent-messaging.md"
97
+ batch: 3
98
+ conditional: true
99
+ search_keywords: ["Kafka", "Pulsar", "topic", "消费", "消息", "MQ", "RabbitMQ", "Producer"]
100
+ owns_keywords: ["Kafka", "Topic", "Consumer", "Producer", "KafkaConfig", "Pulsar", "RabbitMQ", "RocketMQ", "MQListener"]
101
+ dedup_patterns: ["消息体 JSON", "消费逻辑", "Topic 清单"]
102
+ depends_on: ["data-models"]
103
+ size_expectations: {"min": 30, "warn_max": 600, "hard_max": 1200}
104
+
105
+ scheduled-tasks:
106
+ filename: "scheduled-tasks.md"
107
+ template: "subagent-scheduled.md"
108
+ batch: 3
109
+ conditional: true
110
+ search_keywords: ["定时任务", "Job", "cron", "调度", "Scheduled", "XxlJob"]
111
+ size_expectations: {"min": 20, "warn_max": 500, "hard_max": 1000}
112
+
113
+ error-handling:
114
+ filename: "error-handling.md"
115
+ template: "subagent-error-handling.md"
116
+ batch: 3
117
+ conditional: true
118
+ search_keywords: ["异常", "错误码", "Exception", "错误处理", "ControllerAdvice"]
119
+ size_expectations: {"min": 10, "warn_max": 400, "hard_max": 800}
120
+
121
+ security:
122
+ filename: "security.md"
123
+ template: "subagent-security.md"
124
+ batch: 3
125
+ conditional: true
126
+ search_keywords: ["认证", "鉴权", "权限", "Token", "Security", "登录", "Filter"]
127
+ size_expectations: {"min": 20, "warn_max": 500, "hard_max": 1000}
128
+
129
+ aop-and-interceptors:
130
+ filename: "aop-and-interceptors.md"
131
+ template: "subagent-aop.md"
132
+ batch: 3
133
+ conditional: true
134
+ search_keywords: ["切面", "AOP", "拦截器", "Aspect", "Interceptor", "Pointcut"]
135
+ size_expectations: {"min": 20, "warn_max": 500, "hard_max": 1000}
136
+
137
+ observability:
138
+ filename: "observability.md"
139
+ template: "subagent-observability.md"
140
+ batch: 3
141
+ conditional: true
142
+ search_keywords: ["监控", "指标", "日志", "告警", "Metrics", "Actuator", "健康检查"]
143
+ owns_keywords: ["MeterRegistry", "Counter", "Timer", "Gauge", "Actuator", "Micrometer"]
144
+ size_expectations: {"min": 20, "warn_max": 500, "hard_max": 1000}
145
+
146
+ async-and-events:
147
+ filename: "async-and-events.md"
148
+ template: "subagent-async-events.md"
149
+ batch: 3
150
+ conditional: true
151
+ search_keywords: ["异步", "事件", "@Async", "EventListener", "线程池", "CompletableFuture"]
152
+ owns_keywords: ["Spring Event", "ApplicationEvent", "EventListener"]
153
+ size_expectations: {"min": 30, "warn_max": 600, "hard_max": 1200}
154
+
155
+ external-integrations:
156
+ filename: "external-integrations.md"
157
+ template: "subagent-external-integrations.md"
158
+ batch: 3
159
+ conditional: true
160
+ search_keywords: ["外部调用", "Feign", "HTTP", "SDK", "第三方", "远程服务", "WebClient"]
161
+ owns_keywords: ["Feign", "外部调用", "远程服务"]
162
+ size_expectations: {"min": 30, "warn_max": 800, "hard_max": 1500}
163
+
164
+ business-logic:
165
+ filename: "business-logic.md"
166
+ template: "subagent-business.md"
167
+ batch: 4
168
+ conditional: true
169
+ search_keywords: ["怎么实现", "流程", "调用链", "业务逻辑", "方法实现", "Service"]
170
+ split_override:
171
+ output-only:
172
+ max_lines: 4000
173
+ readwrite:
174
+ max_lines: 8000
175
+ max_files_per_shard: 60
176
+ depends_on: ["data-models", "api-contracts", "enums-and-constants"]
177
+ size_expectations: {"min": 100, "warn_max": 3000, "hard_max": 5000}
178
+
179
+ utils:
180
+ filename: "utils.md"
181
+ template: "subagent-utils.md"
182
+ batch: 4
183
+ conditional: true
184
+ search_keywords: ["工具类", "工具方法", "Util", "Helper", "Common"]
185
+ split_override:
186
+ readwrite:
187
+ max_lines: 15000
188
+ max_files_per_shard: 100
189
+ depends_on: ["data-models"]
190
+ size_expectations: {"min": 30, "warn_max": 1000, "hard_max": 2000}
191
+
192
+ index:
193
+ filename: "index.md"
194
+ template: "subagent-index.md"
195
+ batch: 5
196
+ conditional: false
197
+ global_view: true
198
+ search_keywords: ["概览", "模块介绍", "有哪些功能", "导航", "索引"]
199
+ depends_on: ["business-logic", "architecture", "data-models"]
200
+ size_expectations: {"min": 20, "warn_max": 300, "hard_max": 600}
201
+
202
+ # Split thresholds (mode-specific defaults)
203
+ # Users can override in kb-project.yaml under `split:` key.
204
+ # Adjust based on your LLM model's context window:
205
+ # - GPT-3.5/4: use output-only defaults
206
+ # - Claude Sonnet/Opus: can increase readwrite limits
207
+ # - Local models (Qwen/DeepSeek): may need lower limits
208
+ split:
209
+ output-only:
210
+ max_bytes: 307200 # 300KB — prompt token budget constraint
211
+ max_lines: 8000
212
+ max_files_per_shard: 80
213
+ llm_sample_limit: 200 # [CLI only] Max files sampled for LLM-based split grouping
214
+ merge_threshold_ratio: 0.25
215
+ readwrite:
216
+ max_bytes: 2097152 # 2MB — Agent reads files individually, not token-limited
217
+ max_lines: 15000 # Controls doc granularity, not prompt size
218
+ max_files_per_shard: 200
219
+ merge_threshold_ratio: 0.25
220
+ hysteresis_ratio: 0.10
221
+ # [Both modes] Noise words filtered when deriving shard group names from class names
222
+ noise_words:
223
+ - service
224
+ - impl
225
+ - base
226
+ - controller
227
+ - manager
228
+ - handler
229
+ - listener
230
+ - api
231
+ - java
232
+ - abstract
233
+ - default
234
+
235
+ # Timeouts and limits
236
+ # All values can be overridden in kb-project.yaml under `limits:` key.
237
+ # Adjust based on your LLM model's capabilities.
238
+ limits:
239
+ min_doc_size_bytes: 500
240
+ max_source_inline_bytes: 300000
241
+ max_skeleton_inline_bytes: 50000
242
+ max_output_tokens: 8192
243
+ prior_docs_max_chars: 2000
244
+ shard_context_max_chars: 1500
245
+ spawn_timeout_default: 900
246
+ heartbeat_interval: 30
247
+ max_retries: 2
248
+ max_consecutive_failures: 3
249
+ failure_rate_threshold: 0.5
250
+ # Audit thresholds
251
+ audit_single_agent_threshold_bytes: 51200 # 50KB — docs below this use single-agent audit
252
+ audit_skeleton_threshold_bytes: 81920 # 80KB — skeleton inline limit for audit prompts
253
+ # Sync thresholds
254
+ sync_max_changed_files: 20 # above this, suggest kb-audit instead of kb-sync
255
+ # LLM call limits
256
+ classify_max_tokens: 4096 # max tokens for LLM classification calls
257
+ # Dedup thresholds
258
+ dedup_min_doc_size_bytes: 15360 # 15KB — docs smaller than this skip dedup
259
+ garbage_patterns:
260
+ - "<web_search>"
261
+ - "<read_file>"
262
+ - "<search_results>"
263
+ - "Search results for"
264
+
265
+ # Parser preference chain
266
+ parsers:
267
+ - jqassistant
268
+ - treesitter
269
+ - regex
@@ -0,0 +1,122 @@
1
+ """Java-Spring preset hooks — language/framework-specific logic.
2
+
3
+ Moves all hardcoded Java/Spring assumptions out of core/ into this preset hook.
4
+ Core modules call hooks.get_*() instead of hardcoding these values.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ from pathlib import Path
11
+
12
+ from core.preset_hooks import PresetHooks
13
+
14
+
15
+ class Hooks(PresetHooks):
16
+ """Java/Spring Boot specific hooks."""
17
+
18
+ def get_business_suffixes(self) -> tuple[str, ...]:
19
+ return (
20
+ "ServiceImpl", "Service", "Handler", "Processor", "Manager",
21
+ "Facade", "Strategy", "Validator", "Factory",
22
+ "Listener", "Consumer", "Producer", "Client", "Feign", "Biz", "BizImpl",
23
+ )
24
+
25
+ def get_infra_suffixes(self) -> tuple[str, ...]:
26
+ return (
27
+ "Config", "Configuration", "Properties", "Interceptor",
28
+ "Filter", "Aspect", "Advisor", "Converter", "Mapper",
29
+ )
30
+
31
+ def get_data_suffixes(self) -> tuple[str, ...]:
32
+ return ("DTO", "VO", "DO", "Entity", "Enum", "Constant", "Constants")
33
+
34
+ def get_framework_types(self) -> set[str]:
35
+ return {
36
+ "RedisTemplate", "StringRedisTemplate", "RedissonClient", "RdcRedissonClient",
37
+ "RdcStringCacheTemplate", "RedisUtil",
38
+ "KafkaTemplate", "KafkaProducer", "KafkaConsumer",
39
+ "RestTemplate", "WebClient", "HttpClient",
40
+ "JdbcTemplate", "SqlSessionFactory", "DataSource",
41
+ "ObjectMapper", "Gson", "BeanMapperImplDozer",
42
+ "ThreadPoolExecutor", "ExecutorService", "ScheduledExecutorService",
43
+ "ThreadPoolTaskExecutor", "Executor",
44
+ "ApplicationContext", "BeanFactory", "Environment",
45
+ "ApplicationEventPublisher",
46
+ "TransactionTemplate", "PlatformTransactionManager",
47
+ "MongoTemplate", "ElasticsearchRestTemplate",
48
+ "KafkaListenerEndpointRegistry",
49
+ "UniqueNumUtil", "DictionaryUtil", "Logger",
50
+ }
51
+
52
+ def get_common_types(self) -> frozenset[str]:
53
+ return frozenset({
54
+ "String", "Integer", "Long", "Double", "Float", "Boolean", "Byte", "Short",
55
+ "Object", "Void", "BigDecimal", "BigInteger", "Date",
56
+ "LocalDate", "LocalDateTime", "Instant", "Duration",
57
+ "List", "Map", "Set", "Collection", "Optional", "Stream",
58
+ "HashMap", "ArrayList", "HashSet", "LinkedList",
59
+ "Serializable", "Comparable", "Iterable", "Iterator",
60
+ "Exception", "RuntimeException", "Throwable", "Logger", "Log",
61
+ "HttpServletRequest", "HttpServletResponse", "ResponseEntity",
62
+ })
63
+
64
+ def get_noise_terms(self) -> set[str]:
65
+ return {
66
+ "String", "Integer", "Long", "Boolean", "List", "Map", "Set",
67
+ "JSON", "HTTP", "API", "SQL", "Redis", "Kafka", "Spring", "Java",
68
+ }
69
+
70
+ def get_inject_annotations(self) -> set[str]:
71
+ return {"@Autowired", "@Resource", "@Inject", "@Value",
72
+ "Autowired", "Resource", "Inject"}
73
+
74
+ def get_controller_annotations(self) -> list[str]:
75
+ return ["@RestController", "@Controller"]
76
+
77
+ def get_entity_annotations(self) -> list[str]:
78
+ return ["@Entity", "@Table", "@TableName"]
79
+
80
+ def get_source_extensions(self) -> list[str]:
81
+ return [".java", ".kt"]
82
+
83
+ def get_test_path_patterns(self) -> list[str]:
84
+ return ["/test/", "/tests/", "/src/test/"]
85
+
86
+ def get_split_name_suffixes(self) -> tuple[str, ...]:
87
+ return ("ServiceImpl", "Service", "Handler", "Processor", "Manager", "Controller")
88
+
89
+ def count_source_files(self, source_cache: Path) -> tuple[int, int]:
90
+ """Count Java source files and lines (excluding test directories)."""
91
+ total_files, total_lines = 0, 0
92
+ if not source_cache.is_dir():
93
+ return 0, 0
94
+ for root, _, filenames in os.walk(source_cache):
95
+ root_str = str(root).replace("\\", "/")
96
+ if "/test/" in root_str or "/src/test/" in root_str:
97
+ continue
98
+ for fn in filenames:
99
+ if fn.endswith(".java") or fn.endswith(".kt"):
100
+ total_files += 1
101
+ try:
102
+ with open(os.path.join(root, fn), encoding="utf-8", errors="replace") as f:
103
+ total_lines += sum(1 for _ in f)
104
+ except OSError:
105
+ pass
106
+ return total_files, total_lines
107
+
108
+ def extract_package_from_fqn(self, fqn: str) -> str:
109
+ """Extract package from Java fully-qualified name."""
110
+ parts = fqn.rsplit(".", 1)
111
+ return parts[0] if len(parts) > 1 else ""
112
+
113
+ def get_focus_hint(self, doc_type: str) -> str:
114
+ """Return focus hints for LLM-based split planning."""
115
+ hints = {
116
+ "business-logic": "关注 Service、Handler、Processor 等业务处理类",
117
+ "data-models": "关注 Entity、DTO、VO 等数据模型类",
118
+ "architecture": "关注 Config、Interceptor、Filter 等架构组件",
119
+ "utils": "关注 Helper、Converter、Mapper 等工具类",
120
+ "api-contracts": "关注 Controller、API、Client 等接口类",
121
+ }
122
+ return hints.get(doc_type, "")