source-kb 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +50 -0
- cli/__main__.py +5 -0
- cli/commands/__init__.py +1 -0
- cli/commands/anchor_fix.py +47 -0
- cli/commands/diff_doc.py +52 -0
- cli/commands/dispatch.py +77 -0
- cli/commands/extract.py +72 -0
- cli/commands/file_list.py +74 -0
- cli/commands/index.py +84 -0
- cli/commands/lock.py +89 -0
- cli/commands/merge.py +60 -0
- cli/commands/merge_delta.py +19 -0
- cli/commands/metadata.py +24 -0
- cli/commands/pipeline.py +45 -0
- cli/commands/post_merge.py +43 -0
- cli/commands/query.py +52 -0
- cli/commands/render.py +101 -0
- cli/commands/scan_repos.py +46 -0
- cli/commands/setup.py +94 -0
- cli/commands/split.py +196 -0
- cli/commands/stale_files.py +98 -0
- cli/commands/validate.py +191 -0
- core/__init__.py +32 -0
- core/config.py +261 -0
- core/docs/__init__.py +7 -0
- core/docs/section_updater.py +286 -0
- core/docs/shared.py +149 -0
- core/git.py +294 -0
- core/interfaces.py +249 -0
- core/monitor/__init__.py +5 -0
- core/monitor/progress.py +83 -0
- core/monitor/prompt_store.py +49 -0
- core/paths.py +141 -0
- core/preset.py +237 -0
- core/preset_accessors.py +202 -0
- core/preset_classify.py +132 -0
- core/preset_hooks.py +129 -0
- core/preset_profile.py +89 -0
- core/prompt/__init__.py +7 -0
- core/prompt/__main__.py +147 -0
- core/prompt/content.py +320 -0
- core/prompt/context_manager.py +164 -0
- core/prompt/renderer.py +236 -0
- core/prompt/response_parser.py +274 -0
- core/prompt/templates.py +357 -0
- core/prompt/validate_parity.py +162 -0
- core/prompt/variables.py +339 -0
- core/rag/__init__.py +22 -0
- core/rag/__main__.py +136 -0
- core/rag/bm25_index.py +268 -0
- core/rag/chunker.py +273 -0
- core/rag/embedder.py +151 -0
- core/rag/indexer.py +292 -0
- core/rag/loader.py +89 -0
- core/rag/retriever.py +82 -0
- core/skeleton/__init__.py +11 -0
- core/skeleton/__main__.py +934 -0
- core/skeleton/anchor_fix.py +250 -0
- core/skeleton/classify.py +331 -0
- core/skeleton/cmd_anchor_fix.py +43 -0
- core/skeleton/cmd_diff_doc.py +44 -0
- core/skeleton/cmd_lock.py +87 -0
- core/skeleton/cmd_merge_delta.py +41 -0
- core/skeleton/community.py +233 -0
- core/skeleton/dependency_graph.py +306 -0
- core/skeleton/diff_doc.py +248 -0
- core/skeleton/dispatch.py +273 -0
- core/skeleton/dispatch_render.py +319 -0
- core/skeleton/dispatch_source.py +111 -0
- core/skeleton/extract.py +218 -0
- core/skeleton/extract_methods.py +298 -0
- core/skeleton/file_list.py +239 -0
- core/skeleton/impact.py +278 -0
- core/skeleton/jar_download.py +177 -0
- core/skeleton/jar_resolver.py +186 -0
- core/skeleton/loader.py +162 -0
- core/skeleton/merge.py +278 -0
- core/skeleton/merge_delta.py +229 -0
- core/skeleton/metadata.py +96 -0
- core/skeleton/metadata_builders.py +264 -0
- core/skeleton/module_dag.py +330 -0
- core/skeleton/parsers/__init__.py +71 -0
- core/skeleton/parsers/jqassistant.py +300 -0
- core/skeleton/parsers/jqassistant_cypher.py +225 -0
- core/skeleton/parsers/regex.py +171 -0
- core/skeleton/parsers/treesitter.py +324 -0
- core/skeleton/parsers/treesitter_java.py +284 -0
- core/skeleton/parsers/treesitter_multi.py +289 -0
- core/skeleton/pom_parser.py +299 -0
- core/skeleton/post_merge.py +295 -0
- core/skeleton/post_merge_llm.py +82 -0
- core/skeleton/query.py +195 -0
- core/skeleton/shard_context.py +177 -0
- core/skeleton/split.py +180 -0
- core/skeleton/split_cache.py +107 -0
- core/skeleton/split_feedback.py +174 -0
- core/skeleton/split_plan.py +219 -0
- core/skeleton/split_plan_helpers.py +305 -0
- core/skeleton/split_plan_llm.py +274 -0
- core/utils.py +135 -0
- core/validators/__init__.py +65 -0
- core/validators/__main__.py +215 -0
- core/validators/consistency.py +203 -0
- core/validators/coverage.py +171 -0
- core/validators/duplicates.py +76 -0
- core/validators/engine.py +224 -0
- core/validators/links.py +76 -0
- core/validators/sampling.py +169 -0
- core/validators/structure.py +144 -0
- engine/__init__.py +7 -0
- engine/assembler.py +231 -0
- engine/confirm.py +65 -0
- engine/dedup.py +106 -0
- engine/main.py +211 -0
- engine/pipeline/__init__.py +163 -0
- engine/pipeline/recovery.py +250 -0
- engine/pipeline/steps/__init__.py +23 -0
- engine/pipeline/steps/audit.py +220 -0
- engine/pipeline/steps/audit_apply.py +195 -0
- engine/pipeline/steps/audit_helpers.py +155 -0
- engine/pipeline/steps/classify_llm.py +236 -0
- engine/pipeline/steps/classify_prompt.py +223 -0
- engine/pipeline/steps/finalize.py +160 -0
- engine/pipeline/steps/generate.py +169 -0
- engine/pipeline/steps/generate_batch.py +197 -0
- engine/pipeline/steps/generate_recovery.py +170 -0
- engine/pipeline/steps/llm_plan_split.py +253 -0
- engine/pipeline/steps/lock.py +64 -0
- engine/pipeline/steps/preflight.py +237 -0
- engine/pipeline/steps/preflight_adjust.py +147 -0
- engine/pipeline/steps/pregenerate.py +130 -0
- engine/pipeline/steps/quality.py +81 -0
- engine/pipeline/steps/skeleton.py +149 -0
- engine/pipeline/steps/source.py +163 -0
- engine/pipeline/steps/sync.py +117 -0
- engine/pipeline/steps/sync_finalize.py +237 -0
- engine/pipeline/steps/sync_update.py +341 -0
- engine/pipelines.py +91 -0
- engine/runner.py +335 -0
- engine/strategies/__init__.py +86 -0
- engine/strategies/api.py +128 -0
- engine/strategies/delegated.py +50 -0
- engine/strategies/dryrun.py +25 -0
- engine/two_phase.py +143 -0
- mcp_server/__init__.py +73 -0
- mcp_server/__main__.py +5 -0
- mcp_server/tools/__init__.py +1 -0
- mcp_server/tools/config.py +63 -0
- mcp_server/tools/discovery.py +276 -0
- mcp_server/tools/generation.py +184 -0
- mcp_server/tools/planning.py +144 -0
- mcp_server/tools/source.py +175 -0
- mcp_server/tools/validation.py +140 -0
- mcp_server/tools/workflow.py +166 -0
- mcp_server/workflow_loader.py +204 -0
- presets/generic/audit_dimensions.md +132 -0
- presets/generic/doc_types.yaml +152 -0
- presets/generic/preset.yaml +115 -0
- presets/java-spring/audit_dimensions.md +228 -0
- presets/java-spring/audit_dimensions.yaml +203 -0
- presets/java-spring/doc_types.yaml +269 -0
- presets/java-spring/hooks.py +122 -0
- presets/java-spring/preset.yaml +341 -0
- presets/java-spring/templates/README.md +34 -0
- presets/java-spring/templates/audit-system.md +15 -0
- presets/java-spring/templates/subagent-aop.md +105 -0
- presets/java-spring/templates/subagent-api.md +63 -0
- presets/java-spring/templates/subagent-architecture.md +111 -0
- presets/java-spring/templates/subagent-async-events.md +107 -0
- presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
- presets/java-spring/templates/subagent-audit-architecture.md +38 -0
- presets/java-spring/templates/subagent-audit-business.md +40 -0
- presets/java-spring/templates/subagent-audit-data-models.md +40 -0
- presets/java-spring/templates/subagent-business.md +129 -0
- presets/java-spring/templates/subagent-caching.md +75 -0
- presets/java-spring/templates/subagent-database-access.md +114 -0
- presets/java-spring/templates/subagent-enum.md +75 -0
- presets/java-spring/templates/subagent-error-handling.md +91 -0
- presets/java-spring/templates/subagent-external-integrations.md +80 -0
- presets/java-spring/templates/subagent-index.md +122 -0
- presets/java-spring/templates/subagent-messaging.md +97 -0
- presets/java-spring/templates/subagent-model.md +88 -0
- presets/java-spring/templates/subagent-observability.md +91 -0
- presets/java-spring/templates/subagent-scheduled.md +81 -0
- presets/java-spring/templates/subagent-security.md +102 -0
- presets/java-spring/templates/subagent-structure.md +101 -0
- presets/java-spring/templates/subagent-sync-section.md +34 -0
- presets/java-spring/templates/subagent-utils.md +73 -0
- presets/java-spring/templates/sync-system.md +8 -0
- presets/java-spring/workflow-extensions.md +112 -0
- skills/__init__.py +1 -0
- skills/_shared/README.md +30 -0
- skills/_shared/doc-coverage-shared.md +134 -0
- skills/_shared/doc-quality-standard.md +1058 -0
- skills/_shared/doc-subagent-rules.md +762 -0
- skills/_shared/windows-compat.md +89 -0
- skills/kb-audit/SKILL.md +52 -0
- skills/kb-audit/rules.md +88 -0
- skills/kb-audit/steps/step-01-prepare.md +75 -0
- skills/kb-audit/steps/step-02-audit.md +96 -0
- skills/kb-audit/steps/step-03-verify.md +65 -0
- skills/kb-audit/steps/step-04-report.md +64 -0
- skills/kb-init/SKILL.md +146 -0
- skills/kb-init/rules.md +187 -0
- skills/kb-init/steps/step-01-scope.md +62 -0
- skills/kb-init/steps/step-02-source.md +410 -0
- skills/kb-init/steps/step-03-generate.md +307 -0
- skills/kb-init/steps/step-04-quality.md +92 -0
- skills/kb-init/steps/step-05-finalize.md +132 -0
- skills/kb-init/templates/core/execution-modes.md +29 -0
- skills/kb-init/templates/core/output-only.md +4 -0
- skills/kb-init/templates/core/readwrite.md +33 -0
- skills/kb-search/SKILL.md +138 -0
- skills/kb-search/rules.md +64 -0
- skills/kb-sync/SKILL.md +43 -0
- skills/kb-sync/rules.md +70 -0
- skills/kb-sync/scripts/rebuild_module.py +91 -0
- skills/kb-sync/scripts/scan_repos.py +687 -0
- skills/kb-sync/steps/step-01-detect.md +72 -0
- skills/kb-sync/steps/step-02-update.md +71 -0
- skills/kb-sync/steps/step-03-verify.md +47 -0
- skills/kb-sync/steps/step-04-finalize.md +52 -0
- source_kb-0.2.2.dist-info/METADATA +194 -0
- source_kb-0.2.2.dist-info/RECORD +228 -0
- source_kb-0.2.2.dist-info/WHEEL +5 -0
- source_kb-0.2.2.dist-info/entry_points.txt +3 -0
- source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
- source_kb-0.2.2.dist-info/top_level.txt +6 -0
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# Doc type definitions for java-spring preset
|
|
2
|
+
# Each doc_type defines: template, batch order, generation conditions, split overrides, and size expectations.
|
|
3
|
+
# core/ code reads this config — no doc_type names are hardcoded in Python.
|
|
4
|
+
|
|
5
|
+
doc_types:
|
|
6
|
+
source-tree-analysis:
|
|
7
|
+
filename: "source-tree-analysis.md"
|
|
8
|
+
template: "subagent-structure.md"
|
|
9
|
+
batch: 1
|
|
10
|
+
global_view: true
|
|
11
|
+
conditional: false
|
|
12
|
+
search_keywords: ["目录结构", "文件树", "类职责", "包结构", "文件", "源码结构"]
|
|
13
|
+
size_expectations: {"min": 50, "warn_max": 1000, "hard_max": 2000}
|
|
14
|
+
|
|
15
|
+
data-models:
|
|
16
|
+
filename: "data-models.md"
|
|
17
|
+
template: "subagent-model.md"
|
|
18
|
+
batch: 2
|
|
19
|
+
conditional: true
|
|
20
|
+
search_keywords: ["表", "字段", "实体", "模型", "DTO", "VO", "Entity", "Domain"]
|
|
21
|
+
dedup_patterns: ["| 字段 | 类型 |", "字段表格"]
|
|
22
|
+
split_override:
|
|
23
|
+
readwrite:
|
|
24
|
+
max_lines: 30000
|
|
25
|
+
max_files_per_shard: 500
|
|
26
|
+
size_expectations: {"min": 50, "warn_max": 1500, "hard_max": 3000}
|
|
27
|
+
validators:
|
|
28
|
+
- type: sampling_check
|
|
29
|
+
params:
|
|
30
|
+
source: field_names
|
|
31
|
+
sample_size: 5
|
|
32
|
+
|
|
33
|
+
enums-and-constants:
|
|
34
|
+
filename: "enums-and-constants.md"
|
|
35
|
+
template: "subagent-enum.md"
|
|
36
|
+
batch: 2
|
|
37
|
+
conditional: true
|
|
38
|
+
search_keywords: ["枚举", "常量", "状态码", "Enum", "Constant"]
|
|
39
|
+
split_override:
|
|
40
|
+
readwrite:
|
|
41
|
+
max_lines: 20000
|
|
42
|
+
max_files_per_shard: 300
|
|
43
|
+
size_expectations: {"min": 30, "warn_max": 2000, "hard_max": 4000}
|
|
44
|
+
validators:
|
|
45
|
+
- type: sampling_check
|
|
46
|
+
params:
|
|
47
|
+
source: enum_values
|
|
48
|
+
sample_size: 5
|
|
49
|
+
|
|
50
|
+
database-access:
|
|
51
|
+
filename: "database-access.md"
|
|
52
|
+
template: "subagent-database-access.md"
|
|
53
|
+
batch: 2
|
|
54
|
+
conditional: true
|
|
55
|
+
search_keywords: ["SQL", "Mapper", "DAO", "数据访问", "Repository", "查询", "事务"]
|
|
56
|
+
depends_on: ["data-models"]
|
|
57
|
+
size_expectations: {"min": 50, "warn_max": 1500, "hard_max": 3000}
|
|
58
|
+
|
|
59
|
+
api-contracts:
|
|
60
|
+
filename: "api-contracts.md"
|
|
61
|
+
template: "subagent-api.md"
|
|
62
|
+
batch: 3
|
|
63
|
+
conditional: true
|
|
64
|
+
search_keywords: ["接口", "API", "参数", "错误码", "URL", "Controller", "端点"]
|
|
65
|
+
depends_on: ["data-models"]
|
|
66
|
+
size_expectations: {"min": 50, "warn_max": 1500, "hard_max": 3000}
|
|
67
|
+
validators:
|
|
68
|
+
- type: skeleton_count_match
|
|
69
|
+
params:
|
|
70
|
+
skeleton_field: "controllers"
|
|
71
|
+
annotations: ["@RestController", "@Controller"]
|
|
72
|
+
doc_heading_level: 2
|
|
73
|
+
min_ratio: 0.5
|
|
74
|
+
|
|
75
|
+
architecture:
|
|
76
|
+
filename: "architecture.md"
|
|
77
|
+
template: "subagent-architecture.md"
|
|
78
|
+
batch: 3
|
|
79
|
+
conditional: false
|
|
80
|
+
search_keywords: ["架构", "依赖", "配置", "技术栈", "中间件", "分层", "部署"]
|
|
81
|
+
size_expectations: {"min": 50, "warn_max": 1000, "hard_max": 2000}
|
|
82
|
+
|
|
83
|
+
caching:
|
|
84
|
+
filename: "caching.md"
|
|
85
|
+
template: "subagent-caching.md"
|
|
86
|
+
batch: 3
|
|
87
|
+
conditional: true
|
|
88
|
+
search_keywords: ["Redis", "缓存", "key", "Memcached", "TTL", "Cache", "Caffeine"]
|
|
89
|
+
owns_keywords: ["Redis", "缓存键", "RedisConstant", "Jedis", "Memcached", "Caffeine", "EhCache", "GuavaCache", "CacheManager"]
|
|
90
|
+
dedup_patterns: ["缓存 Key 常量", "TTL", "CAS 流程"]
|
|
91
|
+
depends_on: ["data-models"]
|
|
92
|
+
size_expectations: {"min": 30, "warn_max": 800, "hard_max": 1500}
|
|
93
|
+
|
|
94
|
+
messaging:
|
|
95
|
+
filename: "messaging.md"
|
|
96
|
+
template: "subagent-messaging.md"
|
|
97
|
+
batch: 3
|
|
98
|
+
conditional: true
|
|
99
|
+
search_keywords: ["Kafka", "Pulsar", "topic", "消费", "消息", "MQ", "RabbitMQ", "Producer"]
|
|
100
|
+
owns_keywords: ["Kafka", "Topic", "Consumer", "Producer", "KafkaConfig", "Pulsar", "RabbitMQ", "RocketMQ", "MQListener"]
|
|
101
|
+
dedup_patterns: ["消息体 JSON", "消费逻辑", "Topic 清单"]
|
|
102
|
+
depends_on: ["data-models"]
|
|
103
|
+
size_expectations: {"min": 30, "warn_max": 600, "hard_max": 1200}
|
|
104
|
+
|
|
105
|
+
scheduled-tasks:
|
|
106
|
+
filename: "scheduled-tasks.md"
|
|
107
|
+
template: "subagent-scheduled.md"
|
|
108
|
+
batch: 3
|
|
109
|
+
conditional: true
|
|
110
|
+
search_keywords: ["定时任务", "Job", "cron", "调度", "Scheduled", "XxlJob"]
|
|
111
|
+
size_expectations: {"min": 20, "warn_max": 500, "hard_max": 1000}
|
|
112
|
+
|
|
113
|
+
error-handling:
|
|
114
|
+
filename: "error-handling.md"
|
|
115
|
+
template: "subagent-error-handling.md"
|
|
116
|
+
batch: 3
|
|
117
|
+
conditional: true
|
|
118
|
+
search_keywords: ["异常", "错误码", "Exception", "错误处理", "ControllerAdvice"]
|
|
119
|
+
size_expectations: {"min": 10, "warn_max": 400, "hard_max": 800}
|
|
120
|
+
|
|
121
|
+
security:
|
|
122
|
+
filename: "security.md"
|
|
123
|
+
template: "subagent-security.md"
|
|
124
|
+
batch: 3
|
|
125
|
+
conditional: true
|
|
126
|
+
search_keywords: ["认证", "鉴权", "权限", "Token", "Security", "登录", "Filter"]
|
|
127
|
+
size_expectations: {"min": 20, "warn_max": 500, "hard_max": 1000}
|
|
128
|
+
|
|
129
|
+
aop-and-interceptors:
|
|
130
|
+
filename: "aop-and-interceptors.md"
|
|
131
|
+
template: "subagent-aop.md"
|
|
132
|
+
batch: 3
|
|
133
|
+
conditional: true
|
|
134
|
+
search_keywords: ["切面", "AOP", "拦截器", "Aspect", "Interceptor", "Pointcut"]
|
|
135
|
+
size_expectations: {"min": 20, "warn_max": 500, "hard_max": 1000}
|
|
136
|
+
|
|
137
|
+
observability:
|
|
138
|
+
filename: "observability.md"
|
|
139
|
+
template: "subagent-observability.md"
|
|
140
|
+
batch: 3
|
|
141
|
+
conditional: true
|
|
142
|
+
search_keywords: ["监控", "指标", "日志", "告警", "Metrics", "Actuator", "健康检查"]
|
|
143
|
+
owns_keywords: ["MeterRegistry", "Counter", "Timer", "Gauge", "Actuator", "Micrometer"]
|
|
144
|
+
size_expectations: {"min": 20, "warn_max": 500, "hard_max": 1000}
|
|
145
|
+
|
|
146
|
+
async-and-events:
|
|
147
|
+
filename: "async-and-events.md"
|
|
148
|
+
template: "subagent-async-events.md"
|
|
149
|
+
batch: 3
|
|
150
|
+
conditional: true
|
|
151
|
+
search_keywords: ["异步", "事件", "@Async", "EventListener", "线程池", "CompletableFuture"]
|
|
152
|
+
owns_keywords: ["Spring Event", "ApplicationEvent", "EventListener"]
|
|
153
|
+
size_expectations: {"min": 30, "warn_max": 600, "hard_max": 1200}
|
|
154
|
+
|
|
155
|
+
external-integrations:
|
|
156
|
+
filename: "external-integrations.md"
|
|
157
|
+
template: "subagent-external-integrations.md"
|
|
158
|
+
batch: 3
|
|
159
|
+
conditional: true
|
|
160
|
+
search_keywords: ["外部调用", "Feign", "HTTP", "SDK", "第三方", "远程服务", "WebClient"]
|
|
161
|
+
owns_keywords: ["Feign", "外部调用", "远程服务"]
|
|
162
|
+
size_expectations: {"min": 30, "warn_max": 800, "hard_max": 1500}
|
|
163
|
+
|
|
164
|
+
business-logic:
|
|
165
|
+
filename: "business-logic.md"
|
|
166
|
+
template: "subagent-business.md"
|
|
167
|
+
batch: 4
|
|
168
|
+
conditional: true
|
|
169
|
+
search_keywords: ["怎么实现", "流程", "调用链", "业务逻辑", "方法实现", "Service"]
|
|
170
|
+
split_override:
|
|
171
|
+
output-only:
|
|
172
|
+
max_lines: 4000
|
|
173
|
+
readwrite:
|
|
174
|
+
max_lines: 8000
|
|
175
|
+
max_files_per_shard: 60
|
|
176
|
+
depends_on: ["data-models", "api-contracts", "enums-and-constants"]
|
|
177
|
+
size_expectations: {"min": 100, "warn_max": 3000, "hard_max": 5000}
|
|
178
|
+
|
|
179
|
+
utils:
|
|
180
|
+
filename: "utils.md"
|
|
181
|
+
template: "subagent-utils.md"
|
|
182
|
+
batch: 4
|
|
183
|
+
conditional: true
|
|
184
|
+
search_keywords: ["工具类", "工具方法", "Util", "Helper", "Common"]
|
|
185
|
+
split_override:
|
|
186
|
+
readwrite:
|
|
187
|
+
max_lines: 15000
|
|
188
|
+
max_files_per_shard: 100
|
|
189
|
+
depends_on: ["data-models"]
|
|
190
|
+
size_expectations: {"min": 30, "warn_max": 1000, "hard_max": 2000}
|
|
191
|
+
|
|
192
|
+
index:
|
|
193
|
+
filename: "index.md"
|
|
194
|
+
template: "subagent-index.md"
|
|
195
|
+
batch: 5
|
|
196
|
+
conditional: false
|
|
197
|
+
global_view: true
|
|
198
|
+
search_keywords: ["概览", "模块介绍", "有哪些功能", "导航", "索引"]
|
|
199
|
+
depends_on: ["business-logic", "architecture", "data-models"]
|
|
200
|
+
size_expectations: {"min": 20, "warn_max": 300, "hard_max": 600}
|
|
201
|
+
|
|
202
|
+
# Split thresholds (mode-specific defaults)
|
|
203
|
+
# Users can override in kb-project.yaml under `split:` key.
|
|
204
|
+
# Adjust based on your LLM model's context window:
|
|
205
|
+
# - GPT-3.5/4: use output-only defaults
|
|
206
|
+
# - Claude Sonnet/Opus: can increase readwrite limits
|
|
207
|
+
# - Local models (Qwen/DeepSeek): may need lower limits
|
|
208
|
+
split:
|
|
209
|
+
output-only:
|
|
210
|
+
max_bytes: 307200 # 300KB — prompt token budget constraint
|
|
211
|
+
max_lines: 8000
|
|
212
|
+
max_files_per_shard: 80
|
|
213
|
+
llm_sample_limit: 200 # [CLI only] Max files sampled for LLM-based split grouping
|
|
214
|
+
merge_threshold_ratio: 0.25
|
|
215
|
+
readwrite:
|
|
216
|
+
max_bytes: 2097152 # 2MB — Agent reads files individually, not token-limited
|
|
217
|
+
max_lines: 15000 # Controls doc granularity, not prompt size
|
|
218
|
+
max_files_per_shard: 200
|
|
219
|
+
merge_threshold_ratio: 0.25
|
|
220
|
+
hysteresis_ratio: 0.10
|
|
221
|
+
# [Both modes] Noise words filtered when deriving shard group names from class names
|
|
222
|
+
noise_words:
|
|
223
|
+
- service
|
|
224
|
+
- impl
|
|
225
|
+
- base
|
|
226
|
+
- controller
|
|
227
|
+
- manager
|
|
228
|
+
- handler
|
|
229
|
+
- listener
|
|
230
|
+
- api
|
|
231
|
+
- java
|
|
232
|
+
- abstract
|
|
233
|
+
- default
|
|
234
|
+
|
|
235
|
+
# Timeouts and limits
|
|
236
|
+
# All values can be overridden in kb-project.yaml under `limits:` key.
|
|
237
|
+
# Adjust based on your LLM model's capabilities.
|
|
238
|
+
limits:
|
|
239
|
+
min_doc_size_bytes: 500
|
|
240
|
+
max_source_inline_bytes: 300000
|
|
241
|
+
max_skeleton_inline_bytes: 50000
|
|
242
|
+
max_output_tokens: 8192
|
|
243
|
+
prior_docs_max_chars: 2000
|
|
244
|
+
shard_context_max_chars: 1500
|
|
245
|
+
spawn_timeout_default: 900
|
|
246
|
+
heartbeat_interval: 30
|
|
247
|
+
max_retries: 2
|
|
248
|
+
max_consecutive_failures: 3
|
|
249
|
+
failure_rate_threshold: 0.5
|
|
250
|
+
# Audit thresholds
|
|
251
|
+
audit_single_agent_threshold_bytes: 51200 # 50KB — docs below this use single-agent audit
|
|
252
|
+
audit_skeleton_threshold_bytes: 81920 # 80KB — skeleton inline limit for audit prompts
|
|
253
|
+
# Sync thresholds
|
|
254
|
+
sync_max_changed_files: 20 # above this, suggest kb-audit instead of kb-sync
|
|
255
|
+
# LLM call limits
|
|
256
|
+
classify_max_tokens: 4096 # max tokens for LLM classification calls
|
|
257
|
+
# Dedup thresholds
|
|
258
|
+
dedup_min_doc_size_bytes: 15360 # 15KB — docs smaller than this skip dedup
|
|
259
|
+
garbage_patterns:
|
|
260
|
+
- "<web_search>"
|
|
261
|
+
- "<read_file>"
|
|
262
|
+
- "<search_results>"
|
|
263
|
+
- "Search results for"
|
|
264
|
+
|
|
265
|
+
# Parser preference chain
|
|
266
|
+
parsers:
|
|
267
|
+
- jqassistant
|
|
268
|
+
- treesitter
|
|
269
|
+
- regex
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Java-Spring preset hooks — language/framework-specific logic.
|
|
2
|
+
|
|
3
|
+
Moves all hardcoded Java/Spring assumptions out of core/ into this preset hook.
|
|
4
|
+
Core modules call hooks.get_*() instead of hardcoding these values.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from core.preset_hooks import PresetHooks
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Hooks(PresetHooks):
|
|
16
|
+
"""Java/Spring Boot specific hooks."""
|
|
17
|
+
|
|
18
|
+
def get_business_suffixes(self) -> tuple[str, ...]:
|
|
19
|
+
return (
|
|
20
|
+
"ServiceImpl", "Service", "Handler", "Processor", "Manager",
|
|
21
|
+
"Facade", "Strategy", "Validator", "Factory",
|
|
22
|
+
"Listener", "Consumer", "Producer", "Client", "Feign", "Biz", "BizImpl",
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def get_infra_suffixes(self) -> tuple[str, ...]:
|
|
26
|
+
return (
|
|
27
|
+
"Config", "Configuration", "Properties", "Interceptor",
|
|
28
|
+
"Filter", "Aspect", "Advisor", "Converter", "Mapper",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def get_data_suffixes(self) -> tuple[str, ...]:
|
|
32
|
+
return ("DTO", "VO", "DO", "Entity", "Enum", "Constant", "Constants")
|
|
33
|
+
|
|
34
|
+
def get_framework_types(self) -> set[str]:
|
|
35
|
+
return {
|
|
36
|
+
"RedisTemplate", "StringRedisTemplate", "RedissonClient", "RdcRedissonClient",
|
|
37
|
+
"RdcStringCacheTemplate", "RedisUtil",
|
|
38
|
+
"KafkaTemplate", "KafkaProducer", "KafkaConsumer",
|
|
39
|
+
"RestTemplate", "WebClient", "HttpClient",
|
|
40
|
+
"JdbcTemplate", "SqlSessionFactory", "DataSource",
|
|
41
|
+
"ObjectMapper", "Gson", "BeanMapperImplDozer",
|
|
42
|
+
"ThreadPoolExecutor", "ExecutorService", "ScheduledExecutorService",
|
|
43
|
+
"ThreadPoolTaskExecutor", "Executor",
|
|
44
|
+
"ApplicationContext", "BeanFactory", "Environment",
|
|
45
|
+
"ApplicationEventPublisher",
|
|
46
|
+
"TransactionTemplate", "PlatformTransactionManager",
|
|
47
|
+
"MongoTemplate", "ElasticsearchRestTemplate",
|
|
48
|
+
"KafkaListenerEndpointRegistry",
|
|
49
|
+
"UniqueNumUtil", "DictionaryUtil", "Logger",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def get_common_types(self) -> frozenset[str]:
|
|
53
|
+
return frozenset({
|
|
54
|
+
"String", "Integer", "Long", "Double", "Float", "Boolean", "Byte", "Short",
|
|
55
|
+
"Object", "Void", "BigDecimal", "BigInteger", "Date",
|
|
56
|
+
"LocalDate", "LocalDateTime", "Instant", "Duration",
|
|
57
|
+
"List", "Map", "Set", "Collection", "Optional", "Stream",
|
|
58
|
+
"HashMap", "ArrayList", "HashSet", "LinkedList",
|
|
59
|
+
"Serializable", "Comparable", "Iterable", "Iterator",
|
|
60
|
+
"Exception", "RuntimeException", "Throwable", "Logger", "Log",
|
|
61
|
+
"HttpServletRequest", "HttpServletResponse", "ResponseEntity",
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
def get_noise_terms(self) -> set[str]:
|
|
65
|
+
return {
|
|
66
|
+
"String", "Integer", "Long", "Boolean", "List", "Map", "Set",
|
|
67
|
+
"JSON", "HTTP", "API", "SQL", "Redis", "Kafka", "Spring", "Java",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
def get_inject_annotations(self) -> set[str]:
|
|
71
|
+
return {"@Autowired", "@Resource", "@Inject", "@Value",
|
|
72
|
+
"Autowired", "Resource", "Inject"}
|
|
73
|
+
|
|
74
|
+
def get_controller_annotations(self) -> list[str]:
|
|
75
|
+
return ["@RestController", "@Controller"]
|
|
76
|
+
|
|
77
|
+
def get_entity_annotations(self) -> list[str]:
|
|
78
|
+
return ["@Entity", "@Table", "@TableName"]
|
|
79
|
+
|
|
80
|
+
def get_source_extensions(self) -> list[str]:
|
|
81
|
+
return [".java", ".kt"]
|
|
82
|
+
|
|
83
|
+
def get_test_path_patterns(self) -> list[str]:
|
|
84
|
+
return ["/test/", "/tests/", "/src/test/"]
|
|
85
|
+
|
|
86
|
+
def get_split_name_suffixes(self) -> tuple[str, ...]:
|
|
87
|
+
return ("ServiceImpl", "Service", "Handler", "Processor", "Manager", "Controller")
|
|
88
|
+
|
|
89
|
+
def count_source_files(self, source_cache: Path) -> tuple[int, int]:
|
|
90
|
+
"""Count Java source files and lines (excluding test directories)."""
|
|
91
|
+
total_files, total_lines = 0, 0
|
|
92
|
+
if not source_cache.is_dir():
|
|
93
|
+
return 0, 0
|
|
94
|
+
for root, _, filenames in os.walk(source_cache):
|
|
95
|
+
root_str = str(root).replace("\\", "/")
|
|
96
|
+
if "/test/" in root_str or "/src/test/" in root_str:
|
|
97
|
+
continue
|
|
98
|
+
for fn in filenames:
|
|
99
|
+
if fn.endswith(".java") or fn.endswith(".kt"):
|
|
100
|
+
total_files += 1
|
|
101
|
+
try:
|
|
102
|
+
with open(os.path.join(root, fn), encoding="utf-8", errors="replace") as f:
|
|
103
|
+
total_lines += sum(1 for _ in f)
|
|
104
|
+
except OSError:
|
|
105
|
+
pass
|
|
106
|
+
return total_files, total_lines
|
|
107
|
+
|
|
108
|
+
def extract_package_from_fqn(self, fqn: str) -> str:
|
|
109
|
+
"""Extract package from Java fully-qualified name."""
|
|
110
|
+
parts = fqn.rsplit(".", 1)
|
|
111
|
+
return parts[0] if len(parts) > 1 else ""
|
|
112
|
+
|
|
113
|
+
def get_focus_hint(self, doc_type: str) -> str:
|
|
114
|
+
"""Return focus hints for LLM-based split planning."""
|
|
115
|
+
hints = {
|
|
116
|
+
"business-logic": "关注 Service、Handler、Processor 等业务处理类",
|
|
117
|
+
"data-models": "关注 Entity、DTO、VO 等数据模型类",
|
|
118
|
+
"architecture": "关注 Config、Interceptor、Filter 等架构组件",
|
|
119
|
+
"utils": "关注 Helper、Converter、Mapper 等工具类",
|
|
120
|
+
"api-contracts": "关注 Controller、API、Client 等接口类",
|
|
121
|
+
}
|
|
122
|
+
return hints.get(doc_type, "")
|