sourcecode 0.49.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.49.0"
3
+ __version__ = "1.1.0"
@@ -176,6 +176,36 @@ class ArchitectureAnalyzer:
176
176
 
177
177
  # Step 1: filter paths
178
178
  filtered = self._filter_paths(sm.file_paths)
179
+
180
+ # Step 1b: DDD filesystem detection — runs before the filtered-paths guard
181
+ # because DDD signals live in directory structure, not just file extensions.
182
+ ddd_result = self._detect_ddd(sm.file_paths)
183
+ if ddd_result is not None:
184
+ ddd_pattern, ddd_layers, ddd_contexts, ddd_layer_names = ddd_result
185
+ domains_for_ddd = self._cluster_domains(filtered) if len(filtered) >= 2 else []
186
+ bc_list = [BoundedContext(name=n, confidence="high") for n in ddd_contexts]
187
+ return ArchitectureAnalysis(
188
+ requested=True,
189
+ pattern=ddd_pattern,
190
+ domains=domains_for_ddd,
191
+ layers=ddd_layers,
192
+ bounded_contexts=bc_list,
193
+ ddd_layers_detected=ddd_layer_names,
194
+ confidence="high",
195
+ method="filesystem_inference",
196
+ limitations=[],
197
+ evidence=[{
198
+ "type": "filesystem_naming",
199
+ "paths": [f"{ddd_contexts[0]}/" if ddd_contexts else ""],
200
+ "reason": (
201
+ f"DDD layout detected: {len(ddd_contexts)} modules under common prefix "
202
+ "each contain application/, domain/, infrastructure/ subdirectories."
203
+ ),
204
+ "confidence": "high",
205
+ }],
206
+ tentative=False,
207
+ )
208
+
179
209
  if len(filtered) < 2:
180
210
  return ArchitectureAnalysis(
181
211
  requested=True,
@@ -333,6 +363,57 @@ class ArchitectureAnalyzer:
333
363
  # Private helpers
334
364
  # ------------------------------------------------------------------
335
365
 
366
+ def _detect_ddd(
367
+ self, paths: list[str]
368
+ ) -> "Optional[tuple[str, list[ArchitectureLayer], list[str], list[str]]]":
369
+ """Detect DDD: ≥5 modules under a common prefix each with application/domain/infrastructure."""
370
+ _DDD_LAYERS = frozenset({"application", "domain", "infrastructure"})
371
+ _DDD_MIN_MODULES = 5
372
+
373
+ # Map (prefix, module) → set of DDD layer names found under that module
374
+ prefix_module_layers: dict[tuple[str, str], set[str]] = {}
375
+
376
+ for p in paths:
377
+ parts = p.replace("\\", "/").split("/")
378
+ for i, part in enumerate(parts):
379
+ if part in _DDD_LAYERS and i >= 2:
380
+ module = parts[i - 1]
381
+ prefix = "/".join(parts[:i - 1])
382
+ key = (prefix, module)
383
+ prefix_module_layers.setdefault(key, set()).add(part)
384
+ break
385
+
386
+ # Group by prefix; find prefixes where ≥5 modules have all 3 DDD layers
387
+ prefix_modules: dict[str, list[str]] = {}
388
+ for (prefix, module), layers_found in prefix_module_layers.items():
389
+ if _DDD_LAYERS <= layers_found: # module has all 3
390
+ prefix_modules.setdefault(prefix, []).append(module)
391
+
392
+ best_prefix = max(
393
+ prefix_modules,
394
+ key=lambda p: len(prefix_modules[p]),
395
+ default=None,
396
+ )
397
+ if best_prefix is None or len(prefix_modules[best_prefix]) < _DDD_MIN_MODULES:
398
+ return None
399
+
400
+ bounded_context_names = sorted(set(prefix_modules[best_prefix]))
401
+ ddd_layer_names = sorted(_DDD_LAYERS)
402
+
403
+ arch_layers: list[ArchitectureLayer] = [
404
+ ArchitectureLayer(
405
+ name=layer,
406
+ pattern="ddd",
407
+ files=[
408
+ p for p in paths
409
+ if f"/{layer}/" in p.replace("\\", "/")
410
+ ],
411
+ confidence="high",
412
+ )
413
+ for layer in ddd_layer_names
414
+ ]
415
+ return "ddd", arch_layers, bounded_context_names, ddd_layer_names
416
+
336
417
  def _is_tooling(self, path: str) -> bool:
337
418
  norm = path.replace("\\", "/")
338
419
  return any(norm.startswith(p) for p in _TOOLING_PREFIXES)
@@ -84,6 +84,11 @@ class ArchitectureSummarizer:
84
84
  elif suffix in {".cs", ".fs", ".vb"}:
85
85
  lang_lines = self._summarize_dotnet_entry(sm.stacks)
86
86
 
87
+ # MyBatis XML mapper count line (Java projects)
88
+ mybatis_line = self._mybatis_summary_line(file_paths)
89
+ if mybatis_line:
90
+ lang_lines.append(mybatis_line)
91
+
87
92
  # Merge: rich lines first, stack-specific details appended (deduped)
88
93
  lines = rich_lines + [l for l in lang_lines if l not in rich_lines]
89
94
 
@@ -296,6 +301,13 @@ class ArchitectureSummarizer:
296
301
  lines.append("Orquesta el arranque de la aplicacion JVM.")
297
302
  return lines
298
303
 
304
+ def _mybatis_summary_line(self, file_paths: list[str]) -> str | None:
305
+ """Return a summary line when >5 MyBatis XML mappers are detected."""
306
+ mapper_xml_count = sum(1 for p in file_paths if p.endswith("Mapper.xml"))
307
+ if mapper_xml_count > 5:
308
+ return f"MyBatis XML mappers: {mapper_xml_count} *Mapper.xml detected."
309
+ return None
310
+
299
311
  def _summarize_dotnet_entry(self, stacks: list[StackDetection]) -> list[str]:
300
312
  dotnet_stacks = [s for s in stacks if s.stack == "dotnet"]
301
313
  if not dotnet_stacks:
@@ -15,12 +15,19 @@ from sourcecode.schema import FrameworkDetection
15
15
  from sourcecode.tree_utils import flatten_file_tree
16
16
 
17
17
  _MAX_FILE_SIZE = 256 * 1024 # 256 KB
18
- _MAX_JAVA_ENTRY_SCAN = 200
19
- _MAX_ANNOTATION_ENTRY_POINTS = 20
18
+ _MAX_JAVA_ENTRY_SCAN = 1000
19
+ _MAX_ANNOTATION_ENTRY_POINTS = 500
20
20
 
21
- _REST_CONTROLLER_RE = re.compile(r'@(?:Rest)?Controller\b')
21
+ _REST_CONTROLLER_RE = re.compile(r'@RestController\b')
22
+ _MVC_CONTROLLER_RE = re.compile(r'@Controller\b')
23
+ _REQUEST_MAPPING_RE = re.compile(r'@RequestMapping\b')
24
+ _CONTROLLER_ADVICE_RE = re.compile(r'@ControllerAdvice\b')
22
25
  _WEB_FILTER_RE = re.compile(r'@WebFilter\b')
23
26
  _FILTER_BEAN_RE = re.compile(r'FilterRegistrationBean\b')
27
+ # Extracts path from @RequestMapping("/v1/foo"), @GetMapping("/bar"), etc.
28
+ _HTTP_PATH_RE = re.compile(
29
+ r'@(?:Request|Get|Post|Put|Delete|Patch)Mapping\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)["\']'
30
+ )
24
31
 
25
32
 
26
33
  class JavaDetector(AbstractDetector):
@@ -81,6 +88,8 @@ class JavaDetector(AbstractDetector):
81
88
  frameworks.append(FrameworkDetection(name="Vert.x", source=source))
82
89
  if "jakarta.ee" in text or "javax.ws.rs" in text:
83
90
  frameworks.append(FrameworkDetection(name="Jakarta EE", source=source))
91
+ if "mybatis" in text:
92
+ frameworks.append(FrameworkDetection(name="MyBatis", source=source))
84
93
  return frameworks
85
94
 
86
95
  def _collect_entry_points(self, context: DetectionContext) -> list[EntryPoint]:
@@ -139,13 +148,30 @@ class JavaDetector(AbstractDetector):
139
148
  return []
140
149
 
141
150
  # Quick pre-filter before running regexes
142
- if "Controller" not in content and "Filter" not in content:
151
+ if ("Controller" not in content and "Filter" not in content
152
+ and "ControllerAdvice" not in content):
143
153
  return []
144
154
 
145
155
  if _REST_CONTROLLER_RE.search(content):
156
+ http_path_match = _HTTP_PATH_RE.search(content)
157
+ http_path = http_path_match.group(1) if http_path_match else None
146
158
  return [EntryPoint(
147
- path=rel_path, stack="java", kind="http_handler",
159
+ path=rel_path, stack="java", kind="rest_controller",
148
160
  source="annotation", confidence="high",
161
+ http_path=http_path,
162
+ )]
163
+ if _CONTROLLER_ADVICE_RE.search(content):
164
+ return [EntryPoint(
165
+ path=rel_path, stack="java", kind="exception_handler",
166
+ source="annotation", confidence="medium",
167
+ )]
168
+ if _MVC_CONTROLLER_RE.search(content) and _REQUEST_MAPPING_RE.search(content):
169
+ http_path_match = _HTTP_PATH_RE.search(content)
170
+ http_path = http_path_match.group(1) if http_path_match else None
171
+ return [EntryPoint(
172
+ path=rel_path, stack="java", kind="mvc_controller",
173
+ source="annotation", confidence="medium",
174
+ http_path=http_path,
149
175
  )]
150
176
  if _WEB_FILTER_RE.search(content):
151
177
  return [EntryPoint(
@@ -35,6 +35,15 @@ _SPRING_ENV_VAR_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::([^}]*))?\}')
35
35
  # These are internal property cross-references, not OS env vars, but still config signals.
36
36
  _SPRING_PROP_REF_RE = re.compile(r'\$\{([a-z][a-z0-9]*(?:\.[a-z][a-z0-9_-]*)*)(?::([^}]*))?\}')
37
37
 
38
+ # Known Spring-internal namespaces — NOT emitted as custom application properties.
39
+ _SPRING_BUILTIN_NAMESPACES: frozenset[str] = frozenset({
40
+ "spring", "logging", "management", "server", "info", "debug",
41
+ "endpoints", "security", "eureka", "feign", "ribbon", "hystrix",
42
+ "zuul", "cloud", "flyway", "liquibase", "jpa", "datasource",
43
+ "kafka", "rabbitmq", "redis", "mail", "thymeleaf", "mvc",
44
+ "web", "actuator", "metrics", "tracing",
45
+ })
46
+
38
47
  # Patterns where absence of the variable causes a hard runtime error (not just None/null).
39
48
  # py_environ_bracket → os.environ["KEY"] raises KeyError
40
49
  # java_spring_value → Spring fails to start if ${KEY} has no default
@@ -223,6 +232,66 @@ def _extract_spring_profile(filename: str) -> Optional[str]:
223
232
  return None
224
233
 
225
234
 
235
+ def _parse_yaml_custom_properties(
236
+ content: str,
237
+ rel_path: str,
238
+ profile: Optional[str],
239
+ findings: dict,
240
+ ) -> None:
241
+ """Extract custom namespace leaf properties from YAML (e.g. saint.ldap.url).
242
+
243
+ Builds dotted key paths by tracking indentation levels. Emits only properties
244
+ whose top-level namespace is NOT a well-known Spring built-in namespace.
245
+ """
246
+ # Stack of (indent, key_segment)
247
+ key_stack: list[tuple[int, str]] = []
248
+
249
+ for line in content.splitlines():
250
+ stripped = line.lstrip()
251
+ if not stripped or stripped.startswith('#'):
252
+ continue
253
+ if ':' not in stripped:
254
+ continue
255
+
256
+ indent = len(line) - len(stripped)
257
+ colon_idx = stripped.index(':')
258
+ key_part = stripped[:colon_idx].strip()
259
+ value_part = stripped[colon_idx + 1:].strip() if colon_idx + 1 < len(stripped) else ""
260
+
261
+ # Only plain identifiers (no special chars)
262
+ if not re.match(r'^[a-zA-Z][a-zA-Z0-9_-]*$', key_part):
263
+ continue
264
+
265
+ # Pop stack entries at same or deeper indent
266
+ while key_stack and key_stack[-1][0] >= indent:
267
+ key_stack.pop()
268
+
269
+ key_stack.append((indent, key_part))
270
+
271
+ # Only emit leaf values (non-empty, not a nested mapping start)
272
+ if not value_part or value_part.startswith('{') or value_part.startswith('['):
273
+ continue
274
+
275
+ # Reconstruct full dotted key
276
+ full_key = '.'.join(seg for _, seg in key_stack)
277
+ top_ns = key_stack[0][1].lower()
278
+
279
+ # Skip Spring built-in namespaces
280
+ if top_ns in _SPRING_BUILTIN_NAMESPACES:
281
+ continue
282
+
283
+ # Skip entries that look like ${...} references (already handled elsewhere)
284
+ if value_part.startswith('${'):
285
+ continue
286
+
287
+ # Strip inline YAML comments
288
+ clean_value = value_part.split('#')[0].strip()
289
+ if not clean_value:
290
+ continue
291
+
292
+ findings[full_key].append((rel_path, clean_value, False, profile))
293
+
294
+
226
295
  def _parse_spring_config(
227
296
  path: Path,
228
297
  rel_path: str,
@@ -234,6 +303,7 @@ def _parse_spring_config(
234
303
  Returns the total number of ${...} placeholders found (candidates).
235
304
  Captures default values from ${VAR:default} syntax.
236
305
  Marks vars without defaults as hard-required (Spring fails to start if missing).
306
+ Also extracts custom namespace properties (saint.*, app.*, etc.) as yml_property entries.
237
307
  """
238
308
  try:
239
309
  content = path.read_text(encoding="utf-8", errors="replace")
@@ -267,6 +337,10 @@ def _parse_spring_config(
267
337
  findings[key].append((f"{rel_path}:{line_num}", default, False, profile))
268
338
  candidates += 1
269
339
 
340
+ # 3. Custom YAML namespace properties (YAML/YML files only)
341
+ if rel_path.endswith((".yml", ".yaml")):
342
+ _parse_yaml_custom_properties(content, rel_path, profile, findings)
343
+
270
344
  return candidates
271
345
 
272
346
 
@@ -320,14 +394,17 @@ class EnvAnalyzer:
320
394
  first_profile = prof
321
395
  if len(unique_files) >= _MAX_FILES_PER_KEY:
322
396
  break
397
+ # Custom YAML properties use lowercase.dotted keys and category "application"
398
+ is_yml_prop = '.' in key and key[0].islower()
323
399
  records[key] = EnvVarRecord(
324
400
  key=key,
325
401
  required=required,
326
402
  default=default_val,
327
403
  type_hint=_infer_type_hint(key),
328
- category=_infer_category(key),
404
+ category="application" if is_yml_prop else _infer_category(key),
329
405
  files=unique_files,
330
406
  profile=first_profile,
407
+ source="yml_property" if is_yml_prop else None,
331
408
  )
332
409
 
333
410
  # 2. Supplement with .env.example entries (fill description + add missing keys)
@@ -372,6 +449,8 @@ class EnvAnalyzer:
372
449
  "extracted. Duplicates across profiles collapsed."
373
450
  )
374
451
 
452
+ # spring_profiles: named profiles only (exclude "default")
453
+ _named_profiles = sorted({p for p in profiles_scanned if p != "default"})
375
454
  summary = EnvSummary(
376
455
  requested=True,
377
456
  total=len(sorted_records),
@@ -383,6 +462,7 @@ class EnvAnalyzer:
383
462
  profiles_scanned=sorted(set(profiles_scanned)),
384
463
  spring_candidates=spring_candidates,
385
464
  coverage_note=coverage_note,
465
+ spring_profiles=_named_profiles,
386
466
  )
387
467
 
388
468
  return sorted_records, summary
@@ -78,6 +78,29 @@ _IMPORT_RE = re.compile(
78
78
  )
79
79
  _DEF_RE = re.compile(r"\b(class|def|function|const|export\s+class|interface|type)\s+[A-Za-z_]", re.MULTILINE)
80
80
 
81
+ # Java Spring stereotype annotation detection
82
+ _JAVA_ANNOTATION_RE = re.compile(r'@(RestController|Controller|Service|Repository|Mapper|Entity|Data|Configuration|EnableWebSecurity|ControllerAdvice|Transactional)\b')
83
+
84
+ # (annotation_set, category, relevance, why_template)
85
+ # Checked in priority order; first match wins.
86
+ _JAVA_STEREOTYPE_RULES: list[tuple[frozenset, str, float, str]] = [
87
+ (frozenset({"EnableWebSecurity"}), "security", 0.85, "Spring Security configuration"),
88
+ (frozenset({"RestController"}), "api_endpoint", 0.90, "Spring REST controller — defines HTTP API surface"),
89
+ (frozenset({"Controller", "RequestMapping"}), "api_endpoint", 0.80, "Spring MVC controller"),
90
+ (frozenset({"Service", "Transactional"}), "business_logic", 0.75, "Transactional service — business logic boundary"),
91
+ (frozenset({"Service"}), "business_logic", 0.65, "Spring service component"),
92
+ (frozenset({"Repository"}), "data_access", 0.65, "Spring repository — data access layer"),
93
+ (frozenset({"Mapper"}), "data_access", 0.65, "MyBatis mapper — SQL data access"),
94
+ (frozenset({"Configuration"}), "configuration", 0.70, "Spring configuration class"),
95
+ (frozenset({"Entity"}), "domain_model", 0.50, "JPA entity — domain model"),
96
+ (frozenset({"Data"}), "dto", 0.40, "Lombok DTO"),
97
+ ]
98
+
99
+ # Categories produced by Java stereotype detection — used downstream to apply direct relevance
100
+ JAVA_STEREOTYPE_CATEGORIES: frozenset[str] = frozenset(
101
+ cat for _, cat, _, _ in _JAVA_STEREOTYPE_RULES
102
+ )
103
+
81
104
 
82
105
  class FileClassifier:
83
106
  def __init__(
@@ -138,6 +161,12 @@ class FileClassifier:
138
161
  if norm in self.production_entry_paths:
139
162
  return FileClassification(norm, "runtime_core", "high", 0.95, "declared production runtime entrypoint", ["entry_points"])
140
163
 
164
+ # Java Spring stereotype detection (Java/Kotlin files only)
165
+ if suffix in {".java", ".kt"}:
166
+ java_class = self._classify_java_stereotype(norm, content)
167
+ if java_class is not None:
168
+ return java_class
169
+
141
170
  if self._has_any_import(imports, _API_IMPORTS):
142
171
  evidence = self._matched_imports(imports, _API_IMPORTS)
143
172
  return FileClassification(norm, "api_layer", "high", 0.82, "imports API/server framework", evidence)
@@ -213,3 +242,21 @@ class FileClassifier:
213
242
  def _sample(self, imports: list[str]) -> list[str]:
214
243
  return [f"import:{imp}" for imp in imports[:4]]
215
244
 
245
+ def _classify_java_stereotype(self, path: str, content: str) -> "FileClassification | None":
246
+ """Classify Java file by Spring/JPA/MyBatis annotation stereotypes."""
247
+ if not content:
248
+ return None
249
+ found = frozenset(m.group(1) for m in _JAVA_ANNOTATION_RE.finditer(content))
250
+ if not found:
251
+ return None
252
+ for required_annotations, category, relevance, why in _JAVA_STEREOTYPE_RULES:
253
+ # For @Data DTO: must have @Data but NOT @Entity
254
+ if required_annotations == frozenset({"Data"}):
255
+ if "Data" in found and "Entity" not in found:
256
+ return FileClassification(path, category, "high", relevance, why, list(found))
257
+ continue
258
+ # For compound rules (Service+Transactional, Controller+RequestMapping): all required
259
+ if required_annotations <= found:
260
+ return FileClassification(path, category, "high", relevance, why, list(found))
261
+ return None
262
+
@@ -332,6 +332,50 @@ _SOURCE_EXTENSIONS: frozenset[str] = frozenset({
332
332
  ".go", ".rs", ".rb", ".php", ".cs", ".dart",
333
333
  })
334
334
 
335
+
336
+ def _extract_ddd_domain(path: str) -> str:
337
+ """Extract domain name from DDD package path.
338
+
339
+ For m3informatica.saint.ddd.{domain}.infrastructure.rest.*RestController
340
+ the domain is the segment just before application/ domain/ or infrastructure/.
341
+ """
342
+ parts = path.replace("\\", "/").split("/")
343
+ _DDD_LAYERS = {"application", "domain", "infrastructure"}
344
+ for i, part in enumerate(parts):
345
+ if part in _DDD_LAYERS and i >= 1:
346
+ return parts[i - 1]
347
+ # Fallback: penultimate directory segment
348
+ if len(parts) >= 2:
349
+ return parts[-2]
350
+ return ""
351
+
352
+
353
+ def _java_why(path: str, file_class: "Optional[object]") -> str:
354
+ """Generate why string for Java files based on stereotype classification."""
355
+ if file_class is None:
356
+ return ""
357
+ from sourcecode.file_classifier import JAVA_STEREOTYPE_CATEGORIES
358
+ category = getattr(file_class, "category", "")
359
+ if category not in JAVA_STEREOTYPE_CATEGORIES:
360
+ return ""
361
+ domain = _extract_ddd_domain(path)
362
+ class_name = Path(path).stem
363
+ if category == "api_endpoint":
364
+ return f"Defines HTTP endpoints for the {domain} domain" if domain else "Defines HTTP API endpoints"
365
+ if category == "business_logic":
366
+ return f"Orchestrates {domain} business logic" if domain else "Business logic service"
367
+ if category == "data_access":
368
+ return f"SQL queries for {domain} data access" if domain else "Data access layer"
369
+ if category == "domain_model":
370
+ return f"JPA entity for {class_name} persistence"
371
+ if category == "configuration":
372
+ return getattr(file_class, "reason", "Spring configuration class")
373
+ if category == "security":
374
+ return getattr(file_class, "reason", "Spring Security configuration")
375
+ if category == "dto":
376
+ return f"Lombok DTO — {class_name}"
377
+ return getattr(file_class, "reason", "")
378
+
335
379
  _ALL_EXTENSIONS: frozenset[str] = _SOURCE_EXTENSIONS | frozenset({
336
380
  ".md", ".toml", ".yaml", ".yml", ".json", ".xml",
337
381
  })
@@ -726,12 +770,14 @@ class TaskContextBuilder:
726
770
  )
727
771
  all_reasons = [r for r in fs.reasons if r != "source file"] + content_reasons
728
772
  reason_str = ", ".join(all_reasons) if all_reasons else "source file"
773
+ why_str = _java_why(path, file_class)
729
774
 
730
775
  scored.append((total, path, RelevantFile(
731
776
  path=path,
732
777
  role=role,
733
778
  score=round(min(total / 3.0, 1.0), 2),
734
779
  reason=reason_str,
780
+ why=why_str,
735
781
  )))
736
782
 
737
783
  # Deterministic: score desc, then path asc as tiebreaker
sourcecode/schema.py CHANGED
@@ -79,6 +79,7 @@ class EntryPoint:
79
79
  classification: Optional[Literal["production", "development", "auxiliary"]] = None
80
80
  runtime_relevance: Optional[Literal["high", "medium", "low"]] = None
81
81
  produced_by: Optional[str] = None # which detector emitted this
82
+ http_path: Optional[str] = None # extracted from @RequestMapping / @GetMapping (Java REST controllers)
82
83
 
83
84
 
84
85
  @dataclass
@@ -413,6 +414,7 @@ class ArchitectureAnalysis:
413
414
  # True when pattern is inferred from weak signals (e.g. directory names only).
414
415
  # Agents must not treat tentative patterns as confirmed facts.
415
416
  tentative: bool = False
417
+ ddd_layers_detected: list[str] = field(default_factory=list) # e.g. ["application", "domain", "infrastructure"]
416
418
 
417
419
 
418
420
  # --- Env Map ---
@@ -425,10 +427,11 @@ class EnvVarRecord:
425
427
  required: bool = True
426
428
  default: Optional[str] = None
427
429
  type_hint: Optional[str] = None # string | int | bool | url | path | enum
428
- category: Optional[str] = None # database | cache | storage | auth | service | observability | feature_flag | server | general
430
+ category: Optional[str] = None # database | cache | storage | auth | service | observability | feature_flag | server | general | application
429
431
  description: Optional[str] = None
430
432
  files: list[str] = field(default_factory=list) # "path:line"
431
433
  profile: Optional[str] = None # Spring profile if first occurrence is in application-{profile}.yml
434
+ source: Optional[str] = None # yml_property | env_var | source_code
432
435
 
433
436
 
434
437
  @dataclass
@@ -446,6 +449,7 @@ class EnvSummary:
446
449
  profiles_scanned: list[str] = field(default_factory=list)
447
450
  spring_candidates: int = 0 # total ${VAR} refs found across Spring config files
448
451
  coverage_note: Optional[str] = None # explicit note about partial coverage
452
+ spring_profiles: list[str] = field(default_factory=list) # canonical list: profile names from application-{profile}.yml
449
453
 
450
454
 
451
455
  # --- Code Notes ---
sourcecode/serializer.py CHANGED
@@ -258,11 +258,19 @@ def _file_relevance(sm: SourceMap, *, limit: int = _FILE_RELEVANCE_LIMIT) -> lis
258
258
  and combined < 0.45):
259
259
  continue
260
260
 
261
+ # For Java stereotype annotations use the table relevance directly —
262
+ # the combined/2 formula would dilute the stereotype signal.
263
+ from sourcecode.file_classifier import JAVA_STEREOTYPE_CATEGORIES
264
+ if file_class and file_class.category in JAVA_STEREOTYPE_CATEGORIES:
265
+ relevance_val = round(file_class.relevance, 3)
266
+ else:
267
+ relevance_val = round(max(0.0, min(1.0, combined / 2.0)), 3)
268
+
261
269
  item: dict[str, Any] = {
262
270
  "path": path,
263
271
  "category": file_class.category if file_class else "source",
264
272
  "confidence": file_class.confidence if file_class else "low",
265
- "relevance": round(max(0.0, min(1.0, combined / 2.0)), 3),
273
+ "relevance": relevance_val,
266
274
  "reason": file_class.reason if file_class else (fs.reasons[0] if fs.reasons else "source file"),
267
275
  "evidence": file_class.evidence if file_class else [],
268
276
  }
@@ -301,6 +309,10 @@ def _architecture_context(sm: SourceMap) -> dict[str, Any]:
301
309
  ]
302
310
  else:
303
311
  ctx["no_layers_detected"] = True
312
+ if arch.bounded_contexts:
313
+ ctx["bounded_contexts"] = [bc.name for bc in arch.bounded_contexts]
314
+ if arch.ddd_layers_detected:
315
+ ctx["ddd_layers_detected"] = arch.ddd_layers_detected
304
316
  if arch.confidence == "low" and not pattern:
305
317
  ctx["note"] = "directory structure insufficient for reliable architectural inference; use --semantics for higher accuracy"
306
318
  if arch.limitations:
@@ -350,49 +362,72 @@ def _section_confidence(sm: SourceMap) -> dict[str, str]:
350
362
 
351
363
 
352
364
  def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
353
- """Context package ready for prompt or handoff (~600-800 tokens).
365
+ """Context package ready for prompt or handoff (~300-500 tokens).
354
366
 
355
367
  Answers: what it is, where it enters, what depends on what,
356
368
  what signals matter, and what uncertainty exists.
357
369
 
358
370
  Includes: project_type, project_summary, architecture_summary,
359
- stacks, entry_points, dependency_summary + key_dependencies (when analyzed),
371
+ stacks (minimal), entry_points (path+kind only), key_dependencies (name+version+role),
360
372
  env_summary (when analyzed), code_notes_summary (when analyzed),
361
- confidence_summary, anomalies, analysis_gaps.
373
+ confidence (overall only), analysis_gaps.
362
374
 
363
- Excludes: file_tree, raw dependency lists, docs, module_graph.
364
- Empty sections are explained when relevant.
375
+ Excludes: file_tree, raw dependency lists, docs, module_graph, verbose metadata.
365
376
  """
366
- dep_summary_dict: Any = None
377
+ # Key dependencies — name + version + role only (no ecosystem, source, manifests)
367
378
  key_deps: Any = None
368
379
  if sm.dependency_summary is not None and sm.dependency_summary.requested:
369
- dep_summary_dict = asdict(sm.dependency_summary)
370
- dep_summary_dict.pop("dependencies", None)
371
380
  key_deps = [
372
- asdict(d) for d in sm.key_dependencies
381
+ {
382
+ "name": d.name,
383
+ **({"version": d.declared_version} if d.declared_version else {}),
384
+ **({"role": d.role} if d.role and d.role != "runtime" else {}),
385
+ }
386
+ for d in sm.key_dependencies
373
387
  if (d.role or "unknown") in _PRODUCTION_DEP_ROLES and d.scope not in {"dev"}
374
388
  ][:_KEY_DEPS_CAP]
375
- elif sm.dependency_summary is None or not sm.dependency_summary.requested:
376
- dep_summary_dict = None # "not analyzed" — agent should add --dependencies
377
389
 
390
+ # Dependency summary — requested flag + count + source only
391
+ dep_summary_dict: Any = None
392
+ if sm.dependency_summary is not None and sm.dependency_summary.requested:
393
+ ds = sm.dependency_summary
394
+ dep_summary_dict = {
395
+ "requested": True,
396
+ "total_count": ds.total_count,
397
+ "direct": ds.direct_count,
398
+ **({"sources": ds.sources} if ds.sources else {}),
399
+ }
400
+
401
+ # Env map — key + required + category only (drop type_hint, files list)
378
402
  env_summary_dict: Any = None
379
403
  env_map_items: Any = None
380
404
  if sm.env_summary is not None and sm.env_summary.requested:
381
- env_summary_dict = asdict(sm.env_summary)
405
+ env_summary_dict = {
406
+ "total": sm.env_summary.total,
407
+ "required": sm.env_summary.required_count,
408
+ **({"categories": sm.env_summary.categories} if sm.env_summary.categories else {}),
409
+ }
382
410
  if sm.env_map:
383
411
  _sorted_env = sorted(
384
412
  sm.env_map,
385
413
  key=lambda e: (not getattr(e, "required", False), getattr(e, "key", "")),
386
414
  )
387
415
  env_map_items = [
388
- {k: v for k, v in asdict(e).items() if v is not None and v != "" and v != []}
416
+ {
417
+ "key": getattr(e, "key", ""),
418
+ **({"required": True} if getattr(e, "required", False) else {}),
419
+ **({"category": getattr(e, "category", None)} if getattr(e, "category", None) else {}),
420
+ }
389
421
  for e in _sorted_env[:_ENV_MAP_CAP]
390
422
  ]
391
423
 
424
+ # Code notes — kind + path + line + truncated text only
392
425
  code_notes_summary_dict: Any = None
393
426
  code_notes_items: Any = None
394
427
  if sm.code_notes_summary is not None and sm.code_notes_summary.requested:
395
- code_notes_summary_dict = asdict(sm.code_notes_summary)
428
+ cn = sm.code_notes_summary
429
+ by_kind = {k: v for k, v in cn.by_kind.items() if v > 0}
430
+ code_notes_summary_dict = {"total": cn.total, **({"by_kind": by_kind} if by_kind else {})}
396
431
  if sm.code_notes:
397
432
  _SEVERITY_ORDER = {"BUG": 0, "FIXME": 1, "DEPRECATED": 2, "TODO": 3, "HACK": 4, "WARNING": 5}
398
433
  _sorted_notes = sorted(
@@ -400,43 +435,62 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
400
435
  key=lambda n: (_SEVERITY_ORDER.get(getattr(n, "kind", "").upper(), 9), getattr(n, "path", "")),
401
436
  )
402
437
  code_notes_items = [
403
- {k: v for k, v in asdict(n).items() if v is not None}
438
+ {
439
+ "kind": getattr(n, "kind", ""),
440
+ "path": getattr(n, "path", ""),
441
+ "line": getattr(n, "line", None),
442
+ **({"text": getattr(n, "text", "")[:60]} if getattr(n, "text", "") else {}),
443
+ }
404
444
  for n in _sorted_notes[:_CODE_NOTES_CAP]
405
445
  ]
406
446
 
407
- # Entry points: production runtime only, capped.
408
- # Development entries shown separately; auxiliary omitted from compact view.
447
+ # Entry points path + kind + confidence only
409
448
  ep_groups = _entry_point_groups(sm.entry_points)
410
- entry_points_compact = ep_groups["production"][:_EP_PRODUCTION_CAP]
411
- if not entry_points_compact:
412
- entry_points_compact = [] # truth signal: no production runtime detected
449
+ entry_points_compact = [
450
+ {
451
+ "path": ep["path"],
452
+ **({"kind": ep["kind"]} if ep.get("kind") else {}),
453
+ **({"confidence": ep["confidence"]} if ep.get("confidence") else {}),
454
+ }
455
+ for ep in ep_groups["production"][:_EP_PRODUCTION_CAP]
456
+ ]
457
+
458
+ # Stacks — name + method + confidence + frameworks (names only)
459
+ stacks_compact = [
460
+ {
461
+ "stack": s.stack,
462
+ "detection_method": s.detection_method,
463
+ "confidence": s.confidence,
464
+ **({"primary": True} if s.primary else {}),
465
+ **({"frameworks": [f.name for f in s.frameworks]} if s.frameworks else {}),
466
+ **({"package_manager": s.package_manager} if s.package_manager else {}),
467
+ }
468
+ for s in sm.stacks
469
+ ]
413
470
 
414
- # Confidence summary
471
+ # Confidence — overall only + anomalies
415
472
  conf_dict: Any = None
416
- anomalies: Any = None
417
473
  if sm.confidence_summary is not None:
418
- conf_dict = asdict(sm.confidence_summary)
419
- if sm.confidence_summary.anomalies:
420
- anomalies = sm.confidence_summary.anomalies
474
+ cs = sm.confidence_summary
475
+ conf_dict = {"overall": cs.overall, "stack": cs.stack_confidence, "entry_points": cs.entry_point_confidence}
476
+ if cs.anomalies:
477
+ conf_dict["anomalies"] = cs.anomalies
421
478
 
422
479
  # Analysis gaps
423
480
  gaps_list: Any = None
424
481
  if sm.analysis_gaps:
425
- gaps_list = [asdict(g) for g in sm.analysis_gaps]
426
-
427
- context_summary_dict: Any = None
428
- if sm.context_summary is not None and sm.context_summary.requested:
429
- context_summary_dict = asdict(sm.context_summary)
482
+ gaps_list = [
483
+ {"area": g.area, "reason": g.reason, "impact": g.impact}
484
+ for g in sm.analysis_gaps
485
+ ]
430
486
 
431
487
  result: dict[str, Any] = {
432
488
  "schema_version": sm.metadata.schema_version,
433
489
  "project_type": sm.project_type,
434
490
  "project_summary": sm.project_summary,
435
491
  "architecture_summary": sm.architecture_summary,
436
- "context_summary": context_summary_dict,
437
- "stacks": [asdict(stack) for stack in sm.stacks],
492
+ "stacks": stacks_compact,
438
493
  "entry_points": entry_points_compact,
439
- "development_entry_points": (ep_groups["development"][:_EP_DEV_CAP] or None),
440
494
  "dependency_summary": dep_summary_dict,
441
495
  "key_dependencies": key_deps,
442
496
  "env_summary": env_summary_dict,
@@ -444,14 +498,10 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
444
498
  "code_notes_summary": code_notes_summary_dict,
445
499
  "code_notes": code_notes_items,
446
500
  "confidence_summary": conf_dict,
447
- "anomalies": anomalies,
448
501
  "analysis_gaps": gaps_list,
449
502
  }
450
- # Strip keys that are fully None and not informative
451
- return {k: v for k, v in result.items() if v is not None or k in (
452
- "project_type", "project_summary", "architecture_summary",
453
- "dependency_summary", "confidence_summary",
454
- )}
503
+ _always_include = {"project_type", "project_summary", "architecture_summary", "dependency_summary"}
504
+ return {k: v for k, v in result.items() if v is not None or k in _always_include}
455
505
 
456
506
 
457
507
  def normalize_source_map(sm: SourceMap) -> SourceMap:
@@ -827,6 +877,10 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
827
877
  }
828
878
  if sm.env_summary.categories:
829
879
  signals["env_vars"]["categories"] = sm.env_summary.categories
880
+ _spring_profiles = (sm.env_summary.spring_profiles
881
+ or sm.env_summary.profiles_scanned)
882
+ if _spring_profiles:
883
+ signals["env_vars"]["spring_profiles"] = sorted(set(_spring_profiles))
830
884
  if sm.env_map:
831
885
  _sorted_env = sorted(
832
886
  sm.env_map,
@@ -1005,7 +1059,11 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
1005
1059
  ][:_KEY_DEPS_CAP]
1006
1060
 
1007
1061
  if sm.env_summary is not None and sm.env_summary.requested:
1008
- result["env_summary"] = asdict(sm.env_summary)
1062
+ env_sum_dict = asdict(sm.env_summary)
1063
+ _sp = sm.env_summary.spring_profiles or sm.env_summary.profiles_scanned
1064
+ if _sp:
1065
+ env_sum_dict["spring_profiles"] = sorted(set(_sp))
1066
+ result["env_summary"] = env_sum_dict
1009
1067
  result["env_map"] = [asdict(e) for e in sm.env_map[:_ENV_MAP_CAP]]
1010
1068
 
1011
1069
  if sm.code_notes_summary is not None and sm.code_notes_summary.requested:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.49.0
3
+ Version: 1.1.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -221,349 +221,141 @@ Description-Content-Type: text/markdown
221
221
 
222
222
  **Deterministic codebase context for AI coding agents.**
223
223
 
224
- [![PyPI](https://img.shields.io/pypi/v/sourcecode)](https://pypi.org/project/sourcecode/)
225
- [![Python](https://img.shields.io/pypi/pyversions/sourcecode)](https://pypi.org/project/sourcecode/)
226
- [![License](https://img.shields.io/badge/license-Apache%202.0-blue)](LICENSE)
227
- [![CI](https://img.shields.io/github/actions/workflow/status/sourcecode-ai/sourcecode/ci.yml)](https://github.com/sourcecode-ai/sourcecode/actions)
228
-
229
- Turn any repository into structured, reproducible context optimized for AI coding agents — in one command.
230
-
231
- ```bash
232
- pip install sourcecode
233
- sourcecode . --agent
234
- ```
235
-
236
- ```json
237
- {
238
- "project": {
239
- "type": "api",
240
- "summary": "Python REST API built with FastAPI and SQLAlchemy. Layered architecture with domain, service, and infrastructure layers.",
241
- "primary_stack": "python",
242
- "frameworks": ["FastAPI", "SQLAlchemy"]
243
- },
244
- "entry_points": [
245
- { "path": "src/app/main.py", "kind": "server", "confidence": "high" }
246
- ],
247
- "architecture": "FastAPI application. Clean Architecture with domain, application, and infrastructure layers. Hub modules: schema.py, models.py.",
248
- "key_dependencies": [
249
- { "name": "fastapi", "declared_version": ">=0.100", "role": "runtime" },
250
- { "name": "sqlalchemy", "declared_version": "^2.0", "role": "runtime" },
251
- { "name": "pydantic", "declared_version": "^2.0", "role": "runtime" }
252
- ],
253
- "confidence_summary": { "overall": "high" }
254
- }
255
- ```
224
+ ![Version](https://img.shields.io/badge/version-1.0.0-blue)
225
+ ![Status](https://img.shields.io/badge/status-MVP-orange)
226
+ ![Python](https://img.shields.io/badge/python-3.10%2B-green)
256
227
 
257
228
  ---
258
229
 
259
- ## The problem
230
+ ## What is it?
260
231
 
261
- AI coding agents are only as good as the context they receive. In large, real-world repositories, that context is almost always wrong.
262
-
263
- - **Agents start blind.** Without repo structure, they hallucinate imports, file paths, and architecture decisions.
264
- - **Context is noisy.** Raw file trees contain benchmark dirs, generated files, tooling configs, and docs that consume tokens without helping.
265
- - **Architecture is invisible.** LLMs see files, not systems. They miss layers, plugin systems, entry points, and runtime topology.
266
- - **Context decays.** What you paste today is stale tomorrow. There's no reproducible baseline.
267
- - **Manual context doesn't scale.** Handcrafting prompts per project is engineering debt that grows with every new agent, team, and task.
232
+ `sourcecode` analyzes a repository and produces a structured context map (JSON or YAML) designed to be consumed by AI agents or language models. It solves the "stuff the whole repo into the prompt" problem by instead producing a deterministic extract: entry points, dependencies, stacks, inline annotations, environment variables, and git activity. It is an MVP tool under active evolution — the semantic analysis and module graph features work but have known limitations that are explicitly documented below.
268
233
 
269
234
  ---
270
235
 
271
- ## The solution
272
-
273
- `sourcecode` analyzes your repository and produces a structured, reproducible context package — ready to inject into any AI coding agent.
274
-
275
- **What it does:**
276
- - Detects stacks, frameworks, entry points, and project type across 10+ languages
277
- - Infers runtime topology: which packages are core, which are plugins, which are noise
278
- - Ranks files by operational relevance for agents: git churn + runtime proximity + bootstrap signal
279
- - Suppresses non-runtime noise: benchmarks, docs, tooling, generated files
280
- - Produces structured JSON/YAML that agents can reason over, not raw file trees
281
- - Runs deterministically — same repo, same output, every time
282
-
283
- **What it outputs:**
284
- - `project_summary` — one-sentence natural language description
285
- - `architecture_summary` — runtime topology: layers, plugin systems, entry flows
286
- - `entry_points` — where execution actually starts (production, not benchmarks)
287
- - `key_dependencies` — runtime dependencies with role classification
288
- - `relevant_files` — ranked by usefulness for coding tasks, not folder position
289
- - `confidence_summary` — detection quality and analysis gaps
290
-
291
- All fields are stable, machine-readable, and designed for LLM consumption.
292
-
293
- ---
236
+ ## Installation
294
237
 
295
- ## Install
238
+ **Prerequisites:** Python 3.10+
296
239
 
297
240
  ```bash
298
241
  pip install sourcecode
242
+ # or with pipx for isolation:
243
+ pipx install sourcecode
299
244
  ```
300
245
 
301
- Requires Python 3.9+. No API keys. No network calls. Runs locally.
302
-
303
- ---
304
-
305
- ## Quickstart
306
-
307
- **Basic analysis:**
308
- ```bash
309
- sourcecode .
310
- ```
311
-
312
- **Agent-optimized output** (structured, noise-free, gap-aware):
313
- ```bash
314
- sourcecode . --agent
315
- ```
316
-
317
- **Task-specific context for coding agents:**
318
- ```bash
319
- # Explain the project architecture
320
- sourcecode . prepare-context explain
321
-
322
- # Find likely bug locations
323
- sourcecode . prepare-context fix-bug
324
-
325
- # Onboard a new agent to the codebase
326
- sourcecode . prepare-context onboard
327
-
328
- # Ranked context for a specific task
329
- sourcecode . prepare-context refactor
330
- ```
331
-
332
- **Pipe directly into Claude Code or any agent:**
333
- ```bash
334
- sourcecode . --agent | claude -p "Review the architecture and suggest improvements"
335
- ```
336
-
337
- **Write to file for session injection:**
338
- ```bash
339
- sourcecode . --agent --output context.json
340
- ```
246
+ Verify installation:
341
247
 
342
- **Include git activity signals:**
343
248
  ```bash
344
- sourcecode . --agent --git-context
249
+ sourcecode version
250
+ # sourcecode 1.0.0
345
251
  ```
346
252
 
347
253
  ---
348
254
 
349
- ## Use cases
350
-
351
- ### Claude Code
352
- ```bash
353
- # Start every session with full context
354
- sourcecode . --agent > .claude/context.json
355
-
356
- # Use with CLAUDE.md for persistent context
357
- echo "$(sourcecode . --agent --compact)" >> CLAUDE.md
358
- ```
359
-
360
- ### Cursor / Windsurf / Copilot
361
- ```bash
362
- # Generate context snapshot before starting a feature
363
- sourcecode . --agent --git-context --output .cursor/context.json
364
- ```
365
-
366
- ### OpenAI / Anthropic API
367
- ```python
368
- import json, subprocess
369
-
370
- context = json.loads(
371
- subprocess.check_output(["sourcecode", ".", "--agent"])
372
- )
373
-
374
- system_prompt = f"""
375
- You are working on: {context['project']['summary']}
376
- Architecture: {context['architecture']}
377
- Entry points: {[ep['path'] for ep in context['entry_points']]}
378
- """
379
- ```
380
-
381
- ### CI / CD pipelines
382
- ```yaml
383
- # .github/workflows/context.yml
384
- - name: Generate codebase context
385
- run: sourcecode . --agent --output context.json
386
-
387
- - name: AI-assisted code review
388
- run: |
389
- CONTEXT=$(cat context.json)
390
- # Inject into your preferred AI review step
391
- ```
255
+ ## Quickstart
392
256
 
393
- ### Onboarding new engineers
394
- ```bash
395
- # Generate human-readable architecture summary
396
- sourcecode . prepare-context onboard --llm-prompt
397
- ```
257
+ The most useful command for integrating `sourcecode` into an AI agent:
398
258
 
399
- ### Architecture audits
400
259
  ```bash
401
- sourcecode . --agent --architecture --graph-modules --dependencies
260
+ sourcecode --agent
402
261
  ```
403
262
 
404
- ---
405
-
406
- ## How it works
407
-
408
- `sourcecode` runs a local, static analysis pipeline on your repository:
263
+ It produces a structured JSON with the essential sections (no noise, no file tree), ready to paste into an LLM context:
409
264
 
265
+ ```json
266
+ {
267
+ "project": {
268
+ "type": "fullstack",
269
+ "summary": "Full-stack project in Nodejs, mvc, 4075 source files. Domains: atlas-client, atlas-server, atlas-hub, atlas-reports. 3300 dependencies (java, nodejs).",
270
+ "primary_stack": "nodejs",
271
+ "secondary_stacks": ["java"]
272
+ },
273
+ "entry_points": [
274
+ {
275
+ "path": "atlas-server/src/main/java/com/example/atlas/AtlasServerApplication.java",
276
+ "stack": "java",
277
+ "kind": "application",
278
+ "confidence": "high"
279
+ },
280
+ {
281
+ "path": "atlas-client/src/main.ts",
282
+ "stack": "nodejs",
283
+ "kind": "entrypoint",
284
+ "confidence": "high"
285
+ }
286
+ ],
287
+ "runtime_packages": [ ... ],
288
+ "dependencies": { ... },
289
+ "env_map": { ... },
290
+ "code_notes": [ ... ]
291
+ }
410
292
  ```
411
- Repository
412
-
413
- ├── Scanner # File tree, manifests, workspace detection
414
- ├── Stack Detectors # Language, framework, package manager detection
415
- ├── Entry Points # Production entry points (not benchmarks/docs)
416
- ├── Git Analyzer # Churn hotspots, uncommitted changes
417
- ├── Relevance Scorer # Runtime proximity × git churn × bootstrap signal
418
- └── Serializer # Structured JSON/YAML output
419
- ```
420
-
421
- No LLM calls. No network requests. No sampling. Fully deterministic.
422
-
423
- The same repository produces the same output on every run — which means agents can cache it, diff it, and rely on it.
424
-
425
- ---
426
-
427
- ## Output modes
428
-
429
- | Mode | Use case | Size |
430
- |------|----------|------|
431
- | `sourcecode .` | Full analysis | Full |
432
- | `sourcecode . --agent` | AI agent injection | ~600–1000 tokens |
433
- | `sourcecode . --compact` | Prompts, handoffs | ~500–700 tokens |
434
- | `sourcecode . prepare-context <task>` | Task-specific context | ~800–1200 tokens |
435
-
436
- ### Available flags
437
-
438
- | Flag | Description |
439
- |------|-------------|
440
- | `--agent` | Structured, noise-free output for AI agents. Auto-enables `--dependencies`, `--env-map`, `--code-notes`. |
441
- | `--dependencies` | Direct dependencies with versions and role classification. |
442
- | `--git-context` | Recent commits, change hotspots, uncommitted files. |
443
- | `--architecture` | Layer inference: MVC, layered, hexagonal, domain-based. |
444
- | `--graph-modules` | Module import graph and call relationships. |
445
- | `--semantics` | Cross-file symbol resolution and call graph. |
446
- | `--env-map` | All environment variables referenced in source. |
447
- | `--code-notes` | TODOs, FIXMEs, HACKs, and Architecture Decision Records. |
448
- | `--compact` | Minimal output for token-constrained prompts. |
449
- | `--format yaml` | YAML instead of JSON. |
450
- | `--output PATH` | Write to file instead of stdout. |
451
-
452
- Full reference: `sourcecode --help`
453
-
454
- ### Prepare-context tasks
455
-
456
- | Task | What it produces |
457
- |------|-----------------|
458
- | `explain` | Architecture + entry points + key dependencies |
459
- | `fix-bug` | Risk-ranked files + suspected areas + code annotations |
460
- | `refactor` | Structural issues + improvement opportunities |
461
- | `generate-tests` | Untested source files + test gap analysis |
462
- | `onboard` | Full project understanding for new agents/developers |
463
- | `review-pr` | Changed files + architectural impact |
464
- | `delta` | Git-changed files only — incremental context |
465
-
466
- ---
467
-
468
- ## Philosophy
469
-
470
- **Determinism over approximation.** Every run on the same repository produces the same output. Agents, pipelines, and teams can depend on that.
471
293
 
472
- **Runtime topology over file trees.** What matters is where execution starts, what calls what, and which modules are actually critical — not alphabetical file lists.
294
+ For large repositories where context matters, use `--compact` to reduce to ~600-800 tokens:
473
295
 
474
- **Noise suppression by default.** Benchmark dirs, generated files, tooling configs, and docs are suppressed unless explicitly requested. Agents get signal, not inventory.
475
-
476
- **Local-first, privacy-respecting.** No code leaves your machine. No API keys required. Analysis is fully offline.
477
-
478
- **Composable, not monolithic.** Output is structured data. Pipe it, transform it, inject it, cache it. It's infrastructure, not a magic black box.
479
-
480
- **Confidence-aware.** Every analysis includes a confidence summary and gap list. Agents know what they don't know.
481
-
482
- ---
483
-
484
- ## Supported languages and stacks
485
-
486
- | Language | Package detection | Entry points | Frameworks |
487
- |----------|-------------------|--------------|------------|
488
- | Python | `pyproject.toml`, `requirements.txt`, `setup.py` | CLI, scripts, `__main__` | FastAPI, Django, Flask, Typer, Click |
489
- | Node.js | `package.json`, lock files | `main`, `bin`, scripts | Express, Next.js, Fastify, NestJS, React, Vue |
490
- | Go | `go.mod` | `main.go`, `cmd/` | Standard library, Gin, Echo |
491
- | Rust | `Cargo.toml` | `main.rs`, `lib.rs` | Tokio, Actix, Axum |
492
- | Java | `pom.xml`, `build.gradle` | Spring Boot, Quarkus, Micronaut | Spring, Quarkus |
493
- | Kotlin | `build.gradle.kts` | Spring Boot, Ktor | Spring, Ktor |
494
- | .NET / C# | `.csproj`, `.sln` | `Program.cs` | ASP.NET, Blazor |
495
- | PHP | `composer.json` | `index.php` | Laravel, Symfony |
496
- | Ruby | `Gemfile` | `config.ru` | Rails, Sinatra |
497
- | Dart | `pubspec.yaml` | `main.dart` | Flutter |
498
-
499
- Monorepos with mixed stacks are fully supported.
500
-
501
- ---
502
-
503
- ## Roadmap
504
-
505
- **Now — Core stability**
506
- - Ranking improvements (git churn, runtime proximity)
507
- - Better architecture inference
508
- - Broader language coverage
509
-
510
- **Next — Agent integrations**
511
- - MCP server for native Claude Code integration
512
- - VS Code extension
513
- - Context diffing (compare before/after changes)
514
- - Incremental updates (delta mode improvements)
515
-
516
- **Later — Team features**
517
- - Shared context snapshots
518
- - Architecture drift detection
519
- - CI integration templates
520
- - Governance and compliance context
521
-
522
- > Focus is on adoption and utility. No monetization until the core is genuinely useful to the community.
523
-
524
- ---
525
-
526
- ## Contributing
527
-
528
- We welcome contributions. See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, testing, and guidelines.
529
-
530
- **Quick start for contributors:**
531
296
  ```bash
532
- git clone https://github.com/sourcecode-ai/sourcecode
533
- cd sourcecode
534
- pip install -e ".[dev]"
535
- pytest tests/
297
+ sourcecode --compact --copy
298
+ # Copies the summary to the clipboard. Ready to paste.
536
299
  ```
537
300
 
538
301
  ---
539
302
 
540
- ## Security
541
-
542
- `sourcecode` analyzes local repositories. It does not transmit code, paths, or analysis results to any external service. See [SECURITY.md](SECURITY.md) for our security policy and responsible disclosure process.
303
+ ## Flags reference
304
+
305
+ ### Global options
306
+
307
+ | Flag | Alias | Type | Default | Description | Status |
308
+ |------|-------|------|---------|-------------|--------|
309
+ | `--format` | `-f` | `json\|yaml` | `json` | Output format. YAML is more readable, JSON preferred in pipelines. | ✅ CORE |
310
+ | `--output` | `-o` | `PATH` | stdout | Writes output to a file instead of stdout. | ✅ CORE |
311
+ | `--compact` | | flag | off | ~600-800 token output: stacks, entry points, deps, gaps. No file tree. | ✅ CORE |
312
+ | `--agent` | | flag | off | JSON optimized for agents. Automatically enables `--dependencies`, `--env-map`, `--code-notes`. | ✅ CORE |
313
+ | `--dependencies` | | flag | off | Analyzes direct and transitive deps from manifests and lockfiles. | ✅ CORE |
314
+ | `--git-context` | `-g` | flag | off | Includes recent commits, change hotspots, uncommitted changes, contributors. | ✅ CORE |
315
+ | `--git-depth` | | `INT [1–100]` | `20` | Number of recent commits with `--git-context`. | ✅ CORE |
316
+ | `--git-days` | | `INT [1–3650]` | `90` | Window in days to detect hotspots with `--git-context`. | ✅ CORE |
317
+ | `--env-map` | | flag | off | Maps environment variables: key, type, category, files that reference them. | ✅ CORE |
318
+ | `--code-notes` | | flag | off | Extracts inline annotations: TODO, FIXME, HACK, BUG, DEPRECATED, NOTE, etc. | ✅ CORE |
319
+ | `--copy` | `-c` | flag | off | Copies output to the clipboard after successful execution. | ✅ CORE |
320
+ | `--depth` | | `INT [1–20]` | `4` | Maximum file tree traversal depth. Java/Maven requires ≥8. | ✅ CORE |
321
+ | `--mode` | | `contract\|standard\|raw` | `contract` | `contract`: minimal contracts per file. `standard`: full detail. `raw`: project level only. | ✅ CORE |
322
+ | `--tree` | | flag | off | Includes full `file_tree` and `file_paths` in the output. Increases size significantly. | ✅ CORE |
323
+ | `--changed-only` | | flag | off | Contract mode: only files modified in git (staged, unstaged, untracked). | ✅ CORE |
324
+ | `--rank-by` | | `relevance\|centrality\|git-churn` | `relevance` | File ranking strategy in contract mode. | ✅ CORE |
325
+ | `--semantics` | | flag | off | Cross-file symbol resolution, call graph with confidence levels, fan-in/fan-out hotspots. Slower. | 🧪 EXP |
326
+ | `--architecture` | | flag | off | Architectural layer inference (MVC/hexagonal/bounded contexts). Low confidence without `--semantics`. | 🧪 EXP |
327
+ | `--graph-modules` | | flag | off | Structural module graph: nodes (files/symbols) and edges (imports, calls, contains). | 🧪 EXP |
328
+ | `--graph-detail` | | `high\|medium\|full` | `high` | Module graph detail level. | 🧪 EXP |
329
+ | `--max-nodes` | | `INT [≥1]` | — | Maximum nodes in `--graph-modules`. Prevents huge graphs in large repos. | 🧪 EXP |
330
+ | `--graph-edges` | | `TEXT` | all | Edge types for `--graph-modules`, comma-separated: `imports,calls,contains`. | 🧪 EXP |
331
+ | `--docs` | | flag | off | Extracts docstrings, function signatures, and module comments. | 🧪 EXP |
332
+ | `--docs-depth` | | `module\|symbols\|full` | `symbols` | Docs extraction depth. `full` includes private symbols. | 🧪 EXP |
333
+ | `--symbol` | | `TEXT` | — | Contract mode: localized context for a specific symbol. Python, TS, JS only. **Does not support Java.** | 🧪 EXP |
334
+ | `--max-importers` | | `INT [1–10000]` | `50` | Limit on importer files returned by `--symbol`. | 🧪 EXP |
335
+ | `--full-metrics` | | flag | off | Per-file technical metrics: LOC, cyclomatic complexity, coverage. Aimed at CI, not at agents. | 🧪 EXP |
336
+ | `--emit-graph` | | flag | off | Contract mode: includes a compact dependency graph (nodes + edges) in the output. | 🚧 WIP |
337
+ | `--entrypoints-only` | | flag | off | Contract mode: only files with exports or entry points. Note: includes *all* files with exports. | 🚧 WIP |
338
+ | `--max-symbols` | | `INT [≥1]` | — | Limits total exported symbols in contract mode. Discards lower-ranked files. | 🚧 WIP |
339
+ | `--no-redact` | | flag | off | Disables automatic secret redaction. Output may contain sensitive values. | 🚧 WIP |
340
+ | `--trace-pipeline` | | flag | off | Diagnostic mode: includes a trace of each candidate and filtering decision. Debugging only. | 🚧 WIP |
341
+ | `--version` | `-v` | flag | — | Shows version and exits. | ✅ CORE |
543
342
 
544
343
  ---
545
344
 
546
- ## Privacy
345
+ ## Subcommands
547
346
 
548
- Telemetry is **opt-in only** and disabled by default. If you choose to enable it, only anonymous usage metadata is collected — never code, paths, or content. See [docs/privacy.md](docs/privacy.md) for full details.
347
+ ### `prepare-context TASK [PATH]`
549
348
 
550
- ```bash
551
- sourcecode telemetry status # check current setting
552
- sourcecode telemetry enable # opt in
553
- sourcecode telemetry disable # opt out
554
- ```
555
-
556
- ---
349
+ Generates task-specific context for AI agents.
557
350
 
558
- ## License
559
-
560
- Apache License 2.0. See [LICENSE](LICENSE) for details.
561
-
562
- ---
351
+ | Task | Description | Status |
352
+ |------|-------------|--------|
353
+ | `explain` | Architecture, entry points, key dependencies | ✅ CORE |
354
+ | `fix-bug` | Files prioritized by risk, inline annotations | ✅ CORE |
355
+ | `onboard` | Full context for new agents or developers | ✅ CORE |
356
+ | `delta` | Incremental context: only files changed in git | ✅ CORE |
357
+ | `refactor` | Structural problems, improvement opportunities | 🧪 EXP |
358
+ | `generate-tests` | Files without tests, coverage gap analysis | 🧪 EXP |
359
+ | `review-pr` | Changed files + architectural impact | 🧪 EXP |
563
360
 
564
- <p align="center">
565
- Built for the age of AI coding agents.<br>
566
- <a href="https://github.com/sourcecode-ai/sourcecode">GitHub</a> ·
567
- <a href="https://pypi.org/project/sourcecode/">PyPI</a> ·
568
- <a href="docs/getting-started.md">Documentation</a>
569
- </p>
361
+ ...
@@ -1,7 +1,7 @@
1
- sourcecode/__init__.py,sha256=W3DJGnBZMJZBnvn9pO7FSLfHppERKWNuRgtqy1X-umM,103
1
+ sourcecode/__init__.py,sha256=U4-Ic6jRz9YH4wIYlhtl8YFtDO_yG3OsMIWYQbQ3mKE,102
2
2
  sourcecode/adaptive_scanner.py,sha256=6dh34C2qZXyRbw-8xBhbEwDdXanM6CRFRWayVoYITnA,10190
3
- sourcecode/architecture_analyzer.py,sha256=O4AXc7l_WTzIXrcAzstqZy-TGKNaFa6p3MzpgVjaO8g,27749
4
- sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
3
+ sourcecode/architecture_analyzer.py,sha256=hn2K4c_EknGehXZ3I1KyoJPI-LlBSkphrVGBZMceif4,31249
4
+ sourcecode/architecture_summary.py,sha256=J9yoLgh8wXwIRrT6q6JooB6PekivbOEYpJz4BUXdalk,20545
5
5
  sourcecode/ast_extractor.py,sha256=0OHQwTUBBc9lmqPLryVeB1z8dGIC6NhLlar800CD9oI,41129
6
6
  sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
7
7
  sourcecode/cli.py,sha256=YusMOF5OfihL3nBw66LcANRFSiVHugPrXE0vPIycjLQ,72016
@@ -15,21 +15,21 @@ sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo
15
15
  sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
16
16
  sourcecode/doc_analyzer.py,sha256=TttdS7mndKQhyJCfJnnAsyGCJrf-TIL7oXxDlTLUFKE,21248
17
17
  sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
18
- sourcecode/env_analyzer.py,sha256=YXlaxFBuf-ladWmb3iLCNMN-rKhP2JuqAIDwZdiIZHQ,18473
19
- sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8,8577
18
+ sourcecode/env_analyzer.py,sha256=Ifwst0YLvArHHaRQXlf9DCYGO0MdyQBAMWSyEzfpKZo,21650
19
+ sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo,11572
20
20
  sourcecode/git_analyzer.py,sha256=PD3eNWydznQ6KLNpxGzBqizIHoPIKevfwz9Xyf_pDt4,11600
21
21
  sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
22
22
  sourcecode/metrics_analyzer.py,sha256=e2cFwB9XubFq_dIVsP2PLjpr4wX0N6ulb3ol3sGDUeo,20777
23
- sourcecode/prepare_context.py,sha256=n7NghZJt8zPt7bzMVpk6gvHlQfhwDYjuLJjgHSOTfD4,33943
23
+ sourcecode/prepare_context.py,sha256=FKh-M5B74r-yztuAgfuSE8RjIZvsq9YRwTr74zmldxI,35901
24
24
  sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
25
25
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
26
26
  sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
27
27
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
28
28
  sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
29
29
  sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
30
- sourcecode/schema.py,sha256=ofEge9hTWHOTjeWt7ceCDQWzP-uhhenrYX2usjW2KVU,22759
30
+ sourcecode/schema.py,sha256=5s9Gtiw2Fk-HEVwVcegl2fy-cyYBwS16WSTS0xIv744,23204
31
31
  sourcecode/semantic_analyzer.py,sha256=16EFTgM7ooW0m5gNUKOlTSn7IEMLSzKmzQn-cWaSqjs,82604
32
- sourcecode/serializer.py,sha256=nh8DNGVPVszy60YnWGVH_sLyskgDN973glPIMzNeFWA,62843
32
+ sourcecode/serializer.py,sha256=k-rddaaIlvAA5F2qvizCh_yd4oAlhhsg2obrYoJKtlo,65424
33
33
  sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
34
34
  sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
35
35
  sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
@@ -42,7 +42,7 @@ sourcecode/detectors/elixir.py,sha256=jCpvt5Yi6jvplc80ovRtWh17q-11ZGo9qX7o8b57TJ
42
42
  sourcecode/detectors/go.py,sha256=2r66uRQfeTWsqxr4HDhT6vExZErby0t46QXLHVBRv9w,2782
43
43
  sourcecode/detectors/heuristic.py,sha256=bCqqgbHavl4Sse3dqT8mwmo1wAdgeJr7VyXOmfClLKo,3387
44
44
  sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
45
- sourcecode/detectors/java.py,sha256=cZvB13cqJ76zHDncEG-TOCuK8gJjJN2mZGS2DGEcZy8,7715
45
+ sourcecode/detectors/java.py,sha256=H5qicYbpIFqThCuT4Aocn-d2zEZ_6vJc-kLjHZITIBw,9084
46
46
  sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
47
47
  sourcecode/detectors/nodejs.py,sha256=7fsyAmrGkkguX6U80HUQpIe9MRaYyi_A7zbaRtmFmGc,13097
48
48
  sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
@@ -60,8 +60,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
60
60
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
61
61
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
62
62
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
63
- sourcecode-0.49.0.dist-info/METADATA,sha256=5FVQYOuzhccMc8oiJ-tPJPr3XJqrdDzWRWf32W8HqWk,25209
64
- sourcecode-0.49.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
65
- sourcecode-0.49.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
66
- sourcecode-0.49.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
67
- sourcecode-0.49.0.dist-info/RECORD,,
63
+ sourcecode-1.1.0.dist-info/METADATA,sha256=pM02mysiHsgFzu3TzkDPTNX78ts7wt0S4R4We6zDA_w,20411
64
+ sourcecode-1.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
65
+ sourcecode-1.1.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
66
+ sourcecode-1.1.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
67
+ sourcecode-1.1.0.dist-info/RECORD,,