PyPI - sourcecode - Versions diffs - 0.49.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

sourcecode 0.49.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

sourcecode/__init__.py +1 -1
sourcecode/architecture_analyzer.py +81 -0
sourcecode/architecture_summary.py +12 -0
sourcecode/detectors/java.py +31 -5
sourcecode/env_analyzer.py +81 -1
sourcecode/file_classifier.py +47 -0
sourcecode/prepare_context.py +46 -0
sourcecode/schema.py +5 -1
sourcecode/serializer.py +99 -41
{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/METADATA +99 -307
{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/RECORD +14 -14
{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/WHEEL +0 -0
{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/entry_points.txt +0 -0
{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/licenses/LICENSE +0 -0

sourcecode/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "0.49.0"
+__version__ = "1.1.0"

sourcecode/architecture_analyzer.py CHANGED Viewed

@@ -176,6 +176,36 @@ class ArchitectureAnalyzer:
         # Step 1: filter paths
         filtered = self._filter_paths(sm.file_paths)
+        # Step 1b: DDD filesystem detection — runs before the filtered-paths guard
+        # because DDD signals live in directory structure, not just file extensions.
+        ddd_result = self._detect_ddd(sm.file_paths)
+        if ddd_result is not None:
+            ddd_pattern, ddd_layers, ddd_contexts, ddd_layer_names = ddd_result
+            domains_for_ddd = self._cluster_domains(filtered) if len(filtered) >= 2 else []
+            bc_list = [BoundedContext(name=n, confidence="high") for n in ddd_contexts]
+            return ArchitectureAnalysis(
+                requested=True,
+                pattern=ddd_pattern,
+                domains=domains_for_ddd,
+                layers=ddd_layers,
+                bounded_contexts=bc_list,
+                ddd_layers_detected=ddd_layer_names,
+                confidence="high",
+                method="filesystem_inference",
+                limitations=[],
+                evidence=[{
+                    "type": "filesystem_naming",
+                    "paths": [f"{ddd_contexts[0]}/" if ddd_contexts else ""],
+                    "reason": (
+                        f"DDD layout detected: {len(ddd_contexts)} modules under common prefix "
+                        "each contain application/, domain/, infrastructure/ subdirectories."
+                    ),
+                    "confidence": "high",
+                }],
+                tentative=False,
+            )
         if len(filtered) < 2:
             return ArchitectureAnalysis(
                 requested=True,
@@ -333,6 +363,57 @@ class ArchitectureAnalyzer:
     # Private helpers
     # ------------------------------------------------------------------
+    def _detect_ddd(
+        self, paths: list[str]
+    ) -> "Optional[tuple[str, list[ArchitectureLayer], list[str], list[str]]]":
+        """Detect DDD: ≥5 modules under a common prefix each with application/domain/infrastructure."""
+        _DDD_LAYERS = frozenset({"application", "domain", "infrastructure"})
+        _DDD_MIN_MODULES = 5
+        # Map (prefix, module) → set of DDD layer names found under that module
+        prefix_module_layers: dict[tuple[str, str], set[str]] = {}
+        for p in paths:
+            parts = p.replace("\\", "/").split("/")
+            for i, part in enumerate(parts):
+                if part in _DDD_LAYERS and i >= 2:
+                    module = parts[i - 1]
+                    prefix = "/".join(parts[:i - 1])
+                    key = (prefix, module)
+                    prefix_module_layers.setdefault(key, set()).add(part)
+                    break
+        # Group by prefix; find prefixes where ≥5 modules have all 3 DDD layers
+        prefix_modules: dict[str, list[str]] = {}
+        for (prefix, module), layers_found in prefix_module_layers.items():
+            if _DDD_LAYERS <= layers_found:  # module has all 3
+                prefix_modules.setdefault(prefix, []).append(module)
+        best_prefix = max(
+            prefix_modules,
+            key=lambda p: len(prefix_modules[p]),
+            default=None,
+        )
+        if best_prefix is None or len(prefix_modules[best_prefix]) < _DDD_MIN_MODULES:
+            return None
+        bounded_context_names = sorted(set(prefix_modules[best_prefix]))
+        ddd_layer_names = sorted(_DDD_LAYERS)
+        arch_layers: list[ArchitectureLayer] = [
+            ArchitectureLayer(
+                name=layer,
+                pattern="ddd",
+                files=[
+                    p for p in paths
+                    if f"/{layer}/" in p.replace("\\", "/")
+                ],
+                confidence="high",
+            )
+            for layer in ddd_layer_names
+        ]
+        return "ddd", arch_layers, bounded_context_names, ddd_layer_names
     def _is_tooling(self, path: str) -> bool:
         norm = path.replace("\\", "/")
         return any(norm.startswith(p) for p in _TOOLING_PREFIXES)

sourcecode/architecture_summary.py CHANGED Viewed

@@ -84,6 +84,11 @@ class ArchitectureSummarizer:
                 elif suffix in {".cs", ".fs", ".vb"}:
                     lang_lines = self._summarize_dotnet_entry(sm.stacks)
+        # MyBatis XML mapper count line (Java projects)
+        mybatis_line = self._mybatis_summary_line(file_paths)
+        if mybatis_line:
+            lang_lines.append(mybatis_line)
         # Merge: rich lines first, stack-specific details appended (deduped)
         lines = rich_lines + [l for l in lang_lines if l not in rich_lines]
@@ -296,6 +301,13 @@ class ArchitectureSummarizer:
             lines.append("Orquesta el arranque de la aplicacion JVM.")
         return lines
+    def _mybatis_summary_line(self, file_paths: list[str]) -> str | None:
+        """Return a summary line when >5 MyBatis XML mappers are detected."""
+        mapper_xml_count = sum(1 for p in file_paths if p.endswith("Mapper.xml"))
+        if mapper_xml_count > 5:
+            return f"MyBatis XML mappers: {mapper_xml_count} *Mapper.xml detected."
+        return None
     def _summarize_dotnet_entry(self, stacks: list[StackDetection]) -> list[str]:
         dotnet_stacks = [s for s in stacks if s.stack == "dotnet"]
         if not dotnet_stacks:

sourcecode/detectors/java.py CHANGED Viewed

@@ -15,12 +15,19 @@ from sourcecode.schema import FrameworkDetection
 from sourcecode.tree_utils import flatten_file_tree
 _MAX_FILE_SIZE = 256 * 1024  # 256 KB
-_MAX_JAVA_ENTRY_SCAN = 200
-_MAX_ANNOTATION_ENTRY_POINTS = 20
+_MAX_JAVA_ENTRY_SCAN = 1000
+_MAX_ANNOTATION_ENTRY_POINTS = 500
-_REST_CONTROLLER_RE = re.compile(r'@(?:Rest)?Controller\b')
+_REST_CONTROLLER_RE = re.compile(r'@RestController\b')
+_MVC_CONTROLLER_RE = re.compile(r'@Controller\b')
+_REQUEST_MAPPING_RE = re.compile(r'@RequestMapping\b')
+_CONTROLLER_ADVICE_RE = re.compile(r'@ControllerAdvice\b')
 _WEB_FILTER_RE = re.compile(r'@WebFilter\b')
 _FILTER_BEAN_RE = re.compile(r'FilterRegistrationBean\b')
+# Extracts path from @RequestMapping("/v1/foo"), @GetMapping("/bar"), etc.
+_HTTP_PATH_RE = re.compile(
+    r'@(?:Request|Get|Post|Put|Delete|Patch)Mapping\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)["\']'
+)
 class JavaDetector(AbstractDetector):
@@ -81,6 +88,8 @@ class JavaDetector(AbstractDetector):
             frameworks.append(FrameworkDetection(name="Vert.x", source=source))
         if "jakarta.ee" in text or "javax.ws.rs" in text:
             frameworks.append(FrameworkDetection(name="Jakarta EE", source=source))
+        if "mybatis" in text:
+            frameworks.append(FrameworkDetection(name="MyBatis", source=source))
         return frameworks
     def _collect_entry_points(self, context: DetectionContext) -> list[EntryPoint]:
@@ -139,13 +148,30 @@ class JavaDetector(AbstractDetector):
             return []
         # Quick pre-filter before running regexes
-        if "Controller" not in content and "Filter" not in content:
+        if ("Controller" not in content and "Filter" not in content
+                and "ControllerAdvice" not in content):
             return []
         if _REST_CONTROLLER_RE.search(content):
+            http_path_match = _HTTP_PATH_RE.search(content)
+            http_path = http_path_match.group(1) if http_path_match else None
             return [EntryPoint(
-                path=rel_path, stack="java", kind="http_handler",
+                path=rel_path, stack="java", kind="rest_controller",
                 source="annotation", confidence="high",
+                http_path=http_path,
+            )]
+        if _CONTROLLER_ADVICE_RE.search(content):
+            return [EntryPoint(
+                path=rel_path, stack="java", kind="exception_handler",
+                source="annotation", confidence="medium",
+            )]
+        if _MVC_CONTROLLER_RE.search(content) and _REQUEST_MAPPING_RE.search(content):
+            http_path_match = _HTTP_PATH_RE.search(content)
+            http_path = http_path_match.group(1) if http_path_match else None
+            return [EntryPoint(
+                path=rel_path, stack="java", kind="mvc_controller",
+                source="annotation", confidence="medium",
+                http_path=http_path,
             )]
         if _WEB_FILTER_RE.search(content):
             return [EntryPoint(

sourcecode/env_analyzer.py CHANGED Viewed

@@ -35,6 +35,15 @@ _SPRING_ENV_VAR_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::([^}]*))?\}')
 # These are internal property cross-references, not OS env vars, but still config signals.
 _SPRING_PROP_REF_RE = re.compile(r'\$\{([a-z][a-z0-9]*(?:\.[a-z][a-z0-9_-]*)*)(?::([^}]*))?\}')
+# Known Spring-internal namespaces — NOT emitted as custom application properties.
+_SPRING_BUILTIN_NAMESPACES: frozenset[str] = frozenset({
+    "spring", "logging", "management", "server", "info", "debug",
+    "endpoints", "security", "eureka", "feign", "ribbon", "hystrix",
+    "zuul", "cloud", "flyway", "liquibase", "jpa", "datasource",
+    "kafka", "rabbitmq", "redis", "mail", "thymeleaf", "mvc",
+    "web", "actuator", "metrics", "tracing",
+})
 # Patterns where absence of the variable causes a hard runtime error (not just None/null).
 # py_environ_bracket → os.environ["KEY"] raises KeyError
 # java_spring_value   → Spring fails to start if ${KEY} has no default
@@ -223,6 +232,66 @@ def _extract_spring_profile(filename: str) -> Optional[str]:
     return None
+def _parse_yaml_custom_properties(
+    content: str,
+    rel_path: str,
+    profile: Optional[str],
+    findings: dict,
+) -> None:
+    """Extract custom namespace leaf properties from YAML (e.g. saint.ldap.url).
+    Builds dotted key paths by tracking indentation levels. Emits only properties
+    whose top-level namespace is NOT a well-known Spring built-in namespace.
+    """
+    # Stack of (indent, key_segment)
+    key_stack: list[tuple[int, str]] = []
+    for line in content.splitlines():
+        stripped = line.lstrip()
+        if not stripped or stripped.startswith('#'):
+            continue
+        if ':' not in stripped:
+            continue
+        indent = len(line) - len(stripped)
+        colon_idx = stripped.index(':')
+        key_part = stripped[:colon_idx].strip()
+        value_part = stripped[colon_idx + 1:].strip() if colon_idx + 1 < len(stripped) else ""
+        # Only plain identifiers (no special chars)
+        if not re.match(r'^[a-zA-Z][a-zA-Z0-9_-]*$', key_part):
+            continue
+        # Pop stack entries at same or deeper indent
+        while key_stack and key_stack[-1][0] >= indent:
+            key_stack.pop()
+        key_stack.append((indent, key_part))
+        # Only emit leaf values (non-empty, not a nested mapping start)
+        if not value_part or value_part.startswith('{') or value_part.startswith('['):
+            continue
+        # Reconstruct full dotted key
+        full_key = '.'.join(seg for _, seg in key_stack)
+        top_ns = key_stack[0][1].lower()
+        # Skip Spring built-in namespaces
+        if top_ns in _SPRING_BUILTIN_NAMESPACES:
+            continue
+        # Skip entries that look like ${...} references (already handled elsewhere)
+        if value_part.startswith('${'):
+            continue
+        # Strip inline YAML comments
+        clean_value = value_part.split('#')[0].strip()
+        if not clean_value:
+            continue
+        findings[full_key].append((rel_path, clean_value, False, profile))
 def _parse_spring_config(
     path: Path,
     rel_path: str,
@@ -234,6 +303,7 @@ def _parse_spring_config(
     Returns the total number of ${...} placeholders found (candidates).
     Captures default values from ${VAR:default} syntax.
     Marks vars without defaults as hard-required (Spring fails to start if missing).
+    Also extracts custom namespace properties (saint.*, app.*, etc.) as yml_property entries.
     """
     try:
         content = path.read_text(encoding="utf-8", errors="replace")
@@ -267,6 +337,10 @@ def _parse_spring_config(
         findings[key].append((f"{rel_path}:{line_num}", default, False, profile))
         candidates += 1
+    # 3. Custom YAML namespace properties (YAML/YML files only)
+    if rel_path.endswith((".yml", ".yaml")):
+        _parse_yaml_custom_properties(content, rel_path, profile, findings)
     return candidates
@@ -320,14 +394,17 @@ class EnvAnalyzer:
                     first_profile = prof
                 if len(unique_files) >= _MAX_FILES_PER_KEY:
                     break
+            # Custom YAML properties use lowercase.dotted keys and category "application"
+            is_yml_prop = '.' in key and key[0].islower()
             records[key] = EnvVarRecord(
                 key=key,
                 required=required,
                 default=default_val,
                 type_hint=_infer_type_hint(key),
-                category=_infer_category(key),
+                category="application" if is_yml_prop else _infer_category(key),
                 files=unique_files,
                 profile=first_profile,
+                source="yml_property" if is_yml_prop else None,
             )
         # 2. Supplement with .env.example entries (fill description + add missing keys)
@@ -372,6 +449,8 @@ class EnvAnalyzer:
                     "extracted. Duplicates across profiles collapsed."
                 )
+        # spring_profiles: named profiles only (exclude "default")
+        _named_profiles = sorted({p for p in profiles_scanned if p != "default"})
         summary = EnvSummary(
             requested=True,
             total=len(sorted_records),
@@ -383,6 +462,7 @@ class EnvAnalyzer:
             profiles_scanned=sorted(set(profiles_scanned)),
             spring_candidates=spring_candidates,
             coverage_note=coverage_note,
+            spring_profiles=_named_profiles,
         )
         return sorted_records, summary

sourcecode/file_classifier.py CHANGED Viewed

@@ -78,6 +78,29 @@ _IMPORT_RE = re.compile(
 )
 _DEF_RE = re.compile(r"\b(class|def|function|const|export\s+class|interface|type)\s+[A-Za-z_]", re.MULTILINE)
+# Java Spring stereotype annotation detection
+_JAVA_ANNOTATION_RE = re.compile(r'@(RestController|Controller|Service|Repository|Mapper|Entity|Data|Configuration|EnableWebSecurity|ControllerAdvice|Transactional)\b')
+# (annotation_set, category, relevance, why_template)
+# Checked in priority order; first match wins.
+_JAVA_STEREOTYPE_RULES: list[tuple[frozenset, str, float, str]] = [
+    (frozenset({"EnableWebSecurity"}),               "security",        0.85, "Spring Security configuration"),
+    (frozenset({"RestController"}),                  "api_endpoint",    0.90, "Spring REST controller — defines HTTP API surface"),
+    (frozenset({"Controller", "RequestMapping"}),    "api_endpoint",    0.80, "Spring MVC controller"),
+    (frozenset({"Service", "Transactional"}),        "business_logic",  0.75, "Transactional service — business logic boundary"),
+    (frozenset({"Service"}),                         "business_logic",  0.65, "Spring service component"),
+    (frozenset({"Repository"}),                      "data_access",     0.65, "Spring repository — data access layer"),
+    (frozenset({"Mapper"}),                          "data_access",     0.65, "MyBatis mapper — SQL data access"),
+    (frozenset({"Configuration"}),                   "configuration",   0.70, "Spring configuration class"),
+    (frozenset({"Entity"}),                          "domain_model",    0.50, "JPA entity — domain model"),
+    (frozenset({"Data"}),                            "dto",             0.40, "Lombok DTO"),
+]
+# Categories produced by Java stereotype detection — used downstream to apply direct relevance
+JAVA_STEREOTYPE_CATEGORIES: frozenset[str] = frozenset(
+    cat for _, cat, _, _ in _JAVA_STEREOTYPE_RULES
+)
 class FileClassifier:
     def __init__(
@@ -138,6 +161,12 @@ class FileClassifier:
         if norm in self.production_entry_paths:
             return FileClassification(norm, "runtime_core", "high", 0.95, "declared production runtime entrypoint", ["entry_points"])
+        # Java Spring stereotype detection (Java/Kotlin files only)
+        if suffix in {".java", ".kt"}:
+            java_class = self._classify_java_stereotype(norm, content)
+            if java_class is not None:
+                return java_class
         if self._has_any_import(imports, _API_IMPORTS):
             evidence = self._matched_imports(imports, _API_IMPORTS)
             return FileClassification(norm, "api_layer", "high", 0.82, "imports API/server framework", evidence)
@@ -213,3 +242,21 @@ class FileClassifier:
     def _sample(self, imports: list[str]) -> list[str]:
         return [f"import:{imp}" for imp in imports[:4]]
+    def _classify_java_stereotype(self, path: str, content: str) -> "FileClassification | None":
+        """Classify Java file by Spring/JPA/MyBatis annotation stereotypes."""
+        if not content:
+            return None
+        found = frozenset(m.group(1) for m in _JAVA_ANNOTATION_RE.finditer(content))
+        if not found:
+            return None
+        for required_annotations, category, relevance, why in _JAVA_STEREOTYPE_RULES:
+            # For @Data DTO: must have @Data but NOT @Entity
+            if required_annotations == frozenset({"Data"}):
+                if "Data" in found and "Entity" not in found:
+                    return FileClassification(path, category, "high", relevance, why, list(found))
+                continue
+            # For compound rules (Service+Transactional, Controller+RequestMapping): all required
+            if required_annotations <= found:
+                return FileClassification(path, category, "high", relevance, why, list(found))
+        return None

sourcecode/prepare_context.py CHANGED Viewed

@@ -332,6 +332,50 @@ _SOURCE_EXTENSIONS: frozenset[str] = frozenset({
     ".go", ".rs", ".rb", ".php", ".cs", ".dart",
 })
+def _extract_ddd_domain(path: str) -> str:
+    """Extract domain name from DDD package path.
+    For m3informatica.saint.ddd.{domain}.infrastructure.rest.*RestController
+    the domain is the segment just before application/ domain/ or infrastructure/.
+    """
+    parts = path.replace("\\", "/").split("/")
+    _DDD_LAYERS = {"application", "domain", "infrastructure"}
+    for i, part in enumerate(parts):
+        if part in _DDD_LAYERS and i >= 1:
+            return parts[i - 1]
+    # Fallback: penultimate directory segment
+    if len(parts) >= 2:
+        return parts[-2]
+    return ""
+def _java_why(path: str, file_class: "Optional[object]") -> str:
+    """Generate why string for Java files based on stereotype classification."""
+    if file_class is None:
+        return ""
+    from sourcecode.file_classifier import JAVA_STEREOTYPE_CATEGORIES
+    category = getattr(file_class, "category", "")
+    if category not in JAVA_STEREOTYPE_CATEGORIES:
+        return ""
+    domain = _extract_ddd_domain(path)
+    class_name = Path(path).stem
+    if category == "api_endpoint":
+        return f"Defines HTTP endpoints for the {domain} domain" if domain else "Defines HTTP API endpoints"
+    if category == "business_logic":
+        return f"Orchestrates {domain} business logic" if domain else "Business logic service"
+    if category == "data_access":
+        return f"SQL queries for {domain} data access" if domain else "Data access layer"
+    if category == "domain_model":
+        return f"JPA entity for {class_name} persistence"
+    if category == "configuration":
+        return getattr(file_class, "reason", "Spring configuration class")
+    if category == "security":
+        return getattr(file_class, "reason", "Spring Security configuration")
+    if category == "dto":
+        return f"Lombok DTO — {class_name}"
+    return getattr(file_class, "reason", "")
 _ALL_EXTENSIONS: frozenset[str] = _SOURCE_EXTENSIONS | frozenset({
     ".md", ".toml", ".yaml", ".yml", ".json", ".xml",
 })
@@ -726,12 +770,14 @@ class TaskContextBuilder:
             )
             all_reasons = [r for r in fs.reasons if r != "source file"] + content_reasons
             reason_str = ", ".join(all_reasons) if all_reasons else "source file"
+            why_str = _java_why(path, file_class)
             scored.append((total, path, RelevantFile(
                 path=path,
                 role=role,
                 score=round(min(total / 3.0, 1.0), 2),
                 reason=reason_str,
+                why=why_str,
             )))
         # Deterministic: score desc, then path asc as tiebreaker

sourcecode/schema.py CHANGED Viewed

@@ -79,6 +79,7 @@ class EntryPoint:
     classification: Optional[Literal["production", "development", "auxiliary"]] = None
     runtime_relevance: Optional[Literal["high", "medium", "low"]] = None
     produced_by: Optional[str] = None  # which detector emitted this
+    http_path: Optional[str] = None  # extracted from @RequestMapping / @GetMapping (Java REST controllers)
 @dataclass
@@ -413,6 +414,7 @@ class ArchitectureAnalysis:
     # True when pattern is inferred from weak signals (e.g. directory names only).
     # Agents must not treat tentative patterns as confirmed facts.
     tentative: bool = False
+    ddd_layers_detected: list[str] = field(default_factory=list)  # e.g. ["application", "domain", "infrastructure"]
 # --- Env Map ---
@@ -425,10 +427,11 @@ class EnvVarRecord:
     required: bool = True
     default: Optional[str] = None
     type_hint: Optional[str] = None   # string | int | bool | url | path | enum
-    category: Optional[str] = None    # database | cache | storage | auth | service | observability | feature_flag | server | general
+    category: Optional[str] = None    # database | cache | storage | auth | service | observability | feature_flag | server | general | application
     description: Optional[str] = None
     files: list[str] = field(default_factory=list)  # "path:line"
     profile: Optional[str] = None     # Spring profile if first occurrence is in application-{profile}.yml
+    source: Optional[str] = None      # yml_property | env_var | source_code
 @dataclass
@@ -446,6 +449,7 @@ class EnvSummary:
     profiles_scanned: list[str] = field(default_factory=list)
     spring_candidates: int = 0   # total ${VAR} refs found across Spring config files
     coverage_note: Optional[str] = None  # explicit note about partial coverage
+    spring_profiles: list[str] = field(default_factory=list)  # canonical list: profile names from application-{profile}.yml
 # --- Code Notes ---

sourcecode/serializer.py CHANGED Viewed

@@ -258,11 +258,19 @@ def _file_relevance(sm: SourceMap, *, limit: int = _FILE_RELEVANCE_LIMIT) -> lis
                 and combined < 0.45):
             continue
+        # For Java stereotype annotations use the table relevance directly —
+        # the combined/2 formula would dilute the stereotype signal.
+        from sourcecode.file_classifier import JAVA_STEREOTYPE_CATEGORIES
+        if file_class and file_class.category in JAVA_STEREOTYPE_CATEGORIES:
+            relevance_val = round(file_class.relevance, 3)
+        else:
+            relevance_val = round(max(0.0, min(1.0, combined / 2.0)), 3)
         item: dict[str, Any] = {
             "path": path,
             "category": file_class.category if file_class else "source",
             "confidence": file_class.confidence if file_class else "low",
-            "relevance": round(max(0.0, min(1.0, combined / 2.0)), 3),
+            "relevance": relevance_val,
             "reason": file_class.reason if file_class else (fs.reasons[0] if fs.reasons else "source file"),
             "evidence": file_class.evidence if file_class else [],
         }
@@ -301,6 +309,10 @@ def _architecture_context(sm: SourceMap) -> dict[str, Any]:
             ]
         else:
             ctx["no_layers_detected"] = True
+        if arch.bounded_contexts:
+            ctx["bounded_contexts"] = [bc.name for bc in arch.bounded_contexts]
+        if arch.ddd_layers_detected:
+            ctx["ddd_layers_detected"] = arch.ddd_layers_detected
         if arch.confidence == "low" and not pattern:
             ctx["note"] = "directory structure insufficient for reliable architectural inference; use --semantics for higher accuracy"
         if arch.limitations:
@@ -350,49 +362,72 @@ def _section_confidence(sm: SourceMap) -> dict[str, str]:
 def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
-    """Context package ready for prompt or handoff (~600-800 tokens).
+    """Context package ready for prompt or handoff (~300-500 tokens).
     Answers: what it is, where it enters, what depends on what,
     what signals matter, and what uncertainty exists.
     Includes: project_type, project_summary, architecture_summary,
-    stacks, entry_points, dependency_summary + key_dependencies (when analyzed),
+    stacks (minimal), entry_points (path+kind only), key_dependencies (name+version+role),
     env_summary (when analyzed), code_notes_summary (when analyzed),
-    confidence_summary, anomalies, analysis_gaps.
+    confidence (overall only), analysis_gaps.
-    Excludes: file_tree, raw dependency lists, docs, module_graph.
-    Empty sections are explained when relevant.
+    Excludes: file_tree, raw dependency lists, docs, module_graph, verbose metadata.
     """
-    dep_summary_dict: Any = None
+    # Key dependencies — name + version + role only (no ecosystem, source, manifests)
     key_deps: Any = None
     if sm.dependency_summary is not None and sm.dependency_summary.requested:
-        dep_summary_dict = asdict(sm.dependency_summary)
-        dep_summary_dict.pop("dependencies", None)
         key_deps = [
-            asdict(d) for d in sm.key_dependencies
+            {
+                "name": d.name,
+                **({"version": d.declared_version} if d.declared_version else {}),
+                **({"role": d.role} if d.role and d.role != "runtime" else {}),
+            }
+            for d in sm.key_dependencies
             if (d.role or "unknown") in _PRODUCTION_DEP_ROLES and d.scope not in {"dev"}
         ][:_KEY_DEPS_CAP]
-    elif sm.dependency_summary is None or not sm.dependency_summary.requested:
-        dep_summary_dict = None  # "not analyzed" — agent should add --dependencies
+    # Dependency summary — requested flag + count + source only
+    dep_summary_dict: Any = None
+    if sm.dependency_summary is not None and sm.dependency_summary.requested:
+        ds = sm.dependency_summary
+        dep_summary_dict = {
+            "requested": True,
+            "total_count": ds.total_count,
+            "direct": ds.direct_count,
+            **({"sources": ds.sources} if ds.sources else {}),
+        }
+    # Env map — key + required + category only (drop type_hint, files list)
     env_summary_dict: Any = None
     env_map_items: Any = None
     if sm.env_summary is not None and sm.env_summary.requested:
-        env_summary_dict = asdict(sm.env_summary)
+        env_summary_dict = {
+            "total": sm.env_summary.total,
+            "required": sm.env_summary.required_count,
+            **({"categories": sm.env_summary.categories} if sm.env_summary.categories else {}),
+        }
         if sm.env_map:
             _sorted_env = sorted(
                 sm.env_map,
                 key=lambda e: (not getattr(e, "required", False), getattr(e, "key", "")),
             )
             env_map_items = [
-                {k: v for k, v in asdict(e).items() if v is not None and v != "" and v != []}
+                {
+                    "key": getattr(e, "key", ""),
+                    **({"required": True} if getattr(e, "required", False) else {}),
+                    **({"category": getattr(e, "category", None)} if getattr(e, "category", None) else {}),
+                }
                 for e in _sorted_env[:_ENV_MAP_CAP]
             ]
+    # Code notes — kind + path + line + truncated text only
     code_notes_summary_dict: Any = None
     code_notes_items: Any = None
     if sm.code_notes_summary is not None and sm.code_notes_summary.requested:
-        code_notes_summary_dict = asdict(sm.code_notes_summary)
+        cn = sm.code_notes_summary
+        by_kind = {k: v for k, v in cn.by_kind.items() if v > 0}
+        code_notes_summary_dict = {"total": cn.total, **({"by_kind": by_kind} if by_kind else {})}
         if sm.code_notes:
             _SEVERITY_ORDER = {"BUG": 0, "FIXME": 1, "DEPRECATED": 2, "TODO": 3, "HACK": 4, "WARNING": 5}
             _sorted_notes = sorted(
@@ -400,43 +435,62 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
                 key=lambda n: (_SEVERITY_ORDER.get(getattr(n, "kind", "").upper(), 9), getattr(n, "path", "")),
             )
             code_notes_items = [
-                {k: v for k, v in asdict(n).items() if v is not None}
+                {
+                    "kind": getattr(n, "kind", ""),
+                    "path": getattr(n, "path", ""),
+                    "line": getattr(n, "line", None),
+                    **({"text": getattr(n, "text", "")[:60]} if getattr(n, "text", "") else {}),
+                }
                 for n in _sorted_notes[:_CODE_NOTES_CAP]
             ]
-    # Entry points: production runtime only, capped.
-    # Development entries shown separately; auxiliary omitted from compact view.
+    # Entry points — path + kind + confidence only
     ep_groups = _entry_point_groups(sm.entry_points)
-    entry_points_compact = ep_groups["production"][:_EP_PRODUCTION_CAP]
-    if not entry_points_compact:
-        entry_points_compact = []  # truth signal: no production runtime detected
+    entry_points_compact = [
+        {
+            "path": ep["path"],
+            **({"kind": ep["kind"]} if ep.get("kind") else {}),
+            **({"confidence": ep["confidence"]} if ep.get("confidence") else {}),
+        }
+        for ep in ep_groups["production"][:_EP_PRODUCTION_CAP]
+    ]
+    # Stacks — name + method + confidence + frameworks (names only)
+    stacks_compact = [
+        {
+            "stack": s.stack,
+            "detection_method": s.detection_method,
+            "confidence": s.confidence,
+            **({"primary": True} if s.primary else {}),
+            **({"frameworks": [f.name for f in s.frameworks]} if s.frameworks else {}),
+            **({"package_manager": s.package_manager} if s.package_manager else {}),
+        }
+        for s in sm.stacks
+    ]
-    # Confidence summary
+    # Confidence — overall only + anomalies
     conf_dict: Any = None
-    anomalies: Any = None
     if sm.confidence_summary is not None:
-        conf_dict = asdict(sm.confidence_summary)
-        if sm.confidence_summary.anomalies:
-            anomalies = sm.confidence_summary.anomalies
+        cs = sm.confidence_summary
+        conf_dict = {"overall": cs.overall, "stack": cs.stack_confidence, "entry_points": cs.entry_point_confidence}
+        if cs.anomalies:
+            conf_dict["anomalies"] = cs.anomalies
     # Analysis gaps
     gaps_list: Any = None
     if sm.analysis_gaps:
-        gaps_list = [asdict(g) for g in sm.analysis_gaps]
-    context_summary_dict: Any = None
-    if sm.context_summary is not None and sm.context_summary.requested:
-        context_summary_dict = asdict(sm.context_summary)
+        gaps_list = [
+            {"area": g.area, "reason": g.reason, "impact": g.impact}
+            for g in sm.analysis_gaps
+        ]
     result: dict[str, Any] = {
         "schema_version": sm.metadata.schema_version,
         "project_type": sm.project_type,
         "project_summary": sm.project_summary,
         "architecture_summary": sm.architecture_summary,
-        "context_summary": context_summary_dict,
-        "stacks": [asdict(stack) for stack in sm.stacks],
+        "stacks": stacks_compact,
         "entry_points": entry_points_compact,
-        "development_entry_points": (ep_groups["development"][:_EP_DEV_CAP] or None),
         "dependency_summary": dep_summary_dict,
         "key_dependencies": key_deps,
         "env_summary": env_summary_dict,
@@ -444,14 +498,10 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
         "code_notes_summary": code_notes_summary_dict,
         "code_notes": code_notes_items,
         "confidence_summary": conf_dict,
-        "anomalies": anomalies,
         "analysis_gaps": gaps_list,
     }
-    # Strip keys that are fully None and not informative
-    return {k: v for k, v in result.items() if v is not None or k in (
-        "project_type", "project_summary", "architecture_summary",
-        "dependency_summary", "confidence_summary",
-    )}
+    _always_include = {"project_type", "project_summary", "architecture_summary", "dependency_summary"}
+    return {k: v for k, v in result.items() if v is not None or k in _always_include}
 def normalize_source_map(sm: SourceMap) -> SourceMap:
@@ -827,6 +877,10 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
         }
         if sm.env_summary.categories:
             signals["env_vars"]["categories"] = sm.env_summary.categories
+        _spring_profiles = (sm.env_summary.spring_profiles
+                            or sm.env_summary.profiles_scanned)
+        if _spring_profiles:
+            signals["env_vars"]["spring_profiles"] = sorted(set(_spring_profiles))
         if sm.env_map:
             _sorted_env = sorted(
                 sm.env_map,
@@ -1005,7 +1059,11 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
         ][:_KEY_DEPS_CAP]
     if sm.env_summary is not None and sm.env_summary.requested:
-        result["env_summary"] = asdict(sm.env_summary)
+        env_sum_dict = asdict(sm.env_summary)
+        _sp = sm.env_summary.spring_profiles or sm.env_summary.profiles_scanned
+        if _sp:
+            env_sum_dict["spring_profiles"] = sorted(set(_sp))
+        result["env_summary"] = env_sum_dict
         result["env_map"] = [asdict(e) for e in sm.env_map[:_ENV_MAP_CAP]]
     if sm.code_notes_summary is not None and sm.code_notes_summary.requested:

{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 0.49.0
+Version: 1.1.0
 Summary: Deterministic codebase context for AI coding agents
 License:                                  Apache License
                                    Version 2.0, January 2004
@@ -221,349 +221,141 @@ Description-Content-Type: text/markdown
 **Deterministic codebase context for AI coding agents.**
-[![PyPI](https://img.shields.io/pypi/v/sourcecode)](https://pypi.org/project/sourcecode/)
-[![Python](https://img.shields.io/pypi/pyversions/sourcecode)](https://pypi.org/project/sourcecode/)
-[![License](https://img.shields.io/badge/license-Apache%202.0-blue)](LICENSE)
-[![CI](https://img.shields.io/github/actions/workflow/status/sourcecode-ai/sourcecode/ci.yml)](https://github.com/sourcecode-ai/sourcecode/actions)
-Turn any repository into structured, reproducible context optimized for AI coding agents — in one command.
-```bash
-pip install sourcecode
-sourcecode . --agent
-```
-```json
-{
-  "project": {
-    "type": "api",
-    "summary": "Python REST API built with FastAPI and SQLAlchemy. Layered architecture with domain, service, and infrastructure layers.",
-    "primary_stack": "python",
-    "frameworks": ["FastAPI", "SQLAlchemy"]
-  },
-  "entry_points": [
-    { "path": "src/app/main.py", "kind": "server", "confidence": "high" }
-  ],
-  "architecture": "FastAPI application. Clean Architecture with domain, application, and infrastructure layers. Hub modules: schema.py, models.py.",
-  "key_dependencies": [
-    { "name": "fastapi", "declared_version": ">=0.100", "role": "runtime" },
-    { "name": "sqlalchemy", "declared_version": "^2.0", "role": "runtime" },
-    { "name": "pydantic", "declared_version": "^2.0", "role": "runtime" }
-  ],
-  "confidence_summary": { "overall": "high" }
-}
-```
+![Version](https://img.shields.io/badge/version-1.0.0-blue)
+![Status](https://img.shields.io/badge/status-MVP-orange)
+![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---
-## The problem
+## What is it?
-AI coding agents are only as good as the context they receive. In large, real-world repositories, that context is almost always wrong.
-- **Agents start blind.** Without repo structure, they hallucinate imports, file paths, and architecture decisions.
-- **Context is noisy.** Raw file trees contain benchmark dirs, generated files, tooling configs, and docs that consume tokens without helping.
-- **Architecture is invisible.** LLMs see files, not systems. They miss layers, plugin systems, entry points, and runtime topology.
-- **Context decays.** What you paste today is stale tomorrow. There's no reproducible baseline.
-- **Manual context doesn't scale.** Handcrafting prompts per project is engineering debt that grows with every new agent, team, and task.
+`sourcecode` analyzes a repository and produces a structured context map (JSON or YAML) designed to be consumed by AI agents or language models. It solves the "stuff the whole repo into the prompt" problem by instead producing a deterministic extract: entry points, dependencies, stacks, inline annotations, environment variables, and git activity. It is an MVP tool under active evolution — the semantic analysis and module graph features work but have known limitations that are explicitly documented below.
 ---
-## The solution
-`sourcecode` analyzes your repository and produces a structured, reproducible context package — ready to inject into any AI coding agent.
-**What it does:**
-- Detects stacks, frameworks, entry points, and project type across 10+ languages
-- Infers runtime topology: which packages are core, which are plugins, which are noise
-- Ranks files by operational relevance for agents: git churn + runtime proximity + bootstrap signal
-- Suppresses non-runtime noise: benchmarks, docs, tooling, generated files
-- Produces structured JSON/YAML that agents can reason over, not raw file trees
-- Runs deterministically — same repo, same output, every time
-**What it outputs:**
-- `project_summary` — one-sentence natural language description
-- `architecture_summary` — runtime topology: layers, plugin systems, entry flows
-- `entry_points` — where execution actually starts (production, not benchmarks)
-- `key_dependencies` — runtime dependencies with role classification
-- `relevant_files` — ranked by usefulness for coding tasks, not folder position
-- `confidence_summary` — detection quality and analysis gaps
-All fields are stable, machine-readable, and designed for LLM consumption.
----
+## Installation
-## Install
+**Prerequisites:** Python 3.10+
 ```bash
 pip install sourcecode
+# or with pipx for isolation:
+pipx install sourcecode
 ```
-Requires Python 3.9+. No API keys. No network calls. Runs locally.
----
-## Quickstart
-**Basic analysis:**
-```bash
-sourcecode .
-```
-**Agent-optimized output** (structured, noise-free, gap-aware):
-```bash
-sourcecode . --agent
-```
-**Task-specific context for coding agents:**
-```bash
-# Explain the project architecture
-sourcecode . prepare-context explain
-# Find likely bug locations
-sourcecode . prepare-context fix-bug
-# Onboard a new agent to the codebase
-sourcecode . prepare-context onboard
-# Ranked context for a specific task
-sourcecode . prepare-context refactor
-```
-**Pipe directly into Claude Code or any agent:**
-```bash
-sourcecode . --agent | claude -p "Review the architecture and suggest improvements"
-```
-**Write to file for session injection:**
-```bash
-sourcecode . --agent --output context.json
-```
+Verify installation:
-**Include git activity signals:**
 ```bash
-sourcecode . --agent --git-context
+sourcecode version
+# sourcecode 1.0.0
 ```
 ---
-## Use cases
-### Claude Code
-```bash
-# Start every session with full context
-sourcecode . --agent > .claude/context.json
-# Use with CLAUDE.md for persistent context
-echo "$(sourcecode . --agent --compact)" >> CLAUDE.md
-```
-### Cursor / Windsurf / Copilot
-```bash
-# Generate context snapshot before starting a feature
-sourcecode . --agent --git-context --output .cursor/context.json
-```
-### OpenAI / Anthropic API
-```python
-import json, subprocess
-context = json.loads(
-    subprocess.check_output(["sourcecode", ".", "--agent"])
-)
-system_prompt = f"""
-You are working on: {context['project']['summary']}
-Architecture: {context['architecture']}
-Entry points: {[ep['path'] for ep in context['entry_points']]}
-"""
-```
-### CI / CD pipelines
-```yaml
-# .github/workflows/context.yml
-- name: Generate codebase context
-  run: sourcecode . --agent --output context.json
-- name: AI-assisted code review
-  run: |
-    CONTEXT=$(cat context.json)
-    # Inject into your preferred AI review step
-```
+## Quickstart
-### Onboarding new engineers
-```bash
-# Generate human-readable architecture summary
-sourcecode . prepare-context onboard --llm-prompt
-```
+The most useful command for integrating `sourcecode` into an AI agent:
-### Architecture audits
 ```bash
-sourcecode . --agent --architecture --graph-modules --dependencies
+sourcecode --agent
 ```
----
-## How it works
-`sourcecode` runs a local, static analysis pipeline on your repository:
+It produces a structured JSON with the essential sections (no noise, no file tree), ready to paste into an LLM context:
+```json
+{
+  "project": {
+    "type": "fullstack",
+    "summary": "Full-stack project in Nodejs, mvc, 4075 source files. Domains: atlas-client, atlas-server, atlas-hub, atlas-reports. 3300 dependencies (java, nodejs).",
+    "primary_stack": "nodejs",
+    "secondary_stacks": ["java"]
+  },
+  "entry_points": [
+    {
+      "path": "atlas-server/src/main/java/com/example/atlas/AtlasServerApplication.java",
+      "stack": "java",
+      "kind": "application",
+      "confidence": "high"
+    },
+    {
+      "path": "atlas-client/src/main.ts",
+      "stack": "nodejs",
+      "kind": "entrypoint",
+      "confidence": "high"
+    }
+  ],
+  "runtime_packages": [ ... ],
+  "dependencies": { ... },
+  "env_map": { ... },
+  "code_notes": [ ... ]
+}
 ```
-Repository
-    │
-    ├── Scanner          # File tree, manifests, workspace detection
-    ├── Stack Detectors  # Language, framework, package manager detection
-    ├── Entry Points     # Production entry points (not benchmarks/docs)
-    ├── Git Analyzer     # Churn hotspots, uncommitted changes
-    ├── Relevance Scorer # Runtime proximity × git churn × bootstrap signal
-    └── Serializer       # Structured JSON/YAML output
-```
-No LLM calls. No network requests. No sampling. Fully deterministic.
-The same repository produces the same output on every run — which means agents can cache it, diff it, and rely on it.
----
-## Output modes
-| Mode | Use case | Size |
-|------|----------|------|
-| `sourcecode .` | Full analysis | Full |
-| `sourcecode . --agent` | AI agent injection | ~600–1000 tokens |
-| `sourcecode . --compact` | Prompts, handoffs | ~500–700 tokens |
-| `sourcecode . prepare-context <task>` | Task-specific context | ~800–1200 tokens |
-### Available flags
-| Flag | Description |
-|------|-------------|
-| `--agent` | Structured, noise-free output for AI agents. Auto-enables `--dependencies`, `--env-map`, `--code-notes`. |
-| `--dependencies` | Direct dependencies with versions and role classification. |
-| `--git-context` | Recent commits, change hotspots, uncommitted files. |
-| `--architecture` | Layer inference: MVC, layered, hexagonal, domain-based. |
-| `--graph-modules` | Module import graph and call relationships. |
-| `--semantics` | Cross-file symbol resolution and call graph. |
-| `--env-map` | All environment variables referenced in source. |
-| `--code-notes` | TODOs, FIXMEs, HACKs, and Architecture Decision Records. |
-| `--compact` | Minimal output for token-constrained prompts. |
-| `--format yaml` | YAML instead of JSON. |
-| `--output PATH` | Write to file instead of stdout. |
-Full reference: `sourcecode --help`
-### Prepare-context tasks
-| Task | What it produces |
-|------|-----------------|
-| `explain` | Architecture + entry points + key dependencies |
-| `fix-bug` | Risk-ranked files + suspected areas + code annotations |
-| `refactor` | Structural issues + improvement opportunities |
-| `generate-tests` | Untested source files + test gap analysis |
-| `onboard` | Full project understanding for new agents/developers |
-| `review-pr` | Changed files + architectural impact |
-| `delta` | Git-changed files only — incremental context |
----
-## Philosophy
-**Determinism over approximation.** Every run on the same repository produces the same output. Agents, pipelines, and teams can depend on that.
-**Runtime topology over file trees.** What matters is where execution starts, what calls what, and which modules are actually critical — not alphabetical file lists.
+For large repositories where context matters, use `--compact` to reduce to ~600-800 tokens:
-**Noise suppression by default.** Benchmark dirs, generated files, tooling configs, and docs are suppressed unless explicitly requested. Agents get signal, not inventory.
-**Local-first, privacy-respecting.** No code leaves your machine. No API keys required. Analysis is fully offline.
-**Composable, not monolithic.** Output is structured data. Pipe it, transform it, inject it, cache it. It's infrastructure, not a magic black box.
-**Confidence-aware.** Every analysis includes a confidence summary and gap list. Agents know what they don't know.
----
-## Supported languages and stacks
-| Language | Package detection | Entry points | Frameworks |
-|----------|-------------------|--------------|------------|
-| Python | `pyproject.toml`, `requirements.txt`, `setup.py` | CLI, scripts, `__main__` | FastAPI, Django, Flask, Typer, Click |
-| Node.js | `package.json`, lock files | `main`, `bin`, scripts | Express, Next.js, Fastify, NestJS, React, Vue |
-| Go | `go.mod` | `main.go`, `cmd/` | Standard library, Gin, Echo |
-| Rust | `Cargo.toml` | `main.rs`, `lib.rs` | Tokio, Actix, Axum |
-| Java | `pom.xml`, `build.gradle` | Spring Boot, Quarkus, Micronaut | Spring, Quarkus |
-| Kotlin | `build.gradle.kts` | Spring Boot, Ktor | Spring, Ktor |
-| .NET / C# | `.csproj`, `.sln` | `Program.cs` | ASP.NET, Blazor |
-| PHP | `composer.json` | `index.php` | Laravel, Symfony |
-| Ruby | `Gemfile` | `config.ru` | Rails, Sinatra |
-| Dart | `pubspec.yaml` | `main.dart` | Flutter |
-Monorepos with mixed stacks are fully supported.
----
-## Roadmap
-**Now — Core stability**
-- Ranking improvements (git churn, runtime proximity)
-- Better architecture inference
-- Broader language coverage
-**Next — Agent integrations**
-- MCP server for native Claude Code integration
-- VS Code extension
-- Context diffing (compare before/after changes)
-- Incremental updates (delta mode improvements)
-**Later — Team features**
-- Shared context snapshots
-- Architecture drift detection
-- CI integration templates
-- Governance and compliance context
-> Focus is on adoption and utility. No monetization until the core is genuinely useful to the community.
----
-## Contributing
-We welcome contributions. See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, testing, and guidelines.
-**Quick start for contributors:**
 ```bash
-git clone https://github.com/sourcecode-ai/sourcecode
-cd sourcecode
-pip install -e ".[dev]"
-pytest tests/
+sourcecode --compact --copy
+# Copies the summary to the clipboard. Ready to paste.
 ```
 ---
-## Security
-`sourcecode` analyzes local repositories. It does not transmit code, paths, or analysis results to any external service. See [SECURITY.md](SECURITY.md) for our security policy and responsible disclosure process.
+## Flags reference
+### Global options
+| Flag | Alias | Type | Default | Description | Status |
+|------|-------|------|---------|-------------|--------|
+| `--format` | `-f` | `json\|yaml` | `json` | Output format. YAML is more readable, JSON preferred in pipelines. | ✅ CORE |
+| `--output` | `-o` | `PATH` | stdout | Writes output to a file instead of stdout. | ✅ CORE |
+| `--compact` | | flag | off | ~600-800 token output: stacks, entry points, deps, gaps. No file tree. | ✅ CORE |
+| `--agent` | | flag | off | JSON optimized for agents. Automatically enables `--dependencies`, `--env-map`, `--code-notes`. | ✅ CORE |
+| `--dependencies` | | flag | off | Analyzes direct and transitive deps from manifests and lockfiles. | ✅ CORE |
+| `--git-context` | `-g` | flag | off | Includes recent commits, change hotspots, uncommitted changes, contributors. | ✅ CORE |
+| `--git-depth` | | `INT [1–100]` | `20` | Number of recent commits with `--git-context`. | ✅ CORE |
+| `--git-days` | | `INT [1–3650]` | `90` | Window in days to detect hotspots with `--git-context`. | ✅ CORE |
+| `--env-map` | | flag | off | Maps environment variables: key, type, category, files that reference them. | ✅ CORE |
+| `--code-notes` | | flag | off | Extracts inline annotations: TODO, FIXME, HACK, BUG, DEPRECATED, NOTE, etc. | ✅ CORE |
+| `--copy` | `-c` | flag | off | Copies output to the clipboard after successful execution. | ✅ CORE |
+| `--depth` | | `INT [1–20]` | `4` | Maximum file tree traversal depth. Java/Maven requires ≥8. | ✅ CORE |
+| `--mode` | | `contract\|standard\|raw` | `contract` | `contract`: minimal contracts per file. `standard`: full detail. `raw`: project level only. | ✅ CORE |
+| `--tree` | | flag | off | Includes full `file_tree` and `file_paths` in the output. Increases size significantly. | ✅ CORE |
+| `--changed-only` | | flag | off | Contract mode: only files modified in git (staged, unstaged, untracked). | ✅ CORE |
+| `--rank-by` | | `relevance\|centrality\|git-churn` | `relevance` | File ranking strategy in contract mode. | ✅ CORE |
+| `--semantics` | | flag | off | Cross-file symbol resolution, call graph with confidence levels, fan-in/fan-out hotspots. Slower. | 🧪 EXP |
+| `--architecture` | | flag | off | Architectural layer inference (MVC/hexagonal/bounded contexts). Low confidence without `--semantics`. | 🧪 EXP |
+| `--graph-modules` | | flag | off | Structural module graph: nodes (files/symbols) and edges (imports, calls, contains). | 🧪 EXP |
+| `--graph-detail` | | `high\|medium\|full` | `high` | Module graph detail level. | 🧪 EXP |
+| `--max-nodes` | | `INT [≥1]` | — | Maximum nodes in `--graph-modules`. Prevents huge graphs in large repos. | 🧪 EXP |
+| `--graph-edges` | | `TEXT` | all | Edge types for `--graph-modules`, comma-separated: `imports,calls,contains`. | 🧪 EXP |
+| `--docs` | | flag | off | Extracts docstrings, function signatures, and module comments. | 🧪 EXP |
+| `--docs-depth` | | `module\|symbols\|full` | `symbols` | Docs extraction depth. `full` includes private symbols. | 🧪 EXP |
+| `--symbol` | | `TEXT` | — | Contract mode: localized context for a specific symbol. Python, TS, JS only. **Does not support Java.** | 🧪 EXP |
+| `--max-importers` | | `INT [1–10000]` | `50` | Limit on importer files returned by `--symbol`. | 🧪 EXP |
+| `--full-metrics` | | flag | off | Per-file technical metrics: LOC, cyclomatic complexity, coverage. Aimed at CI, not at agents. | 🧪 EXP |
+| `--emit-graph` | | flag | off | Contract mode: includes a compact dependency graph (nodes + edges) in the output. | 🚧 WIP |
+| `--entrypoints-only` | | flag | off | Contract mode: only files with exports or entry points. Note: includes *all* files with exports. | 🚧 WIP |
+| `--max-symbols` | | `INT [≥1]` | — | Limits total exported symbols in contract mode. Discards lower-ranked files. | 🚧 WIP |
+| `--no-redact` | | flag | off | Disables automatic secret redaction. Output may contain sensitive values. | 🚧 WIP |
+| `--trace-pipeline` | | flag | off | Diagnostic mode: includes a trace of each candidate and filtering decision. Debugging only. | 🚧 WIP |
+| `--version` | `-v` | flag | — | Shows version and exits. | ✅ CORE |
 ---
-## Privacy
+## Subcommands
-Telemetry is **opt-in only** and disabled by default. If you choose to enable it, only anonymous usage metadata is collected — never code, paths, or content. See [docs/privacy.md](docs/privacy.md) for full details.
+### `prepare-context TASK [PATH]`
-```bash
-sourcecode telemetry status   # check current setting
-sourcecode telemetry enable   # opt in
-sourcecode telemetry disable  # opt out
-```
----
+Generates task-specific context for AI agents.
-## License
-Apache License 2.0. See [LICENSE](LICENSE) for details.
----
+| Task | Description | Status |
+|------|-------------|--------|
+| `explain` | Architecture, entry points, key dependencies | ✅ CORE |
+| `fix-bug` | Files prioritized by risk, inline annotations | ✅ CORE |
+| `onboard` | Full context for new agents or developers | ✅ CORE |
+| `delta` | Incremental context: only files changed in git | ✅ CORE |
+| `refactor` | Structural problems, improvement opportunities | 🧪 EXP |
+| `generate-tests` | Files without tests, coverage gap analysis | 🧪 EXP |
+| `review-pr` | Changed files + architectural impact | 🧪 EXP |
-<p align="center">
-  Built for the age of AI coding agents.<br>
-  <a href="https://github.com/sourcecode-ai/sourcecode">GitHub</a> ·
-  <a href="https://pypi.org/project/sourcecode/">PyPI</a> ·
-  <a href="docs/getting-started.md">Documentation</a>
-</p>
+...

{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-sourcecode/__init__.py,sha256=W3DJGnBZMJZBnvn9pO7FSLfHppERKWNuRgtqy1X-umM,103
+sourcecode/__init__.py,sha256=U4-Ic6jRz9YH4wIYlhtl8YFtDO_yG3OsMIWYQbQ3mKE,102
 sourcecode/adaptive_scanner.py,sha256=6dh34C2qZXyRbw-8xBhbEwDdXanM6CRFRWayVoYITnA,10190
-sourcecode/architecture_analyzer.py,sha256=O4AXc7l_WTzIXrcAzstqZy-TGKNaFa6p3MzpgVjaO8g,27749
-sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
+sourcecode/architecture_analyzer.py,sha256=hn2K4c_EknGehXZ3I1KyoJPI-LlBSkphrVGBZMceif4,31249
+sourcecode/architecture_summary.py,sha256=J9yoLgh8wXwIRrT6q6JooB6PekivbOEYpJz4BUXdalk,20545
 sourcecode/ast_extractor.py,sha256=0OHQwTUBBc9lmqPLryVeB1z8dGIC6NhLlar800CD9oI,41129
 sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
 sourcecode/cli.py,sha256=YusMOF5OfihL3nBw66LcANRFSiVHugPrXE0vPIycjLQ,72016
@@ -15,21 +15,21 @@ sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo
 sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
 sourcecode/doc_analyzer.py,sha256=TttdS7mndKQhyJCfJnnAsyGCJrf-TIL7oXxDlTLUFKE,21248
 sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
-sourcecode/env_analyzer.py,sha256=YXlaxFBuf-ladWmb3iLCNMN-rKhP2JuqAIDwZdiIZHQ,18473
-sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8,8577
+sourcecode/env_analyzer.py,sha256=Ifwst0YLvArHHaRQXlf9DCYGO0MdyQBAMWSyEzfpKZo,21650
+sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo,11572
 sourcecode/git_analyzer.py,sha256=PD3eNWydznQ6KLNpxGzBqizIHoPIKevfwz9Xyf_pDt4,11600
 sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
 sourcecode/metrics_analyzer.py,sha256=e2cFwB9XubFq_dIVsP2PLjpr4wX0N6ulb3ol3sGDUeo,20777
-sourcecode/prepare_context.py,sha256=n7NghZJt8zPt7bzMVpk6gvHlQfhwDYjuLJjgHSOTfD4,33943
+sourcecode/prepare_context.py,sha256=FKh-M5B74r-yztuAgfuSE8RjIZvsq9YRwTr74zmldxI,35901
 sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
 sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
 sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
 sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
 sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
 sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
-sourcecode/schema.py,sha256=ofEge9hTWHOTjeWt7ceCDQWzP-uhhenrYX2usjW2KVU,22759
+sourcecode/schema.py,sha256=5s9Gtiw2Fk-HEVwVcegl2fy-cyYBwS16WSTS0xIv744,23204
 sourcecode/semantic_analyzer.py,sha256=16EFTgM7ooW0m5gNUKOlTSn7IEMLSzKmzQn-cWaSqjs,82604
-sourcecode/serializer.py,sha256=nh8DNGVPVszy60YnWGVH_sLyskgDN973glPIMzNeFWA,62843
+sourcecode/serializer.py,sha256=k-rddaaIlvAA5F2qvizCh_yd4oAlhhsg2obrYoJKtlo,65424
 sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
 sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
 sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
@@ -42,7 +42,7 @@ sourcecode/detectors/elixir.py,sha256=jCpvt5Yi6jvplc80ovRtWh17q-11ZGo9qX7o8b57TJ
 sourcecode/detectors/go.py,sha256=2r66uRQfeTWsqxr4HDhT6vExZErby0t46QXLHVBRv9w,2782
 sourcecode/detectors/heuristic.py,sha256=bCqqgbHavl4Sse3dqT8mwmo1wAdgeJr7VyXOmfClLKo,3387
 sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
-sourcecode/detectors/java.py,sha256=cZvB13cqJ76zHDncEG-TOCuK8gJjJN2mZGS2DGEcZy8,7715
+sourcecode/detectors/java.py,sha256=H5qicYbpIFqThCuT4Aocn-d2zEZ_6vJc-kLjHZITIBw,9084
 sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
 sourcecode/detectors/nodejs.py,sha256=7fsyAmrGkkguX6U80HUQpIe9MRaYyi_A7zbaRtmFmGc,13097
 sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
@@ -60,8 +60,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
 sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
 sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
 sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
-sourcecode-0.49.0.dist-info/METADATA,sha256=5FVQYOuzhccMc8oiJ-tPJPr3XJqrdDzWRWf32W8HqWk,25209
-sourcecode-0.49.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-sourcecode-0.49.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
-sourcecode-0.49.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
-sourcecode-0.49.0.dist-info/RECORD,,
+sourcecode-1.1.0.dist-info/METADATA,sha256=pM02mysiHsgFzu3TzkDPTNX78ts7wt0S4R4We6zDA_w,20411
+sourcecode-1.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+sourcecode-1.1.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
+sourcecode-1.1.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
+sourcecode-1.1.0.dist-info/RECORD,,

{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sourcecode-0.49.0.dist-info → sourcecode-1.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sourcecode 0.49.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

sourcecode 0.49.0py3-none-any.whl → 1.1.0py3-none-any.whl