sourcecode 1.31.22__py3-none-any.whl → 1.31.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.31.22"
3
+ __version__ = "1.31.23"
sourcecode/cli.py CHANGED
@@ -423,7 +423,7 @@ def main(
423
423
  "High-signal summary (typically 1000–3000 tokens depending on repo size): "
424
424
  "stacks, entry points, dependency summary, confidence, and gaps. "
425
425
  "Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
426
- "Use --agent for maximum signal or --slim (when available) for minimal token footprint."
426
+ "Use --agent for maximum signal."
427
427
  ),
428
428
  ),
429
429
  dependencies: bool = typer.Option(
@@ -1093,7 +1093,18 @@ def _resolve_jaxrs_prefixes(
1093
1093
 
1094
1094
  for parent_simple, locator_path in locator_map[cls_simple]:
1095
1095
  parent_full = _resolve_jaxrs_prefixes(parent_simple, class_info, locator_map, new_visited)
1096
- for pp in parent_full:
1096
+ # Skip implementation/unrooted parents: if the parent resolves to only empty
1097
+ # prefixes AND has no class-level @Path annotation, it is a concrete impl class
1098
+ # (e.g. DefaultClientsApi implements ClientsApi) that duplicates a locator method
1099
+ # from its interface. Including it would produce spurious short paths like /{id}
1100
+ # alongside the correctly-resolved full path. The interface version is already
1101
+ # in the locator_map and will produce the correct full path.
1102
+ _parent_has_path_ann = class_info.get(parent_simple, {}).get("has_path_ann", False)
1103
+ _non_empty_parent = [p for p in parent_full if p]
1104
+ if not _non_empty_parent and not _parent_has_path_ann:
1105
+ continue
1106
+ use_parent_paths = _non_empty_parent if _non_empty_parent else parent_full
1107
+ for pp in use_parent_paths:
1097
1108
  for op in own_prefixes:
1098
1109
  combined = _join_path_segments(pp, locator_path, op)
1099
1110
  full_prefixes.append(combined)
@@ -2833,6 +2844,18 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
2833
2844
  entry["required_permission"] = security_info["required_permission"]
2834
2845
  endpoints.append(entry)
2835
2846
 
2847
+ # Filter out endpoints whose path looks like a Java FQN (e.g. dynamic admin routing
2848
+ # in frameworks like Broadleaf Commerce where @AdminSection registers entity class
2849
+ # FQNs as URL segments). These are not real REST paths — they are resolved at
2850
+ # runtime by the framework. Including them pollutes the endpoint surface with 20+
2851
+ # garbage entries that confuse agents and break endpoint count accuracy.
2852
+ # Pattern: path segment that matches a Java package hierarchy (org.foo.Bar).
2853
+ import re as _re_fqn
2854
+ _FQN_PATH_RE = _re_fqn.compile(
2855
+ r"/(org|com|net|io|edu)\.[a-z][a-z0-9]*\.[a-zA-Z]",
2856
+ )
2857
+ endpoints = [e for e in endpoints if not _FQN_PATH_RE.search(e.get("path", ""))]
2858
+
2836
2859
  # "no_security_signal" = no recognized security annotation at method OR class level.
2837
2860
  # Note: repos may use framework-level security (e.g. Keycloak itself) with no
2838
2861
  # per-endpoint annotations — this count reflects annotation-based coverage only.
sourcecode/serializer.py CHANGED
@@ -1771,24 +1771,47 @@ def _angular_analysis(sm: "SourceMap") -> "Optional[dict[str, Any]]":
1771
1771
  if val and val not in route_paths:
1772
1772
  route_paths.append(val)
1773
1773
 
1774
- # Angular version from package.json
1774
+ # Angular version from package.json — check root first, then subdirectories.
1775
+ # In monorepos (Java + Angular), the Angular package.json is in a subdirectory
1776
+ # like frontend/ and not at the repo root. We probe candidate locations.
1775
1777
  angular_version: Optional[str] = None
1776
- pkg_json = root / "package.json"
1777
- if pkg_json.exists():
1778
+
1779
+ def _read_angular_version_from_pkg(pkg_path: Path) -> Optional[str]:
1780
+ """Extract @angular/core version from a package.json file."""
1778
1781
  try:
1779
- pkg = _json.loads(pkg_json.read_text(encoding="utf-8", errors="replace"))
1780
- # Use `or {}` so explicit `null` values in package.json don't
1781
- # raise TypeError when unpacking (BUG-4).
1782
+ pkg = _json.loads(pkg_path.read_text(encoding="utf-8", errors="replace"))
1782
1783
  deps = {
1783
1784
  **(pkg.get("dependencies") or {}),
1784
1785
  **(pkg.get("devDependencies") or {}),
1785
1786
  **(pkg.get("peerDependencies") or {}),
1786
1787
  }
1787
1788
  av = deps.get("@angular/core")
1788
- if av:
1789
- angular_version = av.lstrip("^~>=")
1789
+ if av and isinstance(av, str):
1790
+ return av.lstrip("^~>=")
1790
1791
  except Exception:
1791
1792
  pass
1793
+ return None
1794
+
1795
+ # 1. Try root package.json first (fastest, most common for pure Angular projects)
1796
+ _root_pkg = root / "package.json"
1797
+ if _root_pkg.exists():
1798
+ angular_version = _read_angular_version_from_pkg(_root_pkg)
1799
+
1800
+ # 2. If not found at root, search subdirectory package.json files.
1801
+ # Limit to ts_files-derived subdirs to avoid scanning the whole repo.
1802
+ if angular_version is None and ts_files:
1803
+ _candidate_dirs: set[str] = set()
1804
+ for ts_rel in ts_files[:200]: # sample first 200 ts files
1805
+ parts = ts_rel.replace("\\", "/").split("/")
1806
+ if len(parts) >= 2:
1807
+ _candidate_dirs.add(parts[0]) # top-level subdir (e.g. "frontend")
1808
+ for subdir in sorted(_candidate_dirs):
1809
+ _sub_pkg = root / subdir / "package.json"
1810
+ if _sub_pkg.exists():
1811
+ _v = _read_angular_version_from_pkg(_sub_pkg)
1812
+ if _v:
1813
+ angular_version = _v
1814
+ break
1792
1815
 
1793
1816
  # Also check angular.json for entry point
1794
1817
  entry_point: Optional[str] = None
sourcecode/summarizer.py CHANGED
@@ -203,34 +203,78 @@ class ProjectSummarizer:
203
203
  __import__("re").IGNORECASE,
204
204
  )
205
205
 
206
+ # Patterns that indicate license notices or user-facing marketing text.
207
+ # These describe what the product does FOR users or its licensing terms,
208
+ # not the codebase architecture.
209
+ _LICENSE_MARKETING_RE = __import__("re").compile(
210
+ r"\bfair[- ]use\b" # Fair Use license
211
+ r"|\bcommunity edition\b" # product tier labels
212
+ r"|\benterprise edition\b"
213
+ r"|\bcommercial licen[sc]e\b"
214
+ r"|\bsource.available\b"
215
+ r"|\bavailable to companies\b" # license restriction
216
+ r"|\bunder \$\d+[MK]\b" # revenue threshold
217
+ r"|\bimportant:\s" # WARNING/IMPORTANT caveats
218
+ r"|\badd authentication to\b" # user-facing "add X to Y" marketing
219
+ r"|\bno need to deal with\b"
220
+ r"|\bwith minimum effort\b"
221
+ r"|\bsign up\b.*\bevaluation\b"
222
+ r"|\bcontact us\b.*\bmore information\b",
223
+ __import__("re").IGNORECASE,
224
+ )
225
+
206
226
  def _extract_first_useful_paragraph(self, content: str) -> str | None:
227
+ """Extract the first paragraph that describes the project architecture, not its license or marketing."""
207
228
  import re as _re
208
229
  _BADGE_RE = _re.compile(r"^\[?!\[") # [![badge](...)] or ![img](...)
209
230
  _LINK_ONLY_RE = _re.compile(r"^\[.*?\]\(.*?\)$") # pure link line
210
- lines: list[str] = []
231
+
232
+ paragraphs: list[str] = []
233
+ current_lines: list[str] = []
211
234
  in_code_block = False
235
+
212
236
  for raw_line in content.splitlines():
213
237
  line = raw_line.strip()
214
238
  if line.startswith("```"):
215
239
  in_code_block = not in_code_block
216
240
  continue
217
- if in_code_block or not line or line.startswith(("#", "<!--", ">")):
218
- if lines:
219
- break
241
+ if in_code_block:
220
242
  continue
221
- # Skip badge-only lines and pure-link lines — they are metadata, not descriptions
222
- if _BADGE_RE.match(line) or (not lines and _LINK_ONLY_RE.match(line)):
243
+ if not line or line.startswith(("#", "<!--", ">")):
244
+ if current_lines:
245
+ paragraphs.append(" ".join(current_lines).strip())
246
+ current_lines = []
223
247
  continue
224
- lines.append(line)
225
- if not lines:
226
- return None
227
- paragraph = " ".join(lines).strip()
228
- # Reject paragraphs that are startup/setup snippets, not domain descriptions.
229
- # Count how many startup signals appear; >1 means the paragraph is instructions.
230
- _startup_hits = len(self._STARTUP_RE.findall(paragraph))
231
- if _startup_hits >= 2:
232
- return None
233
- return paragraph
248
+ if _BADGE_RE.match(line) or _LINK_ONLY_RE.match(line):
249
+ if current_lines:
250
+ paragraphs.append(" ".join(current_lines).strip())
251
+ current_lines = []
252
+ continue
253
+ current_lines.append(line)
254
+ if current_lines:
255
+ paragraphs.append(" ".join(current_lines).strip())
256
+
257
+ _MD_LINK_RE = _re.compile(r"\[.+?\]\(.+?\)")
258
+ for paragraph in paragraphs[:6]: # Check up to 6 paragraphs
259
+ if not paragraph:
260
+ continue
261
+ # Reject very short fragments (< 30 chars) — likely just a section title
262
+ if len(paragraph) < 30:
263
+ continue
264
+ # Reject startup/setup snippets
265
+ _startup_hits = len(self._STARTUP_RE.findall(paragraph))
266
+ if _startup_hits >= 2:
267
+ continue
268
+ # Reject license notices and user-facing marketing text
269
+ if self._LICENSE_MARKETING_RE.search(paragraph):
270
+ continue
271
+ # Reject link-list paragraphs (docs/navigation sections):
272
+ # if more than 2 markdown links dominate the paragraph, it's a nav section
273
+ _link_count = len(_MD_LINK_RE.findall(paragraph))
274
+ if _link_count > 2 and _link_count * 30 > len(paragraph):
275
+ continue
276
+ return paragraph
277
+ return None
234
278
 
235
279
  _TYPE_LABELS: dict[str, str] = {
236
280
  "cli": "CLI",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.31.22
3
+ Version: 1.31.23
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
225
225
 
226
226
  **AI-ready change intelligence for Java/Spring enterprise monoliths.**
227
227
 
228
- ![Version](https://img.shields.io/badge/version-1.31.22-blue)
228
+ ![Version](https://img.shields.io/badge/version-1.31.23-blue)
229
229
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
230
230
 
231
231
  ---
@@ -263,7 +263,7 @@ pipx install sourcecode
263
263
 
264
264
  ```bash
265
265
  sourcecode version
266
- # sourcecode 1.31.22
266
+ # sourcecode 1.31.23
267
267
  ```
268
268
 
269
269
  ---
@@ -1,4 +1,4 @@
1
- sourcecode/__init__.py,sha256=Wsav7BZkVmw8XZqjz_WUnhLQyGjtZVwjYnyc_N4sraE,104
1
+ sourcecode/__init__.py,sha256=u-AGXNvPNCuCZnA6MlEhddkQlAKa1tp7BeXtzkz6TpU,104
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
3
  sourcecode/architecture_analyzer.py,sha256=4R13Yb02OrPeB4IH3z6V_g7HWhmGcRHbI8CobCVnRrc,39111
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
@@ -6,7 +6,7 @@ sourcecode/ast_extractor.py,sha256=_btmeOJIe3t-NicF94D5ZAesa2YIJ0_QNExGnbHxGFE,5
6
6
  sourcecode/cache.py,sha256=TiYa3ECjBKtvlfCk7GvQ9v6gZkAITpH3ow9PubA7sUo,22946
7
7
  sourcecode/canonical_ir.py,sha256=NZu0XICv__hkQGKzW2LNQLRqb1L28K2p_WQCQKS5Zlk,23141
8
8
  sourcecode/classifier.py,sha256=yWeq6agTjkFa3zuNa-gdVIHtjoBoPoVlJnX-b7tdVJs,7851
9
- sourcecode/cli.py,sha256=qMn-4zD8v03dmkn-AZsf2TSplyhjbq9ZPMAcWl_Lrxg,147576
9
+ sourcecode/cli.py,sha256=juaaKE6QgifwFGSRdIxxB7XoQhp51m_AuG3U7zLNQ4Y,147521
10
10
  sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
11
11
  sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
12
12
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -32,13 +32,13 @@ sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,
32
32
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
33
33
  sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
34
34
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
35
- sourcecode/repository_ir.py,sha256=NooCrMJYqycKSYTEroVWTYR8X83hHaAYKTsgYxvlz-I,140221
35
+ sourcecode/repository_ir.py,sha256=vdgJwzfoY2qpQtzcGX7k0E5jWe9fcuyFdkqnF4K6UBs,141767
36
36
  sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
37
37
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
38
38
  sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
39
39
  sourcecode/semantic_analyzer.py,sha256=TDuC3wzZR2DPm1mgrAg1YSLk2QzJoueS3TZAmyGGpCU,89417
40
- sourcecode/serializer.py,sha256=V8ZV3Y1j4T6rkpO09-PvpVORioWWWbSnOvDjZ2hmQ2U,122144
41
- sourcecode/summarizer.py,sha256=lPlKhMh28nueXkPo2xKeD3DUFYVGRlJMIdY-8TSM-ls,17486
40
+ sourcecode/serializer.py,sha256=zzhWyaseWSAqSIe77QwJkyehVI_1DJBkGM7KqbDQtXo,123313
41
+ sourcecode/summarizer.py,sha256=KAtU2mvXT1GLJJQ50Rup42BFsJXij5hmklCwMvcxVx4,19514
42
42
  sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
43
43
  sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
44
44
  sourcecode/detectors/__init__.py,sha256=A0AACJFF6HWf_RgatNtWu3PUzstcKtIGM9f1PoFcJug,1987
@@ -76,8 +76,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
76
76
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
77
77
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
78
78
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
79
- sourcecode-1.31.22.dist-info/METADATA,sha256=zjEDrWUQ-08LOjvIfXTDUgQ4UTPkneyr4CFGZc5yaOo,31103
80
- sourcecode-1.31.22.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
81
- sourcecode-1.31.22.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
82
- sourcecode-1.31.22.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
83
- sourcecode-1.31.22.dist-info/RECORD,,
79
+ sourcecode-1.31.23.dist-info/METADATA,sha256=3-xJut96GlWt6KcUJCKPO4trdkPbNuSR9cxUo9O6FlI,31103
80
+ sourcecode-1.31.23.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
81
+ sourcecode-1.31.23.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
82
+ sourcecode-1.31.23.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
83
+ sourcecode-1.31.23.dist-info/RECORD,,