sourcecode 1.31.22__py3-none-any.whl → 1.31.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/cli.py +1 -1
- sourcecode/repository_ir.py +24 -1
- sourcecode/serializer.py +31 -8
- sourcecode/summarizer.py +60 -16
- {sourcecode-1.31.22.dist-info → sourcecode-1.31.23.dist-info}/METADATA +3 -3
- {sourcecode-1.31.22.dist-info → sourcecode-1.31.23.dist-info}/RECORD +10 -10
- {sourcecode-1.31.22.dist-info → sourcecode-1.31.23.dist-info}/WHEEL +0 -0
- {sourcecode-1.31.22.dist-info → sourcecode-1.31.23.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.31.22.dist-info → sourcecode-1.31.23.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/cli.py
CHANGED
|
@@ -423,7 +423,7 @@ def main(
|
|
|
423
423
|
"High-signal summary (typically 1000–3000 tokens depending on repo size): "
|
|
424
424
|
"stacks, entry points, dependency summary, confidence, and gaps. "
|
|
425
425
|
"Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
|
|
426
|
-
"Use --agent for maximum signal
|
|
426
|
+
"Use --agent for maximum signal."
|
|
427
427
|
),
|
|
428
428
|
),
|
|
429
429
|
dependencies: bool = typer.Option(
|
sourcecode/repository_ir.py
CHANGED
|
@@ -1093,7 +1093,18 @@ def _resolve_jaxrs_prefixes(
|
|
|
1093
1093
|
|
|
1094
1094
|
for parent_simple, locator_path in locator_map[cls_simple]:
|
|
1095
1095
|
parent_full = _resolve_jaxrs_prefixes(parent_simple, class_info, locator_map, new_visited)
|
|
1096
|
-
|
|
1096
|
+
# Skip implementation/unrooted parents: if the parent resolves to only empty
|
|
1097
|
+
# prefixes AND has no class-level @Path annotation, it is a concrete impl class
|
|
1098
|
+
# (e.g. DefaultClientsApi implements ClientsApi) that duplicates a locator method
|
|
1099
|
+
# from its interface. Including it would produce spurious short paths like /{id}
|
|
1100
|
+
# alongside the correctly-resolved full path. The interface version is already
|
|
1101
|
+
# in the locator_map and will produce the correct full path.
|
|
1102
|
+
_parent_has_path_ann = class_info.get(parent_simple, {}).get("has_path_ann", False)
|
|
1103
|
+
_non_empty_parent = [p for p in parent_full if p]
|
|
1104
|
+
if not _non_empty_parent and not _parent_has_path_ann:
|
|
1105
|
+
continue
|
|
1106
|
+
use_parent_paths = _non_empty_parent if _non_empty_parent else parent_full
|
|
1107
|
+
for pp in use_parent_paths:
|
|
1097
1108
|
for op in own_prefixes:
|
|
1098
1109
|
combined = _join_path_segments(pp, locator_path, op)
|
|
1099
1110
|
full_prefixes.append(combined)
|
|
@@ -2833,6 +2844,18 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
|
|
|
2833
2844
|
entry["required_permission"] = security_info["required_permission"]
|
|
2834
2845
|
endpoints.append(entry)
|
|
2835
2846
|
|
|
2847
|
+
# Filter out endpoints whose path looks like a Java FQN (e.g. dynamic admin routing
|
|
2848
|
+
# in frameworks like Broadleaf Commerce where @AdminSection registers entity class
|
|
2849
|
+
# FQNs as URL segments). These are not real REST paths — they are resolved at
|
|
2850
|
+
# runtime by the framework. Including them pollutes the endpoint surface with 20+
|
|
2851
|
+
# garbage entries that confuse agents and break endpoint count accuracy.
|
|
2852
|
+
# Pattern: path segment that matches a Java package hierarchy (org.foo.Bar).
|
|
2853
|
+
import re as _re_fqn
|
|
2854
|
+
_FQN_PATH_RE = _re_fqn.compile(
|
|
2855
|
+
r"/(org|com|net|io|edu)\.[a-z][a-z0-9]*\.[a-zA-Z]",
|
|
2856
|
+
)
|
|
2857
|
+
endpoints = [e for e in endpoints if not _FQN_PATH_RE.search(e.get("path", ""))]
|
|
2858
|
+
|
|
2836
2859
|
# "no_security_signal" = no recognized security annotation at method OR class level.
|
|
2837
2860
|
# Note: repos may use framework-level security (e.g. Keycloak itself) with no
|
|
2838
2861
|
# per-endpoint annotations — this count reflects annotation-based coverage only.
|
sourcecode/serializer.py
CHANGED
|
@@ -1771,24 +1771,47 @@ def _angular_analysis(sm: "SourceMap") -> "Optional[dict[str, Any]]":
|
|
|
1771
1771
|
if val and val not in route_paths:
|
|
1772
1772
|
route_paths.append(val)
|
|
1773
1773
|
|
|
1774
|
-
# Angular version from package.json
|
|
1774
|
+
# Angular version from package.json — check root first, then subdirectories.
|
|
1775
|
+
# In monorepos (Java + Angular), the Angular package.json is in a subdirectory
|
|
1776
|
+
# like frontend/ and not at the repo root. We probe candidate locations.
|
|
1775
1777
|
angular_version: Optional[str] = None
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
+
|
|
1779
|
+
def _read_angular_version_from_pkg(pkg_path: Path) -> Optional[str]:
|
|
1780
|
+
"""Extract @angular/core version from a package.json file."""
|
|
1778
1781
|
try:
|
|
1779
|
-
pkg = _json.loads(
|
|
1780
|
-
# Use `or {}` so explicit `null` values in package.json don't
|
|
1781
|
-
# raise TypeError when unpacking (BUG-4).
|
|
1782
|
+
pkg = _json.loads(pkg_path.read_text(encoding="utf-8", errors="replace"))
|
|
1782
1783
|
deps = {
|
|
1783
1784
|
**(pkg.get("dependencies") or {}),
|
|
1784
1785
|
**(pkg.get("devDependencies") or {}),
|
|
1785
1786
|
**(pkg.get("peerDependencies") or {}),
|
|
1786
1787
|
}
|
|
1787
1788
|
av = deps.get("@angular/core")
|
|
1788
|
-
if av:
|
|
1789
|
-
|
|
1789
|
+
if av and isinstance(av, str):
|
|
1790
|
+
return av.lstrip("^~>=")
|
|
1790
1791
|
except Exception:
|
|
1791
1792
|
pass
|
|
1793
|
+
return None
|
|
1794
|
+
|
|
1795
|
+
# 1. Try root package.json first (fastest, most common for pure Angular projects)
|
|
1796
|
+
_root_pkg = root / "package.json"
|
|
1797
|
+
if _root_pkg.exists():
|
|
1798
|
+
angular_version = _read_angular_version_from_pkg(_root_pkg)
|
|
1799
|
+
|
|
1800
|
+
# 2. If not found at root, search subdirectory package.json files.
|
|
1801
|
+
# Limit to ts_files-derived subdirs to avoid scanning the whole repo.
|
|
1802
|
+
if angular_version is None and ts_files:
|
|
1803
|
+
_candidate_dirs: set[str] = set()
|
|
1804
|
+
for ts_rel in ts_files[:200]: # sample first 200 ts files
|
|
1805
|
+
parts = ts_rel.replace("\\", "/").split("/")
|
|
1806
|
+
if len(parts) >= 2:
|
|
1807
|
+
_candidate_dirs.add(parts[0]) # top-level subdir (e.g. "frontend")
|
|
1808
|
+
for subdir in sorted(_candidate_dirs):
|
|
1809
|
+
_sub_pkg = root / subdir / "package.json"
|
|
1810
|
+
if _sub_pkg.exists():
|
|
1811
|
+
_v = _read_angular_version_from_pkg(_sub_pkg)
|
|
1812
|
+
if _v:
|
|
1813
|
+
angular_version = _v
|
|
1814
|
+
break
|
|
1792
1815
|
|
|
1793
1816
|
# Also check angular.json for entry point
|
|
1794
1817
|
entry_point: Optional[str] = None
|
sourcecode/summarizer.py
CHANGED
|
@@ -203,34 +203,78 @@ class ProjectSummarizer:
|
|
|
203
203
|
__import__("re").IGNORECASE,
|
|
204
204
|
)
|
|
205
205
|
|
|
206
|
+
# Patterns that indicate license notices or user-facing marketing text.
|
|
207
|
+
# These describe what the product does FOR users or its licensing terms,
|
|
208
|
+
# not the codebase architecture.
|
|
209
|
+
_LICENSE_MARKETING_RE = __import__("re").compile(
|
|
210
|
+
r"\bfair[- ]use\b" # Fair Use license
|
|
211
|
+
r"|\bcommunity edition\b" # product tier labels
|
|
212
|
+
r"|\benterprise edition\b"
|
|
213
|
+
r"|\bcommercial licen[sc]e\b"
|
|
214
|
+
r"|\bsource.available\b"
|
|
215
|
+
r"|\bavailable to companies\b" # license restriction
|
|
216
|
+
r"|\bunder \$\d+[MK]\b" # revenue threshold
|
|
217
|
+
r"|\bimportant:\s" # WARNING/IMPORTANT caveats
|
|
218
|
+
r"|\badd authentication to\b" # user-facing "add X to Y" marketing
|
|
219
|
+
r"|\bno need to deal with\b"
|
|
220
|
+
r"|\bwith minimum effort\b"
|
|
221
|
+
r"|\bsign up\b.*\bevaluation\b"
|
|
222
|
+
r"|\bcontact us\b.*\bmore information\b",
|
|
223
|
+
__import__("re").IGNORECASE,
|
|
224
|
+
)
|
|
225
|
+
|
|
206
226
|
def _extract_first_useful_paragraph(self, content: str) -> str | None:
|
|
227
|
+
"""Extract the first paragraph that describes the project architecture, not its license or marketing."""
|
|
207
228
|
import re as _re
|
|
208
229
|
_BADGE_RE = _re.compile(r"^\[?!\[") # [] or 
|
|
209
230
|
_LINK_ONLY_RE = _re.compile(r"^\[.*?\]\(.*?\)$") # pure link line
|
|
210
|
-
|
|
231
|
+
|
|
232
|
+
paragraphs: list[str] = []
|
|
233
|
+
current_lines: list[str] = []
|
|
211
234
|
in_code_block = False
|
|
235
|
+
|
|
212
236
|
for raw_line in content.splitlines():
|
|
213
237
|
line = raw_line.strip()
|
|
214
238
|
if line.startswith("```"):
|
|
215
239
|
in_code_block = not in_code_block
|
|
216
240
|
continue
|
|
217
|
-
if in_code_block
|
|
218
|
-
if lines:
|
|
219
|
-
break
|
|
241
|
+
if in_code_block:
|
|
220
242
|
continue
|
|
221
|
-
|
|
222
|
-
|
|
243
|
+
if not line or line.startswith(("#", "<!--", ">")):
|
|
244
|
+
if current_lines:
|
|
245
|
+
paragraphs.append(" ".join(current_lines).strip())
|
|
246
|
+
current_lines = []
|
|
223
247
|
continue
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
248
|
+
if _BADGE_RE.match(line) or _LINK_ONLY_RE.match(line):
|
|
249
|
+
if current_lines:
|
|
250
|
+
paragraphs.append(" ".join(current_lines).strip())
|
|
251
|
+
current_lines = []
|
|
252
|
+
continue
|
|
253
|
+
current_lines.append(line)
|
|
254
|
+
if current_lines:
|
|
255
|
+
paragraphs.append(" ".join(current_lines).strip())
|
|
256
|
+
|
|
257
|
+
_MD_LINK_RE = _re.compile(r"\[.+?\]\(.+?\)")
|
|
258
|
+
for paragraph in paragraphs[:6]: # Check up to 6 paragraphs
|
|
259
|
+
if not paragraph:
|
|
260
|
+
continue
|
|
261
|
+
# Reject very short fragments (< 30 chars) — likely just a section title
|
|
262
|
+
if len(paragraph) < 30:
|
|
263
|
+
continue
|
|
264
|
+
# Reject startup/setup snippets
|
|
265
|
+
_startup_hits = len(self._STARTUP_RE.findall(paragraph))
|
|
266
|
+
if _startup_hits >= 2:
|
|
267
|
+
continue
|
|
268
|
+
# Reject license notices and user-facing marketing text
|
|
269
|
+
if self._LICENSE_MARKETING_RE.search(paragraph):
|
|
270
|
+
continue
|
|
271
|
+
# Reject link-list paragraphs (docs/navigation sections):
|
|
272
|
+
# if more than 2 markdown links dominate the paragraph, it's a nav section
|
|
273
|
+
_link_count = len(_MD_LINK_RE.findall(paragraph))
|
|
274
|
+
if _link_count > 2 and _link_count * 30 > len(paragraph):
|
|
275
|
+
continue
|
|
276
|
+
return paragraph
|
|
277
|
+
return None
|
|
234
278
|
|
|
235
279
|
_TYPE_LABELS: dict[str, str] = {
|
|
236
280
|
"cli": "CLI",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.31.
|
|
3
|
+
Version: 1.31.23
|
|
4
4
|
Summary: Deterministic codebase context for AI coding agents
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
|
|
|
225
225
|
|
|
226
226
|
**AI-ready change intelligence for Java/Spring enterprise monoliths.**
|
|
227
227
|
|
|
228
|
-

|
|
229
229
|

|
|
230
230
|
|
|
231
231
|
---
|
|
@@ -263,7 +263,7 @@ pipx install sourcecode
|
|
|
263
263
|
|
|
264
264
|
```bash
|
|
265
265
|
sourcecode version
|
|
266
|
-
# sourcecode 1.31.
|
|
266
|
+
# sourcecode 1.31.23
|
|
267
267
|
```
|
|
268
268
|
|
|
269
269
|
---
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=u-AGXNvPNCuCZnA6MlEhddkQlAKa1tp7BeXtzkz6TpU,104
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
|
|
3
3
|
sourcecode/architecture_analyzer.py,sha256=4R13Yb02OrPeB4IH3z6V_g7HWhmGcRHbI8CobCVnRrc,39111
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
@@ -6,7 +6,7 @@ sourcecode/ast_extractor.py,sha256=_btmeOJIe3t-NicF94D5ZAesa2YIJ0_QNExGnbHxGFE,5
|
|
|
6
6
|
sourcecode/cache.py,sha256=TiYa3ECjBKtvlfCk7GvQ9v6gZkAITpH3ow9PubA7sUo,22946
|
|
7
7
|
sourcecode/canonical_ir.py,sha256=NZu0XICv__hkQGKzW2LNQLRqb1L28K2p_WQCQKS5Zlk,23141
|
|
8
8
|
sourcecode/classifier.py,sha256=yWeq6agTjkFa3zuNa-gdVIHtjoBoPoVlJnX-b7tdVJs,7851
|
|
9
|
-
sourcecode/cli.py,sha256=
|
|
9
|
+
sourcecode/cli.py,sha256=juaaKE6QgifwFGSRdIxxB7XoQhp51m_AuG3U7zLNQ4Y,147521
|
|
10
10
|
sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
|
|
11
11
|
sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
|
|
12
12
|
sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
|
|
@@ -32,13 +32,13 @@ sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,
|
|
|
32
32
|
sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
|
|
33
33
|
sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
|
|
34
34
|
sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
|
|
35
|
-
sourcecode/repository_ir.py,sha256=
|
|
35
|
+
sourcecode/repository_ir.py,sha256=vdgJwzfoY2qpQtzcGX7k0E5jWe9fcuyFdkqnF4K6UBs,141767
|
|
36
36
|
sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
|
|
37
37
|
sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
|
|
38
38
|
sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
|
|
39
39
|
sourcecode/semantic_analyzer.py,sha256=TDuC3wzZR2DPm1mgrAg1YSLk2QzJoueS3TZAmyGGpCU,89417
|
|
40
|
-
sourcecode/serializer.py,sha256=
|
|
41
|
-
sourcecode/summarizer.py,sha256=
|
|
40
|
+
sourcecode/serializer.py,sha256=zzhWyaseWSAqSIe77QwJkyehVI_1DJBkGM7KqbDQtXo,123313
|
|
41
|
+
sourcecode/summarizer.py,sha256=KAtU2mvXT1GLJJQ50Rup42BFsJXij5hmklCwMvcxVx4,19514
|
|
42
42
|
sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
|
|
43
43
|
sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
|
|
44
44
|
sourcecode/detectors/__init__.py,sha256=A0AACJFF6HWf_RgatNtWu3PUzstcKtIGM9f1PoFcJug,1987
|
|
@@ -76,8 +76,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
76
76
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
77
77
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
78
78
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
79
|
-
sourcecode-1.31.
|
|
80
|
-
sourcecode-1.31.
|
|
81
|
-
sourcecode-1.31.
|
|
82
|
-
sourcecode-1.31.
|
|
83
|
-
sourcecode-1.31.
|
|
79
|
+
sourcecode-1.31.23.dist-info/METADATA,sha256=3-xJut96GlWt6KcUJCKPO4trdkPbNuSR9cxUo9O6FlI,31103
|
|
80
|
+
sourcecode-1.31.23.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
81
|
+
sourcecode-1.31.23.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
82
|
+
sourcecode-1.31.23.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
83
|
+
sourcecode-1.31.23.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|