suitable-loop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- suitable_loop/__init__.py +3 -0
- suitable_loop/__main__.py +5 -0
- suitable_loop/analyzers/__init__.py +1 -0
- suitable_loop/analyzers/code_analyzer.py +652 -0
- suitable_loop/analyzers/git_analyzer.py +510 -0
- suitable_loop/analyzers/log_analyzer.py +663 -0
- suitable_loop/config.py +60 -0
- suitable_loop/db.py +497 -0
- suitable_loop/graph/__init__.py +1 -0
- suitable_loop/graph/engine.py +341 -0
- suitable_loop/models.py +131 -0
- suitable_loop/server.py +46 -0
- suitable_loop/tools/__init__.py +1 -0
- suitable_loop/tools/code_tools.py +104 -0
- suitable_loop/tools/git_tools.py +52 -0
- suitable_loop/tools/log_tools.py +53 -0
- suitable_loop/tools/util_tools.py +49 -0
- suitable_loop-0.1.0.dist-info/METADATA +12 -0
- suitable_loop-0.1.0.dist-info/RECORD +21 -0
- suitable_loop-0.1.0.dist-info/WHEEL +4 -0
- suitable_loop-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
"""Git-based change analysis engine for CodeZero.
|
|
2
|
+
|
|
3
|
+
Reads repository history via GitPython, computes per-commit risk scores
|
|
4
|
+
using a weighted formula over complexity delta, blast radius, churn,
|
|
5
|
+
lines changed, and file count, and persists results to the database.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from collections import Counter
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import git
|
|
15
|
+
|
|
16
|
+
from suitable_loop.config import SuitableLoopConfig
|
|
17
|
+
from suitable_loop.db import Database
|
|
18
|
+
from suitable_loop.models import CommitFile, CommitInfo
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GitAnalyzer:
|
|
24
|
+
"""Analyzes Git repository history to surface risky changes and hotspots."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, db: Database, config: SuitableLoopConfig) -> None:
|
|
27
|
+
self.db = db
|
|
28
|
+
self.config = config
|
|
29
|
+
|
|
30
|
+
# ------------------------------------------------------------------
|
|
31
|
+
# Public API
|
|
32
|
+
# ------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
def analyze_recent_changes(
|
|
35
|
+
self, repo_path: str, n_commits: int | None = None
|
|
36
|
+
) -> list[dict]:
|
|
37
|
+
"""Analyze the last *n_commits* commits and return risk-scored results.
|
|
38
|
+
|
|
39
|
+
Each commit is scored with a weighted combination of complexity delta,
|
|
40
|
+
blast radius, churn rate, lines changed, and file count. Results are
|
|
41
|
+
persisted to the database and returned sorted by ``risk_score``
|
|
42
|
+
descending.
|
|
43
|
+
"""
|
|
44
|
+
depth = n_commits or self.config.git.default_commit_depth
|
|
45
|
+
weights = self.config.git.risk_weights
|
|
46
|
+
|
|
47
|
+
repo = self._open_repo(repo_path)
|
|
48
|
+
if repo is None:
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
commits = self._iter_commits(repo, depth)
|
|
52
|
+
if not commits:
|
|
53
|
+
logger.info("No commits found in %s", repo_path)
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
# ----------------------------------------------------------
|
|
57
|
+
# Phase 1: extract raw data per commit
|
|
58
|
+
# ----------------------------------------------------------
|
|
59
|
+
raw_records: list[dict] = []
|
|
60
|
+
# Track file appearance counts across the batch for churn.
|
|
61
|
+
file_appearance: Counter[str] = Counter()
|
|
62
|
+
|
|
63
|
+
for commit in commits:
|
|
64
|
+
changed_files = self._extract_changed_files(commit)
|
|
65
|
+
total_insertions = 0
|
|
66
|
+
total_deletions = 0
|
|
67
|
+
commit_file_records: list[dict] = []
|
|
68
|
+
|
|
69
|
+
for cf in changed_files:
|
|
70
|
+
total_insertions += cf["insertions"]
|
|
71
|
+
total_deletions += cf["deletions"]
|
|
72
|
+
file_appearance[cf["file_path"]] += 1
|
|
73
|
+
|
|
74
|
+
# Complexity delta for .py files
|
|
75
|
+
complexity_before, complexity_after = self._compute_complexity_delta(
|
|
76
|
+
commit, cf["file_path"], cf["change_type"]
|
|
77
|
+
)
|
|
78
|
+
cf["complexity_before"] = complexity_before
|
|
79
|
+
cf["complexity_after"] = complexity_after
|
|
80
|
+
commit_file_records.append(cf)
|
|
81
|
+
|
|
82
|
+
record = {
|
|
83
|
+
"sha": commit.hexsha,
|
|
84
|
+
"author": str(commit.author),
|
|
85
|
+
"timestamp": commit.committed_date,
|
|
86
|
+
"message": commit.message.strip(),
|
|
87
|
+
"files_changed": len(changed_files),
|
|
88
|
+
"insertions": total_insertions,
|
|
89
|
+
"deletions": total_deletions,
|
|
90
|
+
"commit_files": commit_file_records,
|
|
91
|
+
# Raw factors (pre-normalization)
|
|
92
|
+
"_complexity_delta": sum(
|
|
93
|
+
abs((cf.get("complexity_after") or 0) - (cf.get("complexity_before") or 0))
|
|
94
|
+
for cf in commit_file_records
|
|
95
|
+
),
|
|
96
|
+
"_blast_radius": self._commit_blast_radius(commit_file_records, repo_path),
|
|
97
|
+
"_churn_rate": sum(
|
|
98
|
+
file_appearance[cf["file_path"]] for cf in commit_file_records
|
|
99
|
+
),
|
|
100
|
+
"_lines_changed": total_insertions + total_deletions,
|
|
101
|
+
"_file_count": len(changed_files),
|
|
102
|
+
}
|
|
103
|
+
raw_records.append(record)
|
|
104
|
+
|
|
105
|
+
# ----------------------------------------------------------
|
|
106
|
+
# Phase 2: normalize factors and compute risk scores
|
|
107
|
+
# ----------------------------------------------------------
|
|
108
|
+
complexity_vals = [r["_complexity_delta"] for r in raw_records]
|
|
109
|
+
blast_vals = [r["_blast_radius"] for r in raw_records]
|
|
110
|
+
churn_vals = [float(r["_churn_rate"]) for r in raw_records]
|
|
111
|
+
lines_vals = [float(r["_lines_changed"]) for r in raw_records]
|
|
112
|
+
files_vals = [float(r["_file_count"]) for r in raw_records]
|
|
113
|
+
|
|
114
|
+
norm_complexity = self._normalize(complexity_vals)
|
|
115
|
+
norm_blast = self._normalize(blast_vals)
|
|
116
|
+
norm_churn = self._normalize(churn_vals)
|
|
117
|
+
norm_lines = self._normalize(lines_vals)
|
|
118
|
+
norm_files = self._normalize(files_vals)
|
|
119
|
+
|
|
120
|
+
w_complexity = weights.get("complexity", 0.30)
|
|
121
|
+
w_blast = weights.get("blast_radius", 0.25)
|
|
122
|
+
w_churn = weights.get("churn", 0.20)
|
|
123
|
+
w_lines = weights.get("lines", 0.15)
|
|
124
|
+
w_files = weights.get("files", 0.10)
|
|
125
|
+
|
|
126
|
+
results: list[dict] = []
|
|
127
|
+
for i, record in enumerate(raw_records):
|
|
128
|
+
risk_score = (
|
|
129
|
+
w_complexity * norm_complexity[i]
|
|
130
|
+
+ w_blast * norm_blast[i]
|
|
131
|
+
+ w_churn * norm_churn[i]
|
|
132
|
+
+ w_lines * norm_lines[i]
|
|
133
|
+
+ w_files * norm_files[i]
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Persist commit
|
|
137
|
+
commit_info = CommitInfo(
|
|
138
|
+
repo_path=repo_path,
|
|
139
|
+
sha=record["sha"],
|
|
140
|
+
author=record["author"],
|
|
141
|
+
timestamp=record["timestamp"],
|
|
142
|
+
message=record["message"],
|
|
143
|
+
files_changed=record["files_changed"],
|
|
144
|
+
insertions=record["insertions"],
|
|
145
|
+
deletions=record["deletions"],
|
|
146
|
+
risk_score=risk_score,
|
|
147
|
+
)
|
|
148
|
+
commit_id = self.db.upsert_commit(commit_info)
|
|
149
|
+
|
|
150
|
+
# Persist commit files
|
|
151
|
+
for cf in record["commit_files"]:
|
|
152
|
+
commit_file = CommitFile(
|
|
153
|
+
commit_id=commit_id,
|
|
154
|
+
file_path=cf["file_path"],
|
|
155
|
+
change_type=cf["change_type"],
|
|
156
|
+
insertions=cf["insertions"],
|
|
157
|
+
deletions=cf["deletions"],
|
|
158
|
+
complexity_before=cf.get("complexity_before"),
|
|
159
|
+
complexity_after=cf.get("complexity_after"),
|
|
160
|
+
)
|
|
161
|
+
self.db.insert_commit_file(commit_file)
|
|
162
|
+
|
|
163
|
+
results.append({
|
|
164
|
+
"sha": record["sha"],
|
|
165
|
+
"author": record["author"],
|
|
166
|
+
"timestamp": record["timestamp"],
|
|
167
|
+
"message": record["message"],
|
|
168
|
+
"files_changed": record["files_changed"],
|
|
169
|
+
"insertions": record["insertions"],
|
|
170
|
+
"deletions": record["deletions"],
|
|
171
|
+
"risk_score": round(risk_score, 4),
|
|
172
|
+
"factors": {
|
|
173
|
+
"complexity_delta": norm_complexity[i],
|
|
174
|
+
"blast_radius": norm_blast[i],
|
|
175
|
+
"churn_rate": norm_churn[i],
|
|
176
|
+
"lines_changed": norm_lines[i],
|
|
177
|
+
"file_count": norm_files[i],
|
|
178
|
+
},
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
self.db.commit()
|
|
182
|
+
results.sort(key=lambda r: r["risk_score"], reverse=True)
|
|
183
|
+
logger.info(
|
|
184
|
+
"Analyzed %d commits in %s; highest risk %.4f (%s)",
|
|
185
|
+
len(results),
|
|
186
|
+
repo_path,
|
|
187
|
+
results[0]["risk_score"] if results else 0.0,
|
|
188
|
+
results[0]["sha"][:8] if results else "n/a",
|
|
189
|
+
)
|
|
190
|
+
return results
|
|
191
|
+
|
|
192
|
+
def analyze_commit(self, repo_path: str, sha: str) -> dict:
|
|
193
|
+
"""Return a detailed breakdown of a single commit.
|
|
194
|
+
|
|
195
|
+
Includes per-file diffs, complexity deltas, and blast radius.
|
|
196
|
+
"""
|
|
197
|
+
repo = self._open_repo(repo_path)
|
|
198
|
+
if repo is None:
|
|
199
|
+
return {"error": f"Cannot open repository at {repo_path}"}
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
commit = repo.commit(sha)
|
|
203
|
+
except (git.BadName, git.GitCommandError, ValueError) as exc:
|
|
204
|
+
logger.warning("Cannot resolve commit %s: %s", sha, exc)
|
|
205
|
+
return {"error": f"Cannot resolve commit {sha}: {exc}"}
|
|
206
|
+
|
|
207
|
+
changed_files = self._extract_changed_files(commit)
|
|
208
|
+
file_details: list[dict] = []
|
|
209
|
+
total_insertions = 0
|
|
210
|
+
total_deletions = 0
|
|
211
|
+
|
|
212
|
+
for cf in changed_files:
|
|
213
|
+
total_insertions += cf["insertions"]
|
|
214
|
+
total_deletions += cf["deletions"]
|
|
215
|
+
|
|
216
|
+
complexity_before, complexity_after = self._compute_complexity_delta(
|
|
217
|
+
commit, cf["file_path"], cf["change_type"]
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
blast = self.blast_radius(cf["file_path"])
|
|
221
|
+
|
|
222
|
+
# Attempt to get the diff for this file
|
|
223
|
+
diff_text = self._file_diff_text(commit, cf["file_path"])
|
|
224
|
+
|
|
225
|
+
file_details.append({
|
|
226
|
+
"file_path": cf["file_path"],
|
|
227
|
+
"change_type": cf["change_type"],
|
|
228
|
+
"insertions": cf["insertions"],
|
|
229
|
+
"deletions": cf["deletions"],
|
|
230
|
+
"complexity_before": complexity_before,
|
|
231
|
+
"complexity_after": complexity_after,
|
|
232
|
+
"complexity_delta": (complexity_after or 0) - (complexity_before or 0),
|
|
233
|
+
"blast_radius": blast,
|
|
234
|
+
"diff": diff_text,
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
"sha": commit.hexsha,
|
|
239
|
+
"author": str(commit.author),
|
|
240
|
+
"timestamp": commit.committed_date,
|
|
241
|
+
"message": commit.message.strip(),
|
|
242
|
+
"files_changed": len(changed_files),
|
|
243
|
+
"insertions": total_insertions,
|
|
244
|
+
"deletions": total_deletions,
|
|
245
|
+
"files": file_details,
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
def hotspot_report(
|
|
249
|
+
self, repo_path: str, n_commits: int | None = None
|
|
250
|
+
) -> list[dict]:
|
|
251
|
+
"""Identify hotspot files by cross-referencing churn with dependency count.
|
|
252
|
+
|
|
253
|
+
Returns files ranked by ``churn * dependency_count`` descending.
|
|
254
|
+
"""
|
|
255
|
+
depth = n_commits or self.config.git.default_commit_depth
|
|
256
|
+
|
|
257
|
+
repo = self._open_repo(repo_path)
|
|
258
|
+
if repo is None:
|
|
259
|
+
return []
|
|
260
|
+
|
|
261
|
+
commits = self._iter_commits(repo, depth)
|
|
262
|
+
if not commits:
|
|
263
|
+
return []
|
|
264
|
+
|
|
265
|
+
# Count how often each file path appears across commits.
|
|
266
|
+
file_churn: Counter[str] = Counter()
|
|
267
|
+
for commit in commits:
|
|
268
|
+
changed = self._extract_changed_files(commit)
|
|
269
|
+
for cf in changed:
|
|
270
|
+
file_churn[cf["file_path"]] += 1
|
|
271
|
+
|
|
272
|
+
# For each file, look up dependency count from the database.
|
|
273
|
+
hotspots: list[dict] = []
|
|
274
|
+
for file_path, churn in file_churn.items():
|
|
275
|
+
dep_count = self._dependency_count(file_path)
|
|
276
|
+
score = churn * dep_count
|
|
277
|
+
hotspots.append({
|
|
278
|
+
"file_path": file_path,
|
|
279
|
+
"churn": churn,
|
|
280
|
+
"dependency_count": dep_count,
|
|
281
|
+
"hotspot_score": score,
|
|
282
|
+
})
|
|
283
|
+
|
|
284
|
+
hotspots.sort(key=lambda h: h["hotspot_score"], reverse=True)
|
|
285
|
+
return hotspots
|
|
286
|
+
|
|
287
|
+
def blast_radius(self, file_path: str) -> dict:
|
|
288
|
+
"""Find all files that transitively depend on *file_path*.
|
|
289
|
+
|
|
290
|
+
Returns a dict with ``count`` and ``dependents`` (list of file paths).
|
|
291
|
+
"""
|
|
292
|
+
file_entity = self.db.get_file_by_path(file_path)
|
|
293
|
+
if file_entity is None:
|
|
294
|
+
file_entity = self.db.find_file_by_suffix(file_path)
|
|
295
|
+
if file_entity is None:
|
|
296
|
+
return {"count": 0, "dependents": []}
|
|
297
|
+
|
|
298
|
+
visited: set[int] = set()
|
|
299
|
+
queue: list[int] = [file_entity.id] # type: ignore[arg-type]
|
|
300
|
+
|
|
301
|
+
while queue:
|
|
302
|
+
current_id = queue.pop()
|
|
303
|
+
if current_id in visited:
|
|
304
|
+
continue
|
|
305
|
+
visited.add(current_id)
|
|
306
|
+
dependents = self.db.get_file_dependents(current_id)
|
|
307
|
+
for dep in dependents:
|
|
308
|
+
if dep.id not in visited:
|
|
309
|
+
queue.append(dep.id) # type: ignore[arg-type]
|
|
310
|
+
|
|
311
|
+
# Remove the original file itself from the result set.
|
|
312
|
+
visited.discard(file_entity.id) # type: ignore[arg-type]
|
|
313
|
+
|
|
314
|
+
dependent_paths: list[str] = []
|
|
315
|
+
for fid in visited:
|
|
316
|
+
entity = self.db.get_file_by_id(fid)
|
|
317
|
+
if entity is not None:
|
|
318
|
+
dependent_paths.append(entity.path)
|
|
319
|
+
|
|
320
|
+
return {"count": len(dependent_paths), "dependents": sorted(dependent_paths)}
|
|
321
|
+
|
|
322
|
+
# ------------------------------------------------------------------
|
|
323
|
+
# Normalization helper
|
|
324
|
+
# ------------------------------------------------------------------
|
|
325
|
+
|
|
326
|
+
@staticmethod
|
|
327
|
+
def _normalize(values: list[float]) -> list[float]:
|
|
328
|
+
"""Normalize *values* to the ``[0, 1]`` range using min-max scaling.
|
|
329
|
+
|
|
330
|
+
Returns a list of zeros when all values are identical.
|
|
331
|
+
"""
|
|
332
|
+
if not values:
|
|
333
|
+
return []
|
|
334
|
+
min_val = min(values)
|
|
335
|
+
max_val = max(values)
|
|
336
|
+
span = max_val - min_val
|
|
337
|
+
if span == 0:
|
|
338
|
+
return [0.0] * len(values)
|
|
339
|
+
return [(v - min_val) / span for v in values]
|
|
340
|
+
|
|
341
|
+
# ------------------------------------------------------------------
|
|
342
|
+
# Git helpers
|
|
343
|
+
# ------------------------------------------------------------------
|
|
344
|
+
|
|
345
|
+
@staticmethod
|
|
346
|
+
def _open_repo(repo_path: str) -> git.Repo | None:
|
|
347
|
+
"""Open a Git repository, returning ``None`` on failure."""
|
|
348
|
+
try:
|
|
349
|
+
return git.Repo(repo_path, search_parent_directories=True)
|
|
350
|
+
except (git.InvalidGitRepositoryError, git.NoSuchPathError) as exc:
|
|
351
|
+
logger.error("Cannot open Git repository at %s: %s", repo_path, exc)
|
|
352
|
+
return None
|
|
353
|
+
|
|
354
|
+
@staticmethod
|
|
355
|
+
def _iter_commits(repo: git.Repo, max_count: int) -> list[git.Commit]:
|
|
356
|
+
"""Return up to *max_count* commits from the active branch."""
|
|
357
|
+
try:
|
|
358
|
+
return list(repo.iter_commits(max_count=max_count))
|
|
359
|
+
except (git.GitCommandError, ValueError) as exc:
|
|
360
|
+
logger.warning("Cannot iterate commits: %s", exc)
|
|
361
|
+
return []
|
|
362
|
+
|
|
363
|
+
@staticmethod
|
|
364
|
+
def _extract_changed_files(commit: git.Commit) -> list[dict]:
|
|
365
|
+
"""Extract changed file metadata from a commit's diff against its parent.
|
|
366
|
+
|
|
367
|
+
Returns a list of dicts with keys ``file_path``, ``change_type``,
|
|
368
|
+
``insertions``, and ``deletions``.
|
|
369
|
+
"""
|
|
370
|
+
results: list[dict] = []
|
|
371
|
+
|
|
372
|
+
# Determine the parent to diff against. For the initial commit the
|
|
373
|
+
# tree is diffed against an empty tree (NULL_TREE).
|
|
374
|
+
if commit.parents:
|
|
375
|
+
diffs = commit.parents[0].diff(commit, create_patch=False)
|
|
376
|
+
else:
|
|
377
|
+
diffs = commit.diff(git.NULL_TREE, create_patch=False)
|
|
378
|
+
|
|
379
|
+
# Use commit.stats for line-level counts since Diff objects do not
|
|
380
|
+
# always expose insertions/deletions directly.
|
|
381
|
+
stats_files: dict[str, dict] = {}
|
|
382
|
+
try:
|
|
383
|
+
stats_files = commit.stats.files
|
|
384
|
+
except Exception:
|
|
385
|
+
pass
|
|
386
|
+
|
|
387
|
+
for diff_item in diffs:
|
|
388
|
+
# Determine a usable file path from the diff.
|
|
389
|
+
file_path = diff_item.b_path or diff_item.a_path or ""
|
|
390
|
+
if not file_path:
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
# Map GitPython change type to a human-readable label.
|
|
394
|
+
change_type = _diff_change_type(diff_item)
|
|
395
|
+
|
|
396
|
+
file_stats = stats_files.get(file_path, {})
|
|
397
|
+
results.append({
|
|
398
|
+
"file_path": file_path,
|
|
399
|
+
"change_type": change_type,
|
|
400
|
+
"insertions": file_stats.get("insertions", 0),
|
|
401
|
+
"deletions": file_stats.get("deletions", 0),
|
|
402
|
+
})
|
|
403
|
+
|
|
404
|
+
return results
|
|
405
|
+
|
|
406
|
+
# ------------------------------------------------------------------
|
|
407
|
+
# Complexity helpers
|
|
408
|
+
# ------------------------------------------------------------------
|
|
409
|
+
|
|
410
|
+
@staticmethod
|
|
411
|
+
def _compute_complexity_delta(
|
|
412
|
+
commit: git.Commit, file_path: str, change_type: str
|
|
413
|
+
) -> tuple[int | None, int | None]:
|
|
414
|
+
"""Compute radon cyclomatic complexity before and after the commit.
|
|
415
|
+
|
|
416
|
+
Only applies to ``.py`` files. Returns ``(None, None)`` for
|
|
417
|
+
non-Python files or when radon is unavailable.
|
|
418
|
+
"""
|
|
419
|
+
if not file_path.endswith(".py"):
|
|
420
|
+
return None, None
|
|
421
|
+
|
|
422
|
+
try:
|
|
423
|
+
from radon.complexity import cc_visit # type: ignore[import-untyped]
|
|
424
|
+
except ImportError:
|
|
425
|
+
return None, None
|
|
426
|
+
|
|
427
|
+
complexity_before: int | None = None
|
|
428
|
+
complexity_after: int | None = None
|
|
429
|
+
|
|
430
|
+
# Complexity *after* the commit (file in the commit's tree).
|
|
431
|
+
if change_type != "D":
|
|
432
|
+
try:
|
|
433
|
+
blob = commit.tree / file_path
|
|
434
|
+
source_after = blob.data_stream.read().decode("utf-8", errors="replace")
|
|
435
|
+
complexity_after = _total_complexity(cc_visit, source_after)
|
|
436
|
+
except (KeyError, TypeError, Exception):
|
|
437
|
+
complexity_after = 0
|
|
438
|
+
|
|
439
|
+
# Complexity *before* the commit (file in the parent's tree).
|
|
440
|
+
if change_type != "A" and commit.parents:
|
|
441
|
+
try:
|
|
442
|
+
parent_blob = commit.parents[0].tree / file_path
|
|
443
|
+
source_before = parent_blob.data_stream.read().decode("utf-8", errors="replace")
|
|
444
|
+
complexity_before = _total_complexity(cc_visit, source_before)
|
|
445
|
+
except (KeyError, TypeError, Exception):
|
|
446
|
+
complexity_before = 0
|
|
447
|
+
|
|
448
|
+
return complexity_before, complexity_after
|
|
449
|
+
|
|
450
|
+
@staticmethod
|
|
451
|
+
def _file_diff_text(commit: git.Commit, file_path: str) -> str:
|
|
452
|
+
"""Return the unified diff text for *file_path* within *commit*."""
|
|
453
|
+
try:
|
|
454
|
+
if commit.parents:
|
|
455
|
+
diffs = commit.parents[0].diff(commit, paths=[file_path], create_patch=True)
|
|
456
|
+
else:
|
|
457
|
+
diffs = commit.diff(git.NULL_TREE, paths=[file_path], create_patch=True)
|
|
458
|
+
|
|
459
|
+
for diff_item in diffs:
|
|
460
|
+
if diff_item.diff:
|
|
461
|
+
raw = diff_item.diff
|
|
462
|
+
if isinstance(raw, bytes):
|
|
463
|
+
return raw.decode("utf-8", errors="replace")
|
|
464
|
+
return str(raw)
|
|
465
|
+
except Exception as exc:
|
|
466
|
+
logger.debug("Cannot get diff for %s in %s: %s", file_path, commit.hexsha[:8], exc)
|
|
467
|
+
return ""
|
|
468
|
+
|
|
469
|
+
# ------------------------------------------------------------------
|
|
470
|
+
# Blast radius / dependency helpers
|
|
471
|
+
# ------------------------------------------------------------------
|
|
472
|
+
|
|
473
|
+
def _commit_blast_radius(self, commit_files: list[dict], repo_path: str) -> float:
|
|
474
|
+
"""Sum transitive dependent counts for all files in a commit."""
|
|
475
|
+
total = 0
|
|
476
|
+
for cf in commit_files:
|
|
477
|
+
total += self._dependency_count(cf["file_path"])
|
|
478
|
+
return float(total)
|
|
479
|
+
|
|
480
|
+
def _dependency_count(self, file_path: str) -> int:
|
|
481
|
+
"""Return the number of files that transitively depend on *file_path*."""
|
|
482
|
+
result = self.blast_radius(file_path)
|
|
483
|
+
return result["count"]
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
# ------------------------------------------------------------------
|
|
487
|
+
# Module-level helpers
|
|
488
|
+
# ------------------------------------------------------------------
|
|
489
|
+
|
|
490
|
+
def _diff_change_type(diff_item: git.Diff) -> str: # type: ignore[name-defined]
|
|
491
|
+
"""Map a GitPython Diff object's change_type to a readable string."""
|
|
492
|
+
mapping = {
|
|
493
|
+
"A": "A",
|
|
494
|
+
"D": "D",
|
|
495
|
+
"M": "M",
|
|
496
|
+
"R": "R",
|
|
497
|
+
"C": "C",
|
|
498
|
+
"T": "T",
|
|
499
|
+
}
|
|
500
|
+
ct = getattr(diff_item, "change_type", None) or "M"
|
|
501
|
+
return mapping.get(ct, ct)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def _total_complexity(cc_visit_fn, source: str) -> int:
|
|
505
|
+
"""Sum cyclomatic complexity across all blocks returned by radon."""
|
|
506
|
+
try:
|
|
507
|
+
blocks = cc_visit_fn(source)
|
|
508
|
+
return sum(block.complexity for block in blocks)
|
|
509
|
+
except Exception:
|
|
510
|
+
return 0
|