vcode-analysis 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
core/git_handler.py ADDED
@@ -0,0 +1,718 @@
1
+ """Git 操作处理模块
2
+
3
+ 支持:
4
+ - 克隆远程仓库
5
+ - 获取提交历史
6
+ - 获取提交差异
7
+ - 分析指定提交
8
+ - 批量 Git 操作
9
+ - 批量克隆
10
+ """
11
+
12
+ import subprocess
13
+ import re
14
+ import concurrent.futures
15
+ from pathlib import Path
16
+ from dataclasses import dataclass, field
17
+ from typing import Optional, Callable
18
+ from datetime import datetime
19
+ from enum import Enum
20
+
21
+
22
+ @dataclass
23
+ class CommitInfo:
24
+ """提交信息"""
25
+ hash: str
26
+ short_hash: str
27
+ author: str
28
+ date: datetime
29
+ message: str
30
+ files_changed: list[str]
31
+
32
+
33
+ @dataclass
34
+ class FileDiff:
35
+ """文件差异"""
36
+ file_path: str
37
+ old_path: Optional[str] # 重命名时的原路径
38
+ status: str # A=新增, M=修改, D=删除, R=重命名
39
+ additions: int
40
+ deletions: int
41
+ diff: str
42
+
43
+
44
+ class GitHandler:
45
+ """Git 操作处理器"""
46
+
47
+ def __init__(self, repo_path: str):
48
+ self.repo_path = Path(repo_path)
49
+
50
+ def _run_git(self, *args: str, check: bool = True) -> str:
51
+ """执行 git 命令"""
52
+ cmd = ["git"] + list(args)
53
+ result = subprocess.run(
54
+ cmd,
55
+ cwd=self.repo_path,
56
+ capture_output=True,
57
+ text=True,
58
+ check=check,
59
+ )
60
+ return result.stdout.strip()
61
+
62
+ @classmethod
63
+ def clone(cls, url: str, target_dir: Optional[str] = None) -> "GitHandler":
64
+ """克隆仓库"""
65
+ if target_dir:
66
+ Path(target_dir).mkdir(parents=True, exist_ok=True)
67
+ else:
68
+ target_dir = url.split("/")[-1].replace(".git", "")
69
+
70
+ subprocess.run(
71
+ ["git", "clone", url, target_dir],
72
+ capture_output=True,
73
+ check=True,
74
+ )
75
+ return cls(target_dir)
76
+
77
+ def is_valid_repo(self) -> bool:
78
+ """检查是否为有效的 Git 仓库"""
79
+ try:
80
+ self._run_git("rev-parse", "--git-dir", check=False)
81
+ return True
82
+ except Exception:
83
+ return False
84
+
85
+ def get_current_branch(self) -> str:
86
+ """获取当前分支名"""
87
+ return self._run_git("branch", "--show-current")
88
+
89
+ def get_commit_info(self, commit_hash: str) -> CommitInfo:
90
+ """获取提交详情"""
91
+ # 格式: hash|short_hash|author|date|message
92
+ format_str = "%H|%h|%an|%aI|%s"
93
+ output = self._run_git("log", "-1", f"--format={format_str}", commit_hash)
94
+ parts = output.split("|", 4)
95
+
96
+ # 获取变更的文件列表
97
+ files_output = self._run_git("diff-tree", "--no-commit-id", "--name-only", "-r", commit_hash)
98
+ files_changed = files_output.split("\n") if files_output else []
99
+
100
+ return CommitInfo(
101
+ hash=parts[0],
102
+ short_hash=parts[1],
103
+ author=parts[2],
104
+ date=datetime.fromisoformat(parts[3]),
105
+ message=parts[4],
106
+ files_changed=files_changed,
107
+ )
108
+
109
+ def get_commit_diff(self, commit_hash: str) -> list[FileDiff]:
110
+ """获取提交的文件差异"""
111
+ diffs = []
112
+
113
+ # 获取差异统计
114
+ numstat = self._run_git("diff-tree", "--numstat", "-r", commit_hash)
115
+
116
+ # 获取详细差异
117
+ diff_output = self._run_git("show", "--format=", commit_hash)
118
+
119
+ # 解析差异
120
+ file_pattern = re.compile(r'^diff --git a/(.+?) b/(.+?)$')
121
+ current_file = None
122
+ current_diff = []
123
+
124
+ for line in diff_output.split("\n"):
125
+ match = file_pattern.match(line)
126
+ if match:
127
+ if current_file and current_diff:
128
+ diffs.append(self._parse_file_diff(current_file, "\n".join(current_diff), numstat))
129
+ current_file = match.group(2)
130
+ current_diff = [line]
131
+ elif current_file:
132
+ current_diff.append(line)
133
+
134
+ if current_file and current_diff:
135
+ diffs.append(self._parse_file_diff(current_file, "\n".join(current_diff), numstat))
136
+
137
+ return diffs
138
+
139
+ def _parse_file_diff(self, file_path: str, diff: str, numstat: str) -> FileDiff:
140
+ """解析文件差异"""
141
+ additions = 0
142
+ deletions = 0
143
+ old_path = None
144
+ status = "M"
145
+
146
+ # 从 numstat 获取行数变化
147
+ for line in numstat.split("\n"):
148
+ if line.endswith(file_path):
149
+ parts = line.split("\t")
150
+ if len(parts) >= 3:
151
+ additions = int(parts[0]) if parts[0] != "-" else 0
152
+ deletions = int(parts[1]) if parts[1] != "-" else 0
153
+ break
154
+
155
+ # 判断状态
156
+ if diff.startswith("diff --git"):
157
+ if "new file mode" in diff:
158
+ status = "A"
159
+ elif "deleted file mode" in diff:
160
+ status = "D"
161
+ elif "rename from" in diff:
162
+ status = "R"
163
+ match = re.search(r"rename from (.+)", diff)
164
+ if match:
165
+ old_path = match.group(1)
166
+
167
+ return FileDiff(
168
+ file_path=file_path,
169
+ old_path=old_path,
170
+ status=status,
171
+ additions=additions,
172
+ deletions=deletions,
173
+ diff=diff,
174
+ )
175
+
176
+ def get_commits(
177
+ self,
178
+ branch: Optional[str] = None,
179
+ since: Optional[str] = None,
180
+ until: Optional[str] = None,
181
+ author: Optional[str] = None,
182
+ limit: int = 50,
183
+ ) -> list[CommitInfo]:
184
+ """获取提交列表"""
185
+ args = ["log", f"--format=%H", f"-{limit}"]
186
+
187
+ if branch:
188
+ args.append(branch)
189
+ if since:
190
+ args.extend(["--since", since])
191
+ if until:
192
+ args.extend(["--until", until])
193
+ if author:
194
+ args.extend(["--author", author])
195
+
196
+ output = self._run_git(*args)
197
+ commits = []
198
+ for hash_line in output.split("\n"):
199
+ if hash_line:
200
+ commits.append(self.get_commit_info(hash_line))
201
+
202
+ return commits
203
+
204
+ def get_file_content(self, file_path: str, commit_hash: Optional[str] = None) -> str:
205
+ """获取文件内容"""
206
+ if commit_hash:
207
+ return self._run_git("show", f"{commit_hash}:{file_path}")
208
+ else:
209
+ return (self.repo_path / file_path).read_text(encoding="utf-8", errors="ignore")
210
+
211
+ # ==================== Git 操作方法 ====================
212
+
213
+ def pull(self, remote: str = "origin", branch: Optional[str] = None) -> dict:
214
+ """执行 git pull
215
+
216
+ Args:
217
+ remote: 远程仓库名
218
+ branch: 分支名
219
+
220
+ Returns:
221
+ 操作结果 {success, message, changes}
222
+ """
223
+ try:
224
+ args = ["pull", remote]
225
+ if branch:
226
+ args.append(branch)
227
+
228
+ output = self._run_git(*args, check=False)
229
+
230
+ # 解析结果
231
+ if "Already up to date" in output or "Already up-to-date" in output:
232
+ return {"success": True, "message": "已是最新", "changes": False}
233
+ elif "Updating" in output:
234
+ return {"success": True, "message": "更新成功", "changes": True}
235
+ else:
236
+ return {"success": True, "message": output, "changes": True}
237
+
238
+ except subprocess.CalledProcessError as e:
239
+ return {"success": False, "message": e.stderr if hasattr(e, 'stderr') else str(e), "changes": False}
240
+
241
+ def fetch(self, remote: str = "origin", prune: bool = True) -> dict:
242
+ """执行 git fetch
243
+
244
+ Args:
245
+ remote: 远程仓库名
246
+ prune: 是否清理已删除的远程分支
247
+
248
+ Returns:
249
+ 操作结果
250
+ """
251
+ try:
252
+ args = ["fetch", remote]
253
+ if prune:
254
+ args.append("--prune")
255
+
256
+ output = self._run_git(*args, check=False)
257
+ return {"success": True, "message": output or "获取成功"}
258
+
259
+ except subprocess.CalledProcessError as e:
260
+ return {"success": False, "message": str(e)}
261
+
262
+ def status(self, porcelain: bool = True) -> dict:
263
+ """获取仓库状态
264
+
265
+ Args:
266
+ porcelain: 使用机器可读格式
267
+
268
+ Returns:
269
+ 状态信息 {branch, staged, unstaged, untracked, is_clean}
270
+ """
271
+ result = {
272
+ "branch": self.get_current_branch(),
273
+ "staged": [],
274
+ "unstaged": [],
275
+ "untracked": [],
276
+ "is_clean": True,
277
+ }
278
+
279
+ if porcelain:
280
+ output = self._run_git("status", "--porcelain", check=False)
281
+
282
+ for line in output.split("\n"):
283
+ if not line:
284
+ continue
285
+
286
+ result["is_clean"] = False
287
+ status = line[:2]
288
+ file_path = line[3:]
289
+
290
+ if status[0] in "MADRC":
291
+ result["staged"].append({"status": status[0], "file": file_path})
292
+ if status[1] in "MD":
293
+ result["unstaged"].append({"status": status[1], "file": file_path})
294
+ if status == "??":
295
+ result["untracked"].append(file_path)
296
+
297
+ return result
298
+
299
+ def get_remote_url(self, remote: str = "origin") -> Optional[str]:
300
+ """获取远程仓库 URL"""
301
+ try:
302
+ return self._run_git("remote", "get-url", remote, check=False) or None
303
+ except Exception:
304
+ return None
305
+
306
+ def get_all_branches(self, include_remote: bool = True) -> list[str]:
307
+ """获取所有分支
308
+
309
+ Args:
310
+ include_remote: 是否包含远程分支
311
+
312
+ Returns:
313
+ 分支名列表
314
+ """
315
+ branches = []
316
+
317
+ # 本地分支
318
+ local = self._run_git("branch", "--format=%(refname:short)", check=False)
319
+ branches.extend([b.strip() for b in local.split("\n") if b.strip()])
320
+
321
+ # 远程分支
322
+ if include_remote:
323
+ remote = self._run_git("branch", "-r", "--format=%(refname:short)", check=False)
324
+ branches.extend([b.strip() for b in remote.split("\n") if b.strip()])
325
+
326
+ return branches
327
+
328
+ def has_uncommitted_changes(self) -> bool:
329
+ """检查是否有未提交的更改"""
330
+ status = self.status()
331
+ return not status["is_clean"]
332
+
333
+ def get_ahead_behind(self, branch: Optional[str] = None, remote: str = "origin") -> dict:
334
+ """获取分支领先/落后远程的提交数
335
+
336
+ Args:
337
+ branch: 分支名,默认当前分支
338
+ remote: 远程仓库名
339
+
340
+ Returns:
341
+ {ahead, behind}
342
+ """
343
+ if not branch:
344
+ branch = self.get_current_branch()
345
+
346
+ try:
347
+ output = self._run_git(
348
+ "rev-list", "--left-right", "--count",
349
+ f"{remote}/{branch}...{branch}",
350
+ check=False
351
+ )
352
+
353
+ parts = output.split()
354
+ if len(parts) == 2:
355
+ return {"ahead": int(parts[1]), "behind": int(parts[0])}
356
+ except Exception:
357
+ pass
358
+
359
+ return {"ahead": 0, "behind": 0}
360
+
361
+
362
+ # ==================== 批量操作数据结构 ====================
363
+
364
+ class RepoStatus(Enum):
365
+ """仓库状态"""
366
+ CLEAN = "clean"
367
+ MODIFIED = "modified"
368
+ AHEAD = "ahead"
369
+ BEHIND = "behind"
370
+ DIVERGED = "diverged"
371
+ ERROR = "error"
372
+
373
+
374
+ @dataclass
375
+ class RepoInfo:
376
+ """仓库信息"""
377
+ path: str
378
+ name: str
379
+ branch: str
380
+ remote_url: Optional[str]
381
+ status: RepoStatus
382
+ ahead: int
383
+ behind: int
384
+ staged_files: int
385
+ unstaged_files: int
386
+ untracked_files: int
387
+ error: Optional[str] = None
388
+
389
+
390
+ @dataclass
391
+ class BatchResult:
392
+ """批量操作结果"""
393
+ total: int
394
+ success: int
395
+ failed: int
396
+ results: list[dict] = field(default_factory=list)
397
+
398
+ def add_success(self, repo: str, message: str = ""):
399
+ self.success += 1
400
+ self.results.append({"repo": repo, "success": True, "message": message})
401
+
402
+ def add_failure(self, repo: str, error: str):
403
+ self.failed += 1
404
+ self.results.append({"repo": repo, "success": False, "error": error})
405
+
406
+
407
+ # ==================== 批量操作处理器 ====================
408
+
409
+ class BatchGitHandler:
410
+ """批量 Git 操作处理器"""
411
+
412
+ def __init__(self, base_dir: str, max_workers: int = 4):
413
+ """初始化批量处理器
414
+
415
+ Args:
416
+ base_dir: 包含多个仓库的基础目录
417
+ max_workers: 最大并发数
418
+ """
419
+ self.base_dir = Path(base_dir)
420
+ self.max_workers = max_workers
421
+
422
+ def discover_repos(self) -> list[Path]:
423
+ """发现目录下的所有 Git 仓库
424
+
425
+ Returns:
426
+ 仓库路径列表
427
+ """
428
+ repos = []
429
+
430
+ if not self.base_dir.exists():
431
+ return repos
432
+
433
+ # 检查当前目录是否为仓库
434
+ if (self.base_dir / ".git").exists():
435
+ repos.append(self.base_dir)
436
+ return repos
437
+
438
+ # 扫描子目录
439
+ for item in self.base_dir.iterdir():
440
+ if item.is_dir() and (item / ".git").exists():
441
+ repos.append(item)
442
+
443
+ return sorted(repos)
444
+
445
+ def get_all_repos_info(self) -> list[RepoInfo]:
446
+ """获取所有仓库的状态信息
447
+
448
+ Returns:
449
+ 仓库信息列表
450
+ """
451
+ repos = self.discover_repos()
452
+ results = []
453
+
454
+ for repo_path in repos:
455
+ try:
456
+ handler = GitHandler(str(repo_path))
457
+ status = handler.status()
458
+
459
+ # 计算状态
460
+ ahead_behind = handler.get_ahead_behind()
461
+
462
+ if status["is_clean"]:
463
+ if ahead_behind["ahead"] > 0 and ahead_behind["behind"] > 0:
464
+ repo_status = RepoStatus.DIVERGED
465
+ elif ahead_behind["ahead"] > 0:
466
+ repo_status = RepoStatus.AHEAD
467
+ elif ahead_behind["behind"] > 0:
468
+ repo_status = RepoStatus.BEHIND
469
+ else:
470
+ repo_status = RepoStatus.CLEAN
471
+ else:
472
+ repo_status = RepoStatus.MODIFIED
473
+
474
+ results.append(RepoInfo(
475
+ path=str(repo_path),
476
+ name=repo_path.name,
477
+ branch=status["branch"],
478
+ remote_url=handler.get_remote_url(),
479
+ status=repo_status,
480
+ ahead=ahead_behind["ahead"],
481
+ behind=ahead_behind["behind"],
482
+ staged_files=len(status["staged"]),
483
+ unstaged_files=len(status["unstaged"]),
484
+ untracked_files=len(status["untracked"]),
485
+ ))
486
+
487
+ except Exception as e:
488
+ results.append(RepoInfo(
489
+ path=str(repo_path),
490
+ name=repo_path.name,
491
+ branch="",
492
+ remote_url=None,
493
+ status=RepoStatus.ERROR,
494
+ ahead=0,
495
+ behind=0,
496
+ staged_files=0,
497
+ unstaged_files=0,
498
+ untracked_files=0,
499
+ error=str(e),
500
+ ))
501
+
502
+ return results
503
+
504
+ def batch_pull(self, repos: Optional[list[str]] = None) -> BatchResult:
505
+ """批量拉取更新
506
+
507
+ Args:
508
+ repos: 指定仓库列表,None 表示全部
509
+
510
+ Returns:
511
+ 批量操作结果
512
+ """
513
+ all_repos = self.discover_repos()
514
+
515
+ if repos:
516
+ all_repos = [r for r in all_repos if r.name in repos or str(r) in repos]
517
+
518
+ result = BatchResult(total=len(all_repos), success=0, failed=0)
519
+
520
+ for repo_path in all_repos:
521
+ try:
522
+ handler = GitHandler(str(repo_path))
523
+ pull_result = handler.pull()
524
+
525
+ if pull_result["success"]:
526
+ result.add_success(repo_path.name, pull_result["message"])
527
+ else:
528
+ result.add_failure(repo_path.name, pull_result["message"])
529
+
530
+ except Exception as e:
531
+ result.add_failure(repo_path.name, str(e))
532
+
533
+ return result
534
+
535
+ def batch_fetch(self, repos: Optional[list[str]] = None) -> BatchResult:
536
+ """批量获取远程信息
537
+
538
+ Args:
539
+ repos: 指定仓库列表
540
+
541
+ Returns:
542
+ 批量操作结果
543
+ """
544
+ all_repos = self.discover_repos()
545
+
546
+ if repos:
547
+ all_repos = [r for r in all_repos if r.name in repos or str(r) in repos]
548
+
549
+ result = BatchResult(total=len(all_repos), success=0, failed=0)
550
+
551
+ for repo_path in all_repos:
552
+ try:
553
+ handler = GitHandler(str(repo_path))
554
+ fetch_result = handler.fetch()
555
+
556
+ if fetch_result["success"]:
557
+ result.add_success(repo_path.name, fetch_result["message"])
558
+ else:
559
+ result.add_failure(repo_path.name, fetch_result["message"])
560
+
561
+ except Exception as e:
562
+ result.add_failure(repo_path.name, str(e))
563
+
564
+ return result
565
+
566
+ @staticmethod
567
+ def batch_clone(
568
+ urls: list[str],
569
+ target_dir: str,
570
+ parallel: bool = True,
571
+ max_workers: int = 4,
572
+ progress_callback: Optional[Callable[[str, int, int], None]] = None,
573
+ ) -> BatchResult:
574
+ """批量克隆仓库
575
+
576
+ Args:
577
+ urls: 仓库 URL 列表
578
+ target_dir: 目标目录
579
+ parallel: 是否并行克隆
580
+ max_workers: 最大并发数
581
+ progress_callback: 进度回调 (url, current, total)
582
+
583
+ Returns:
584
+ 批量操作结果
585
+ """
586
+ target_path = Path(target_dir)
587
+ target_path.mkdir(parents=True, exist_ok=True)
588
+
589
+ result = BatchResult(total=len(urls), success=0, failed=0)
590
+
591
+ def clone_single(url: str, index: int) -> dict:
592
+ """克隆单个仓库"""
593
+ # 从 URL 提取仓库名
594
+ name = url.split("/")[-1].replace(".git", "")
595
+ repo_dir = target_path / name
596
+
597
+ if repo_dir.exists():
598
+ return {"url": url, "success": False, "error": f"目录已存在: {repo_dir}"}
599
+
600
+ try:
601
+ if progress_callback:
602
+ progress_callback(url, index + 1, len(urls))
603
+
604
+ subprocess.run(
605
+ ["git", "clone", url, str(repo_dir)],
606
+ capture_output=True,
607
+ check=True,
608
+ timeout=300, # 5 分钟超时
609
+ )
610
+ return {"url": url, "success": True, "path": str(repo_dir)}
611
+ except subprocess.TimeoutExpired:
612
+ return {"url": url, "success": False, "error": "克隆超时"}
613
+ except subprocess.CalledProcessError as e:
614
+ return {"url": url, "success": False, "error": e.stderr.decode() if e.stderr else str(e)}
615
+ except Exception as e:
616
+ return {"url": url, "success": False, "error": str(e)}
617
+
618
+ if parallel:
619
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
620
+ futures = {
621
+ executor.submit(clone_single, url, i): url
622
+ for i, url in enumerate(urls)
623
+ }
624
+
625
+ for future in concurrent.futures.as_completed(futures):
626
+ clone_result = future.result()
627
+ if clone_result["success"]:
628
+ result.add_success(clone_result["url"], f"克隆到 {clone_result.get('path', '')}")
629
+ else:
630
+ result.add_failure(clone_result["url"], clone_result["error"])
631
+ else:
632
+ for i, url in enumerate(urls):
633
+ clone_result = clone_single(url, i)
634
+ if clone_result["success"]:
635
+ result.add_success(clone_result["url"], f"克隆到 {clone_result.get('path', '')}")
636
+ else:
637
+ result.add_failure(clone_result["url"], clone_result["error"])
638
+
639
+ return result
640
+
641
+ def generate_status_report(self, repos: Optional[list[RepoInfo]] = None) -> str:
642
+ """生成状态报告
643
+
644
+ Args:
645
+ repos: 仓库信息列表,None 则自动获取
646
+
647
+ Returns:
648
+ Markdown 格式的报告
649
+ """
650
+ if repos is None:
651
+ repos = self.get_all_repos_info()
652
+
653
+ lines = [
654
+ "# Git 仓库状态报告",
655
+ "",
656
+ f"**扫描目录**: {self.base_dir}",
657
+ f"**仓库数量**: {len(repos)}",
658
+ "",
659
+ "---",
660
+ "",
661
+ "| 仓库 | 分支 | 状态 | 领先 | 落后 | 已暂存 | 未暂存 |",
662
+ "|------|------|------|------|------|--------|--------|",
663
+ ]
664
+
665
+ status_emoji = {
666
+ RepoStatus.CLEAN: "✅",
667
+ RepoStatus.MODIFIED: "📝",
668
+ RepoStatus.AHEAD: "⬆️",
669
+ RepoStatus.BEHIND: "⬇️",
670
+ RepoStatus.DIVERGED: "↔️",
671
+ RepoStatus.ERROR: "❌",
672
+ }
673
+
674
+ for repo in repos:
675
+ emoji = status_emoji.get(repo.status, "❓")
676
+ lines.append(
677
+ f"| {repo.name} | {repo.branch} | {emoji} {repo.status.value} | "
678
+ f"{repo.ahead} | {repo.behind} | {repo.staged_files} | {repo.unstaged_files} |"
679
+ )
680
+
681
+ if repo.error:
682
+ lines.append(f"| ^ | | 错误: {repo.error} | | | | |")
683
+
684
+ # 统计
685
+ status_counts = {}
686
+ for repo in repos:
687
+ status_counts[repo.status] = status_counts.get(repo.status, 0) + 1
688
+
689
+ lines.extend([
690
+ "",
691
+ "---",
692
+ "",
693
+ "## 统计",
694
+ "",
695
+ ])
696
+
697
+ for status, count in sorted(status_counts.items(), key=lambda x: -x[1]):
698
+ emoji = status_emoji.get(status, "❓")
699
+ lines.append(f"- {emoji} {status.value}: {count} 个仓库")
700
+
701
+ # 需要操作的仓库
702
+ need_action = [r for r in repos if r.status in (RepoStatus.MODIFIED, RepoStatus.BEHIND, RepoStatus.DIVERGED, RepoStatus.ERROR)]
703
+ if need_action:
704
+ lines.extend([
705
+ "",
706
+ "## 需要关注的仓库",
707
+ "",
708
+ ])
709
+ for repo in need_action:
710
+ lines.append(f"- **{repo.name}**: {repo.status.value}")
711
+ if repo.status == RepoStatus.BEHIND:
712
+ lines.append(f" - 建议: 执行 `git pull` 同步")
713
+ elif repo.status == RepoStatus.MODIFIED:
714
+ lines.append(f" - 建议: 提交或暂存更改")
715
+ elif repo.status == RepoStatus.DIVERGED:
716
+ lines.append(f" - 建议: 解决分支分歧")
717
+
718
+ return "\n".join(lines)