wcgw 2.4.3__py3-none-any.whl → 2.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

@@ -0,0 +1,148 @@
1
+ import os
2
+ from collections import deque
3
+ from pathlib import Path # Still needed for other parts
4
+ from typing import Optional
5
+
6
+ from pygit2 import GitError, Repository
7
+
8
+ from .display_tree import DirectoryTree
9
+ from .path_prob import FastPathAnalyzer
10
+
11
+ curr_folder = Path(__file__).parent
12
+ vocab_file = curr_folder / "paths_model.vocab"
13
+ model_file = curr_folder / "paths_tokens.model"
14
+ PATH_SCORER = FastPathAnalyzer(str(model_file), str(vocab_file))
15
+
16
+
17
+ def find_ancestor_with_git(path: Path) -> Optional[Repository]:
18
+ if path.is_file():
19
+ path = path.parent
20
+
21
+ try:
22
+ return Repository(str(path))
23
+ except GitError:
24
+ return None
25
+
26
+
27
+ MAX_ENTRIES_CHECK = 100_000
28
+
29
+
30
+ def get_all_files_max_depth(
31
+ abs_folder: str,
32
+ max_depth: int,
33
+ repo: Optional[Repository],
34
+ ) -> list[str]:
35
+ """BFS implementation using deque that maintains relative paths during traversal.
36
+ Returns (files_list, total_files_found) to track file count."""
37
+ all_files = []
38
+ # Queue stores: (folder_path, depth, rel_path_prefix)
39
+ queue = deque([(abs_folder, 0, "")])
40
+ entries_check = 0
41
+ while queue and entries_check < MAX_ENTRIES_CHECK:
42
+ current_folder, depth, prefix = queue.popleft()
43
+
44
+ if depth > max_depth:
45
+ continue
46
+
47
+ try:
48
+ entries = list(os.scandir(current_folder))
49
+ except PermissionError:
50
+ continue
51
+ except OSError:
52
+ continue
53
+ # Split into files and folders with single scan
54
+ files = []
55
+ folders = []
56
+ for entry in entries:
57
+ entries_check += 1
58
+ try:
59
+ is_file = entry.is_file(follow_symlinks=False)
60
+ except OSError:
61
+ continue
62
+ name = entry.name
63
+ rel_path = f"{prefix}{name}" if prefix else name
64
+
65
+ if repo and repo.path_is_ignored(rel_path):
66
+ continue
67
+
68
+ if is_file:
69
+ files.append(rel_path)
70
+ else:
71
+ folders.append((entry.path, rel_path))
72
+
73
+ # Process files first (maintain priority)
74
+ chunk = files[: min(10_000, max(0, MAX_ENTRIES_CHECK - entries_check))]
75
+ all_files.extend(chunk)
76
+
77
+ # Add folders to queue for BFS traversal
78
+ for folder_path, folder_rel_path in folders:
79
+ next_prefix = f"{folder_rel_path}/"
80
+ queue.append((folder_path, depth + 1, next_prefix))
81
+
82
+ return all_files
83
+
84
+
85
+ def get_repo_context(file_or_repo_path: str, max_files: int) -> tuple[str, Path]:
86
+ file_or_repo_path_ = Path(file_or_repo_path).absolute()
87
+
88
+ repo = find_ancestor_with_git(file_or_repo_path_)
89
+
90
+ if repo is not None:
91
+ context_dir = Path(repo.path).parent
92
+ else:
93
+ if file_or_repo_path_.is_file():
94
+ context_dir = file_or_repo_path_.parent
95
+ else:
96
+ context_dir = file_or_repo_path_
97
+
98
+ all_files = get_all_files_max_depth(str(context_dir), 10, repo)
99
+
100
+ # Calculate probabilities in batch
101
+ path_scores = PATH_SCORER.calculate_path_probabilities_batch(all_files)
102
+
103
+ # Create list of (path, score) tuples and sort by score
104
+ path_with_scores = list(zip(all_files, (score[0] for score in path_scores)))
105
+ sorted_files = [
106
+ path for path, _ in sorted(path_with_scores, key=lambda x: x[1], reverse=True)
107
+ ]
108
+
109
+ top_files = sorted_files[:max_files]
110
+
111
+ directory_printer = DirectoryTree(context_dir, max_files=max_files)
112
+ for file in top_files:
113
+ directory_printer.expand(file)
114
+
115
+ return directory_printer.display(), context_dir
116
+
117
+
118
+ if __name__ == "__main__":
119
+ import cProfile
120
+ import pstats
121
+ import sys
122
+
123
+ from line_profiler import LineProfiler
124
+
125
+ folder = sys.argv[1]
126
+
127
+ # Profile using cProfile for overall function statistics
128
+ profiler = cProfile.Profile()
129
+ profiler.enable()
130
+ result = get_repo_context(folder, 200)[0]
131
+ profiler.disable()
132
+
133
+ # Print cProfile stats
134
+ stats = pstats.Stats(profiler)
135
+ stats.sort_stats("cumulative")
136
+ print("\n=== Function-level profiling ===")
137
+ stats.print_stats(20) # Print top 20 functions
138
+
139
+ # Profile using line_profiler for line-by-line statistics
140
+ lp = LineProfiler()
141
+ lp_wrapper = lp(get_repo_context)
142
+ lp_wrapper(folder, 200)
143
+
144
+ print("\n=== Line-by-line profiling ===")
145
+ lp.print_stats()
146
+
147
+ print("\n=== Result ===")
148
+ print(result)