wcgw 2.4.3__py3-none-any.whl → 2.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wcgw might be problematic. Click here for more details.
- wcgw/client/anthropic_client.py +7 -17
- wcgw/client/common.py +3 -1
- wcgw/client/mcp_server/server.py +41 -36
- wcgw/client/openai_client.py +21 -36
- wcgw/client/openai_utils.py +5 -5
- wcgw/client/repo_ops/display_tree.py +127 -0
- wcgw/client/repo_ops/path_prob.py +58 -0
- wcgw/client/repo_ops/paths_model.vocab +20000 -0
- wcgw/client/repo_ops/paths_tokens.model +80042 -0
- wcgw/client/repo_ops/repo_context.py +148 -0
- wcgw/client/tools.py +220 -115
- wcgw/relay/serve.py +3 -3
- wcgw/types_.py +6 -4
- {wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/METADATA +19 -56
- {wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/RECORD +18 -12
- wcgw-2.6.1.dist-info/licenses/LICENSE +213 -0
- {wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/WHEEL +0 -0
- {wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from collections import deque
|
|
3
|
+
from pathlib import Path # Still needed for other parts
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from pygit2 import GitError, Repository
|
|
7
|
+
|
|
8
|
+
from .display_tree import DirectoryTree
|
|
9
|
+
from .path_prob import FastPathAnalyzer
|
|
10
|
+
|
|
11
|
+
curr_folder = Path(__file__).parent
|
|
12
|
+
vocab_file = curr_folder / "paths_model.vocab"
|
|
13
|
+
model_file = curr_folder / "paths_tokens.model"
|
|
14
|
+
PATH_SCORER = FastPathAnalyzer(str(model_file), str(vocab_file))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def find_ancestor_with_git(path: Path) -> Optional[Repository]:
|
|
18
|
+
if path.is_file():
|
|
19
|
+
path = path.parent
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
return Repository(str(path))
|
|
23
|
+
except GitError:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
MAX_ENTRIES_CHECK = 100_000
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_all_files_max_depth(
|
|
31
|
+
abs_folder: str,
|
|
32
|
+
max_depth: int,
|
|
33
|
+
repo: Optional[Repository],
|
|
34
|
+
) -> list[str]:
|
|
35
|
+
"""BFS implementation using deque that maintains relative paths during traversal.
|
|
36
|
+
Returns (files_list, total_files_found) to track file count."""
|
|
37
|
+
all_files = []
|
|
38
|
+
# Queue stores: (folder_path, depth, rel_path_prefix)
|
|
39
|
+
queue = deque([(abs_folder, 0, "")])
|
|
40
|
+
entries_check = 0
|
|
41
|
+
while queue and entries_check < MAX_ENTRIES_CHECK:
|
|
42
|
+
current_folder, depth, prefix = queue.popleft()
|
|
43
|
+
|
|
44
|
+
if depth > max_depth:
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
entries = list(os.scandir(current_folder))
|
|
49
|
+
except PermissionError:
|
|
50
|
+
continue
|
|
51
|
+
except OSError:
|
|
52
|
+
continue
|
|
53
|
+
# Split into files and folders with single scan
|
|
54
|
+
files = []
|
|
55
|
+
folders = []
|
|
56
|
+
for entry in entries:
|
|
57
|
+
entries_check += 1
|
|
58
|
+
try:
|
|
59
|
+
is_file = entry.is_file(follow_symlinks=False)
|
|
60
|
+
except OSError:
|
|
61
|
+
continue
|
|
62
|
+
name = entry.name
|
|
63
|
+
rel_path = f"{prefix}{name}" if prefix else name
|
|
64
|
+
|
|
65
|
+
if repo and repo.path_is_ignored(rel_path):
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
if is_file:
|
|
69
|
+
files.append(rel_path)
|
|
70
|
+
else:
|
|
71
|
+
folders.append((entry.path, rel_path))
|
|
72
|
+
|
|
73
|
+
# Process files first (maintain priority)
|
|
74
|
+
chunk = files[: min(10_000, max(0, MAX_ENTRIES_CHECK - entries_check))]
|
|
75
|
+
all_files.extend(chunk)
|
|
76
|
+
|
|
77
|
+
# Add folders to queue for BFS traversal
|
|
78
|
+
for folder_path, folder_rel_path in folders:
|
|
79
|
+
next_prefix = f"{folder_rel_path}/"
|
|
80
|
+
queue.append((folder_path, depth + 1, next_prefix))
|
|
81
|
+
|
|
82
|
+
return all_files
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_repo_context(file_or_repo_path: str, max_files: int) -> tuple[str, Path]:
|
|
86
|
+
file_or_repo_path_ = Path(file_or_repo_path).absolute()
|
|
87
|
+
|
|
88
|
+
repo = find_ancestor_with_git(file_or_repo_path_)
|
|
89
|
+
|
|
90
|
+
if repo is not None:
|
|
91
|
+
context_dir = Path(repo.path).parent
|
|
92
|
+
else:
|
|
93
|
+
if file_or_repo_path_.is_file():
|
|
94
|
+
context_dir = file_or_repo_path_.parent
|
|
95
|
+
else:
|
|
96
|
+
context_dir = file_or_repo_path_
|
|
97
|
+
|
|
98
|
+
all_files = get_all_files_max_depth(str(context_dir), 10, repo)
|
|
99
|
+
|
|
100
|
+
# Calculate probabilities in batch
|
|
101
|
+
path_scores = PATH_SCORER.calculate_path_probabilities_batch(all_files)
|
|
102
|
+
|
|
103
|
+
# Create list of (path, score) tuples and sort by score
|
|
104
|
+
path_with_scores = list(zip(all_files, (score[0] for score in path_scores)))
|
|
105
|
+
sorted_files = [
|
|
106
|
+
path for path, _ in sorted(path_with_scores, key=lambda x: x[1], reverse=True)
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
top_files = sorted_files[:max_files]
|
|
110
|
+
|
|
111
|
+
directory_printer = DirectoryTree(context_dir, max_files=max_files)
|
|
112
|
+
for file in top_files:
|
|
113
|
+
directory_printer.expand(file)
|
|
114
|
+
|
|
115
|
+
return directory_printer.display(), context_dir
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
if __name__ == "__main__":
|
|
119
|
+
import cProfile
|
|
120
|
+
import pstats
|
|
121
|
+
import sys
|
|
122
|
+
|
|
123
|
+
from line_profiler import LineProfiler
|
|
124
|
+
|
|
125
|
+
folder = sys.argv[1]
|
|
126
|
+
|
|
127
|
+
# Profile using cProfile for overall function statistics
|
|
128
|
+
profiler = cProfile.Profile()
|
|
129
|
+
profiler.enable()
|
|
130
|
+
result = get_repo_context(folder, 200)[0]
|
|
131
|
+
profiler.disable()
|
|
132
|
+
|
|
133
|
+
# Print cProfile stats
|
|
134
|
+
stats = pstats.Stats(profiler)
|
|
135
|
+
stats.sort_stats("cumulative")
|
|
136
|
+
print("\n=== Function-level profiling ===")
|
|
137
|
+
stats.print_stats(20) # Print top 20 functions
|
|
138
|
+
|
|
139
|
+
# Profile using line_profiler for line-by-line statistics
|
|
140
|
+
lp = LineProfiler()
|
|
141
|
+
lp_wrapper = lp(get_repo_context)
|
|
142
|
+
lp_wrapper(folder, 200)
|
|
143
|
+
|
|
144
|
+
print("\n=== Line-by-line profiling ===")
|
|
145
|
+
lp.print_stats()
|
|
146
|
+
|
|
147
|
+
print("\n=== Result ===")
|
|
148
|
+
print(result)
|