wcgw 3.0.7__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wcgw might be problematic. Click here for more details.
- wcgw/client/bash_state/bash_state.py +182 -13
- wcgw/client/diff-instructions.txt +29 -15
- wcgw/client/file_ops/diff_edit.py +2 -1
- wcgw/client/file_ops/search_replace.py +37 -21
- wcgw/client/memory.py +5 -2
- wcgw/client/modes.py +7 -7
- wcgw/client/repo_ops/display_tree.py +3 -3
- wcgw/client/repo_ops/file_stats.py +152 -0
- wcgw/client/repo_ops/repo_context.py +122 -4
- wcgw/client/tool_prompts.py +13 -16
- wcgw/client/tools.py +479 -80
- wcgw/relay/serve.py +8 -53
- wcgw/types_.py +103 -16
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/METADATA +36 -19
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/RECORD +20 -19
- wcgw_cli/anthropic_client.py +1 -1
- wcgw_cli/openai_client.py +1 -1
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/WHEEL +0 -0
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/entry_points.txt +0 -0
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Any, Callable, Dict, TypeVar, cast
|
|
6
|
+
|
|
7
|
+
T = TypeVar("T") # Type variable for generic functions
|
|
8
|
+
F = TypeVar("F", bound=Callable[..., Any]) # Type variable for decorated functions
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FileStats:
|
|
12
|
+
"""Track read, edit, and write counts for a single file."""
|
|
13
|
+
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
self.read_count: int = 0
|
|
16
|
+
self.edit_count: int = 0
|
|
17
|
+
self.write_count: int = 0
|
|
18
|
+
|
|
19
|
+
def increment_read(self) -> None:
|
|
20
|
+
"""Increment the read counter."""
|
|
21
|
+
self.read_count += 1
|
|
22
|
+
|
|
23
|
+
def increment_edit(self) -> None:
|
|
24
|
+
"""Increment the edit counter."""
|
|
25
|
+
self.edit_count += 1
|
|
26
|
+
|
|
27
|
+
def increment_write(self) -> None:
|
|
28
|
+
"""Increment the write counter."""
|
|
29
|
+
self.write_count += 1
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> Dict[str, int]:
|
|
32
|
+
"""Convert to a dictionary for serialization."""
|
|
33
|
+
return {
|
|
34
|
+
"read_count": self.read_count,
|
|
35
|
+
"edit_count": self.edit_count,
|
|
36
|
+
"write_count": self.write_count,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def from_dict(cls, data: Dict[str, Any]) -> "FileStats":
|
|
41
|
+
"""Create from a serialized dictionary."""
|
|
42
|
+
stats = cls()
|
|
43
|
+
stats.read_count = data.get("read_count", 0)
|
|
44
|
+
stats.edit_count = data.get("edit_count", 0)
|
|
45
|
+
stats.write_count = data.get("write_count", 0)
|
|
46
|
+
return stats
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class WorkspaceStats:
|
|
50
|
+
"""Track file operations statistics for an entire workspace."""
|
|
51
|
+
|
|
52
|
+
def __init__(self) -> None:
|
|
53
|
+
self.files: Dict[str, FileStats] = {} # filepath -> FileStats
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
56
|
+
"""Convert to a dictionary for serialization."""
|
|
57
|
+
return {"files": {k: v.to_dict() for k, v in self.files.items()}}
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_dict(cls, data: Dict[str, Any]) -> "WorkspaceStats":
|
|
61
|
+
"""Create from a serialized dictionary."""
|
|
62
|
+
stats = cls()
|
|
63
|
+
files_data = data.get("files", {})
|
|
64
|
+
stats.files = {k: FileStats.from_dict(v) for k, v in files_data.items()}
|
|
65
|
+
return stats
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def safe_stats_operation(func: F) -> F:
|
|
69
|
+
"""
|
|
70
|
+
Decorator to safely perform stats operations without affecting core functionality.
|
|
71
|
+
If an exception occurs, it logs the error but allows the program to continue.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
75
|
+
try:
|
|
76
|
+
return func(*args, **kwargs)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
# Log the error but continue with the operation
|
|
79
|
+
print(f"Warning: Stats tracking error - {e}", file=sys.stderr)
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
# This is a workaround for proper typing with decorators
|
|
83
|
+
return cast(F, wrapper)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_stats_path(workspace_path: str) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Get the path to the stats file for a workspace using a hash-based approach.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
workspace_path: The full path of the workspace directory.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The path to the stats file.
|
|
95
|
+
"""
|
|
96
|
+
# Normalize the path
|
|
97
|
+
workspace_path = os.path.normpath(os.path.expanduser(workspace_path))
|
|
98
|
+
|
|
99
|
+
# Get the basename of the workspace path for readability
|
|
100
|
+
workspace_name = os.path.basename(workspace_path)
|
|
101
|
+
if not workspace_name: # In case of root directory
|
|
102
|
+
workspace_name = "root"
|
|
103
|
+
|
|
104
|
+
# Create a hash of the full path
|
|
105
|
+
path_hash = hashlib.md5(workspace_path.encode()).hexdigest()
|
|
106
|
+
|
|
107
|
+
# Combine to create a unique identifier that's still somewhat readable
|
|
108
|
+
filename = f"{workspace_name}_{path_hash}.json"
|
|
109
|
+
|
|
110
|
+
# Create directory if it doesn't exist
|
|
111
|
+
xdg_data_dir = os.environ.get("XDG_DATA_HOME", os.path.expanduser("~/.local/share"))
|
|
112
|
+
stats_dir = os.path.join(xdg_data_dir, "wcgw/workspace_stats")
|
|
113
|
+
os.makedirs(stats_dir, exist_ok=True)
|
|
114
|
+
|
|
115
|
+
return os.path.join(stats_dir, filename)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@safe_stats_operation
|
|
119
|
+
def load_workspace_stats(workspace_path: str) -> WorkspaceStats:
|
|
120
|
+
"""
|
|
121
|
+
Load the stats for a workspace, or create empty stats if not exists.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
workspace_path: The full path of the workspace directory.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
WorkspaceStats object containing file operation statistics.
|
|
128
|
+
"""
|
|
129
|
+
stats_path = get_stats_path(workspace_path)
|
|
130
|
+
if os.path.exists(stats_path):
|
|
131
|
+
try:
|
|
132
|
+
with open(stats_path, "r") as f:
|
|
133
|
+
return WorkspaceStats.from_dict(json.load(f))
|
|
134
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
|
135
|
+
# Handle corrupted file
|
|
136
|
+
return WorkspaceStats()
|
|
137
|
+
else:
|
|
138
|
+
return WorkspaceStats()
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@safe_stats_operation
|
|
142
|
+
def save_workspace_stats(workspace_path: str, stats: WorkspaceStats) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Save the stats for a workspace.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
workspace_path: The full path of the workspace directory.
|
|
148
|
+
stats: WorkspaceStats object to save.
|
|
149
|
+
"""
|
|
150
|
+
stats_path = get_stats_path(workspace_path)
|
|
151
|
+
with open(stats_path, "w") as f:
|
|
152
|
+
json.dump(stats.to_dict(), f, indent=2)
|
|
@@ -4,8 +4,10 @@ from pathlib import Path # Still needed for other parts
|
|
|
4
4
|
from typing import Optional
|
|
5
5
|
|
|
6
6
|
from pygit2 import GitError, Repository
|
|
7
|
+
from pygit2.enums import SortMode
|
|
7
8
|
|
|
8
9
|
from .display_tree import DirectoryTree
|
|
10
|
+
from .file_stats import load_workspace_stats
|
|
9
11
|
from .path_prob import FastPathAnalyzer
|
|
10
12
|
|
|
11
13
|
curr_folder = Path(__file__).parent
|
|
@@ -82,19 +84,84 @@ def get_all_files_max_depth(
|
|
|
82
84
|
return all_files
|
|
83
85
|
|
|
84
86
|
|
|
87
|
+
def get_recent_git_files(repo: Repository, count: int = 10) -> list[str]:
|
|
88
|
+
"""
|
|
89
|
+
Get the most recently modified files from git history
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
repo: The git repository
|
|
93
|
+
count: Number of recent files to return
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
List of relative paths to recently modified files
|
|
97
|
+
"""
|
|
98
|
+
# Track seen files to avoid duplicates
|
|
99
|
+
seen_files: set[str] = set()
|
|
100
|
+
recent_files: list[str] = []
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
# Get the HEAD reference and walk through recent commits
|
|
104
|
+
head = repo.head
|
|
105
|
+
for commit in repo.walk(head.target, SortMode.TOPOLOGICAL | SortMode.TIME):
|
|
106
|
+
# Skip merge commits which have multiple parents
|
|
107
|
+
if len(commit.parents) > 1:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
# If we have a parent, get the diff between the commit and its parent
|
|
111
|
+
if commit.parents:
|
|
112
|
+
parent = commit.parents[0]
|
|
113
|
+
diff = repo.diff(parent, commit) # type: ignore[attr-defined]
|
|
114
|
+
else:
|
|
115
|
+
# For the first commit, get the diff against an empty tree
|
|
116
|
+
diff = commit.tree.diff_to_tree(context_lines=0)
|
|
117
|
+
|
|
118
|
+
# Process each changed file in the diff
|
|
119
|
+
for patch in diff:
|
|
120
|
+
file_path = patch.delta.new_file.path
|
|
121
|
+
|
|
122
|
+
# Skip if we've already seen this file or if the file was deleted
|
|
123
|
+
repo_path_parent = Path(repo.path).parent
|
|
124
|
+
if (
|
|
125
|
+
file_path in seen_files
|
|
126
|
+
or not (repo_path_parent / file_path).exists()
|
|
127
|
+
):
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
seen_files.add(file_path)
|
|
131
|
+
recent_files.append(file_path)
|
|
132
|
+
|
|
133
|
+
# If we have enough files, stop
|
|
134
|
+
if len(recent_files) >= count:
|
|
135
|
+
return recent_files
|
|
136
|
+
|
|
137
|
+
except Exception:
|
|
138
|
+
# Handle git errors gracefully
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
return recent_files
|
|
142
|
+
|
|
143
|
+
|
|
85
144
|
def get_repo_context(file_or_repo_path: str, max_files: int) -> tuple[str, Path]:
|
|
86
145
|
file_or_repo_path_ = Path(file_or_repo_path).absolute()
|
|
87
146
|
|
|
88
147
|
repo = find_ancestor_with_git(file_or_repo_path_)
|
|
148
|
+
recent_git_files: list[str] = []
|
|
89
149
|
|
|
150
|
+
# Determine the context directory
|
|
90
151
|
if repo is not None:
|
|
91
152
|
context_dir = Path(repo.path).parent
|
|
153
|
+
# Get recent git files - get at least 50 or the max_files count, whichever is larger
|
|
154
|
+
recent_files_count = max(10, max_files)
|
|
155
|
+
recent_git_files = get_recent_git_files(repo, recent_files_count)
|
|
92
156
|
else:
|
|
93
157
|
if file_or_repo_path_.is_file():
|
|
94
158
|
context_dir = file_or_repo_path_.parent
|
|
95
159
|
else:
|
|
96
160
|
context_dir = file_or_repo_path_
|
|
97
161
|
|
|
162
|
+
# Load workspace stats from the context directory
|
|
163
|
+
workspace_stats = load_workspace_stats(str(context_dir))
|
|
164
|
+
|
|
98
165
|
all_files = get_all_files_max_depth(str(context_dir), 10, repo)
|
|
99
166
|
|
|
100
167
|
# Calculate probabilities in batch
|
|
@@ -106,10 +173,61 @@ def get_repo_context(file_or_repo_path: str, max_files: int) -> tuple[str, Path]
|
|
|
106
173
|
path for path, _ in sorted(path_with_scores, key=lambda x: x[1], reverse=True)
|
|
107
174
|
]
|
|
108
175
|
|
|
109
|
-
|
|
176
|
+
# Start with recent git files, then add other important files
|
|
177
|
+
top_files = []
|
|
178
|
+
|
|
179
|
+
# If we have workspace stats, prioritize the most active files first
|
|
180
|
+
active_files = []
|
|
181
|
+
if workspace_stats is not None:
|
|
182
|
+
# Get files with activity score (weighted count of operations)
|
|
183
|
+
scored_files = []
|
|
184
|
+
for file_path, file_stats in workspace_stats.files.items():
|
|
185
|
+
try:
|
|
186
|
+
# Convert to relative path if possible
|
|
187
|
+
if str(context_dir) in file_path:
|
|
188
|
+
rel_path = os.path.relpath(file_path, str(context_dir))
|
|
189
|
+
else:
|
|
190
|
+
rel_path = file_path
|
|
191
|
+
|
|
192
|
+
# Calculate activity score - weight reads more for this functionality
|
|
193
|
+
activity_score = (
|
|
194
|
+
file_stats.read_count * 2
|
|
195
|
+
+ (file_stats.edit_count)
|
|
196
|
+
+ (file_stats.write_count)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Only include files that still exist
|
|
200
|
+
if rel_path in all_files or os.path.exists(file_path):
|
|
201
|
+
scored_files.append((rel_path, activity_score))
|
|
202
|
+
except (ValueError, OSError):
|
|
203
|
+
# Skip files that cause path resolution errors
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
# Sort by activity score (highest first) and get top 5
|
|
207
|
+
active_files = [
|
|
208
|
+
f for f, _ in sorted(scored_files, key=lambda x: x[1], reverse=True)[:5]
|
|
209
|
+
]
|
|
210
|
+
|
|
211
|
+
# Add active files first
|
|
212
|
+
for file in active_files:
|
|
213
|
+
if file not in top_files and file in all_files:
|
|
214
|
+
top_files.append(file)
|
|
215
|
+
|
|
216
|
+
# Add recent git files next - these should be prioritized
|
|
217
|
+
for file in recent_git_files:
|
|
218
|
+
if file not in top_files and file in all_files:
|
|
219
|
+
top_files.append(file)
|
|
220
|
+
|
|
221
|
+
# Use statistical sorting for the remaining files, but respect max_files limit
|
|
222
|
+
# and ensure we don't add duplicates
|
|
223
|
+
if len(top_files) < max_files:
|
|
224
|
+
# Only add statistically important files that aren't already in top_files
|
|
225
|
+
for file in sorted_files:
|
|
226
|
+
if file not in top_files and len(top_files) < max_files:
|
|
227
|
+
top_files.append(file)
|
|
110
228
|
|
|
111
229
|
directory_printer = DirectoryTree(context_dir, max_files=max_files)
|
|
112
|
-
for file in top_files:
|
|
230
|
+
for file in top_files[:max_files]:
|
|
113
231
|
directory_printer.expand(file)
|
|
114
232
|
|
|
115
233
|
return directory_printer.display(), context_dir
|
|
@@ -127,7 +245,7 @@ if __name__ == "__main__":
|
|
|
127
245
|
# Profile using cProfile for overall function statistics
|
|
128
246
|
profiler = cProfile.Profile()
|
|
129
247
|
profiler.enable()
|
|
130
|
-
result = get_repo_context(folder,
|
|
248
|
+
result = get_repo_context(folder, 50)[0]
|
|
131
249
|
profiler.disable()
|
|
132
250
|
|
|
133
251
|
# Print cProfile stats
|
|
@@ -139,7 +257,7 @@ if __name__ == "__main__":
|
|
|
139
257
|
# Profile using line_profiler for line-by-line statistics
|
|
140
258
|
lp = LineProfiler()
|
|
141
259
|
lp_wrapper = lp(get_repo_context)
|
|
142
|
-
lp_wrapper(folder,
|
|
260
|
+
lp_wrapper(folder, 50)
|
|
143
261
|
|
|
144
262
|
print("\n=== Line-by-line profiling ===")
|
|
145
263
|
lp.print_stats()
|
wcgw/client/tool_prompts.py
CHANGED
|
@@ -5,11 +5,10 @@ from typing import Any
|
|
|
5
5
|
from ..types_ import (
|
|
6
6
|
BashCommand,
|
|
7
7
|
ContextSave,
|
|
8
|
-
|
|
8
|
+
FileWriteOrEdit,
|
|
9
9
|
Initialize,
|
|
10
10
|
ReadFiles,
|
|
11
11
|
ReadImage,
|
|
12
|
-
WriteIfEmpty,
|
|
13
12
|
)
|
|
14
13
|
|
|
15
14
|
with open(os.path.join(os.path.dirname(__file__), "diff-instructions.txt")) as f:
|
|
@@ -62,15 +61,9 @@ TOOL_PROMPTS = [
|
|
|
62
61
|
description="""
|
|
63
62
|
- Read full file content of one or more files.
|
|
64
63
|
- Provide absolute paths only (~ allowed)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
inputSchema=WriteIfEmpty.model_json_schema(),
|
|
69
|
-
name="WriteIfEmpty",
|
|
70
|
-
description="""
|
|
71
|
-
- Write content to an empty or non-existent file. Provide file path and content. Use this instead of BashCommand for writing new files.
|
|
72
|
-
- Provide absolute path only.
|
|
73
|
-
- For editing existing files, use FileEdit instead of this tool.
|
|
64
|
+
- Only if the task requires line numbers understanding:
|
|
65
|
+
- You may populate "show_line_numbers_reason" with your reason, by default null/empty means no line numbers are shown.
|
|
66
|
+
- You may extract a range of lines. E.g., `/path/to/file:1-10` for lines 1-10. You can drop start or end like `/path/to/file:1-` or `/path/to/file:-10`
|
|
74
67
|
""",
|
|
75
68
|
),
|
|
76
69
|
Prompts(
|
|
@@ -79,12 +72,16 @@ TOOL_PROMPTS = [
|
|
|
79
72
|
description="Read an image from the shell.",
|
|
80
73
|
),
|
|
81
74
|
Prompts(
|
|
82
|
-
inputSchema=
|
|
83
|
-
name="
|
|
75
|
+
inputSchema=FileWriteOrEdit.model_json_schema(),
|
|
76
|
+
name="FileWriteOrEdit",
|
|
84
77
|
description="""
|
|
85
|
-
-
|
|
86
|
-
- Use
|
|
87
|
-
-
|
|
78
|
+
- Writes or edits a file based on the percentage of changes.
|
|
79
|
+
- Use absolute path only (~ allowed).
|
|
80
|
+
- First write down percentage of lines that need to be replaced in the file (between 0-100) in percentage_to_change
|
|
81
|
+
- percentage_to_change should be low if mostly new code is to be added. It should be high if a lot of things are to be replaced.
|
|
82
|
+
- If percentage_to_change > 50, provide full file content in file_content_or_search_replace_blocks
|
|
83
|
+
- If percentage_to_change <= 50, file_content_or_search_replace_blocks should be search/replace blocks.
|
|
84
|
+
|
|
88
85
|
"""
|
|
89
86
|
+ diffinstructions,
|
|
90
87
|
),
|