wcgw 3.0.7__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

@@ -4,8 +4,10 @@ from pathlib import Path # Still needed for other parts
4
4
  from typing import Optional
5
5
 
6
6
  from pygit2 import GitError, Repository
7
+ from pygit2.enums import SortMode
7
8
 
8
9
  from .display_tree import DirectoryTree
10
+ from .file_stats import load_workspace_stats
9
11
  from .path_prob import FastPathAnalyzer
10
12
 
11
13
  curr_folder = Path(__file__).parent
@@ -82,11 +84,85 @@ def get_all_files_max_depth(
82
84
  return all_files
83
85
 
84
86
 
85
- def get_repo_context(file_or_repo_path: str, max_files: int) -> tuple[str, Path]:
87
+ def get_recent_git_files(repo: Repository, count: int = 10) -> list[str]:
88
+ """
89
+ Get the most recently modified files from git history
90
+
91
+ Args:
92
+ repo: The git repository
93
+ count: Number of recent files to return
94
+
95
+ Returns:
96
+ List of relative paths to recently modified files
97
+ """
98
+ # Track seen files to avoid duplicates
99
+ seen_files: set[str] = set()
100
+ recent_files: list[str] = []
101
+
102
+ try:
103
+ # Get the HEAD reference and walk through recent commits
104
+ head = repo.head
105
+ for commit in repo.walk(head.target, SortMode.TOPOLOGICAL | SortMode.TIME):
106
+ # Skip merge commits which have multiple parents
107
+ if len(commit.parents) > 1:
108
+ continue
109
+
110
+ # If we have a parent, get the diff between the commit and its parent
111
+ if commit.parents:
112
+ parent = commit.parents[0]
113
+ diff = repo.diff(parent, commit) # type: ignore[attr-defined]
114
+ else:
115
+ # For the first commit, get the diff against an empty tree
116
+ diff = commit.tree.diff_to_tree(context_lines=0)
117
+
118
+ # Process each changed file in the diff
119
+ for patch in diff:
120
+ file_path = patch.delta.new_file.path
121
+
122
+ # Skip if we've already seen this file or if the file was deleted
123
+ repo_path_parent = Path(repo.path).parent
124
+ if (
125
+ file_path in seen_files
126
+ or not (repo_path_parent / file_path).exists()
127
+ ):
128
+ continue
129
+
130
+ seen_files.add(file_path)
131
+ recent_files.append(file_path)
132
+
133
+ # If we have enough files, stop
134
+ if len(recent_files) >= count:
135
+ return recent_files
136
+
137
+ except Exception:
138
+ # Handle git errors gracefully
139
+ pass
140
+
141
+ return recent_files
142
+
143
+
144
+ def calculate_dynamic_file_limit(total_files: int) -> int:
145
+ # Scale linearly, with minimum and maximum bounds
146
+ min_files = 50
147
+ max_files = 400
148
+
149
+ if total_files <= min_files:
150
+ return min_files
151
+
152
+ scale_factor = (max_files - min_files) / (30000 - min_files)
153
+
154
+ dynamic_limit = min_files + int((total_files - min_files) * scale_factor)
155
+
156
+ return min(max_files, dynamic_limit)
157
+
158
+
159
+ def get_repo_context(file_or_repo_path: str) -> tuple[str, Path]:
86
160
  file_or_repo_path_ = Path(file_or_repo_path).absolute()
87
161
 
88
162
  repo = find_ancestor_with_git(file_or_repo_path_)
163
+ recent_git_files: list[str] = []
89
164
 
165
+ # Determine the context directory
90
166
  if repo is not None:
91
167
  context_dir = Path(repo.path).parent
92
168
  else:
@@ -95,8 +171,22 @@ def get_repo_context(file_or_repo_path: str, max_files: int) -> tuple[str, Path]
95
171
  else:
96
172
  context_dir = file_or_repo_path_
97
173
 
174
+ # Load workspace stats from the context directory
175
+ workspace_stats = load_workspace_stats(str(context_dir))
176
+
177
+ # Get all files and calculate dynamic max files limit once
98
178
  all_files = get_all_files_max_depth(str(context_dir), 10, repo)
99
179
 
180
+ # For Git repositories, get recent files
181
+ if repo is not None:
182
+ dynamic_max_files = calculate_dynamic_file_limit(len(all_files))
183
+ # Get recent git files - get at least 10 or 20% of dynamic_max_files, whichever is larger
184
+ recent_files_count = max(10, int(dynamic_max_files * 0.2))
185
+ recent_git_files = get_recent_git_files(repo, recent_files_count)
186
+ else:
187
+ # We don't want dynamic limit for non git folders like /tmp or ~
188
+ dynamic_max_files = 50
189
+
100
190
  # Calculate probabilities in batch
101
191
  path_scores = PATH_SCORER.calculate_path_probabilities_batch(all_files)
102
192
 
@@ -106,10 +196,61 @@ def get_repo_context(file_or_repo_path: str, max_files: int) -> tuple[str, Path]
106
196
  path for path, _ in sorted(path_with_scores, key=lambda x: x[1], reverse=True)
107
197
  ]
108
198
 
109
- top_files = sorted_files[:max_files]
199
+ # Start with recent git files, then add other important files
200
+ top_files = []
201
+
202
+ # If we have workspace stats, prioritize the most active files first
203
+ active_files = []
204
+ if workspace_stats is not None:
205
+ # Get files with activity score (weighted count of operations)
206
+ scored_files = []
207
+ for file_path, file_stats in workspace_stats.files.items():
208
+ try:
209
+ # Convert to relative path if possible
210
+ if str(context_dir) in file_path:
211
+ rel_path = os.path.relpath(file_path, str(context_dir))
212
+ else:
213
+ rel_path = file_path
214
+
215
+ # Calculate activity score - weight reads more for this functionality
216
+ activity_score = (
217
+ file_stats.read_count * 2
218
+ + (file_stats.edit_count)
219
+ + (file_stats.write_count)
220
+ )
221
+
222
+ # Only include files that still exist
223
+ if rel_path in all_files or os.path.exists(file_path):
224
+ scored_files.append((rel_path, activity_score))
225
+ except (ValueError, OSError):
226
+ # Skip files that cause path resolution errors
227
+ continue
110
228
 
111
- directory_printer = DirectoryTree(context_dir, max_files=max_files)
112
- for file in top_files:
229
+ # Sort by activity score (highest first) and get top 5
230
+ active_files = [
231
+ f for f, _ in sorted(scored_files, key=lambda x: x[1], reverse=True)[:5]
232
+ ]
233
+
234
+ # Add active files first
235
+ for file in active_files:
236
+ if file not in top_files and file in all_files:
237
+ top_files.append(file)
238
+
239
+ # Add recent git files next - these should be prioritized
240
+ for file in recent_git_files:
241
+ if file not in top_files and file in all_files:
242
+ top_files.append(file)
243
+
244
+ # Use statistical sorting for the remaining files, but respect dynamic_max_files limit
245
+ # and ensure we don't add duplicates
246
+ if len(top_files) < dynamic_max_files:
247
+ # Only add statistically important files that aren't already in top_files
248
+ for file in sorted_files:
249
+ if file not in top_files and len(top_files) < dynamic_max_files:
250
+ top_files.append(file)
251
+
252
+ directory_printer = DirectoryTree(context_dir, max_files=dynamic_max_files)
253
+ for file in top_files[:dynamic_max_files]:
113
254
  directory_printer.expand(file)
114
255
 
115
256
  return directory_printer.display(), context_dir
@@ -127,7 +268,7 @@ if __name__ == "__main__":
127
268
  # Profile using cProfile for overall function statistics
128
269
  profiler = cProfile.Profile()
129
270
  profiler.enable()
130
- result = get_repo_context(folder, 200)[0]
271
+ result = get_repo_context(folder)[0]
131
272
  profiler.disable()
132
273
 
133
274
  # Print cProfile stats
@@ -139,7 +280,7 @@ if __name__ == "__main__":
139
280
  # Profile using line_profiler for line-by-line statistics
140
281
  lp = LineProfiler()
141
282
  lp_wrapper = lp(get_repo_context)
142
- lp_wrapper(folder, 200)
283
+ lp_wrapper(folder)
143
284
 
144
285
  print("\n=== Line-by-line profiling ===")
145
286
  lp.print_stats()
@@ -5,11 +5,10 @@ from typing import Any
5
5
  from ..types_ import (
6
6
  BashCommand,
7
7
  ContextSave,
8
- FileEdit,
8
+ FileWriteOrEdit,
9
9
  Initialize,
10
10
  ReadFiles,
11
11
  ReadImage,
12
- WriteIfEmpty,
13
12
  )
14
13
 
15
14
  with open(os.path.join(os.path.dirname(__file__), "diff-instructions.txt")) as f:
@@ -62,15 +61,9 @@ TOOL_PROMPTS = [
62
61
  description="""
63
62
  - Read full file content of one or more files.
64
63
  - Provide absolute paths only (~ allowed)
65
- """,
66
- ),
67
- Prompts(
68
- inputSchema=WriteIfEmpty.model_json_schema(),
69
- name="WriteIfEmpty",
70
- description="""
71
- - Write content to an empty or non-existent file. Provide file path and content. Use this instead of BashCommand for writing new files.
72
- - Provide absolute path only.
73
- - For editing existing files, use FileEdit instead of this tool.
64
+ - Only if the task requires line numbers understanding:
65
+ - You may populate "show_line_numbers_reason" with your reason, by default null/empty means no line numbers are shown.
66
+ - You may extract a range of lines. E.g., `/path/to/file:1-10` for lines 1-10. You can drop start or end like `/path/to/file:1-` or `/path/to/file:-10`
74
67
  """,
75
68
  ),
76
69
  Prompts(
@@ -79,12 +72,17 @@ TOOL_PROMPTS = [
79
72
  description="Read an image from the shell.",
80
73
  ),
81
74
  Prompts(
82
- inputSchema=FileEdit.model_json_schema(),
83
- name="FileEdit",
75
+ inputSchema=FileWriteOrEdit.model_json_schema(),
76
+ name="FileWriteOrEdit",
84
77
  description="""
85
- - Use absolute path only.
86
- - Use SEARCH/REPLACE blocks to edit the file.
87
- - If the edit fails due to block not matching, please retry with correct block till it matches. Re-read the file to ensure you've all the lines correct.
78
+ - Writes or edits a file based on the percentage of changes.
79
+ - Use absolute path only (~ allowed).
80
+ - percentage_to_change is calculated as number of existing lines that will have some diff divided by total existing lines.
81
+ - First write down percentage of lines that need to be replaced in the file (between 0-100) in percentage_to_change
82
+ - percentage_to_change should be low if mostly new code is to be added. It should be high if a lot of things are to be replaced.
83
+ - If percentage_to_change > 50, provide full file content in file_content_or_search_replace_blocks
84
+ - If percentage_to_change <= 50, file_content_or_search_replace_blocks should be search/replace blocks.
85
+
88
86
  """
89
87
  + diffinstructions,
90
88
  ),