wcgw 3.0.7__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wcgw might be problematic. Click here for more details.
- wcgw/client/bash_state/bash_state.py +182 -13
- wcgw/client/diff-instructions.txt +29 -15
- wcgw/client/file_ops/diff_edit.py +2 -1
- wcgw/client/file_ops/search_replace.py +37 -21
- wcgw/client/memory.py +5 -2
- wcgw/client/modes.py +7 -7
- wcgw/client/repo_ops/display_tree.py +3 -3
- wcgw/client/repo_ops/file_stats.py +152 -0
- wcgw/client/repo_ops/repo_context.py +122 -4
- wcgw/client/tool_prompts.py +13 -16
- wcgw/client/tools.py +479 -80
- wcgw/relay/serve.py +8 -53
- wcgw/types_.py +103 -16
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/METADATA +36 -19
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/RECORD +20 -19
- wcgw_cli/anthropic_client.py +1 -1
- wcgw_cli/openai_client.py +1 -1
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/WHEEL +0 -0
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/entry_points.txt +0 -0
- {wcgw-3.0.7.dist-info → wcgw-4.0.0.dist-info}/licenses/LICENSE +0 -0
wcgw/client/tools.py
CHANGED
|
@@ -7,6 +7,7 @@ import os
|
|
|
7
7
|
import subprocess
|
|
8
8
|
import traceback
|
|
9
9
|
from dataclasses import dataclass
|
|
10
|
+
from hashlib import sha256
|
|
10
11
|
from os.path import expanduser
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
from tempfile import NamedTemporaryFile
|
|
@@ -28,6 +29,11 @@ from pydantic import BaseModel, TypeAdapter, ValidationError
|
|
|
28
29
|
from syntax_checker import check_syntax
|
|
29
30
|
|
|
30
31
|
from wcgw.client.bash_state.bash_state import get_status
|
|
32
|
+
from wcgw.client.repo_ops.file_stats import (
|
|
33
|
+
FileStats,
|
|
34
|
+
load_workspace_stats,
|
|
35
|
+
save_workspace_stats,
|
|
36
|
+
)
|
|
31
37
|
|
|
32
38
|
from ..types_ import (
|
|
33
39
|
BashCommand,
|
|
@@ -36,6 +42,7 @@ from ..types_ import (
|
|
|
36
42
|
Console,
|
|
37
43
|
ContextSave,
|
|
38
44
|
FileEdit,
|
|
45
|
+
FileWriteOrEdit,
|
|
39
46
|
Initialize,
|
|
40
47
|
Modes,
|
|
41
48
|
ModesConfig,
|
|
@@ -48,7 +55,12 @@ from .bash_state.bash_state import (
|
|
|
48
55
|
execute_bash,
|
|
49
56
|
)
|
|
50
57
|
from .encoder import EncoderDecoder, get_default_encoder
|
|
51
|
-
from .file_ops.search_replace import
|
|
58
|
+
from .file_ops.search_replace import (
|
|
59
|
+
DIVIDER_MARKER,
|
|
60
|
+
REPLACE_MARKER,
|
|
61
|
+
SEARCH_MARKER,
|
|
62
|
+
search_replace_edit,
|
|
63
|
+
)
|
|
52
64
|
from .memory import load_memory, save_memory
|
|
53
65
|
from .modes import (
|
|
54
66
|
ARCHITECT_PROMPT,
|
|
@@ -92,7 +104,7 @@ def initialize(
|
|
|
92
104
|
task_id_to_resume: str,
|
|
93
105
|
max_tokens: Optional[int],
|
|
94
106
|
mode: ModesConfig,
|
|
95
|
-
) -> tuple[str, Context]:
|
|
107
|
+
) -> tuple[str, Context, dict[str, list[tuple[int, int]]]]:
|
|
96
108
|
# Expand the workspace path
|
|
97
109
|
any_workspace_path = expand_user(any_workspace_path)
|
|
98
110
|
repo_context = ""
|
|
@@ -129,6 +141,7 @@ def initialize(
|
|
|
129
141
|
if not read_files_:
|
|
130
142
|
read_files_ = [any_workspace_path]
|
|
131
143
|
any_workspace_path = os.path.dirname(any_workspace_path)
|
|
144
|
+
# Let get_repo_context handle loading the workspace stats
|
|
132
145
|
repo_context, folder_to_start = get_repo_context(any_workspace_path, 50)
|
|
133
146
|
|
|
134
147
|
repo_context = f"---\n# Workspace structure\n{repo_context}\n---\n"
|
|
@@ -151,14 +164,18 @@ def initialize(
|
|
|
151
164
|
if loaded_state is not None:
|
|
152
165
|
try:
|
|
153
166
|
parsed_state = BashState.parse_state(loaded_state)
|
|
167
|
+
workspace_root = (
|
|
168
|
+
str(folder_to_start) if folder_to_start else parsed_state[5]
|
|
169
|
+
)
|
|
154
170
|
if mode == "wcgw":
|
|
155
171
|
context.bash_state.load_state(
|
|
156
172
|
parsed_state[0],
|
|
157
173
|
parsed_state[1],
|
|
158
174
|
parsed_state[2],
|
|
159
175
|
parsed_state[3],
|
|
160
|
-
parsed_state[4]
|
|
161
|
-
str(folder_to_start) if folder_to_start else
|
|
176
|
+
{**parsed_state[4], **context.bash_state.whitelist_for_overwrite},
|
|
177
|
+
str(folder_to_start) if folder_to_start else workspace_root,
|
|
178
|
+
workspace_root,
|
|
162
179
|
)
|
|
163
180
|
else:
|
|
164
181
|
state = modes_to_state(mode)
|
|
@@ -167,8 +184,9 @@ def initialize(
|
|
|
167
184
|
state[1],
|
|
168
185
|
state[2],
|
|
169
186
|
state[3],
|
|
170
|
-
parsed_state[4]
|
|
171
|
-
str(folder_to_start) if folder_to_start else
|
|
187
|
+
{**parsed_state[4], **context.bash_state.whitelist_for_overwrite},
|
|
188
|
+
str(folder_to_start) if folder_to_start else workspace_root,
|
|
189
|
+
workspace_root,
|
|
172
190
|
)
|
|
173
191
|
except ValueError:
|
|
174
192
|
context.console.print(traceback.format_exc())
|
|
@@ -178,12 +196,14 @@ def initialize(
|
|
|
178
196
|
else:
|
|
179
197
|
mode_changed = is_mode_change(mode, context.bash_state)
|
|
180
198
|
state = modes_to_state(mode)
|
|
199
|
+
# Use the provided workspace path as the workspace root
|
|
181
200
|
context.bash_state.load_state(
|
|
182
201
|
state[0],
|
|
183
202
|
state[1],
|
|
184
203
|
state[2],
|
|
185
204
|
state[3],
|
|
186
|
-
|
|
205
|
+
dict(context.bash_state.whitelist_for_overwrite),
|
|
206
|
+
str(folder_to_start) if folder_to_start else "",
|
|
187
207
|
str(folder_to_start) if folder_to_start else "",
|
|
188
208
|
)
|
|
189
209
|
if type == "first_call" or mode_changed:
|
|
@@ -194,6 +214,7 @@ def initialize(
|
|
|
194
214
|
del mode
|
|
195
215
|
|
|
196
216
|
initial_files_context = ""
|
|
217
|
+
initial_paths_with_ranges: dict[str, list[tuple[int, int]]] = {}
|
|
197
218
|
if read_files_:
|
|
198
219
|
if folder_to_start:
|
|
199
220
|
read_files_ = [
|
|
@@ -203,7 +224,9 @@ def initialize(
|
|
|
203
224
|
else expand_user(f)
|
|
204
225
|
for f in read_files_
|
|
205
226
|
]
|
|
206
|
-
initial_files = read_files(
|
|
227
|
+
initial_files, initial_paths_with_ranges, _ = read_files(
|
|
228
|
+
read_files_, max_tokens, context
|
|
229
|
+
)
|
|
207
230
|
initial_files_context = f"---\n# Requested files\n{initial_files}\n---\n"
|
|
208
231
|
|
|
209
232
|
uname_sysname = os.uname().sysname
|
|
@@ -228,7 +251,7 @@ Initialized in directory (also cwd): {context.bash_state.cwd}
|
|
|
228
251
|
|
|
229
252
|
global INITIALIZED
|
|
230
253
|
INITIALIZED = True
|
|
231
|
-
return output, context
|
|
254
|
+
return output, context, initial_paths_with_ranges
|
|
232
255
|
|
|
233
256
|
|
|
234
257
|
def is_mode_change(mode_config: ModesConfig, bash_state: BashState) -> bool:
|
|
@@ -267,7 +290,8 @@ def reset_wcgw(
|
|
|
267
290
|
file_edit_mode,
|
|
268
291
|
write_if_empty_mode,
|
|
269
292
|
mode,
|
|
270
|
-
|
|
293
|
+
dict(context.bash_state.whitelist_for_overwrite),
|
|
294
|
+
starting_directory,
|
|
271
295
|
starting_directory,
|
|
272
296
|
)
|
|
273
297
|
mode_prompt = get_mode_prompt(context)
|
|
@@ -291,7 +315,8 @@ def reset_wcgw(
|
|
|
291
315
|
file_edit_mode,
|
|
292
316
|
write_if_empty_mode,
|
|
293
317
|
mode,
|
|
294
|
-
|
|
318
|
+
dict(context.bash_state.whitelist_for_overwrite),
|
|
319
|
+
starting_directory,
|
|
295
320
|
starting_directory,
|
|
296
321
|
)
|
|
297
322
|
INITIALIZED = True
|
|
@@ -405,39 +430,138 @@ def write_file(
|
|
|
405
430
|
error_on_exist: bool,
|
|
406
431
|
max_tokens: Optional[int],
|
|
407
432
|
context: Context,
|
|
408
|
-
) ->
|
|
433
|
+
) -> tuple[
|
|
434
|
+
str, dict[str, list[tuple[int, int]]]
|
|
435
|
+
]: # Updated to return message and file paths with line ranges
|
|
409
436
|
# Expand the path before checking if it's absolute
|
|
410
437
|
path_ = expand_user(writefile.file_path)
|
|
438
|
+
|
|
439
|
+
workspace_path = context.bash_state.workspace_root
|
|
440
|
+
stats = load_workspace_stats(workspace_path)
|
|
441
|
+
|
|
442
|
+
if path_ not in stats.files:
|
|
443
|
+
stats.files[path_] = FileStats()
|
|
444
|
+
|
|
445
|
+
stats.files[path_].increment_write()
|
|
446
|
+
save_workspace_stats(workspace_path, stats)
|
|
447
|
+
|
|
411
448
|
if not os.path.isabs(path_):
|
|
412
|
-
return
|
|
449
|
+
return (
|
|
450
|
+
f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}",
|
|
451
|
+
{}, # Return empty dict instead of empty list for type consistency
|
|
452
|
+
)
|
|
413
453
|
|
|
414
454
|
error_on_exist_ = (
|
|
415
455
|
error_on_exist and path_ not in context.bash_state.whitelist_for_overwrite
|
|
416
456
|
)
|
|
417
457
|
|
|
458
|
+
if error_on_exist and path_ in context.bash_state.whitelist_for_overwrite:
|
|
459
|
+
# Ensure hash has not changed
|
|
460
|
+
if os.path.exists(path_):
|
|
461
|
+
with open(path_, "rb") as f:
|
|
462
|
+
file_content = f.read()
|
|
463
|
+
curr_hash = sha256(file_content).hexdigest()
|
|
464
|
+
|
|
465
|
+
whitelist_data = context.bash_state.whitelist_for_overwrite[path_]
|
|
466
|
+
|
|
467
|
+
# If we haven't fully read the file or hash has changed, require re-reading
|
|
468
|
+
if curr_hash != whitelist_data.file_hash:
|
|
469
|
+
error_on_exist_ = True
|
|
470
|
+
elif not whitelist_data.is_read_enough():
|
|
471
|
+
error_on_exist_ = True
|
|
472
|
+
|
|
418
473
|
# Validate using write_if_empty_mode after checking whitelist
|
|
419
474
|
allowed_globs = context.bash_state.write_if_empty_mode.allowed_globs
|
|
420
475
|
if allowed_globs != "all" and not any(
|
|
421
476
|
fnmatch.fnmatch(path_, pattern) for pattern in allowed_globs
|
|
422
477
|
):
|
|
423
|
-
return
|
|
478
|
+
return (
|
|
479
|
+
f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}",
|
|
480
|
+
{}, # Empty dict instead of empty list
|
|
481
|
+
)
|
|
424
482
|
|
|
425
|
-
add_overwrite_warning = ""
|
|
426
483
|
if (error_on_exist or error_on_exist_) and os.path.exists(path_):
|
|
427
484
|
content = Path(path_).read_text().strip()
|
|
428
485
|
if content:
|
|
429
|
-
content = truncate_if_over(content, max_tokens)
|
|
430
|
-
|
|
431
486
|
if error_on_exist_:
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
487
|
+
file_ranges = []
|
|
488
|
+
|
|
489
|
+
if path_ not in context.bash_state.whitelist_for_overwrite:
|
|
490
|
+
# File hasn't been read at all
|
|
491
|
+
msg = f"Error: you need to read existing file {path_} at least once before it can be overwritten.\n\n"
|
|
492
|
+
# Read the entire file
|
|
493
|
+
file_content_str, truncated, _, _, line_range = read_file(
|
|
494
|
+
path_, max_tokens, context, False
|
|
495
|
+
)
|
|
496
|
+
file_ranges = [line_range]
|
|
497
|
+
|
|
498
|
+
final_message = ""
|
|
499
|
+
if not truncated:
|
|
500
|
+
final_message = "You can now safely retry writing immediately considering the above information."
|
|
501
|
+
|
|
502
|
+
return (
|
|
503
|
+
(
|
|
504
|
+
msg
|
|
505
|
+
+ f"Here's the existing file:\n```\n{file_content_str}\n{final_message}\n```"
|
|
506
|
+
),
|
|
507
|
+
{path_: file_ranges},
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
whitelist_data = context.bash_state.whitelist_for_overwrite[path_]
|
|
511
|
+
|
|
512
|
+
if curr_hash != whitelist_data.file_hash:
|
|
513
|
+
msg = "Error: the file has changed since last read.\n\n"
|
|
514
|
+
# Read the entire file again
|
|
515
|
+
file_content_str, truncated, _, _, line_range = read_file(
|
|
516
|
+
path_, max_tokens, context, False
|
|
517
|
+
)
|
|
518
|
+
file_ranges = [line_range]
|
|
519
|
+
|
|
520
|
+
final_message = ""
|
|
521
|
+
if not truncated:
|
|
522
|
+
final_message = "You can now safely retry writing immediately considering the above information."
|
|
523
|
+
|
|
524
|
+
return (
|
|
525
|
+
(
|
|
526
|
+
msg
|
|
527
|
+
+ f"Here's the existing file:\n```\n{file_content_str}\n```\n{final_message}"
|
|
528
|
+
),
|
|
529
|
+
{path_: file_ranges},
|
|
530
|
+
)
|
|
531
|
+
else:
|
|
532
|
+
# The file hasn't changed, but we haven't read enough of it
|
|
533
|
+
unread_ranges = whitelist_data.get_unread_ranges()
|
|
534
|
+
# Format the ranges as a string for display
|
|
535
|
+
ranges_str = ", ".join(
|
|
536
|
+
[f"{start}-{end}" for start, end in unread_ranges]
|
|
537
|
+
)
|
|
538
|
+
msg = f"Error: you need to read more of the file before it can be overwritten.\nUnread line ranges: {ranges_str}\n\n"
|
|
539
|
+
|
|
540
|
+
# Read just the unread ranges
|
|
541
|
+
paths_: list[str] = []
|
|
542
|
+
for start, end in unread_ranges:
|
|
543
|
+
paths_.append(path_ + ":" + f"{start}-{end}")
|
|
544
|
+
paths_readfiles = ReadFiles(
|
|
545
|
+
file_paths=paths_, show_line_numbers_reason=""
|
|
546
|
+
)
|
|
547
|
+
readfiles, file_ranges_dict, truncated = read_files(
|
|
548
|
+
paths_readfiles.file_paths,
|
|
549
|
+
max_tokens,
|
|
550
|
+
context,
|
|
551
|
+
show_line_numbers=False,
|
|
552
|
+
start_line_nums=paths_readfiles.start_line_nums,
|
|
553
|
+
end_line_nums=paths_readfiles.end_line_nums,
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
final_message = ""
|
|
557
|
+
if not truncated:
|
|
558
|
+
final_message = "Now that you have read the rest of the file, you can now safely immediately retry writing but consider the new information above."
|
|
559
|
+
|
|
560
|
+
return (
|
|
561
|
+
(msg + "\n" + readfiles + "\n" + final_message),
|
|
562
|
+
file_ranges_dict,
|
|
563
|
+
)
|
|
564
|
+
# No need to add to whitelist here - will be handled by get_tool_output
|
|
441
565
|
|
|
442
566
|
path = Path(path_)
|
|
443
567
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -446,7 +570,7 @@ def write_file(
|
|
|
446
570
|
with path.open("w") as f:
|
|
447
571
|
f.write(writefile.file_content)
|
|
448
572
|
except OSError as e:
|
|
449
|
-
return f"Error: {e}"
|
|
573
|
+
return f"Error: {e}", {}
|
|
450
574
|
|
|
451
575
|
extension = Path(path_).suffix.lstrip(".")
|
|
452
576
|
|
|
@@ -478,19 +602,17 @@ Syntax errors:
|
|
|
478
602
|
except Exception:
|
|
479
603
|
pass
|
|
480
604
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
"\n---\nWarning: a file already existed and it's now overwritten. Was it a mistake? If yes please revert your action."
|
|
484
|
-
"\n---\n"
|
|
485
|
-
+ "Here's the previous content:\n```\n"
|
|
486
|
-
+ add_overwrite_warning
|
|
487
|
-
+ "\n```"
|
|
488
|
-
)
|
|
605
|
+
# Count the lines directly from the content we're writing
|
|
606
|
+
total_lines = writefile.file_content.count("\n") + 1
|
|
489
607
|
|
|
490
|
-
return "Success" + "".join(warnings)
|
|
608
|
+
return "Success" + "".join(warnings), {
|
|
609
|
+
path_: [(1, total_lines)]
|
|
610
|
+
} # Return the file path with line range along with success message
|
|
491
611
|
|
|
492
612
|
|
|
493
|
-
def do_diff_edit(
|
|
613
|
+
def do_diff_edit(
|
|
614
|
+
fedit: FileEdit, max_tokens: Optional[int], context: Context
|
|
615
|
+
) -> tuple[str, dict[str, list[tuple[int, int]]]]:
|
|
494
616
|
try:
|
|
495
617
|
return _do_diff_edit(fedit, max_tokens, context)
|
|
496
618
|
except Exception as e:
|
|
@@ -508,16 +630,28 @@ def do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context) -
|
|
|
508
630
|
raise e
|
|
509
631
|
|
|
510
632
|
|
|
511
|
-
def _do_diff_edit(
|
|
633
|
+
def _do_diff_edit(
|
|
634
|
+
fedit: FileEdit, max_tokens: Optional[int], context: Context
|
|
635
|
+
) -> tuple[str, dict[str, list[tuple[int, int]]]]:
|
|
512
636
|
context.console.log(f"Editing file: {fedit.file_path}")
|
|
513
637
|
|
|
514
638
|
# Expand the path before checking if it's absolute
|
|
515
639
|
path_ = expand_user(fedit.file_path)
|
|
640
|
+
|
|
516
641
|
if not os.path.isabs(path_):
|
|
517
642
|
raise Exception(
|
|
518
643
|
f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}"
|
|
519
644
|
)
|
|
520
645
|
|
|
646
|
+
workspace_path = context.bash_state.workspace_root
|
|
647
|
+
stats = load_workspace_stats(workspace_path)
|
|
648
|
+
|
|
649
|
+
if path_ not in stats.files:
|
|
650
|
+
stats.files[path_] = FileStats()
|
|
651
|
+
|
|
652
|
+
stats.files[path_].increment_edit()
|
|
653
|
+
save_workspace_stats(workspace_path, stats)
|
|
654
|
+
|
|
521
655
|
# Validate using file_edit_mode
|
|
522
656
|
allowed_globs = context.bash_state.file_edit_mode.allowed_globs
|
|
523
657
|
if allowed_globs != "all" and not any(
|
|
@@ -527,8 +661,7 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context)
|
|
|
527
661
|
f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}"
|
|
528
662
|
)
|
|
529
663
|
|
|
530
|
-
#
|
|
531
|
-
context.bash_state.add_to_whitelist_for_overwrite(path_)
|
|
664
|
+
# No need to add to whitelist here - will be handled by get_tool_output
|
|
532
665
|
|
|
533
666
|
if not os.path.exists(path_):
|
|
534
667
|
raise Exception(f"Error: file {path_} does not exist")
|
|
@@ -545,6 +678,9 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context)
|
|
|
545
678
|
lines, apply_diff_to, context.console.log
|
|
546
679
|
)
|
|
547
680
|
|
|
681
|
+
# Count the lines just once - after the edit but before writing
|
|
682
|
+
total_lines = apply_diff_to.count("\n") + 1
|
|
683
|
+
|
|
548
684
|
with open(path_, "w") as f:
|
|
549
685
|
f.write(apply_diff_to)
|
|
550
686
|
|
|
@@ -561,29 +697,93 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context)
|
|
|
561
697
|
syntax_errors += "\nNote: Ignore if 'tagged template literals' are used, they may raise false positive errors in tree-sitter."
|
|
562
698
|
|
|
563
699
|
context.console.print(f"W: Syntax errors encountered: {syntax_errors}")
|
|
564
|
-
|
|
700
|
+
|
|
701
|
+
return (
|
|
702
|
+
f"""{comments}
|
|
565
703
|
---
|
|
566
704
|
Warning: tree-sitter reported syntax errors, please re-read the file and fix if there are any errors.
|
|
567
705
|
Syntax errors:
|
|
568
706
|
{syntax_errors}
|
|
569
707
|
|
|
570
708
|
{context_for_errors}
|
|
571
|
-
"""
|
|
709
|
+
""",
|
|
710
|
+
{path_: [(1, total_lines)]},
|
|
711
|
+
) # Return the file path with line range along with the warning message
|
|
572
712
|
except Exception:
|
|
573
713
|
pass
|
|
574
714
|
|
|
575
|
-
return comments
|
|
715
|
+
return comments, {
|
|
716
|
+
path_: [(1, total_lines)]
|
|
717
|
+
} # Return the file path with line range along with the edit comments
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _is_edit(content: str, percentage: int) -> bool:
|
|
721
|
+
lines = content.lstrip().split("\n")
|
|
722
|
+
if not lines:
|
|
723
|
+
return False
|
|
724
|
+
line = lines[0]
|
|
725
|
+
if SEARCH_MARKER.match(line):
|
|
726
|
+
return True
|
|
727
|
+
if percentage <= 50:
|
|
728
|
+
for line in lines:
|
|
729
|
+
if (
|
|
730
|
+
SEARCH_MARKER.match(line)
|
|
731
|
+
or DIVIDER_MARKER.match(line)
|
|
732
|
+
or REPLACE_MARKER.match(line)
|
|
733
|
+
):
|
|
734
|
+
return True
|
|
735
|
+
return False
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
def file_writing(
|
|
739
|
+
file_writing_args: FileWriteOrEdit,
|
|
740
|
+
max_tokens: Optional[int],
|
|
741
|
+
context: Context,
|
|
742
|
+
) -> tuple[
|
|
743
|
+
str, dict[str, list[tuple[int, int]]]
|
|
744
|
+
]: # Updated to return message and file paths with line ranges
|
|
745
|
+
"""
|
|
746
|
+
Write or edit a file based on percentage of changes.
|
|
747
|
+
If percentage_changed > 50%, treat content as direct file content.
|
|
748
|
+
Otherwise, treat content as search/replace blocks.
|
|
749
|
+
"""
|
|
750
|
+
# Expand the path before checking if it's absolute
|
|
751
|
+
path_ = expand_user(file_writing_args.file_path)
|
|
752
|
+
if not os.path.isabs(path_):
|
|
753
|
+
return (
|
|
754
|
+
f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}",
|
|
755
|
+
{}, # Return empty dict instead of empty list for type consistency
|
|
756
|
+
)
|
|
576
757
|
|
|
758
|
+
# If file doesn't exist, always use direct file_content mode
|
|
759
|
+
content = file_writing_args.file_content_or_search_replace_blocks
|
|
760
|
+
|
|
761
|
+
if not _is_edit(content, file_writing_args.percentage_to_change):
|
|
762
|
+
# Use direct content mode (same as WriteIfEmpty)
|
|
763
|
+
result, paths = write_file(
|
|
764
|
+
WriteIfEmpty(
|
|
765
|
+
file_path=path_,
|
|
766
|
+
file_content=file_writing_args.file_content_or_search_replace_blocks,
|
|
767
|
+
),
|
|
768
|
+
True,
|
|
769
|
+
max_tokens,
|
|
770
|
+
context,
|
|
771
|
+
)
|
|
772
|
+
return result, paths
|
|
773
|
+
else:
|
|
774
|
+
# File exists and percentage <= 50, use search/replace mode
|
|
775
|
+
result, paths = do_diff_edit(
|
|
776
|
+
FileEdit(
|
|
777
|
+
file_path=path_,
|
|
778
|
+
file_edit_using_search_replace_blocks=file_writing_args.file_content_or_search_replace_blocks,
|
|
779
|
+
),
|
|
780
|
+
max_tokens,
|
|
781
|
+
context,
|
|
782
|
+
)
|
|
783
|
+
return result, paths
|
|
577
784
|
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
| WriteIfEmpty
|
|
581
|
-
| FileEdit
|
|
582
|
-
| ReadImage
|
|
583
|
-
| ReadFiles
|
|
584
|
-
| Initialize
|
|
585
|
-
| ContextSave
|
|
586
|
-
)
|
|
785
|
+
|
|
786
|
+
TOOLS = BashCommand | FileWriteOrEdit | ReadImage | ReadFiles | Initialize | ContextSave
|
|
587
787
|
|
|
588
788
|
|
|
589
789
|
def which_tool(args: str) -> TOOLS:
|
|
@@ -594,10 +794,8 @@ def which_tool(args: str) -> TOOLS:
|
|
|
594
794
|
def which_tool_name(name: str) -> Type[TOOLS]:
|
|
595
795
|
if name == "BashCommand":
|
|
596
796
|
return BashCommand
|
|
597
|
-
elif name == "
|
|
598
|
-
return
|
|
599
|
-
elif name == "FileEdit":
|
|
600
|
-
return FileEdit
|
|
797
|
+
elif name == "FileWriteOrEdit":
|
|
798
|
+
return FileWriteOrEdit
|
|
601
799
|
elif name == "ReadImage":
|
|
602
800
|
return ReadImage
|
|
603
801
|
elif name == "ReadFiles":
|
|
@@ -647,32 +845,80 @@ def get_tool_output(
|
|
|
647
845
|
output: tuple[str | ImageData, float]
|
|
648
846
|
TOOL_CALLS.append(arg)
|
|
649
847
|
|
|
848
|
+
# Initialize a dictionary to track file paths and line ranges
|
|
849
|
+
file_paths_with_ranges: dict[str, list[tuple[int, int]]] = {}
|
|
850
|
+
|
|
650
851
|
if isinstance(arg, BashCommand):
|
|
651
852
|
context.console.print("Calling execute bash tool")
|
|
652
853
|
if not INITIALIZED:
|
|
653
854
|
raise Exception("Initialize tool not called yet.")
|
|
654
855
|
|
|
655
|
-
|
|
856
|
+
output_str, cost = execute_bash(
|
|
656
857
|
context.bash_state, enc, arg, max_tokens, arg.wait_for_seconds
|
|
657
858
|
)
|
|
859
|
+
output = output_str, cost
|
|
658
860
|
elif isinstance(arg, WriteIfEmpty):
|
|
659
861
|
context.console.print("Calling write file tool")
|
|
660
862
|
if not INITIALIZED:
|
|
661
863
|
raise Exception("Initialize tool not called yet.")
|
|
662
864
|
|
|
663
|
-
|
|
865
|
+
result, write_paths = write_file(arg, True, max_tokens, context)
|
|
866
|
+
output = result, 0
|
|
867
|
+
# Add write paths with their ranges to our tracking dictionary
|
|
868
|
+
for path, ranges in write_paths.items():
|
|
869
|
+
if path in file_paths_with_ranges:
|
|
870
|
+
file_paths_with_ranges[path].extend(ranges)
|
|
871
|
+
else:
|
|
872
|
+
file_paths_with_ranges[path] = ranges.copy()
|
|
664
873
|
elif isinstance(arg, FileEdit):
|
|
665
874
|
context.console.print("Calling full file edit tool")
|
|
666
875
|
if not INITIALIZED:
|
|
667
876
|
raise Exception("Initialize tool not called yet.")
|
|
668
877
|
|
|
669
|
-
|
|
878
|
+
result, edit_paths = do_diff_edit(arg, max_tokens, context)
|
|
879
|
+
output = result, 0.0
|
|
880
|
+
# Add edit paths with their ranges to our tracking dictionary
|
|
881
|
+
for path, ranges in edit_paths.items():
|
|
882
|
+
if path in file_paths_with_ranges:
|
|
883
|
+
file_paths_with_ranges[path].extend(ranges)
|
|
884
|
+
else:
|
|
885
|
+
file_paths_with_ranges[path] = ranges.copy()
|
|
886
|
+
elif isinstance(arg, FileWriteOrEdit):
|
|
887
|
+
context.console.print("Calling file writing tool")
|
|
888
|
+
if not INITIALIZED:
|
|
889
|
+
raise Exception("Initialize tool not called yet.")
|
|
890
|
+
|
|
891
|
+
result, write_edit_paths = file_writing(arg, max_tokens, context)
|
|
892
|
+
output = result, 0.0
|
|
893
|
+
# Add write/edit paths with their ranges to our tracking dictionary
|
|
894
|
+
for path, ranges in write_edit_paths.items():
|
|
895
|
+
if path in file_paths_with_ranges:
|
|
896
|
+
file_paths_with_ranges[path].extend(ranges)
|
|
897
|
+
else:
|
|
898
|
+
file_paths_with_ranges[path] = ranges.copy()
|
|
670
899
|
elif isinstance(arg, ReadImage):
|
|
671
900
|
context.console.print("Calling read image tool")
|
|
672
|
-
|
|
901
|
+
image_data = read_image_from_shell(arg.file_path, context)
|
|
902
|
+
output = image_data, 0.0
|
|
673
903
|
elif isinstance(arg, ReadFiles):
|
|
674
904
|
context.console.print("Calling read file tool")
|
|
675
|
-
|
|
905
|
+
# Access line numbers through properties
|
|
906
|
+
result, file_ranges_dict, _ = read_files(
|
|
907
|
+
arg.file_paths,
|
|
908
|
+
max_tokens,
|
|
909
|
+
context,
|
|
910
|
+
bool(arg.show_line_numbers_reason),
|
|
911
|
+
arg.start_line_nums,
|
|
912
|
+
arg.end_line_nums,
|
|
913
|
+
)
|
|
914
|
+
output = result, 0.0
|
|
915
|
+
|
|
916
|
+
# Merge the new file ranges into our tracking dictionary
|
|
917
|
+
for path, ranges in file_ranges_dict.items():
|
|
918
|
+
if path in file_paths_with_ranges:
|
|
919
|
+
file_paths_with_ranges[path].extend(ranges)
|
|
920
|
+
else:
|
|
921
|
+
file_paths_with_ranges[path] = ranges
|
|
676
922
|
elif isinstance(arg, Initialize):
|
|
677
923
|
context.console.print("Calling initial info tool")
|
|
678
924
|
if arg.type == "user_asked_mode_change" or arg.type == "reset_shell":
|
|
@@ -694,7 +940,7 @@ def get_tool_output(
|
|
|
694
940
|
0.0,
|
|
695
941
|
)
|
|
696
942
|
else:
|
|
697
|
-
output_, context = initialize(
|
|
943
|
+
output_, context, init_paths = initialize(
|
|
698
944
|
arg.type,
|
|
699
945
|
context,
|
|
700
946
|
arg.any_workspace_path,
|
|
@@ -704,6 +950,13 @@ def get_tool_output(
|
|
|
704
950
|
arg.mode,
|
|
705
951
|
)
|
|
706
952
|
output = output_, 0.0
|
|
953
|
+
# Since init_paths is already a dictionary mapping file paths to line ranges,
|
|
954
|
+
# we just need to merge it with our tracking dictionary
|
|
955
|
+
for path, ranges in init_paths.items():
|
|
956
|
+
if path not in file_paths_with_ranges and os.path.exists(path):
|
|
957
|
+
file_paths_with_ranges[path] = ranges
|
|
958
|
+
elif path in file_paths_with_ranges:
|
|
959
|
+
file_paths_with_ranges[path].extend(ranges)
|
|
707
960
|
|
|
708
961
|
elif isinstance(arg, ContextSave):
|
|
709
962
|
context.console.print("Calling task memory tool")
|
|
@@ -721,7 +974,7 @@ def get_tool_output(
|
|
|
721
974
|
relevant_files.extend(globs[:1000])
|
|
722
975
|
if not globs:
|
|
723
976
|
warnings += f"Warning: No files found for the glob: {fglob}\n"
|
|
724
|
-
relevant_files_data = read_files(relevant_files[:10_000], None, context)
|
|
977
|
+
relevant_files_data, _, _ = read_files(relevant_files[:10_000], None, context)
|
|
725
978
|
save_path = save_memory(
|
|
726
979
|
arg, relevant_files_data, context.bash_state.serialize()
|
|
727
980
|
)
|
|
@@ -736,6 +989,10 @@ def get_tool_output(
|
|
|
736
989
|
output = output_, 0.0
|
|
737
990
|
else:
|
|
738
991
|
raise ValueError(f"Unknown tool: {arg}")
|
|
992
|
+
|
|
993
|
+
if file_paths_with_ranges: # Only add to whitelist if we have paths
|
|
994
|
+
context.bash_state.add_to_whitelist_for_overwrite(file_paths_with_ranges)
|
|
995
|
+
|
|
739
996
|
if isinstance(output[0], str):
|
|
740
997
|
context.console.print(str(output[0]))
|
|
741
998
|
else:
|
|
@@ -749,13 +1006,64 @@ default_enc = get_default_encoder()
|
|
|
749
1006
|
curr_cost = 0.0
|
|
750
1007
|
|
|
751
1008
|
|
|
1009
|
+
def range_format(start_line_num: Optional[int], end_line_num: Optional[int]) -> str:
|
|
1010
|
+
st = "" if not start_line_num else str(start_line_num)
|
|
1011
|
+
end = "" if not end_line_num else str(end_line_num)
|
|
1012
|
+
if not st and not end:
|
|
1013
|
+
return ""
|
|
1014
|
+
return f":{st}-{end}"
|
|
1015
|
+
|
|
1016
|
+
|
|
752
1017
|
def read_files(
|
|
753
|
-
file_paths: list[str],
|
|
754
|
-
|
|
1018
|
+
file_paths: list[str],
|
|
1019
|
+
max_tokens: Optional[int],
|
|
1020
|
+
context: Context,
|
|
1021
|
+
show_line_numbers: bool = False,
|
|
1022
|
+
start_line_nums: Optional[list[Optional[int]]] = None,
|
|
1023
|
+
end_line_nums: Optional[list[Optional[int]]] = None,
|
|
1024
|
+
) -> tuple[
|
|
1025
|
+
str, dict[str, list[tuple[int, int]]], bool
|
|
1026
|
+
]: # Updated to return file paths with ranges
|
|
755
1027
|
message = ""
|
|
1028
|
+
file_ranges_dict: dict[
|
|
1029
|
+
str, list[tuple[int, int]]
|
|
1030
|
+
] = {} # Map file paths to line ranges
|
|
1031
|
+
|
|
1032
|
+
workspace_path = context.bash_state.workspace_root
|
|
1033
|
+
stats = load_workspace_stats(workspace_path)
|
|
1034
|
+
|
|
1035
|
+
for path_ in file_paths:
|
|
1036
|
+
path_ = expand_user(path_)
|
|
1037
|
+
if not os.path.isabs(path_):
|
|
1038
|
+
continue
|
|
1039
|
+
if path_ not in stats.files:
|
|
1040
|
+
stats.files[path_] = FileStats()
|
|
1041
|
+
|
|
1042
|
+
stats.files[path_].increment_read()
|
|
1043
|
+
save_workspace_stats(workspace_path, stats)
|
|
1044
|
+
truncated = False
|
|
756
1045
|
for i, file in enumerate(file_paths):
|
|
757
1046
|
try:
|
|
758
|
-
|
|
1047
|
+
# Use line numbers from parameters if provided
|
|
1048
|
+
start_line_num = None if start_line_nums is None else start_line_nums[i]
|
|
1049
|
+
end_line_num = None if end_line_nums is None else end_line_nums[i]
|
|
1050
|
+
|
|
1051
|
+
# For backward compatibility, we still need to extract line numbers from path
|
|
1052
|
+
# if they weren't provided as parameters
|
|
1053
|
+
content, truncated, tokens, path, line_range = read_file(
|
|
1054
|
+
file,
|
|
1055
|
+
max_tokens,
|
|
1056
|
+
context,
|
|
1057
|
+
show_line_numbers,
|
|
1058
|
+
start_line_num,
|
|
1059
|
+
end_line_num,
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
# Add file path with line range to dictionary
|
|
1063
|
+
if path in file_ranges_dict:
|
|
1064
|
+
file_ranges_dict[path].append(line_range)
|
|
1065
|
+
else:
|
|
1066
|
+
file_ranges_dict[path] = [line_range]
|
|
759
1067
|
except Exception as e:
|
|
760
1068
|
message += f"\n{file}: {str(e)}\n"
|
|
761
1069
|
continue
|
|
@@ -763,7 +1071,8 @@ def read_files(
|
|
|
763
1071
|
if max_tokens:
|
|
764
1072
|
max_tokens = max_tokens - tokens
|
|
765
1073
|
|
|
766
|
-
|
|
1074
|
+
range_formatted = range_format(start_line_num, end_line_num)
|
|
1075
|
+
message += f"\n{file}{range_formatted}\n```\n{content}\n"
|
|
767
1076
|
|
|
768
1077
|
if truncated or (max_tokens and max_tokens <= 0):
|
|
769
1078
|
not_reading = file_paths[i + 1 :]
|
|
@@ -772,15 +1081,21 @@ def read_files(
|
|
|
772
1081
|
break
|
|
773
1082
|
else:
|
|
774
1083
|
message += "```"
|
|
775
|
-
|
|
776
|
-
return message
|
|
1084
|
+
return message, file_ranges_dict, truncated
|
|
777
1085
|
|
|
778
1086
|
|
|
779
1087
|
def read_file(
|
|
780
|
-
file_path: str,
|
|
781
|
-
|
|
1088
|
+
file_path: str,
|
|
1089
|
+
max_tokens: Optional[int],
|
|
1090
|
+
context: Context,
|
|
1091
|
+
show_line_numbers: bool = False,
|
|
1092
|
+
start_line_num: Optional[int] = None,
|
|
1093
|
+
end_line_num: Optional[int] = None,
|
|
1094
|
+
) -> tuple[str, bool, int, str, tuple[int, int]]:
|
|
782
1095
|
context.console.print(f"Reading file: {file_path}")
|
|
783
1096
|
|
|
1097
|
+
# Line numbers are now passed as parameters, no need to parse from path
|
|
1098
|
+
|
|
784
1099
|
# Expand the path before checking if it's absolute
|
|
785
1100
|
file_path = expand_user(file_path)
|
|
786
1101
|
|
|
@@ -789,28 +1104,83 @@ def read_file(
|
|
|
789
1104
|
f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}"
|
|
790
1105
|
)
|
|
791
1106
|
|
|
792
|
-
context.bash_state.add_to_whitelist_for_overwrite(file_path)
|
|
793
|
-
|
|
794
1107
|
path = Path(file_path)
|
|
795
1108
|
if not path.exists():
|
|
796
1109
|
raise ValueError(f"Error: file {file_path} does not exist")
|
|
797
1110
|
|
|
1111
|
+
# Read all lines of the file
|
|
798
1112
|
with path.open("r") as f:
|
|
799
|
-
|
|
1113
|
+
all_lines = f.readlines(10_000_000)
|
|
1114
|
+
|
|
1115
|
+
if all_lines[-1].endswith("\n"):
|
|
1116
|
+
# Special handling of line counts because readlines doesn't consider last empty line as a separate line
|
|
1117
|
+
all_lines[-1] = all_lines[-1][:-1]
|
|
1118
|
+
all_lines.append("")
|
|
1119
|
+
|
|
1120
|
+
total_lines = len(all_lines)
|
|
1121
|
+
|
|
1122
|
+
# Apply line range filtering if specified
|
|
1123
|
+
start_idx = 0
|
|
1124
|
+
if start_line_num is not None:
|
|
1125
|
+
# Convert 1-indexed line number to 0-indexed
|
|
1126
|
+
start_idx = max(0, start_line_num - 1)
|
|
1127
|
+
|
|
1128
|
+
end_idx = len(all_lines)
|
|
1129
|
+
if end_line_num is not None:
|
|
1130
|
+
# end_line_num is inclusive, so we use min to ensure it's within bounds
|
|
1131
|
+
end_idx = min(len(all_lines), end_line_num)
|
|
1132
|
+
|
|
1133
|
+
# Convert back to 1-indexed line numbers for tracking
|
|
1134
|
+
effective_start = start_line_num if start_line_num is not None else 1
|
|
1135
|
+
effective_end = end_line_num if end_line_num is not None else total_lines
|
|
1136
|
+
|
|
1137
|
+
filtered_lines = all_lines[start_idx:end_idx]
|
|
1138
|
+
|
|
1139
|
+
# Create content with or without line numbers
|
|
1140
|
+
if show_line_numbers:
|
|
1141
|
+
content_lines = []
|
|
1142
|
+
for i, line in enumerate(filtered_lines, start=start_idx + 1):
|
|
1143
|
+
content_lines.append(f"{i} {line}")
|
|
1144
|
+
content = "".join(content_lines)
|
|
1145
|
+
else:
|
|
1146
|
+
content = "".join(filtered_lines)
|
|
800
1147
|
|
|
801
1148
|
truncated = False
|
|
802
1149
|
tokens_counts = 0
|
|
1150
|
+
|
|
1151
|
+
# Handle token limit if specified
|
|
803
1152
|
if max_tokens is not None:
|
|
804
1153
|
tokens = default_enc.encoder(content)
|
|
805
1154
|
tokens_counts = len(tokens)
|
|
1155
|
+
|
|
806
1156
|
if len(tokens) > max_tokens:
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
1157
|
+
# Truncate at token boundary first
|
|
1158
|
+
truncated_tokens = tokens[:max_tokens]
|
|
1159
|
+
truncated_content = default_enc.decoder(truncated_tokens)
|
|
1160
|
+
|
|
1161
|
+
# Count how many lines we kept
|
|
1162
|
+
line_count = truncated_content.count("\n")
|
|
1163
|
+
|
|
1164
|
+
# Calculate the last line number shown (1-indexed)
|
|
1165
|
+
last_line_shown = start_idx + line_count
|
|
1166
|
+
|
|
1167
|
+
content = truncated_content
|
|
1168
|
+
# Add informative message about truncation with total line count
|
|
1169
|
+
total_lines = len(all_lines)
|
|
1170
|
+
content += f"\n(...truncated) Only showing till line number {last_line_shown} of {total_lines} total lines due to the token limit, please continue reading from {last_line_shown + 1} if required"
|
|
812
1171
|
truncated = True
|
|
813
|
-
|
|
1172
|
+
|
|
1173
|
+
# Update effective_end if truncated
|
|
1174
|
+
effective_end = last_line_shown
|
|
1175
|
+
|
|
1176
|
+
# Return the content along with the effective line range that was read
|
|
1177
|
+
return (
|
|
1178
|
+
content,
|
|
1179
|
+
truncated,
|
|
1180
|
+
tokens_counts,
|
|
1181
|
+
file_path,
|
|
1182
|
+
(effective_start, effective_end),
|
|
1183
|
+
)
|
|
814
1184
|
|
|
815
1185
|
|
|
816
1186
|
if __name__ == "__main__":
|
|
@@ -851,3 +1221,32 @@ if __name__ == "__main__":
|
|
|
851
1221
|
None,
|
|
852
1222
|
)
|
|
853
1223
|
)
|
|
1224
|
+
|
|
1225
|
+
print(
|
|
1226
|
+
get_tool_output(
|
|
1227
|
+
Context(BASH_STATE, BASH_STATE.console),
|
|
1228
|
+
ReadFiles(
|
|
1229
|
+
file_paths=["/Users/arusia/repos/wcgw/src/wcgw/client/tools.py"],
|
|
1230
|
+
show_line_numbers_reason="true",
|
|
1231
|
+
),
|
|
1232
|
+
default_enc,
|
|
1233
|
+
0,
|
|
1234
|
+
lambda x, y: ("", 0),
|
|
1235
|
+
15000,
|
|
1236
|
+
)[0][0]
|
|
1237
|
+
)
|
|
1238
|
+
|
|
1239
|
+
print(
|
|
1240
|
+
get_tool_output(
|
|
1241
|
+
Context(BASH_STATE, BASH_STATE.console),
|
|
1242
|
+
FileWriteOrEdit(
|
|
1243
|
+
file_path="/Users/arusia/repos/wcgw/src/wcgw/client/tools.py",
|
|
1244
|
+
file_content_or_search_replace_blocks="""test""",
|
|
1245
|
+
percentage_to_change=100,
|
|
1246
|
+
),
|
|
1247
|
+
default_enc,
|
|
1248
|
+
0,
|
|
1249
|
+
lambda x, y: ("", 0),
|
|
1250
|
+
800,
|
|
1251
|
+
)[0][0]
|
|
1252
|
+
)
|