wcgw 3.0.7__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

wcgw/client/tools.py CHANGED
@@ -7,6 +7,7 @@ import os
7
7
  import subprocess
8
8
  import traceback
9
9
  from dataclasses import dataclass
10
+ from hashlib import sha256
10
11
  from os.path import expanduser
11
12
  from pathlib import Path
12
13
  from tempfile import NamedTemporaryFile
@@ -28,6 +29,11 @@ from pydantic import BaseModel, TypeAdapter, ValidationError
28
29
  from syntax_checker import check_syntax
29
30
 
30
31
  from wcgw.client.bash_state.bash_state import get_status
32
+ from wcgw.client.repo_ops.file_stats import (
33
+ FileStats,
34
+ load_workspace_stats,
35
+ save_workspace_stats,
36
+ )
31
37
 
32
38
  from ..types_ import (
33
39
  BashCommand,
@@ -36,6 +42,7 @@ from ..types_ import (
36
42
  Console,
37
43
  ContextSave,
38
44
  FileEdit,
45
+ FileWriteOrEdit,
39
46
  Initialize,
40
47
  Modes,
41
48
  ModesConfig,
@@ -48,7 +55,12 @@ from .bash_state.bash_state import (
48
55
  execute_bash,
49
56
  )
50
57
  from .encoder import EncoderDecoder, get_default_encoder
51
- from .file_ops.search_replace import search_replace_edit
58
+ from .file_ops.search_replace import (
59
+ DIVIDER_MARKER,
60
+ REPLACE_MARKER,
61
+ SEARCH_MARKER,
62
+ search_replace_edit,
63
+ )
52
64
  from .memory import load_memory, save_memory
53
65
  from .modes import (
54
66
  ARCHITECT_PROMPT,
@@ -92,7 +104,7 @@ def initialize(
92
104
  task_id_to_resume: str,
93
105
  max_tokens: Optional[int],
94
106
  mode: ModesConfig,
95
- ) -> tuple[str, Context]:
107
+ ) -> tuple[str, Context, dict[str, list[tuple[int, int]]]]:
96
108
  # Expand the workspace path
97
109
  any_workspace_path = expand_user(any_workspace_path)
98
110
  repo_context = ""
@@ -129,6 +141,7 @@ def initialize(
129
141
  if not read_files_:
130
142
  read_files_ = [any_workspace_path]
131
143
  any_workspace_path = os.path.dirname(any_workspace_path)
144
+ # Let get_repo_context handle loading the workspace stats
132
145
  repo_context, folder_to_start = get_repo_context(any_workspace_path, 50)
133
146
 
134
147
  repo_context = f"---\n# Workspace structure\n{repo_context}\n---\n"
@@ -151,14 +164,18 @@ def initialize(
151
164
  if loaded_state is not None:
152
165
  try:
153
166
  parsed_state = BashState.parse_state(loaded_state)
167
+ workspace_root = (
168
+ str(folder_to_start) if folder_to_start else parsed_state[5]
169
+ )
154
170
  if mode == "wcgw":
155
171
  context.bash_state.load_state(
156
172
  parsed_state[0],
157
173
  parsed_state[1],
158
174
  parsed_state[2],
159
175
  parsed_state[3],
160
- parsed_state[4] + list(context.bash_state.whitelist_for_overwrite),
161
- str(folder_to_start) if folder_to_start else "",
176
+ {**parsed_state[4], **context.bash_state.whitelist_for_overwrite},
177
+ str(folder_to_start) if folder_to_start else workspace_root,
178
+ workspace_root,
162
179
  )
163
180
  else:
164
181
  state = modes_to_state(mode)
@@ -167,8 +184,9 @@ def initialize(
167
184
  state[1],
168
185
  state[2],
169
186
  state[3],
170
- parsed_state[4] + list(context.bash_state.whitelist_for_overwrite),
171
- str(folder_to_start) if folder_to_start else "",
187
+ {**parsed_state[4], **context.bash_state.whitelist_for_overwrite},
188
+ str(folder_to_start) if folder_to_start else workspace_root,
189
+ workspace_root,
172
190
  )
173
191
  except ValueError:
174
192
  context.console.print(traceback.format_exc())
@@ -178,12 +196,14 @@ def initialize(
178
196
  else:
179
197
  mode_changed = is_mode_change(mode, context.bash_state)
180
198
  state = modes_to_state(mode)
199
+ # Use the provided workspace path as the workspace root
181
200
  context.bash_state.load_state(
182
201
  state[0],
183
202
  state[1],
184
203
  state[2],
185
204
  state[3],
186
- list(context.bash_state.whitelist_for_overwrite),
205
+ dict(context.bash_state.whitelist_for_overwrite),
206
+ str(folder_to_start) if folder_to_start else "",
187
207
  str(folder_to_start) if folder_to_start else "",
188
208
  )
189
209
  if type == "first_call" or mode_changed:
@@ -194,6 +214,7 @@ def initialize(
194
214
  del mode
195
215
 
196
216
  initial_files_context = ""
217
+ initial_paths_with_ranges: dict[str, list[tuple[int, int]]] = {}
197
218
  if read_files_:
198
219
  if folder_to_start:
199
220
  read_files_ = [
@@ -203,7 +224,9 @@ def initialize(
203
224
  else expand_user(f)
204
225
  for f in read_files_
205
226
  ]
206
- initial_files = read_files(read_files_, max_tokens, context)
227
+ initial_files, initial_paths_with_ranges, _ = read_files(
228
+ read_files_, max_tokens, context
229
+ )
207
230
  initial_files_context = f"---\n# Requested files\n{initial_files}\n---\n"
208
231
 
209
232
  uname_sysname = os.uname().sysname
@@ -228,7 +251,7 @@ Initialized in directory (also cwd): {context.bash_state.cwd}
228
251
 
229
252
  global INITIALIZED
230
253
  INITIALIZED = True
231
- return output, context
254
+ return output, context, initial_paths_with_ranges
232
255
 
233
256
 
234
257
  def is_mode_change(mode_config: ModesConfig, bash_state: BashState) -> bool:
@@ -267,7 +290,8 @@ def reset_wcgw(
267
290
  file_edit_mode,
268
291
  write_if_empty_mode,
269
292
  mode,
270
- list(context.bash_state.whitelist_for_overwrite),
293
+ dict(context.bash_state.whitelist_for_overwrite),
294
+ starting_directory,
271
295
  starting_directory,
272
296
  )
273
297
  mode_prompt = get_mode_prompt(context)
@@ -291,7 +315,8 @@ def reset_wcgw(
291
315
  file_edit_mode,
292
316
  write_if_empty_mode,
293
317
  mode,
294
- list(context.bash_state.whitelist_for_overwrite),
318
+ dict(context.bash_state.whitelist_for_overwrite),
319
+ starting_directory,
295
320
  starting_directory,
296
321
  )
297
322
  INITIALIZED = True
@@ -405,39 +430,138 @@ def write_file(
405
430
  error_on_exist: bool,
406
431
  max_tokens: Optional[int],
407
432
  context: Context,
408
- ) -> str:
433
+ ) -> tuple[
434
+ str, dict[str, list[tuple[int, int]]]
435
+ ]: # Updated to return message and file paths with line ranges
409
436
  # Expand the path before checking if it's absolute
410
437
  path_ = expand_user(writefile.file_path)
438
+
439
+ workspace_path = context.bash_state.workspace_root
440
+ stats = load_workspace_stats(workspace_path)
441
+
442
+ if path_ not in stats.files:
443
+ stats.files[path_] = FileStats()
444
+
445
+ stats.files[path_].increment_write()
446
+ save_workspace_stats(workspace_path, stats)
447
+
411
448
  if not os.path.isabs(path_):
412
- return f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}"
449
+ return (
450
+ f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}",
451
+ {}, # Return empty dict instead of empty list for type consistency
452
+ )
413
453
 
414
454
  error_on_exist_ = (
415
455
  error_on_exist and path_ not in context.bash_state.whitelist_for_overwrite
416
456
  )
417
457
 
458
+ if error_on_exist and path_ in context.bash_state.whitelist_for_overwrite:
459
+ # Ensure hash has not changed
460
+ if os.path.exists(path_):
461
+ with open(path_, "rb") as f:
462
+ file_content = f.read()
463
+ curr_hash = sha256(file_content).hexdigest()
464
+
465
+ whitelist_data = context.bash_state.whitelist_for_overwrite[path_]
466
+
467
+ # If we haven't fully read the file or hash has changed, require re-reading
468
+ if curr_hash != whitelist_data.file_hash:
469
+ error_on_exist_ = True
470
+ elif not whitelist_data.is_read_enough():
471
+ error_on_exist_ = True
472
+
418
473
  # Validate using write_if_empty_mode after checking whitelist
419
474
  allowed_globs = context.bash_state.write_if_empty_mode.allowed_globs
420
475
  if allowed_globs != "all" and not any(
421
476
  fnmatch.fnmatch(path_, pattern) for pattern in allowed_globs
422
477
  ):
423
- return f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}"
478
+ return (
479
+ f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}",
480
+ {}, # Empty dict instead of empty list
481
+ )
424
482
 
425
- add_overwrite_warning = ""
426
483
  if (error_on_exist or error_on_exist_) and os.path.exists(path_):
427
484
  content = Path(path_).read_text().strip()
428
485
  if content:
429
- content = truncate_if_over(content, max_tokens)
430
-
431
486
  if error_on_exist_:
432
- return (
433
- f"Error: can't write to existing file {path_}, use other functions to edit the file"
434
- + f"\nHere's the existing content:\n```\n{content}\n```"
435
- )
436
- else:
437
- add_overwrite_warning = content
438
-
439
- # Since we've already errored once, add this to whitelist
440
- context.bash_state.add_to_whitelist_for_overwrite(path_)
487
+ file_ranges = []
488
+
489
+ if path_ not in context.bash_state.whitelist_for_overwrite:
490
+ # File hasn't been read at all
491
+ msg = f"Error: you need to read existing file {path_} at least once before it can be overwritten.\n\n"
492
+ # Read the entire file
493
+ file_content_str, truncated, _, _, line_range = read_file(
494
+ path_, max_tokens, context, False
495
+ )
496
+ file_ranges = [line_range]
497
+
498
+ final_message = ""
499
+ if not truncated:
500
+ final_message = "You can now safely retry writing immediately considering the above information."
501
+
502
+ return (
503
+ (
504
+ msg
505
+ + f"Here's the existing file:\n```\n{file_content_str}\n{final_message}\n```"
506
+ ),
507
+ {path_: file_ranges},
508
+ )
509
+
510
+ whitelist_data = context.bash_state.whitelist_for_overwrite[path_]
511
+
512
+ if curr_hash != whitelist_data.file_hash:
513
+ msg = "Error: the file has changed since last read.\n\n"
514
+ # Read the entire file again
515
+ file_content_str, truncated, _, _, line_range = read_file(
516
+ path_, max_tokens, context, False
517
+ )
518
+ file_ranges = [line_range]
519
+
520
+ final_message = ""
521
+ if not truncated:
522
+ final_message = "You can now safely retry writing immediately considering the above information."
523
+
524
+ return (
525
+ (
526
+ msg
527
+ + f"Here's the existing file:\n```\n{file_content_str}\n```\n{final_message}"
528
+ ),
529
+ {path_: file_ranges},
530
+ )
531
+ else:
532
+ # The file hasn't changed, but we haven't read enough of it
533
+ unread_ranges = whitelist_data.get_unread_ranges()
534
+ # Format the ranges as a string for display
535
+ ranges_str = ", ".join(
536
+ [f"{start}-{end}" for start, end in unread_ranges]
537
+ )
538
+ msg = f"Error: you need to read more of the file before it can be overwritten.\nUnread line ranges: {ranges_str}\n\n"
539
+
540
+ # Read just the unread ranges
541
+ paths_: list[str] = []
542
+ for start, end in unread_ranges:
543
+ paths_.append(path_ + ":" + f"{start}-{end}")
544
+ paths_readfiles = ReadFiles(
545
+ file_paths=paths_, show_line_numbers_reason=""
546
+ )
547
+ readfiles, file_ranges_dict, truncated = read_files(
548
+ paths_readfiles.file_paths,
549
+ max_tokens,
550
+ context,
551
+ show_line_numbers=False,
552
+ start_line_nums=paths_readfiles.start_line_nums,
553
+ end_line_nums=paths_readfiles.end_line_nums,
554
+ )
555
+
556
+ final_message = ""
557
+ if not truncated:
558
+ final_message = "Now that you have read the rest of the file, you can now safely immediately retry writing but consider the new information above."
559
+
560
+ return (
561
+ (msg + "\n" + readfiles + "\n" + final_message),
562
+ file_ranges_dict,
563
+ )
564
+ # No need to add to whitelist here - will be handled by get_tool_output
441
565
 
442
566
  path = Path(path_)
443
567
  path.parent.mkdir(parents=True, exist_ok=True)
@@ -446,7 +570,7 @@ def write_file(
446
570
  with path.open("w") as f:
447
571
  f.write(writefile.file_content)
448
572
  except OSError as e:
449
- return f"Error: {e}"
573
+ return f"Error: {e}", {}
450
574
 
451
575
  extension = Path(path_).suffix.lstrip(".")
452
576
 
@@ -478,19 +602,17 @@ Syntax errors:
478
602
  except Exception:
479
603
  pass
480
604
 
481
- if add_overwrite_warning:
482
- warnings.append(
483
- "\n---\nWarning: a file already existed and it's now overwritten. Was it a mistake? If yes please revert your action."
484
- "\n---\n"
485
- + "Here's the previous content:\n```\n"
486
- + add_overwrite_warning
487
- + "\n```"
488
- )
605
+ # Count the lines directly from the content we're writing
606
+ total_lines = writefile.file_content.count("\n") + 1
489
607
 
490
- return "Success" + "".join(warnings)
608
+ return "Success" + "".join(warnings), {
609
+ path_: [(1, total_lines)]
610
+ } # Return the file path with line range along with success message
491
611
 
492
612
 
493
- def do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context) -> str:
613
+ def do_diff_edit(
614
+ fedit: FileEdit, max_tokens: Optional[int], context: Context
615
+ ) -> tuple[str, dict[str, list[tuple[int, int]]]]:
494
616
  try:
495
617
  return _do_diff_edit(fedit, max_tokens, context)
496
618
  except Exception as e:
@@ -508,16 +630,28 @@ def do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context) -
508
630
  raise e
509
631
 
510
632
 
511
- def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context) -> str:
633
+ def _do_diff_edit(
634
+ fedit: FileEdit, max_tokens: Optional[int], context: Context
635
+ ) -> tuple[str, dict[str, list[tuple[int, int]]]]:
512
636
  context.console.log(f"Editing file: {fedit.file_path}")
513
637
 
514
638
  # Expand the path before checking if it's absolute
515
639
  path_ = expand_user(fedit.file_path)
640
+
516
641
  if not os.path.isabs(path_):
517
642
  raise Exception(
518
643
  f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}"
519
644
  )
520
645
 
646
+ workspace_path = context.bash_state.workspace_root
647
+ stats = load_workspace_stats(workspace_path)
648
+
649
+ if path_ not in stats.files:
650
+ stats.files[path_] = FileStats()
651
+
652
+ stats.files[path_].increment_edit()
653
+ save_workspace_stats(workspace_path, stats)
654
+
521
655
  # Validate using file_edit_mode
522
656
  allowed_globs = context.bash_state.file_edit_mode.allowed_globs
523
657
  if allowed_globs != "all" and not any(
@@ -527,8 +661,7 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context)
527
661
  f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}"
528
662
  )
529
663
 
530
- # The LLM is now aware that the file exists
531
- context.bash_state.add_to_whitelist_for_overwrite(path_)
664
+ # No need to add to whitelist here - will be handled by get_tool_output
532
665
 
533
666
  if not os.path.exists(path_):
534
667
  raise Exception(f"Error: file {path_} does not exist")
@@ -545,6 +678,9 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context)
545
678
  lines, apply_diff_to, context.console.log
546
679
  )
547
680
 
681
+ # Count the lines just once - after the edit but before writing
682
+ total_lines = apply_diff_to.count("\n") + 1
683
+
548
684
  with open(path_, "w") as f:
549
685
  f.write(apply_diff_to)
550
686
 
@@ -561,29 +697,93 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context)
561
697
  syntax_errors += "\nNote: Ignore if 'tagged template literals' are used, they may raise false positive errors in tree-sitter."
562
698
 
563
699
  context.console.print(f"W: Syntax errors encountered: {syntax_errors}")
564
- return f"""{comments}
700
+
701
+ return (
702
+ f"""{comments}
565
703
  ---
566
704
  Warning: tree-sitter reported syntax errors, please re-read the file and fix if there are any errors.
567
705
  Syntax errors:
568
706
  {syntax_errors}
569
707
 
570
708
  {context_for_errors}
571
- """
709
+ """,
710
+ {path_: [(1, total_lines)]},
711
+ ) # Return the file path with line range along with the warning message
572
712
  except Exception:
573
713
  pass
574
714
 
575
- return comments
715
+ return comments, {
716
+ path_: [(1, total_lines)]
717
+ } # Return the file path with line range along with the edit comments
718
+
719
+
720
+ def _is_edit(content: str, percentage: int) -> bool:
721
+ lines = content.lstrip().split("\n")
722
+ if not lines:
723
+ return False
724
+ line = lines[0]
725
+ if SEARCH_MARKER.match(line):
726
+ return True
727
+ if percentage <= 50:
728
+ for line in lines:
729
+ if (
730
+ SEARCH_MARKER.match(line)
731
+ or DIVIDER_MARKER.match(line)
732
+ or REPLACE_MARKER.match(line)
733
+ ):
734
+ return True
735
+ return False
736
+
737
+
738
+ def file_writing(
739
+ file_writing_args: FileWriteOrEdit,
740
+ max_tokens: Optional[int],
741
+ context: Context,
742
+ ) -> tuple[
743
+ str, dict[str, list[tuple[int, int]]]
744
+ ]: # Updated to return message and file paths with line ranges
745
+ """
746
+ Write or edit a file based on percentage of changes.
747
+ If percentage_changed > 50%, treat content as direct file content.
748
+ Otherwise, treat content as search/replace blocks.
749
+ """
750
+ # Expand the path before checking if it's absolute
751
+ path_ = expand_user(file_writing_args.file_path)
752
+ if not os.path.isabs(path_):
753
+ return (
754
+ f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}",
755
+ {}, # Return empty dict instead of empty list for type consistency
756
+ )
576
757
 
758
+ # If file doesn't exist, always use direct file_content mode
759
+ content = file_writing_args.file_content_or_search_replace_blocks
760
+
761
+ if not _is_edit(content, file_writing_args.percentage_to_change):
762
+ # Use direct content mode (same as WriteIfEmpty)
763
+ result, paths = write_file(
764
+ WriteIfEmpty(
765
+ file_path=path_,
766
+ file_content=file_writing_args.file_content_or_search_replace_blocks,
767
+ ),
768
+ True,
769
+ max_tokens,
770
+ context,
771
+ )
772
+ return result, paths
773
+ else:
774
+ # File exists and percentage <= 50, use search/replace mode
775
+ result, paths = do_diff_edit(
776
+ FileEdit(
777
+ file_path=path_,
778
+ file_edit_using_search_replace_blocks=file_writing_args.file_content_or_search_replace_blocks,
779
+ ),
780
+ max_tokens,
781
+ context,
782
+ )
783
+ return result, paths
577
784
 
578
- TOOLS = (
579
- BashCommand
580
- | WriteIfEmpty
581
- | FileEdit
582
- | ReadImage
583
- | ReadFiles
584
- | Initialize
585
- | ContextSave
586
- )
785
+
786
+ TOOLS = BashCommand | FileWriteOrEdit | ReadImage | ReadFiles | Initialize | ContextSave
587
787
 
588
788
 
589
789
  def which_tool(args: str) -> TOOLS:
@@ -594,10 +794,8 @@ def which_tool(args: str) -> TOOLS:
594
794
  def which_tool_name(name: str) -> Type[TOOLS]:
595
795
  if name == "BashCommand":
596
796
  return BashCommand
597
- elif name == "WriteIfEmpty":
598
- return WriteIfEmpty
599
- elif name == "FileEdit":
600
- return FileEdit
797
+ elif name == "FileWriteOrEdit":
798
+ return FileWriteOrEdit
601
799
  elif name == "ReadImage":
602
800
  return ReadImage
603
801
  elif name == "ReadFiles":
@@ -647,32 +845,80 @@ def get_tool_output(
647
845
  output: tuple[str | ImageData, float]
648
846
  TOOL_CALLS.append(arg)
649
847
 
848
+ # Initialize a dictionary to track file paths and line ranges
849
+ file_paths_with_ranges: dict[str, list[tuple[int, int]]] = {}
850
+
650
851
  if isinstance(arg, BashCommand):
651
852
  context.console.print("Calling execute bash tool")
652
853
  if not INITIALIZED:
653
854
  raise Exception("Initialize tool not called yet.")
654
855
 
655
- output = execute_bash(
856
+ output_str, cost = execute_bash(
656
857
  context.bash_state, enc, arg, max_tokens, arg.wait_for_seconds
657
858
  )
859
+ output = output_str, cost
658
860
  elif isinstance(arg, WriteIfEmpty):
659
861
  context.console.print("Calling write file tool")
660
862
  if not INITIALIZED:
661
863
  raise Exception("Initialize tool not called yet.")
662
864
 
663
- output = write_file(arg, True, max_tokens, context), 0
865
+ result, write_paths = write_file(arg, True, max_tokens, context)
866
+ output = result, 0
867
+ # Add write paths with their ranges to our tracking dictionary
868
+ for path, ranges in write_paths.items():
869
+ if path in file_paths_with_ranges:
870
+ file_paths_with_ranges[path].extend(ranges)
871
+ else:
872
+ file_paths_with_ranges[path] = ranges.copy()
664
873
  elif isinstance(arg, FileEdit):
665
874
  context.console.print("Calling full file edit tool")
666
875
  if not INITIALIZED:
667
876
  raise Exception("Initialize tool not called yet.")
668
877
 
669
- output = do_diff_edit(arg, max_tokens, context), 0.0
878
+ result, edit_paths = do_diff_edit(arg, max_tokens, context)
879
+ output = result, 0.0
880
+ # Add edit paths with their ranges to our tracking dictionary
881
+ for path, ranges in edit_paths.items():
882
+ if path in file_paths_with_ranges:
883
+ file_paths_with_ranges[path].extend(ranges)
884
+ else:
885
+ file_paths_with_ranges[path] = ranges.copy()
886
+ elif isinstance(arg, FileWriteOrEdit):
887
+ context.console.print("Calling file writing tool")
888
+ if not INITIALIZED:
889
+ raise Exception("Initialize tool not called yet.")
890
+
891
+ result, write_edit_paths = file_writing(arg, max_tokens, context)
892
+ output = result, 0.0
893
+ # Add write/edit paths with their ranges to our tracking dictionary
894
+ for path, ranges in write_edit_paths.items():
895
+ if path in file_paths_with_ranges:
896
+ file_paths_with_ranges[path].extend(ranges)
897
+ else:
898
+ file_paths_with_ranges[path] = ranges.copy()
670
899
  elif isinstance(arg, ReadImage):
671
900
  context.console.print("Calling read image tool")
672
- output = read_image_from_shell(arg.file_path, context), 0.0
901
+ image_data = read_image_from_shell(arg.file_path, context)
902
+ output = image_data, 0.0
673
903
  elif isinstance(arg, ReadFiles):
674
904
  context.console.print("Calling read file tool")
675
- output = read_files(arg.file_paths, max_tokens, context), 0.0
905
+ # Access line numbers through properties
906
+ result, file_ranges_dict, _ = read_files(
907
+ arg.file_paths,
908
+ max_tokens,
909
+ context,
910
+ bool(arg.show_line_numbers_reason),
911
+ arg.start_line_nums,
912
+ arg.end_line_nums,
913
+ )
914
+ output = result, 0.0
915
+
916
+ # Merge the new file ranges into our tracking dictionary
917
+ for path, ranges in file_ranges_dict.items():
918
+ if path in file_paths_with_ranges:
919
+ file_paths_with_ranges[path].extend(ranges)
920
+ else:
921
+ file_paths_with_ranges[path] = ranges
676
922
  elif isinstance(arg, Initialize):
677
923
  context.console.print("Calling initial info tool")
678
924
  if arg.type == "user_asked_mode_change" or arg.type == "reset_shell":
@@ -694,7 +940,7 @@ def get_tool_output(
694
940
  0.0,
695
941
  )
696
942
  else:
697
- output_, context = initialize(
943
+ output_, context, init_paths = initialize(
698
944
  arg.type,
699
945
  context,
700
946
  arg.any_workspace_path,
@@ -704,6 +950,13 @@ def get_tool_output(
704
950
  arg.mode,
705
951
  )
706
952
  output = output_, 0.0
953
+ # Since init_paths is already a dictionary mapping file paths to line ranges,
954
+ # we just need to merge it with our tracking dictionary
955
+ for path, ranges in init_paths.items():
956
+ if path not in file_paths_with_ranges and os.path.exists(path):
957
+ file_paths_with_ranges[path] = ranges
958
+ elif path in file_paths_with_ranges:
959
+ file_paths_with_ranges[path].extend(ranges)
707
960
 
708
961
  elif isinstance(arg, ContextSave):
709
962
  context.console.print("Calling task memory tool")
@@ -721,7 +974,7 @@ def get_tool_output(
721
974
  relevant_files.extend(globs[:1000])
722
975
  if not globs:
723
976
  warnings += f"Warning: No files found for the glob: {fglob}\n"
724
- relevant_files_data = read_files(relevant_files[:10_000], None, context)
977
+ relevant_files_data, _, _ = read_files(relevant_files[:10_000], None, context)
725
978
  save_path = save_memory(
726
979
  arg, relevant_files_data, context.bash_state.serialize()
727
980
  )
@@ -736,6 +989,10 @@ def get_tool_output(
736
989
  output = output_, 0.0
737
990
  else:
738
991
  raise ValueError(f"Unknown tool: {arg}")
992
+
993
+ if file_paths_with_ranges: # Only add to whitelist if we have paths
994
+ context.bash_state.add_to_whitelist_for_overwrite(file_paths_with_ranges)
995
+
739
996
  if isinstance(output[0], str):
740
997
  context.console.print(str(output[0]))
741
998
  else:
@@ -749,13 +1006,64 @@ default_enc = get_default_encoder()
749
1006
  curr_cost = 0.0
750
1007
 
751
1008
 
1009
+ def range_format(start_line_num: Optional[int], end_line_num: Optional[int]) -> str:
1010
+ st = "" if not start_line_num else str(start_line_num)
1011
+ end = "" if not end_line_num else str(end_line_num)
1012
+ if not st and not end:
1013
+ return ""
1014
+ return f":{st}-{end}"
1015
+
1016
+
752
1017
  def read_files(
753
- file_paths: list[str], max_tokens: Optional[int], context: Context
754
- ) -> str:
1018
+ file_paths: list[str],
1019
+ max_tokens: Optional[int],
1020
+ context: Context,
1021
+ show_line_numbers: bool = False,
1022
+ start_line_nums: Optional[list[Optional[int]]] = None,
1023
+ end_line_nums: Optional[list[Optional[int]]] = None,
1024
+ ) -> tuple[
1025
+ str, dict[str, list[tuple[int, int]]], bool
1026
+ ]: # Updated to return file paths with ranges
755
1027
  message = ""
1028
+ file_ranges_dict: dict[
1029
+ str, list[tuple[int, int]]
1030
+ ] = {} # Map file paths to line ranges
1031
+
1032
+ workspace_path = context.bash_state.workspace_root
1033
+ stats = load_workspace_stats(workspace_path)
1034
+
1035
+ for path_ in file_paths:
1036
+ path_ = expand_user(path_)
1037
+ if not os.path.isabs(path_):
1038
+ continue
1039
+ if path_ not in stats.files:
1040
+ stats.files[path_] = FileStats()
1041
+
1042
+ stats.files[path_].increment_read()
1043
+ save_workspace_stats(workspace_path, stats)
1044
+ truncated = False
756
1045
  for i, file in enumerate(file_paths):
757
1046
  try:
758
- content, truncated, tokens = read_file(file, max_tokens, context)
1047
+ # Use line numbers from parameters if provided
1048
+ start_line_num = None if start_line_nums is None else start_line_nums[i]
1049
+ end_line_num = None if end_line_nums is None else end_line_nums[i]
1050
+
1051
+ # For backward compatibility, we still need to extract line numbers from path
1052
+ # if they weren't provided as parameters
1053
+ content, truncated, tokens, path, line_range = read_file(
1054
+ file,
1055
+ max_tokens,
1056
+ context,
1057
+ show_line_numbers,
1058
+ start_line_num,
1059
+ end_line_num,
1060
+ )
1061
+
1062
+ # Add file path with line range to dictionary
1063
+ if path in file_ranges_dict:
1064
+ file_ranges_dict[path].append(line_range)
1065
+ else:
1066
+ file_ranges_dict[path] = [line_range]
759
1067
  except Exception as e:
760
1068
  message += f"\n{file}: {str(e)}\n"
761
1069
  continue
@@ -763,7 +1071,8 @@ def read_files(
763
1071
  if max_tokens:
764
1072
  max_tokens = max_tokens - tokens
765
1073
 
766
- message += f"\n``` {file}\n{content}\n"
1074
+ range_formatted = range_format(start_line_num, end_line_num)
1075
+ message += f"\n{file}{range_formatted}\n```\n{content}\n"
767
1076
 
768
1077
  if truncated or (max_tokens and max_tokens <= 0):
769
1078
  not_reading = file_paths[i + 1 :]
@@ -772,15 +1081,21 @@ def read_files(
772
1081
  break
773
1082
  else:
774
1083
  message += "```"
775
-
776
- return message
1084
+ return message, file_ranges_dict, truncated
777
1085
 
778
1086
 
779
1087
  def read_file(
780
- file_path: str, max_tokens: Optional[int], context: Context
781
- ) -> tuple[str, bool, int]:
1088
+ file_path: str,
1089
+ max_tokens: Optional[int],
1090
+ context: Context,
1091
+ show_line_numbers: bool = False,
1092
+ start_line_num: Optional[int] = None,
1093
+ end_line_num: Optional[int] = None,
1094
+ ) -> tuple[str, bool, int, str, tuple[int, int]]:
782
1095
  context.console.print(f"Reading file: {file_path}")
783
1096
 
1097
+ # Line numbers are now passed as parameters, no need to parse from path
1098
+
784
1099
  # Expand the path before checking if it's absolute
785
1100
  file_path = expand_user(file_path)
786
1101
 
@@ -789,28 +1104,83 @@ def read_file(
789
1104
  f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}"
790
1105
  )
791
1106
 
792
- context.bash_state.add_to_whitelist_for_overwrite(file_path)
793
-
794
1107
  path = Path(file_path)
795
1108
  if not path.exists():
796
1109
  raise ValueError(f"Error: file {file_path} does not exist")
797
1110
 
1111
+ # Read all lines of the file
798
1112
  with path.open("r") as f:
799
- content = f.read(10_000_000)
1113
+ all_lines = f.readlines(10_000_000)
1114
+
1115
+ if all_lines[-1].endswith("\n"):
1116
+ # Special handling of line counts because readlines doesn't consider last empty line as a separate line
1117
+ all_lines[-1] = all_lines[-1][:-1]
1118
+ all_lines.append("")
1119
+
1120
+ total_lines = len(all_lines)
1121
+
1122
+ # Apply line range filtering if specified
1123
+ start_idx = 0
1124
+ if start_line_num is not None:
1125
+ # Convert 1-indexed line number to 0-indexed
1126
+ start_idx = max(0, start_line_num - 1)
1127
+
1128
+ end_idx = len(all_lines)
1129
+ if end_line_num is not None:
1130
+ # end_line_num is inclusive, so we use min to ensure it's within bounds
1131
+ end_idx = min(len(all_lines), end_line_num)
1132
+
1133
+ # Convert back to 1-indexed line numbers for tracking
1134
+ effective_start = start_line_num if start_line_num is not None else 1
1135
+ effective_end = end_line_num if end_line_num is not None else total_lines
1136
+
1137
+ filtered_lines = all_lines[start_idx:end_idx]
1138
+
1139
+ # Create content with or without line numbers
1140
+ if show_line_numbers:
1141
+ content_lines = []
1142
+ for i, line in enumerate(filtered_lines, start=start_idx + 1):
1143
+ content_lines.append(f"{i} {line}")
1144
+ content = "".join(content_lines)
1145
+ else:
1146
+ content = "".join(filtered_lines)
800
1147
 
801
1148
  truncated = False
802
1149
  tokens_counts = 0
1150
+
1151
+ # Handle token limit if specified
803
1152
  if max_tokens is not None:
804
1153
  tokens = default_enc.encoder(content)
805
1154
  tokens_counts = len(tokens)
1155
+
806
1156
  if len(tokens) > max_tokens:
807
- content = default_enc.decoder(tokens[:max_tokens])
808
- rest = save_out_of_context(
809
- default_enc.decoder(tokens[max_tokens:]), Path(file_path).suffix
810
- )
811
- content += f"\n(...truncated)\n---\nI've saved the continuation in a new file. You may want to read: `{rest}`"
1157
+ # Truncate at token boundary first
1158
+ truncated_tokens = tokens[:max_tokens]
1159
+ truncated_content = default_enc.decoder(truncated_tokens)
1160
+
1161
+ # Count how many lines we kept
1162
+ line_count = truncated_content.count("\n")
1163
+
1164
+ # Calculate the last line number shown (1-indexed)
1165
+ last_line_shown = start_idx + line_count
1166
+
1167
+ content = truncated_content
1168
+ # Add informative message about truncation with total line count
1169
+ total_lines = len(all_lines)
1170
+ content += f"\n(...truncated) Only showing till line number {last_line_shown} of {total_lines} total lines due to the token limit, please continue reading from {last_line_shown + 1} if required"
812
1171
  truncated = True
813
- return content, truncated, tokens_counts
1172
+
1173
+ # Update effective_end if truncated
1174
+ effective_end = last_line_shown
1175
+
1176
+ # Return the content along with the effective line range that was read
1177
+ return (
1178
+ content,
1179
+ truncated,
1180
+ tokens_counts,
1181
+ file_path,
1182
+ (effective_start, effective_end),
1183
+ )
814
1184
 
815
1185
 
816
1186
  if __name__ == "__main__":
@@ -851,3 +1221,32 @@ if __name__ == "__main__":
851
1221
  None,
852
1222
  )
853
1223
  )
1224
+
1225
+ print(
1226
+ get_tool_output(
1227
+ Context(BASH_STATE, BASH_STATE.console),
1228
+ ReadFiles(
1229
+ file_paths=["/Users/arusia/repos/wcgw/src/wcgw/client/tools.py"],
1230
+ show_line_numbers_reason="true",
1231
+ ),
1232
+ default_enc,
1233
+ 0,
1234
+ lambda x, y: ("", 0),
1235
+ 15000,
1236
+ )[0][0]
1237
+ )
1238
+
1239
+ print(
1240
+ get_tool_output(
1241
+ Context(BASH_STATE, BASH_STATE.console),
1242
+ FileWriteOrEdit(
1243
+ file_path="/Users/arusia/repos/wcgw/src/wcgw/client/tools.py",
1244
+ file_content_or_search_replace_blocks="""test""",
1245
+ percentage_to_change=100,
1246
+ ),
1247
+ default_enc,
1248
+ 0,
1249
+ lambda x, y: ("", 0),
1250
+ 800,
1251
+ )[0][0]
1252
+ )