wcgw 3.0.7__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

wcgw/client/tools.py CHANGED
@@ -7,6 +7,7 @@ import os
7
7
  import subprocess
8
8
  import traceback
9
9
  from dataclasses import dataclass
10
+ from hashlib import sha256
10
11
  from os.path import expanduser
11
12
  from pathlib import Path
12
13
  from tempfile import NamedTemporaryFile
@@ -28,6 +29,11 @@ from pydantic import BaseModel, TypeAdapter, ValidationError
28
29
  from syntax_checker import check_syntax
29
30
 
30
31
  from wcgw.client.bash_state.bash_state import get_status
32
+ from wcgw.client.repo_ops.file_stats import (
33
+ FileStats,
34
+ load_workspace_stats,
35
+ save_workspace_stats,
36
+ )
31
37
 
32
38
  from ..types_ import (
33
39
  BashCommand,
@@ -36,6 +42,7 @@ from ..types_ import (
36
42
  Console,
37
43
  ContextSave,
38
44
  FileEdit,
45
+ FileWriteOrEdit,
39
46
  Initialize,
40
47
  Modes,
41
48
  ModesConfig,
@@ -48,7 +55,12 @@ from .bash_state.bash_state import (
48
55
  execute_bash,
49
56
  )
50
57
  from .encoder import EncoderDecoder, get_default_encoder
51
- from .file_ops.search_replace import search_replace_edit
58
+ from .file_ops.search_replace import (
59
+ DIVIDER_MARKER,
60
+ REPLACE_MARKER,
61
+ SEARCH_MARKER,
62
+ search_replace_edit,
63
+ )
52
64
  from .memory import load_memory, save_memory
53
65
  from .modes import (
54
66
  ARCHITECT_PROMPT,
@@ -92,7 +104,7 @@ def initialize(
92
104
  task_id_to_resume: str,
93
105
  max_tokens: Optional[int],
94
106
  mode: ModesConfig,
95
- ) -> tuple[str, Context]:
107
+ ) -> tuple[str, Context, dict[str, list[tuple[int, int]]]]:
96
108
  # Expand the workspace path
97
109
  any_workspace_path = expand_user(any_workspace_path)
98
110
  repo_context = ""
@@ -129,7 +141,8 @@ def initialize(
129
141
  if not read_files_:
130
142
  read_files_ = [any_workspace_path]
131
143
  any_workspace_path = os.path.dirname(any_workspace_path)
132
- repo_context, folder_to_start = get_repo_context(any_workspace_path, 50)
144
+ # Let get_repo_context handle loading the workspace stats
145
+ repo_context, folder_to_start = get_repo_context(any_workspace_path)
133
146
 
134
147
  repo_context = f"---\n# Workspace structure\n{repo_context}\n---\n"
135
148
 
@@ -151,14 +164,18 @@ def initialize(
151
164
  if loaded_state is not None:
152
165
  try:
153
166
  parsed_state = BashState.parse_state(loaded_state)
167
+ workspace_root = (
168
+ str(folder_to_start) if folder_to_start else parsed_state[5]
169
+ )
154
170
  if mode == "wcgw":
155
171
  context.bash_state.load_state(
156
172
  parsed_state[0],
157
173
  parsed_state[1],
158
174
  parsed_state[2],
159
175
  parsed_state[3],
160
- parsed_state[4] + list(context.bash_state.whitelist_for_overwrite),
161
- str(folder_to_start) if folder_to_start else "",
176
+ {**parsed_state[4], **context.bash_state.whitelist_for_overwrite},
177
+ str(folder_to_start) if folder_to_start else workspace_root,
178
+ workspace_root,
162
179
  )
163
180
  else:
164
181
  state = modes_to_state(mode)
@@ -167,8 +184,9 @@ def initialize(
167
184
  state[1],
168
185
  state[2],
169
186
  state[3],
170
- parsed_state[4] + list(context.bash_state.whitelist_for_overwrite),
171
- str(folder_to_start) if folder_to_start else "",
187
+ {**parsed_state[4], **context.bash_state.whitelist_for_overwrite},
188
+ str(folder_to_start) if folder_to_start else workspace_root,
189
+ workspace_root,
172
190
  )
173
191
  except ValueError:
174
192
  context.console.print(traceback.format_exc())
@@ -178,12 +196,14 @@ def initialize(
178
196
  else:
179
197
  mode_changed = is_mode_change(mode, context.bash_state)
180
198
  state = modes_to_state(mode)
199
+ # Use the provided workspace path as the workspace root
181
200
  context.bash_state.load_state(
182
201
  state[0],
183
202
  state[1],
184
203
  state[2],
185
204
  state[3],
186
- list(context.bash_state.whitelist_for_overwrite),
205
+ dict(context.bash_state.whitelist_for_overwrite),
206
+ str(folder_to_start) if folder_to_start else "",
187
207
  str(folder_to_start) if folder_to_start else "",
188
208
  )
189
209
  if type == "first_call" or mode_changed:
@@ -194,6 +214,7 @@ def initialize(
194
214
  del mode
195
215
 
196
216
  initial_files_context = ""
217
+ initial_paths_with_ranges: dict[str, list[tuple[int, int]]] = {}
197
218
  if read_files_:
198
219
  if folder_to_start:
199
220
  read_files_ = [
@@ -203,9 +224,25 @@ def initialize(
203
224
  else expand_user(f)
204
225
  for f in read_files_
205
226
  ]
206
- initial_files = read_files(read_files_, max_tokens, context)
227
+ initial_files, initial_paths_with_ranges, _ = read_files(
228
+ read_files_, max_tokens, context
229
+ )
207
230
  initial_files_context = f"---\n# Requested files\n{initial_files}\n---\n"
208
231
 
232
+ # Check for CLAUDE.md in the workspace folder on first call
233
+ alignment_context = ""
234
+ if folder_to_start:
235
+ alignment_file_path = os.path.join(folder_to_start, "CLAUDE.md")
236
+ if os.path.exists(alignment_file_path):
237
+ try:
238
+ # Read the CLAUDE.md file content
239
+ with open(alignment_file_path, "r") as f:
240
+ alignment_content = f.read()
241
+ alignment_context = f"---\n# CLAUDE.md - Project alignment guidelines\n```\n{alignment_content}\n```\n---\n\n"
242
+ except Exception:
243
+ # Handle any errors when reading the file
244
+ alignment_context = ""
245
+
209
246
  uname_sysname = os.uname().sysname
210
247
  uname_machine = os.uname().machine
211
248
 
@@ -216,9 +253,11 @@ def initialize(
216
253
  System: {uname_sysname}
217
254
  Machine: {uname_machine}
218
255
  Initialized in directory (also cwd): {context.bash_state.cwd}
256
+ User home directory: {expanduser("~")}
219
257
 
220
258
  {repo_context}
221
259
 
260
+ {alignment_context}
222
261
  {initial_files_context}
223
262
 
224
263
  ---
@@ -228,7 +267,7 @@ Initialized in directory (also cwd): {context.bash_state.cwd}
228
267
 
229
268
  global INITIALIZED
230
269
  INITIALIZED = True
231
- return output, context
270
+ return output, context, initial_paths_with_ranges
232
271
 
233
272
 
234
273
  def is_mode_change(mode_config: ModesConfig, bash_state: BashState) -> bool:
@@ -267,7 +306,8 @@ def reset_wcgw(
267
306
  file_edit_mode,
268
307
  write_if_empty_mode,
269
308
  mode,
270
- list(context.bash_state.whitelist_for_overwrite),
309
+ dict(context.bash_state.whitelist_for_overwrite),
310
+ starting_directory,
271
311
  starting_directory,
272
312
  )
273
313
  mode_prompt = get_mode_prompt(context)
@@ -291,7 +331,8 @@ def reset_wcgw(
291
331
  file_edit_mode,
292
332
  write_if_empty_mode,
293
333
  mode,
294
- list(context.bash_state.whitelist_for_overwrite),
334
+ dict(context.bash_state.whitelist_for_overwrite),
335
+ starting_directory,
295
336
  starting_directory,
296
337
  )
297
338
  INITIALIZED = True
@@ -405,39 +446,138 @@ def write_file(
405
446
  error_on_exist: bool,
406
447
  max_tokens: Optional[int],
407
448
  context: Context,
408
- ) -> str:
449
+ ) -> tuple[
450
+ str, dict[str, list[tuple[int, int]]]
451
+ ]: # Updated to return message and file paths with line ranges
409
452
  # Expand the path before checking if it's absolute
410
453
  path_ = expand_user(writefile.file_path)
454
+
455
+ workspace_path = context.bash_state.workspace_root
456
+ stats = load_workspace_stats(workspace_path)
457
+
458
+ if path_ not in stats.files:
459
+ stats.files[path_] = FileStats()
460
+
461
+ stats.files[path_].increment_write()
462
+ save_workspace_stats(workspace_path, stats)
463
+
411
464
  if not os.path.isabs(path_):
412
- return f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}"
465
+ return (
466
+ f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}",
467
+ {}, # Return empty dict instead of empty list for type consistency
468
+ )
413
469
 
414
470
  error_on_exist_ = (
415
471
  error_on_exist and path_ not in context.bash_state.whitelist_for_overwrite
416
472
  )
417
473
 
474
+ if error_on_exist and path_ in context.bash_state.whitelist_for_overwrite:
475
+ # Ensure hash has not changed
476
+ if os.path.exists(path_):
477
+ with open(path_, "rb") as f:
478
+ file_content = f.read()
479
+ curr_hash = sha256(file_content).hexdigest()
480
+
481
+ whitelist_data = context.bash_state.whitelist_for_overwrite[path_]
482
+
483
+ # If we haven't fully read the file or hash has changed, require re-reading
484
+ if curr_hash != whitelist_data.file_hash:
485
+ error_on_exist_ = True
486
+ elif not whitelist_data.is_read_enough():
487
+ error_on_exist_ = True
488
+
418
489
  # Validate using write_if_empty_mode after checking whitelist
419
490
  allowed_globs = context.bash_state.write_if_empty_mode.allowed_globs
420
491
  if allowed_globs != "all" and not any(
421
492
  fnmatch.fnmatch(path_, pattern) for pattern in allowed_globs
422
493
  ):
423
- return f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}"
494
+ return (
495
+ f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}",
496
+ {}, # Empty dict instead of empty list
497
+ )
424
498
 
425
- add_overwrite_warning = ""
426
499
  if (error_on_exist or error_on_exist_) and os.path.exists(path_):
427
500
  content = Path(path_).read_text().strip()
428
501
  if content:
429
- content = truncate_if_over(content, max_tokens)
430
-
431
502
  if error_on_exist_:
432
- return (
433
- f"Error: can't write to existing file {path_}, use other functions to edit the file"
434
- + f"\nHere's the existing content:\n```\n{content}\n```"
435
- )
436
- else:
437
- add_overwrite_warning = content
438
-
439
- # Since we've already errored once, add this to whitelist
440
- context.bash_state.add_to_whitelist_for_overwrite(path_)
503
+ file_ranges = []
504
+
505
+ if path_ not in context.bash_state.whitelist_for_overwrite:
506
+ # File hasn't been read at all
507
+ msg = f"Error: you need to read existing file {path_} at least once before it can be overwritten.\n\n"
508
+ # Read the entire file
509
+ file_content_str, truncated, _, _, line_range = read_file(
510
+ path_, max_tokens, context, False
511
+ )
512
+ file_ranges = [line_range]
513
+
514
+ final_message = ""
515
+ if not truncated:
516
+ final_message = "You can now safely retry writing immediately considering the above information."
517
+
518
+ return (
519
+ (
520
+ msg
521
+ + f"Here's the existing file:\n```\n{file_content_str}\n{final_message}\n```"
522
+ ),
523
+ {path_: file_ranges},
524
+ )
525
+
526
+ whitelist_data = context.bash_state.whitelist_for_overwrite[path_]
527
+
528
+ if curr_hash != whitelist_data.file_hash:
529
+ msg = "Error: the file has changed since last read.\n\n"
530
+ # Read the entire file again
531
+ file_content_str, truncated, _, _, line_range = read_file(
532
+ path_, max_tokens, context, False
533
+ )
534
+ file_ranges = [line_range]
535
+
536
+ final_message = ""
537
+ if not truncated:
538
+ final_message = "You can now safely retry writing immediately considering the above information."
539
+
540
+ return (
541
+ (
542
+ msg
543
+ + f"Here's the existing file:\n```\n{file_content_str}\n```\n{final_message}"
544
+ ),
545
+ {path_: file_ranges},
546
+ )
547
+ else:
548
+ # The file hasn't changed, but we haven't read enough of it
549
+ unread_ranges = whitelist_data.get_unread_ranges()
550
+ # Format the ranges as a string for display
551
+ ranges_str = ", ".join(
552
+ [f"{start}-{end}" for start, end in unread_ranges]
553
+ )
554
+ msg = f"Error: you need to read more of the file before it can be overwritten.\nUnread line ranges: {ranges_str}\n\n"
555
+
556
+ # Read just the unread ranges
557
+ paths_: list[str] = []
558
+ for start, end in unread_ranges:
559
+ paths_.append(path_ + ":" + f"{start}-{end}")
560
+ paths_readfiles = ReadFiles(
561
+ file_paths=paths_, show_line_numbers_reason=""
562
+ )
563
+ readfiles, file_ranges_dict, truncated = read_files(
564
+ paths_readfiles.file_paths,
565
+ max_tokens,
566
+ context,
567
+ show_line_numbers=False,
568
+ start_line_nums=paths_readfiles.start_line_nums,
569
+ end_line_nums=paths_readfiles.end_line_nums,
570
+ )
571
+
572
+ final_message = ""
573
+ if not truncated:
574
+ final_message = "Now that you have read the rest of the file, you can now safely immediately retry writing but consider the new information above."
575
+
576
+ return (
577
+ (msg + "\n" + readfiles + "\n" + final_message),
578
+ file_ranges_dict,
579
+ )
580
+ # No need to add to whitelist here - will be handled by get_tool_output
441
581
 
442
582
  path = Path(path_)
443
583
  path.parent.mkdir(parents=True, exist_ok=True)
@@ -446,7 +586,7 @@ def write_file(
446
586
  with path.open("w") as f:
447
587
  f.write(writefile.file_content)
448
588
  except OSError as e:
449
- return f"Error: {e}"
589
+ return f"Error: {e}", {}
450
590
 
451
591
  extension = Path(path_).suffix.lstrip(".")
452
592
 
@@ -478,19 +618,17 @@ Syntax errors:
478
618
  except Exception:
479
619
  pass
480
620
 
481
- if add_overwrite_warning:
482
- warnings.append(
483
- "\n---\nWarning: a file already existed and it's now overwritten. Was it a mistake? If yes please revert your action."
484
- "\n---\n"
485
- + "Here's the previous content:\n```\n"
486
- + add_overwrite_warning
487
- + "\n```"
488
- )
621
+ # Count the lines directly from the content we're writing
622
+ total_lines = writefile.file_content.count("\n") + 1
489
623
 
490
- return "Success" + "".join(warnings)
624
+ return "Success" + "".join(warnings), {
625
+ path_: [(1, total_lines)]
626
+ } # Return the file path with line range along with success message
491
627
 
492
628
 
493
- def do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context) -> str:
629
+ def do_diff_edit(
630
+ fedit: FileEdit, max_tokens: Optional[int], context: Context
631
+ ) -> tuple[str, dict[str, list[tuple[int, int]]]]:
494
632
  try:
495
633
  return _do_diff_edit(fedit, max_tokens, context)
496
634
  except Exception as e:
@@ -508,16 +646,28 @@ def do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context) -
508
646
  raise e
509
647
 
510
648
 
511
- def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context) -> str:
649
+ def _do_diff_edit(
650
+ fedit: FileEdit, max_tokens: Optional[int], context: Context
651
+ ) -> tuple[str, dict[str, list[tuple[int, int]]]]:
512
652
  context.console.log(f"Editing file: {fedit.file_path}")
513
653
 
514
654
  # Expand the path before checking if it's absolute
515
655
  path_ = expand_user(fedit.file_path)
656
+
516
657
  if not os.path.isabs(path_):
517
658
  raise Exception(
518
659
  f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}"
519
660
  )
520
661
 
662
+ workspace_path = context.bash_state.workspace_root
663
+ stats = load_workspace_stats(workspace_path)
664
+
665
+ if path_ not in stats.files:
666
+ stats.files[path_] = FileStats()
667
+
668
+ stats.files[path_].increment_edit()
669
+ save_workspace_stats(workspace_path, stats)
670
+
521
671
  # Validate using file_edit_mode
522
672
  allowed_globs = context.bash_state.file_edit_mode.allowed_globs
523
673
  if allowed_globs != "all" and not any(
@@ -527,8 +677,7 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context)
527
677
  f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}"
528
678
  )
529
679
 
530
- # The LLM is now aware that the file exists
531
- context.bash_state.add_to_whitelist_for_overwrite(path_)
680
+ # No need to add to whitelist here - will be handled by get_tool_output
532
681
 
533
682
  if not os.path.exists(path_):
534
683
  raise Exception(f"Error: file {path_} does not exist")
@@ -545,6 +694,9 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context)
545
694
  lines, apply_diff_to, context.console.log
546
695
  )
547
696
 
697
+ # Count the lines just once - after the edit but before writing
698
+ total_lines = apply_diff_to.count("\n") + 1
699
+
548
700
  with open(path_, "w") as f:
549
701
  f.write(apply_diff_to)
550
702
 
@@ -561,29 +713,93 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int], context: Context)
561
713
  syntax_errors += "\nNote: Ignore if 'tagged template literals' are used, they may raise false positive errors in tree-sitter."
562
714
 
563
715
  context.console.print(f"W: Syntax errors encountered: {syntax_errors}")
564
- return f"""{comments}
716
+
717
+ return (
718
+ f"""{comments}
565
719
  ---
566
720
  Warning: tree-sitter reported syntax errors, please re-read the file and fix if there are any errors.
567
721
  Syntax errors:
568
722
  {syntax_errors}
569
723
 
570
724
  {context_for_errors}
571
- """
725
+ """,
726
+ {path_: [(1, total_lines)]},
727
+ ) # Return the file path with line range along with the warning message
572
728
  except Exception:
573
729
  pass
574
730
 
575
- return comments
731
+ return comments, {
732
+ path_: [(1, total_lines)]
733
+ } # Return the file path with line range along with the edit comments
734
+
735
+
736
+ def _is_edit(content: str, percentage: int) -> bool:
737
+ lines = content.lstrip().split("\n")
738
+ if not lines:
739
+ return False
740
+ line = lines[0]
741
+ if SEARCH_MARKER.match(line):
742
+ return True
743
+ if percentage <= 50:
744
+ for line in lines:
745
+ if (
746
+ SEARCH_MARKER.match(line)
747
+ or DIVIDER_MARKER.match(line)
748
+ or REPLACE_MARKER.match(line)
749
+ ):
750
+ return True
751
+ return False
752
+
753
+
754
+ def file_writing(
755
+ file_writing_args: FileWriteOrEdit,
756
+ max_tokens: Optional[int],
757
+ context: Context,
758
+ ) -> tuple[
759
+ str, dict[str, list[tuple[int, int]]]
760
+ ]: # Updated to return message and file paths with line ranges
761
+ """
762
+ Write or edit a file based on percentage of changes.
763
+ If percentage_changed > 50%, treat content as direct file content.
764
+ Otherwise, treat content as search/replace blocks.
765
+ """
766
+ # Expand the path before checking if it's absolute
767
+ path_ = expand_user(file_writing_args.file_path)
768
+ if not os.path.isabs(path_):
769
+ return (
770
+ f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}",
771
+ {}, # Return empty dict instead of empty list for type consistency
772
+ )
773
+
774
+ # If file doesn't exist, always use direct file_content mode
775
+ content = file_writing_args.file_content_or_search_replace_blocks
776
+
777
+ if not _is_edit(content, file_writing_args.percentage_to_change):
778
+ # Use direct content mode (same as WriteIfEmpty)
779
+ result, paths = write_file(
780
+ WriteIfEmpty(
781
+ file_path=path_,
782
+ file_content=file_writing_args.file_content_or_search_replace_blocks,
783
+ ),
784
+ True,
785
+ max_tokens,
786
+ context,
787
+ )
788
+ return result, paths
789
+ else:
790
+ # File exists and percentage <= 50, use search/replace mode
791
+ result, paths = do_diff_edit(
792
+ FileEdit(
793
+ file_path=path_,
794
+ file_edit_using_search_replace_blocks=file_writing_args.file_content_or_search_replace_blocks,
795
+ ),
796
+ max_tokens,
797
+ context,
798
+ )
799
+ return result, paths
576
800
 
577
801
 
578
- TOOLS = (
579
- BashCommand
580
- | WriteIfEmpty
581
- | FileEdit
582
- | ReadImage
583
- | ReadFiles
584
- | Initialize
585
- | ContextSave
586
- )
802
+ TOOLS = BashCommand | FileWriteOrEdit | ReadImage | ReadFiles | Initialize | ContextSave
587
803
 
588
804
 
589
805
  def which_tool(args: str) -> TOOLS:
@@ -594,10 +810,8 @@ def which_tool(args: str) -> TOOLS:
594
810
  def which_tool_name(name: str) -> Type[TOOLS]:
595
811
  if name == "BashCommand":
596
812
  return BashCommand
597
- elif name == "WriteIfEmpty":
598
- return WriteIfEmpty
599
- elif name == "FileEdit":
600
- return FileEdit
813
+ elif name == "FileWriteOrEdit":
814
+ return FileWriteOrEdit
601
815
  elif name == "ReadImage":
602
816
  return ReadImage
603
817
  elif name == "ReadFiles":
@@ -647,32 +861,80 @@ def get_tool_output(
647
861
  output: tuple[str | ImageData, float]
648
862
  TOOL_CALLS.append(arg)
649
863
 
864
+ # Initialize a dictionary to track file paths and line ranges
865
+ file_paths_with_ranges: dict[str, list[tuple[int, int]]] = {}
866
+
650
867
  if isinstance(arg, BashCommand):
651
868
  context.console.print("Calling execute bash tool")
652
869
  if not INITIALIZED:
653
870
  raise Exception("Initialize tool not called yet.")
654
871
 
655
- output = execute_bash(
872
+ output_str, cost = execute_bash(
656
873
  context.bash_state, enc, arg, max_tokens, arg.wait_for_seconds
657
874
  )
875
+ output = output_str, cost
658
876
  elif isinstance(arg, WriteIfEmpty):
659
877
  context.console.print("Calling write file tool")
660
878
  if not INITIALIZED:
661
879
  raise Exception("Initialize tool not called yet.")
662
880
 
663
- output = write_file(arg, True, max_tokens, context), 0
881
+ result, write_paths = write_file(arg, True, max_tokens, context)
882
+ output = result, 0
883
+ # Add write paths with their ranges to our tracking dictionary
884
+ for path, ranges in write_paths.items():
885
+ if path in file_paths_with_ranges:
886
+ file_paths_with_ranges[path].extend(ranges)
887
+ else:
888
+ file_paths_with_ranges[path] = ranges.copy()
664
889
  elif isinstance(arg, FileEdit):
665
890
  context.console.print("Calling full file edit tool")
666
891
  if not INITIALIZED:
667
892
  raise Exception("Initialize tool not called yet.")
668
893
 
669
- output = do_diff_edit(arg, max_tokens, context), 0.0
894
+ result, edit_paths = do_diff_edit(arg, max_tokens, context)
895
+ output = result, 0.0
896
+ # Add edit paths with their ranges to our tracking dictionary
897
+ for path, ranges in edit_paths.items():
898
+ if path in file_paths_with_ranges:
899
+ file_paths_with_ranges[path].extend(ranges)
900
+ else:
901
+ file_paths_with_ranges[path] = ranges.copy()
902
+ elif isinstance(arg, FileWriteOrEdit):
903
+ context.console.print("Calling file writing tool")
904
+ if not INITIALIZED:
905
+ raise Exception("Initialize tool not called yet.")
906
+
907
+ result, write_edit_paths = file_writing(arg, max_tokens, context)
908
+ output = result, 0.0
909
+ # Add write/edit paths with their ranges to our tracking dictionary
910
+ for path, ranges in write_edit_paths.items():
911
+ if path in file_paths_with_ranges:
912
+ file_paths_with_ranges[path].extend(ranges)
913
+ else:
914
+ file_paths_with_ranges[path] = ranges.copy()
670
915
  elif isinstance(arg, ReadImage):
671
916
  context.console.print("Calling read image tool")
672
- output = read_image_from_shell(arg.file_path, context), 0.0
917
+ image_data = read_image_from_shell(arg.file_path, context)
918
+ output = image_data, 0.0
673
919
  elif isinstance(arg, ReadFiles):
674
920
  context.console.print("Calling read file tool")
675
- output = read_files(arg.file_paths, max_tokens, context), 0.0
921
+ # Access line numbers through properties
922
+ result, file_ranges_dict, _ = read_files(
923
+ arg.file_paths,
924
+ max_tokens,
925
+ context,
926
+ bool(arg.show_line_numbers_reason),
927
+ arg.start_line_nums,
928
+ arg.end_line_nums,
929
+ )
930
+ output = result, 0.0
931
+
932
+ # Merge the new file ranges into our tracking dictionary
933
+ for path, ranges in file_ranges_dict.items():
934
+ if path in file_paths_with_ranges:
935
+ file_paths_with_ranges[path].extend(ranges)
936
+ else:
937
+ file_paths_with_ranges[path] = ranges
676
938
  elif isinstance(arg, Initialize):
677
939
  context.console.print("Calling initial info tool")
678
940
  if arg.type == "user_asked_mode_change" or arg.type == "reset_shell":
@@ -694,7 +956,7 @@ def get_tool_output(
694
956
  0.0,
695
957
  )
696
958
  else:
697
- output_, context = initialize(
959
+ output_, context, init_paths = initialize(
698
960
  arg.type,
699
961
  context,
700
962
  arg.any_workspace_path,
@@ -704,6 +966,13 @@ def get_tool_output(
704
966
  arg.mode,
705
967
  )
706
968
  output = output_, 0.0
969
+ # Since init_paths is already a dictionary mapping file paths to line ranges,
970
+ # we just need to merge it with our tracking dictionary
971
+ for path, ranges in init_paths.items():
972
+ if path not in file_paths_with_ranges and os.path.exists(path):
973
+ file_paths_with_ranges[path] = ranges
974
+ elif path in file_paths_with_ranges:
975
+ file_paths_with_ranges[path].extend(ranges)
707
976
 
708
977
  elif isinstance(arg, ContextSave):
709
978
  context.console.print("Calling task memory tool")
@@ -721,7 +990,7 @@ def get_tool_output(
721
990
  relevant_files.extend(globs[:1000])
722
991
  if not globs:
723
992
  warnings += f"Warning: No files found for the glob: {fglob}\n"
724
- relevant_files_data = read_files(relevant_files[:10_000], None, context)
993
+ relevant_files_data, _, _ = read_files(relevant_files[:10_000], None, context)
725
994
  save_path = save_memory(
726
995
  arg, relevant_files_data, context.bash_state.serialize()
727
996
  )
@@ -736,6 +1005,10 @@ def get_tool_output(
736
1005
  output = output_, 0.0
737
1006
  else:
738
1007
  raise ValueError(f"Unknown tool: {arg}")
1008
+
1009
+ if file_paths_with_ranges: # Only add to whitelist if we have paths
1010
+ context.bash_state.add_to_whitelist_for_overwrite(file_paths_with_ranges)
1011
+
739
1012
  if isinstance(output[0], str):
740
1013
  context.console.print(str(output[0]))
741
1014
  else:
@@ -749,13 +1022,64 @@ default_enc = get_default_encoder()
749
1022
  curr_cost = 0.0
750
1023
 
751
1024
 
1025
+ def range_format(start_line_num: Optional[int], end_line_num: Optional[int]) -> str:
1026
+ st = "" if not start_line_num else str(start_line_num)
1027
+ end = "" if not end_line_num else str(end_line_num)
1028
+ if not st and not end:
1029
+ return ""
1030
+ return f":{st}-{end}"
1031
+
1032
+
752
1033
  def read_files(
753
- file_paths: list[str], max_tokens: Optional[int], context: Context
754
- ) -> str:
1034
+ file_paths: list[str],
1035
+ max_tokens: Optional[int],
1036
+ context: Context,
1037
+ show_line_numbers: bool = False,
1038
+ start_line_nums: Optional[list[Optional[int]]] = None,
1039
+ end_line_nums: Optional[list[Optional[int]]] = None,
1040
+ ) -> tuple[
1041
+ str, dict[str, list[tuple[int, int]]], bool
1042
+ ]: # Updated to return file paths with ranges
755
1043
  message = ""
1044
+ file_ranges_dict: dict[
1045
+ str, list[tuple[int, int]]
1046
+ ] = {} # Map file paths to line ranges
1047
+
1048
+ workspace_path = context.bash_state.workspace_root
1049
+ stats = load_workspace_stats(workspace_path)
1050
+
1051
+ for path_ in file_paths:
1052
+ path_ = expand_user(path_)
1053
+ if not os.path.isabs(path_):
1054
+ continue
1055
+ if path_ not in stats.files:
1056
+ stats.files[path_] = FileStats()
1057
+
1058
+ stats.files[path_].increment_read()
1059
+ save_workspace_stats(workspace_path, stats)
1060
+ truncated = False
756
1061
  for i, file in enumerate(file_paths):
757
1062
  try:
758
- content, truncated, tokens = read_file(file, max_tokens, context)
1063
+ # Use line numbers from parameters if provided
1064
+ start_line_num = None if start_line_nums is None else start_line_nums[i]
1065
+ end_line_num = None if end_line_nums is None else end_line_nums[i]
1066
+
1067
+ # For backward compatibility, we still need to extract line numbers from path
1068
+ # if they weren't provided as parameters
1069
+ content, truncated, tokens, path, line_range = read_file(
1070
+ file,
1071
+ max_tokens,
1072
+ context,
1073
+ show_line_numbers,
1074
+ start_line_num,
1075
+ end_line_num,
1076
+ )
1077
+
1078
+ # Add file path with line range to dictionary
1079
+ if path in file_ranges_dict:
1080
+ file_ranges_dict[path].append(line_range)
1081
+ else:
1082
+ file_ranges_dict[path] = [line_range]
759
1083
  except Exception as e:
760
1084
  message += f"\n{file}: {str(e)}\n"
761
1085
  continue
@@ -763,7 +1087,8 @@ def read_files(
763
1087
  if max_tokens:
764
1088
  max_tokens = max_tokens - tokens
765
1089
 
766
- message += f"\n``` {file}\n{content}\n"
1090
+ range_formatted = range_format(start_line_num, end_line_num)
1091
+ message += f"\n{file}{range_formatted}\n```\n{content}\n"
767
1092
 
768
1093
  if truncated or (max_tokens and max_tokens <= 0):
769
1094
  not_reading = file_paths[i + 1 :]
@@ -772,15 +1097,21 @@ def read_files(
772
1097
  break
773
1098
  else:
774
1099
  message += "```"
775
-
776
- return message
1100
+ return message, file_ranges_dict, truncated
777
1101
 
778
1102
 
779
1103
  def read_file(
780
- file_path: str, max_tokens: Optional[int], context: Context
781
- ) -> tuple[str, bool, int]:
1104
+ file_path: str,
1105
+ max_tokens: Optional[int],
1106
+ context: Context,
1107
+ show_line_numbers: bool = False,
1108
+ start_line_num: Optional[int] = None,
1109
+ end_line_num: Optional[int] = None,
1110
+ ) -> tuple[str, bool, int, str, tuple[int, int]]:
782
1111
  context.console.print(f"Reading file: {file_path}")
783
1112
 
1113
+ # Line numbers are now passed as parameters, no need to parse from path
1114
+
784
1115
  # Expand the path before checking if it's absolute
785
1116
  file_path = expand_user(file_path)
786
1117
 
@@ -789,28 +1120,83 @@ def read_file(
789
1120
  f"Failure: file_path should be absolute path, current working directory is {context.bash_state.cwd}"
790
1121
  )
791
1122
 
792
- context.bash_state.add_to_whitelist_for_overwrite(file_path)
793
-
794
1123
  path = Path(file_path)
795
1124
  if not path.exists():
796
1125
  raise ValueError(f"Error: file {file_path} does not exist")
797
1126
 
1127
+ # Read all lines of the file
798
1128
  with path.open("r") as f:
799
- content = f.read(10_000_000)
1129
+ all_lines = f.readlines(10_000_000)
1130
+
1131
+ if all_lines[-1].endswith("\n"):
1132
+ # Special handling of line counts because readlines doesn't consider last empty line as a separate line
1133
+ all_lines[-1] = all_lines[-1][:-1]
1134
+ all_lines.append("")
1135
+
1136
+ total_lines = len(all_lines)
1137
+
1138
+ # Apply line range filtering if specified
1139
+ start_idx = 0
1140
+ if start_line_num is not None:
1141
+ # Convert 1-indexed line number to 0-indexed
1142
+ start_idx = max(0, start_line_num - 1)
1143
+
1144
+ end_idx = len(all_lines)
1145
+ if end_line_num is not None:
1146
+ # end_line_num is inclusive, so we use min to ensure it's within bounds
1147
+ end_idx = min(len(all_lines), end_line_num)
1148
+
1149
+ # Convert back to 1-indexed line numbers for tracking
1150
+ effective_start = start_line_num if start_line_num is not None else 1
1151
+ effective_end = end_line_num if end_line_num is not None else total_lines
1152
+
1153
+ filtered_lines = all_lines[start_idx:end_idx]
1154
+
1155
+ # Create content with or without line numbers
1156
+ if show_line_numbers:
1157
+ content_lines = []
1158
+ for i, line in enumerate(filtered_lines, start=start_idx + 1):
1159
+ content_lines.append(f"{i} {line}")
1160
+ content = "".join(content_lines)
1161
+ else:
1162
+ content = "".join(filtered_lines)
800
1163
 
801
1164
  truncated = False
802
1165
  tokens_counts = 0
1166
+
1167
+ # Handle token limit if specified
803
1168
  if max_tokens is not None:
804
1169
  tokens = default_enc.encoder(content)
805
1170
  tokens_counts = len(tokens)
1171
+
806
1172
  if len(tokens) > max_tokens:
807
- content = default_enc.decoder(tokens[:max_tokens])
808
- rest = save_out_of_context(
809
- default_enc.decoder(tokens[max_tokens:]), Path(file_path).suffix
810
- )
811
- content += f"\n(...truncated)\n---\nI've saved the continuation in a new file. You may want to read: `{rest}`"
1173
+ # Truncate at token boundary first
1174
+ truncated_tokens = tokens[:max_tokens]
1175
+ truncated_content = default_enc.decoder(truncated_tokens)
1176
+
1177
+ # Count how many lines we kept
1178
+ line_count = truncated_content.count("\n")
1179
+
1180
+ # Calculate the last line number shown (1-indexed)
1181
+ last_line_shown = start_idx + line_count
1182
+
1183
+ content = truncated_content
1184
+ # Add informative message about truncation with total line count
1185
+ total_lines = len(all_lines)
1186
+ content += f"\n(...truncated) Only showing till line number {last_line_shown} of {total_lines} total lines due to the token limit, please continue reading from {last_line_shown + 1} if required"
812
1187
  truncated = True
813
- return content, truncated, tokens_counts
1188
+
1189
+ # Update effective_end if truncated
1190
+ effective_end = last_line_shown
1191
+
1192
+ # Return the content along with the effective line range that was read
1193
+ return (
1194
+ content,
1195
+ truncated,
1196
+ tokens_counts,
1197
+ file_path,
1198
+ (effective_start, effective_end),
1199
+ )
814
1200
 
815
1201
 
816
1202
  if __name__ == "__main__":
@@ -851,3 +1237,32 @@ if __name__ == "__main__":
851
1237
  None,
852
1238
  )
853
1239
  )
1240
+
1241
+ print(
1242
+ get_tool_output(
1243
+ Context(BASH_STATE, BASH_STATE.console),
1244
+ ReadFiles(
1245
+ file_paths=["/Users/arusia/repos/wcgw/src/wcgw/client/tools.py"],
1246
+ show_line_numbers_reason="true",
1247
+ ),
1248
+ default_enc,
1249
+ 0,
1250
+ lambda x, y: ("", 0),
1251
+ 15000,
1252
+ )[0][0]
1253
+ )
1254
+
1255
+ print(
1256
+ get_tool_output(
1257
+ Context(BASH_STATE, BASH_STATE.console),
1258
+ FileWriteOrEdit(
1259
+ file_path="/Users/arusia/repos/wcgw/src/wcgw/client/tools.py",
1260
+ file_content_or_search_replace_blocks="""test""",
1261
+ percentage_to_change=100,
1262
+ ),
1263
+ default_enc,
1264
+ 0,
1265
+ lambda x, y: ("", 0),
1266
+ 800,
1267
+ )[0][0]
1268
+ )