wcgw 3.0.7__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

@@ -6,6 +6,7 @@ import threading
6
6
  import time
7
7
  import traceback
8
8
  from dataclasses import dataclass
9
+ from hashlib import sha256
9
10
  from typing import (
10
11
  Any,
11
12
  Literal,
@@ -124,7 +125,6 @@ def cleanup_all_screens_with_name(name: str, console: Console) -> None:
124
125
  session_info = line.split()[0].strip() # e.g., "1234.my_screen"
125
126
  if session_info.endswith(f".{name}"):
126
127
  sessions_to_kill.append(session_info)
127
-
128
128
  # Now, for every session we found, tell screen to quit it.
129
129
  for session in sessions_to_kill:
130
130
  try:
@@ -232,11 +232,13 @@ class BashState:
232
232
  write_if_empty_mode: Optional[WriteIfEmptyMode],
233
233
  mode: Optional[Modes],
234
234
  use_screen: bool,
235
- whitelist_for_overwrite: Optional[set[str]] = None,
235
+ whitelist_for_overwrite: Optional[dict[str, "FileWhitelistData"]] = None,
236
236
  ) -> None:
237
237
  self._last_command: str = ""
238
238
  self.console = console
239
239
  self._cwd = working_dir or os.getcwd()
240
+ # Store the workspace root separately from the current working directory
241
+ self._workspace_root = working_dir or os.getcwd()
240
242
  self._bash_command_mode: BashCommandMode = bash_command_mode or BashCommandMode(
241
243
  "normal_mode", "all"
242
244
  )
@@ -245,7 +247,9 @@ class BashState:
245
247
  write_if_empty_mode or WriteIfEmptyMode("all")
246
248
  )
247
249
  self._mode = mode or "wcgw"
248
- self._whitelist_for_overwrite: set[str] = whitelist_for_overwrite or set()
250
+ self._whitelist_for_overwrite: dict[str, FileWhitelistData] = (
251
+ whitelist_for_overwrite or {}
252
+ )
249
253
  self._bg_expect_thread: Optional[threading.Thread] = None
250
254
  self._bg_expect_thread_stop_event = threading.Event()
251
255
  self._use_screen = use_screen
@@ -253,7 +257,12 @@ class BashState:
253
257
 
254
258
  def expect(self, pattern: Any, timeout: Optional[float] = -1) -> int:
255
259
  self.close_bg_expect_thread()
256
- output = self._shell.expect(pattern, timeout)
260
+ try:
261
+ output = self._shell.expect(pattern, timeout)
262
+ except pexpect.TIMEOUT:
263
+ # Edge case: gets raised when the child fd is not ready in some timeout
264
+ # pexpect/utils.py:143
265
+ return 1
257
266
  return output
258
267
 
259
268
  def send(self, s: str | bytes, set_as_command: Optional[str]) -> int:
@@ -314,9 +323,9 @@ class BashState:
314
323
  self._bg_expect_thread_stop_event = threading.Event()
315
324
 
316
325
  def cleanup(self) -> None:
326
+ cleanup_all_screens_with_name(self._shell_id, self.console)
317
327
  self.close_bg_expect_thread()
318
328
  self._shell.close(True)
319
- cleanup_all_screens_with_name(self._shell_id, self.console)
320
329
 
321
330
  def __enter__(self) -> "BashState":
322
331
  return self
@@ -427,6 +436,15 @@ class BashState:
427
436
  def cwd(self) -> str:
428
437
  return self._cwd
429
438
 
439
+ @property
440
+ def workspace_root(self) -> str:
441
+ """Return the workspace root directory."""
442
+ return self._workspace_root
443
+
444
+ def set_workspace_root(self, workspace_root: str) -> None:
445
+ """Set the workspace root directory."""
446
+ self._workspace_root = workspace_root
447
+
430
448
  @property
431
449
  def prompt(self) -> str:
432
450
  return PROMPT_CONST
@@ -454,20 +472,56 @@ class BashState:
454
472
  "bash_command_mode": self._bash_command_mode.serialize(),
455
473
  "file_edit_mode": self._file_edit_mode.serialize(),
456
474
  "write_if_empty_mode": self._write_if_empty_mode.serialize(),
457
- "whitelist_for_overwrite": list(self._whitelist_for_overwrite),
475
+ "whitelist_for_overwrite": {
476
+ k: v.serialize() for k, v in self._whitelist_for_overwrite.items()
477
+ },
458
478
  "mode": self._mode,
479
+ "workspace_root": self._workspace_root,
459
480
  }
460
481
 
461
482
  @staticmethod
462
483
  def parse_state(
463
484
  state: dict[str, Any],
464
- ) -> tuple[BashCommandMode, FileEditMode, WriteIfEmptyMode, Modes, list[str]]:
485
+ ) -> tuple[
486
+ BashCommandMode,
487
+ FileEditMode,
488
+ WriteIfEmptyMode,
489
+ Modes,
490
+ dict[str, "FileWhitelistData"],
491
+ str,
492
+ ]:
493
+ whitelist_state = state["whitelist_for_overwrite"]
494
+ # Convert serialized whitelist data back to FileWhitelistData objects
495
+ whitelist_dict = {}
496
+ if isinstance(whitelist_state, dict):
497
+ for file_path, data in whitelist_state.items():
498
+ if isinstance(data, dict) and "file_hash" in data:
499
+ # New format
500
+ whitelist_dict[file_path] = FileWhitelistData.deserialize(data)
501
+ else:
502
+ # Legacy format (just a hash string)
503
+ # Try to get line count from file if it exists, otherwise use a large default
504
+ whitelist_dict[file_path] = FileWhitelistData(
505
+ file_hash=data if isinstance(data, str) else "",
506
+ line_ranges_read=[(1, 1000000)], # Assume entire file was read
507
+ total_lines=1000000,
508
+ )
509
+ else:
510
+ # Handle really old format if needed
511
+ whitelist_dict = {
512
+ k: FileWhitelistData(
513
+ file_hash="", line_ranges_read=[(1, 1000000)], total_lines=1000000
514
+ )
515
+ for k in whitelist_state
516
+ }
517
+
465
518
  return (
466
519
  BashCommandMode.deserialize(state["bash_command_mode"]),
467
520
  FileEditMode.deserialize(state["file_edit_mode"]),
468
521
  WriteIfEmptyMode.deserialize(state["write_if_empty_mode"]),
469
522
  state["mode"],
470
- state["whitelist_for_overwrite"],
523
+ whitelist_dict,
524
+ state.get("workspace_root", ""),
471
525
  )
472
526
 
473
527
  def load_state(
@@ -476,15 +530,17 @@ class BashState:
476
530
  file_edit_mode: FileEditMode,
477
531
  write_if_empty_mode: WriteIfEmptyMode,
478
532
  mode: Modes,
479
- whitelist_for_overwrite: list[str],
533
+ whitelist_for_overwrite: dict[str, "FileWhitelistData"],
480
534
  cwd: str,
535
+ workspace_root: str,
481
536
  ) -> None:
482
537
  """Create a new BashState instance from a serialized state dictionary"""
483
538
  self._bash_command_mode = bash_command_mode
484
539
  self._cwd = cwd or self._cwd
540
+ self._workspace_root = workspace_root or cwd or self._workspace_root
485
541
  self._file_edit_mode = file_edit_mode
486
542
  self._write_if_empty_mode = write_if_empty_mode
487
- self._whitelist_for_overwrite = set(whitelist_for_overwrite)
543
+ self._whitelist_for_overwrite = dict(whitelist_for_overwrite)
488
544
  self._mode = mode
489
545
  self.reset_shell()
490
546
 
@@ -505,17 +561,132 @@ class BashState:
505
561
  return "Not pending"
506
562
 
507
563
  @property
508
- def whitelist_for_overwrite(self) -> set[str]:
564
+ def whitelist_for_overwrite(self) -> dict[str, "FileWhitelistData"]:
509
565
  return self._whitelist_for_overwrite
510
566
 
511
- def add_to_whitelist_for_overwrite(self, file_path: str) -> None:
512
- self._whitelist_for_overwrite.add(file_path)
567
+ def add_to_whitelist_for_overwrite(
568
+ self, file_paths_with_ranges: dict[str, list[tuple[int, int]]]
569
+ ) -> None:
570
+ """
571
+ Add files to the whitelist for overwrite.
572
+
573
+ Args:
574
+ file_paths_with_ranges: Dictionary mapping file paths to sequences of
575
+ (start_line, end_line) tuples representing
576
+ the ranges that have been read.
577
+ """
578
+ for file_path, ranges in file_paths_with_ranges.items():
579
+ # Read the file to get its hash and count lines
580
+ with open(file_path, "rb") as f:
581
+ file_content = f.read()
582
+ file_hash = sha256(file_content).hexdigest()
583
+ total_lines = file_content.count(b"\n") + 1
584
+
585
+ # Update or create whitelist entry
586
+ if file_path in self._whitelist_for_overwrite:
587
+ # Update existing entry
588
+ whitelist_data = self._whitelist_for_overwrite[file_path]
589
+ whitelist_data.file_hash = file_hash
590
+ whitelist_data.total_lines = total_lines
591
+ for range_start, range_end in ranges:
592
+ whitelist_data.add_range(range_start, range_end)
593
+ else:
594
+ # Create new entry
595
+ self._whitelist_for_overwrite[file_path] = FileWhitelistData(
596
+ file_hash=file_hash,
597
+ line_ranges_read=list(ranges),
598
+ total_lines=total_lines,
599
+ )
513
600
 
514
601
  @property
515
602
  def pending_output(self) -> str:
516
603
  return self._pending_output
517
604
 
518
605
 
606
+ @dataclass
607
+ class FileWhitelistData:
608
+ """Data about a file that has been read and can be modified."""
609
+
610
+ file_hash: str
611
+ # List of line ranges that have been read (inclusive start, inclusive end)
612
+ # E.g., [(1, 10), (20, 30)] means lines 1-10 and 20-30 have been read
613
+ line_ranges_read: list[tuple[int, int]]
614
+ # Total number of lines in the file
615
+ total_lines: int
616
+
617
+ def get_percentage_read(self) -> float:
618
+ """Calculate percentage of file read based on line ranges."""
619
+ if self.total_lines == 0:
620
+ return 100.0
621
+
622
+ # Count unique lines read
623
+ lines_read: set[int] = set()
624
+ for start, end in self.line_ranges_read:
625
+ lines_read.update(range(start, end + 1))
626
+
627
+ return (len(lines_read) / self.total_lines) * 100.0
628
+
629
+ def is_read_enough(self) -> bool:
630
+ """Check if enough of the file has been read (>=99%)"""
631
+ return self.get_percentage_read() >= 99
632
+
633
+ def get_unread_ranges(self) -> list[tuple[int, int]]:
634
+ """Return a list of line ranges (start, end) that haven't been read yet.
635
+
636
+ Returns line ranges as tuples of (start_line, end_line) in 1-indexed format.
637
+ If the whole file has been read, returns an empty list.
638
+ """
639
+ if self.total_lines == 0:
640
+ return []
641
+
642
+ # First collect all lines that have been read
643
+ lines_read: set[int] = set()
644
+ for start, end in self.line_ranges_read:
645
+ lines_read.update(range(start, end + 1))
646
+
647
+ # Generate unread ranges from the gaps
648
+ unread_ranges: list[tuple[int, int]] = []
649
+ start_range = None
650
+
651
+ for i in range(1, self.total_lines + 1):
652
+ if i not in lines_read:
653
+ if start_range is None:
654
+ start_range = i
655
+ elif start_range is not None:
656
+ # End of an unread range
657
+ unread_ranges.append((start_range, i - 1))
658
+ start_range = None
659
+
660
+ # Don't forget the last range if it extends to the end of the file
661
+ if start_range is not None:
662
+ unread_ranges.append((start_range, self.total_lines))
663
+
664
+ return unread_ranges
665
+
666
+ def add_range(self, start: int, end: int) -> None:
667
+ """Add a new range of lines that have been read."""
668
+ # Merge with existing ranges if possible
669
+ self.line_ranges_read.append((start, end))
670
+ # Could add range merging logic here for optimization
671
+
672
+ def serialize(self) -> dict[str, Any]:
673
+ """Convert to a serializable dictionary."""
674
+ return {
675
+ "file_hash": self.file_hash,
676
+ "line_ranges_read": self.line_ranges_read,
677
+ "total_lines": self.total_lines,
678
+ }
679
+
680
+ @classmethod
681
+ def deserialize(cls, data: dict[str, Any]) -> "FileWhitelistData":
682
+ """Create from a serialized dictionary."""
683
+ return cls(
684
+ file_hash=data.get("file_hash", ""),
685
+ line_ranges_read=data.get("line_ranges_read", []),
686
+ total_lines=data.get("total_lines", 0),
687
+ )
688
+
689
+
519
690
  WAITING_INPUT_MESSAGE = """A command is already running. NOTE: You can't run multiple shell sessions, likely a previous program hasn't exited.
520
691
  1. Get its output using status check.
521
692
  2. Use `send_ascii` or `send_specials` to give inputs to the running program OR
@@ -1,17 +1,40 @@
1
1
 
2
2
  Instructions for editing files.
3
+ # Example
4
+ ## Input file
5
+ ```
6
+ import numpy as np
7
+ from impls import impl1, impl2
8
+
9
+ def hello():
10
+ "print a greeting"
3
11
 
12
+ print("hello")
4
13
 
5
- Only edit the files using the following SEARCH/REPLACE blocks.
14
+ def call_hello():
15
+ "call hello"
16
+
17
+ hello()
18
+ print("Called")
19
+ impl1()
20
+ hello()
21
+ impl2()
22
+
23
+ ```
24
+ ## Edit format on the input file
6
25
  ```
7
- file_edit_using_search_replace_blocks="""
26
+ <<<<<<< SEARCH
27
+ from impls import impl1, impl2
28
+ =======
29
+ from impls import impl1, impl2
30
+ from hello import hello as hello_renamed
31
+ >>>>>>> REPLACE
8
32
  <<<<<<< SEARCH
9
33
  def hello():
10
34
  "print a greeting"
11
35
 
12
36
  print("hello")
13
37
  =======
14
- from hello import hello as hello_renamed
15
38
  >>>>>>> REPLACE
16
39
  <<<<<<< SEARCH
17
40
  def call_hello():
@@ -33,26 +56,17 @@ def call_hello_renamed():
33
56
  hello_renamed()
34
57
  impl2()
35
58
  >>>>>>> REPLACE
36
- """
37
59
  ```
38
60
 
39
61
  # *SEARCH/REPLACE block* Rules:
40
-
41
- Every *SEARCH/REPLACE block* must use this format:
42
- 1. The start of match block: <<<<<<< SEARCH
43
- 2. A contiguous chunk of lines to do exact match for in the existing source code
44
- 3. The dividing line: =======
45
- 4. The lines to replace into the source code
46
- 5. The end of the replace block: >>>>>>> REPLACE
47
-
48
62
  Every "<<<<<<< SEARCH" section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, whitespaces, etc.
49
63
 
50
64
  Including multiple unique *SEARCH/REPLACE* blocks if needed.
51
- Include enough lines in each SEARCH section to uniquely match each set of lines that need to change.
65
+ Include enough and only enough lines in each SEARCH section to uniquely match each set of lines that need to change.
52
66
 
53
67
  Keep *SEARCH/REPLACE* blocks concise.
54
68
  Break large *SEARCH/REPLACE* blocks into a series of smaller blocks that each change a small portion of the file.
55
- Include just the changing lines, and a few surrounding lines if needed for uniqueness.
56
- Do not include long runs of unchanging lines in *SEARCH/REPLACE* blocks.
69
+ Include just the changing lines, and a few surrounding lines (2-3 lines) if needed for uniqueness.
70
+ Other than for uniqueness, avoid including those lines which do not change in search (and replace) blocks. Target 2-3 non trivial extra lines per block.
57
71
 
58
72
  Preserve leading spaces and indentations in both SEARCH and REPLACE blocks.
@@ -7,7 +7,13 @@ TOLERANCE_TYPES = Literal["SILENT", "WARNING", "ERROR"]
7
7
 
8
8
 
9
9
  class SearchReplaceMatchError(Exception):
10
- pass
10
+ def __init__(self, message: str):
11
+ message = f"""
12
+ {message}
13
+ ---
14
+ Retry immediately with same "percentage_to_change" using search replace blocks fixing above error.
15
+ """
16
+ super().__init__(message)
11
17
 
12
18
 
13
19
  @dataclass
@@ -27,7 +33,9 @@ class TolerancesHit(Tolerance):
27
33
  class FileEditOutput:
28
34
  original_content: list[str]
29
35
  orig_search_blocks: list[list[str]]
30
- edited_with_tolerances: list[tuple[slice, list[TolerancesHit], list[str]]]
36
+ edited_with_tolerances: list[
37
+ tuple[slice, list[TolerancesHit], list[str]]
38
+ ] # Need not be equal to orig_search_blocks when early exit
31
39
 
32
40
  def replace_or_throw(
33
41
  self,
@@ -45,11 +53,12 @@ class FileEditOutput:
45
53
  if tol.severity_cat == "WARNING":
46
54
  warnings.add(tol.error_name)
47
55
  elif tol.severity_cat == "ERROR":
56
+ search__ = "\n".join(search_)
48
57
  errors.append(f"""
49
58
  Got error while processing the following search block:
50
59
  ---
51
60
  ```
52
- {"\n".join(search_)}
61
+ {search__}
53
62
  ```
54
63
  ---
55
64
  Error:
@@ -93,8 +102,7 @@ Error:
93
102
  best_score = hit_score
94
103
  elif abs(hit_score - best_score) < 1e-3:
95
104
  best_hits.append(output)
96
-
97
- return best_hits, best_score < 0
105
+ return best_hits, best_score > 1000
98
106
 
99
107
 
100
108
  def line_process_max_space_tolerance(line: str) -> str:
@@ -204,7 +212,7 @@ class FileEditInput:
204
212
  TolerancesHit(
205
213
  line_process=lambda x: x,
206
214
  severity_cat="ERROR",
207
- score_multiplier=float("-inf"),
215
+ score_multiplier=float("inf"),
208
216
  error_name="The blocks couldn't be matched, maybe the sequence of search blocks was incorrect?",
209
217
  count=max(1, len(search_lines)),
210
218
  )
@@ -240,6 +248,7 @@ class FileEditInput:
240
248
 
241
249
  # search for first block
242
250
  first_block = self.search_replace_blocks[self.search_replace_offset]
251
+ replace_by = first_block[1]
243
252
 
244
253
  # Try exact match
245
254
  matches = match_exact(self.file_lines, self.file_line_offset, first_block[0])
@@ -251,7 +260,6 @@ class FileEditInput:
251
260
  matches_with_tolerances = match_with_tolerance(
252
261
  self.file_lines, self.file_line_offset, first_block[0], self.tolerances
253
262
  )
254
- replace_by = first_block[1]
255
263
  if not matches_with_tolerances:
256
264
  # Try with no empty lines
257
265
  matches_with_tolerances = match_with_tolerance_empty_line(
@@ -277,8 +285,8 @@ class FileEditInput:
277
285
  TolerancesHit(
278
286
  lambda x: x,
279
287
  "ERROR",
280
- -1,
281
- "Couldn't find match. Do you mean to match the lines in the following context?\n```"
288
+ float("inf"),
289
+ "Couldn't find match. Here's the latest snippet from the file which might be relevant for you to consider:\n```"
282
290
  + sim_context
283
291
  + "\n```",
284
292
  int(len(first_block[0]) // sim_sim),
@@ -287,51 +295,40 @@ class FileEditInput:
287
295
  )
288
296
  ]
289
297
 
290
- for match, tolerances in matches_with_tolerances:
291
- if any(
292
- tolerance.error_name == REMOVE_INDENTATION
293
- for tolerance in tolerances
294
- ):
295
- replace_by = fix_indentation(
296
- self.file_lines[match.start : match.stop],
297
- first_block[0],
298
- replace_by,
299
- )
300
-
301
- file_edit_input = FileEditInput(
302
- self.file_lines,
303
- match.stop,
304
- self.search_replace_blocks,
305
- self.search_replace_offset + 1,
306
- self.tolerances,
298
+ else:
299
+ matches_with_tolerances = [(match, []) for match in matches]
300
+
301
+ for match, tolerances in matches_with_tolerances:
302
+ if any(
303
+ tolerance.error_name == REMOVE_INDENTATION for tolerance in tolerances
304
+ ):
305
+ replace_by = fix_indentation(
306
+ self.file_lines[match.start : match.stop],
307
+ first_block[0],
308
+ replace_by,
307
309
  )
308
310
 
309
- remaining_output = file_edit_input.edit_file()
310
- for rem_output in remaining_output:
311
- all_outputs.append(
312
- [
313
- (match, tolerances, replace_by),
314
- *rem_output.edited_with_tolerances,
315
- ]
316
- )
317
- else:
318
- for match in matches:
319
- file_edit_input = FileEditInput(
320
- self.file_lines,
321
- match.stop,
322
- self.search_replace_blocks,
323
- self.search_replace_offset + 1,
324
- self.tolerances,
311
+ file_edit_input = FileEditInput(
312
+ self.file_lines,
313
+ match.stop,
314
+ self.search_replace_blocks,
315
+ self.search_replace_offset + 1,
316
+ self.tolerances,
317
+ )
318
+
319
+ if any(tolerance.severity_cat == "ERROR" for tolerance in tolerances):
320
+ # Exit early
321
+ all_outputs.append(
322
+ [
323
+ (match, tolerances, replace_by),
324
+ ]
325
325
  )
326
+ else:
326
327
  remaining_output = file_edit_input.edit_file()
327
328
  for rem_output in remaining_output:
328
329
  all_outputs.append(
329
330
  [
330
- (
331
- match,
332
- [],
333
- first_block[1],
334
- ),
331
+ (match, tolerances, replace_by),
335
332
  *rem_output.edited_with_tolerances,
336
333
  ]
337
334
  )