wcgw 2.2.2__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

Files changed (39) hide show
  1. {wcgw-2.2.2 → wcgw-2.3.1}/PKG-INFO +2 -2
  2. {wcgw-2.2.2 → wcgw-2.3.1}/gpt_instructions.txt +1 -0
  3. {wcgw-2.2.2 → wcgw-2.3.1}/pyproject.toml +3 -3
  4. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/anthropic_client.py +5 -2
  5. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/mcp_server/server.py +14 -6
  6. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/openai_client.py +1 -0
  7. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/tools.py +226 -77
  8. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/types_.py +2 -0
  9. {wcgw-2.2.2 → wcgw-2.3.1}/uv.lock +131 -134
  10. {wcgw-2.2.2 → wcgw-2.3.1}/.github/workflows/python-publish.yml +0 -0
  11. {wcgw-2.2.2 → wcgw-2.3.1}/.github/workflows/python-tests.yml +0 -0
  12. {wcgw-2.2.2 → wcgw-2.3.1}/.github/workflows/python-types.yml +0 -0
  13. {wcgw-2.2.2 → wcgw-2.3.1}/.gitignore +0 -0
  14. {wcgw-2.2.2 → wcgw-2.3.1}/.python-version +0 -0
  15. {wcgw-2.2.2 → wcgw-2.3.1}/.vscode/settings.json +0 -0
  16. {wcgw-2.2.2 → wcgw-2.3.1}/README.md +0 -0
  17. {wcgw-2.2.2 → wcgw-2.3.1}/gpt_action_json_schema.json +0 -0
  18. {wcgw-2.2.2 → wcgw-2.3.1}/openai.md +0 -0
  19. {wcgw-2.2.2 → wcgw-2.3.1}/src/__init__.py +0 -0
  20. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/__init__.py +0 -0
  21. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/__init__.py +0 -0
  22. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/__main__.py +0 -0
  23. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/cli.py +0 -0
  24. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/common.py +0 -0
  25. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/computer_use.py +0 -0
  26. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/diff-instructions.txt +0 -0
  27. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/mcp_server/Readme.md +0 -0
  28. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/mcp_server/__init__.py +0 -0
  29. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/openai_utils.py +0 -0
  30. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/client/sys_utils.py +0 -0
  31. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/relay/serve.py +0 -0
  32. {wcgw-2.2.2 → wcgw-2.3.1}/src/wcgw/relay/static/privacy.txt +0 -0
  33. {wcgw-2.2.2 → wcgw-2.3.1}/static/claude-ss.jpg +0 -0
  34. {wcgw-2.2.2 → wcgw-2.3.1}/static/computer-use.jpg +0 -0
  35. {wcgw-2.2.2 → wcgw-2.3.1}/static/example.jpg +0 -0
  36. {wcgw-2.2.2 → wcgw-2.3.1}/static/rocket-icon.png +0 -0
  37. {wcgw-2.2.2 → wcgw-2.3.1}/static/ss1.png +0 -0
  38. {wcgw-2.2.2 → wcgw-2.3.1}/tests/test_basic.py +0 -0
  39. {wcgw-2.2.2 → wcgw-2.3.1}/tests/test_tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wcgw
3
- Version: 2.2.2
3
+ Version: 2.3.1
4
4
  Summary: Shell and coding agent on claude and chatgpt
5
5
  Project-URL: Homepage, https://github.com/rusiaaman/wcgw
6
6
  Author-email: Aman Rusia <gapypi@arcfu.com>
@@ -9,7 +9,6 @@ Requires-Dist: anthropic>=0.39.0
9
9
  Requires-Dist: fastapi>=0.115.0
10
10
  Requires-Dist: humanize>=4.11.0
11
11
  Requires-Dist: mcp
12
- Requires-Dist: nltk>=3.9.1
13
12
  Requires-Dist: openai>=1.46.0
14
13
  Requires-Dist: petname>=2.6
15
14
  Requires-Dist: pexpect>=4.9.0
@@ -19,6 +18,7 @@ Requires-Dist: python-dotenv>=1.0.1
19
18
  Requires-Dist: rich>=13.8.1
20
19
  Requires-Dist: semantic-version>=2.10.0
21
20
  Requires-Dist: shell>=1.0.1
21
+ Requires-Dist: syntax-checker==0.2.10
22
22
  Requires-Dist: tiktoken==0.7.0
23
23
  Requires-Dist: toml>=0.10.2
24
24
  Requires-Dist: typer>=0.12.5
@@ -23,6 +23,7 @@ Instructions for `BashCommand`:
23
23
  Instructions for `Read File`
24
24
  - Read full content of a file.
25
25
  - Provide absolute file path only.
26
+ - Use this instead of 'cat' from BashCommand
26
27
 
27
28
  Instructions for `Write if Empty`
28
29
  - Write content to an empty or non-existent file. Provide file path and content. Use this instead of BashCommand for writing new files.
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  authors = [{ name = "Aman Rusia", email = "gapypi@arcfu.com" }]
3
3
  name = "wcgw"
4
- version = "2.2.2"
4
+ version = "2.3.1"
5
5
  description = "Shell and coding agent on claude and chatgpt"
6
6
  readme = "README.md"
7
7
  requires-python = ">=3.11, <3.13"
@@ -22,10 +22,10 @@ dependencies = [
22
22
  "websockets>=13.1",
23
23
  "pydantic>=2.9.2",
24
24
  "semantic-version>=2.10.0",
25
- "nltk>=3.9.1",
26
25
  "anthropic>=0.39.0",
27
- "mcp",
28
26
  "humanize>=4.11.0",
27
+ "mcp",
28
+ "syntax-checker==0.2.10",
29
29
  ]
30
30
 
31
31
  [project.urls]
@@ -173,6 +173,7 @@ def loop(
173
173
  - Always run `pwd` if you get any file or directory not found error to make sure you're not lost.
174
174
  - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished.
175
175
  - Run long running commands in background using screen instead of "&".
176
+ - Use longer wait_for_seconds if the command is expected to run for a long time.
176
177
  """,
177
178
  ),
178
179
  ToolParam(
@@ -185,8 +186,9 @@ def loop(
185
186
  - Send send_specials=["Enter"] to recheck status of a running program.
186
187
  - Only one of send_text, send_specials, send_ascii should be provided.
187
188
  - This returns within 5 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
188
- - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again using ["Enter"].
189
- - Do not send Ctrl-c before checking for status till 10 minutes or whatever is appropriate for the program to finish.
189
+ - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again using ["Enter"].
190
+ - Do not send Ctrl-c before checking for status till 10 minutes or whatever is appropriate for the program to finish.
191
+ - Set longer wait_for_seconds when program is expected to run for a long time.
190
192
  """,
191
193
  ),
192
194
  ToolParam(
@@ -195,6 +197,7 @@ def loop(
195
197
  description="""
196
198
  - Read full file content
197
199
  - Provide absolute file path only
200
+ - Use this instead of 'cat' from BashCommand
198
201
  """,
199
202
  ),
200
203
  ToolParam(
@@ -30,8 +30,6 @@ from ...types_ import (
30
30
  )
31
31
  from ..computer_use import SLEEP_TIME_MAX_S
32
32
 
33
- tools.TIMEOUT = SLEEP_TIME_MAX_S
34
-
35
33
  COMPUTER_USE_ON_DOCKER_ENABLED = False
36
34
 
37
35
  server = Server("wcgw")
@@ -91,8 +89,9 @@ async def handle_list_tools() -> list[types.Tool]:
91
89
  - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands.
92
90
  - The first or the last line might be `(...truncated)` if the output is too long.
93
91
  - Always run `pwd` if you get any file or directory not found error to make sure you're not lost.
94
- - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished.
92
+ - The control will return to you in 3 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished.
95
93
  - Run long running commands in background using screen instead of "&".
94
+ - Use longer wait_for_seconds if the command is expected to run for a long time.
96
95
  """,
97
96
  ),
98
97
  ToolParam(
@@ -105,8 +104,9 @@ async def handle_list_tools() -> list[types.Tool]:
105
104
  - Send send_specials=["Enter"] to recheck status of a running program.
106
105
  - Only one of send_text, send_specials, send_ascii should be provided.
107
106
  - This returns within 3 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
108
- - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again using ["Enter"].
109
- - Do not send Ctrl-c before checking for status till 10 minutes or whatever is appropriate for the program to finish.
107
+ - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again using ["Enter"].
108
+ - Do not send Ctrl-c before checking for status till 10 minutes or whatever is appropriate for the program to finish.
109
+ - Set longer wait_for_seconds when program is expected to run for a long time.
110
110
  """,
111
111
  ),
112
112
  ToolParam(
@@ -115,6 +115,7 @@ async def handle_list_tools() -> list[types.Tool]:
115
115
  description="""
116
116
  - Read full file content
117
117
  - Provide absolute file path only
118
+ - Use this instead of 'cat' from BashCommand
118
119
  """,
119
120
  ),
120
121
  ToolParam(
@@ -247,7 +248,9 @@ async def handle_call_tool(
247
248
  - Always read relevant files before editing.
248
249
  - Do not provide code snippets unless asked by the user, instead directly add/edit the code.
249
250
  - Do not install new tools/packages before ensuring no such tools/package or an alternative already exists.
250
-
251
+ - Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using shell tools.
252
+ - Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the program don't show any update but they still are running.
253
+ - Do not use echo to write multi-line files, always use FileEdit tool to update a code.
251
254
 
252
255
  Additional instructions:
253
256
  Always run `pwd` if you get any file or directory not found error to make sure you're not lost, or to get absolute cwd.
@@ -270,6 +273,11 @@ async def handle_call_tool(
270
273
 
271
274
  async def main(computer_use: bool) -> None:
272
275
  global COMPUTER_USE_ON_DOCKER_ENABLED
276
+
277
+ tools.TIMEOUT = SLEEP_TIME_MAX_S
278
+
279
+ tools.console = tools.DisableConsole()
280
+
273
281
  if computer_use:
274
282
  COMPUTER_USE_ON_DOCKER_ENABLED = True
275
283
 
@@ -192,6 +192,7 @@ def loop(
192
192
  description="""
193
193
  - Read full file content
194
194
  - Provide absolute file path only
195
+ - Use this instead of 'cat' from BashCommand
195
196
  """,
196
197
  ),
197
198
  openai.pydantic_function_tool(
@@ -42,7 +42,7 @@ import rich
42
42
  import pyte
43
43
  from dotenv import load_dotenv
44
44
 
45
- import openai
45
+ from syntax_checker import check_syntax
46
46
  from openai import OpenAI
47
47
  from openai.types.chat import (
48
48
  ChatCompletionMessageParam,
@@ -50,7 +50,7 @@ from openai.types.chat import (
50
50
  ChatCompletionMessage,
51
51
  ParsedChatCompletionMessage,
52
52
  )
53
- from nltk.metrics.distance import edit_distance # type: ignore[import-untyped]
53
+ from difflib import SequenceMatcher
54
54
 
55
55
  from ..types_ import (
56
56
  BashCommand,
@@ -72,7 +72,18 @@ from .common import CostData, Models, discard_input
72
72
  from .sys_utils import command_run
73
73
  from .openai_utils import get_input_cost, get_output_cost
74
74
 
75
- console = rich.console.Console(style="magenta", highlight=False, markup=False)
75
+
76
+ class DisableConsole:
77
+ def print(self, *args, **kwargs): # type: ignore
78
+ pass
79
+
80
+ def log(self, *args, **kwargs): # type: ignore
81
+ pass
82
+
83
+
84
+ console: rich.console.Console | DisableConsole = rich.console.Console(
85
+ style="magenta", highlight=False, markup=False
86
+ )
76
87
 
77
88
  TIMEOUT = 5
78
89
 
@@ -159,7 +170,7 @@ def _get_exit_code(shell: pexpect.spawn) -> int: # type: ignore
159
170
  try:
160
171
  shell.expect(PROMPT, timeout=0.2)
161
172
  except pexpect.TIMEOUT:
162
- print(f"Couldn't get exit code, before: {before}")
173
+ console.print(f"Couldn't get exit code, before: {before}")
163
174
  raise
164
175
  assert isinstance(shell.before, str)
165
176
  # Render because there could be some anscii escape sequences still set like in google colab env
@@ -183,6 +194,7 @@ class BashState:
183
194
  self._is_in_docker: Optional[str] = ""
184
195
  self._cwd: str = os.getcwd()
185
196
  self._shell = start_shell()
197
+ self._whitelist_for_overwrite: set[str] = set()
186
198
 
187
199
  # Get exit info to ensure shell is ready
188
200
  _get_exit_code(self._shell)
@@ -235,6 +247,13 @@ class BashState:
235
247
  )
236
248
  return "Not pending"
237
249
 
250
+ @property
251
+ def whitelist_for_overwrite(self) -> set[str]:
252
+ return self._whitelist_for_overwrite
253
+
254
+ def add_to_whitelist_for_overwrite(self, file_path: str) -> None:
255
+ self._whitelist_for_overwrite.add(file_path)
256
+
238
257
 
239
258
  BASH_STATE = BashState()
240
259
 
@@ -269,18 +288,18 @@ def update_repl_prompt(command: str) -> bool:
269
288
  BASH_STATE.shell.sendintr()
270
289
  index = BASH_STATE.shell.expect([PROMPT, pexpect.TIMEOUT], timeout=0.2)
271
290
  if index == 0:
272
- return False
291
+ return True
273
292
  before = BASH_STATE.shell.before or ""
274
293
  assert before, "Something went wrong updating repl prompt"
275
294
  PROMPT = before.split("\n")[-1].strip()
276
295
  # Escape all regex
277
296
  PROMPT = re.escape(PROMPT)
278
- print(f"Trying to update prompt to: {PROMPT.encode()!r}")
297
+ console.print(f"Trying to update prompt to: {PROMPT.encode()!r}")
279
298
  index = 0
280
299
  while index == 0:
281
300
  # Consume all REPL prompts till now
282
301
  index = BASH_STATE.shell.expect([PROMPT, pexpect.TIMEOUT], timeout=0.2)
283
- print(f"Prompt updated to: {PROMPT}")
302
+ console.print(f"Prompt updated to: {PROMPT}")
284
303
  return True
285
304
  return False
286
305
 
@@ -301,6 +320,34 @@ def get_status() -> str:
301
320
  return status.rstrip()
302
321
 
303
322
 
323
+ T = TypeVar("T")
324
+
325
+
326
+ def save_out_of_context(
327
+ tokens: list[T],
328
+ max_tokens: int,
329
+ suffix: str,
330
+ tokens_converted: Callable[[list[T]], str],
331
+ ) -> tuple[str, list[Path]]:
332
+ file_contents = list[str]()
333
+ for i in range(0, len(tokens), max_tokens):
334
+ file_contents.append(tokens_converted(tokens[i : i + max_tokens]))
335
+
336
+ if len(file_contents) == 1:
337
+ return file_contents[0], []
338
+
339
+ rest_paths = list[Path]()
340
+ for i, content in enumerate(file_contents):
341
+ if i == 0:
342
+ continue
343
+ file_path = NamedTemporaryFile(delete=False, suffix=suffix).name
344
+ with open(file_path, "w") as f:
345
+ f.write(content)
346
+ rest_paths.append(Path(file_path))
347
+
348
+ return file_contents[0], rest_paths
349
+
350
+
304
351
  def execute_bash(
305
352
  enc: tiktoken.Encoding,
306
353
  bash_arg: BashCommand | BashInteraction,
@@ -472,7 +519,7 @@ def serve_image_in_bg(file_path: str, client_uuid: str, name: str) -> None:
472
519
  try:
473
520
  websocket.send(json.dumps(uu))
474
521
  except websockets.ConnectionClosed:
475
- print(f"Connection closed for UUID: {client_uuid}, retrying")
522
+ console.print(f"Connection closed for UUID: {client_uuid}, retrying")
476
523
  serve_image_in_bg(file_path, client_uuid, name)
477
524
 
478
525
 
@@ -490,8 +537,6 @@ class ImageData(BaseModel):
490
537
 
491
538
  Param = ParamSpec("Param")
492
539
 
493
- T = TypeVar("T")
494
-
495
540
 
496
541
  def ensure_no_previous_output(func: Callable[Param, T]) -> Callable[Param, T]:
497
542
  def wrapper(*args: Param.args, **kwargs: Param.kwargs) -> T:
@@ -537,20 +582,19 @@ def write_file(writefile: WriteIfEmpty, error_on_exist: bool) -> str:
537
582
  else:
538
583
  path_ = writefile.file_path
539
584
 
540
- error_on_exist = (
541
- not (
542
- len(TOOL_CALLS) > 1
543
- and isinstance(TOOL_CALLS[-2], FileEdit)
544
- and TOOL_CALLS[-2].file_path == path_
545
- )
546
- and error_on_exist
547
- )
548
-
585
+ error_on_exist_ = error_on_exist and path_ not in BASH_STATE.whitelist_for_overwrite
586
+ add_overwrite_warning = ""
549
587
  if not BASH_STATE.is_in_docker:
550
- if error_on_exist and os.path.exists(path_):
551
- file_data = Path(path_).read_text()
552
- if file_data:
553
- return f"Error: can't write to existing file {path_}, use other functions to edit the file"
588
+ if (error_on_exist or error_on_exist_) and os.path.exists(path_):
589
+ content = Path(path_).read_text().strip()
590
+ if content:
591
+ if error_on_exist_:
592
+ return f"Error: can't write to existing file {path_}, use other functions to edit the file"
593
+ elif error_on_exist:
594
+ add_overwrite_warning = content
595
+
596
+ # Since we've already errored once, add this to whitelist
597
+ BASH_STATE.add_to_whitelist_for_overwrite(path_)
554
598
 
555
599
  path = Path(path_)
556
600
  path.parent.mkdir(parents=True, exist_ok=True)
@@ -561,12 +605,19 @@ def write_file(writefile: WriteIfEmpty, error_on_exist: bool) -> str:
561
605
  except OSError as e:
562
606
  return f"Error: {e}"
563
607
  else:
564
- if error_on_exist:
565
- # Check if it exists using os.system
566
- cmd = f"test -f {shlex.quote(path_)}"
567
- status = os.system(f'docker exec {BASH_STATE.is_in_docker} bash -c "{cmd}"')
568
- if status == 0:
569
- return f"Error: can't write to existing file {path_}, use other functions to edit the file"
608
+ if error_on_exist or error_on_exist_:
609
+ return_code, content, stderr = command_run(
610
+ f"docker exec {BASH_STATE.is_in_docker} cat {shlex.quote(path_)}",
611
+ timeout=TIMEOUT,
612
+ )
613
+ if return_code != 0 and content.strip():
614
+ if error_on_exist_:
615
+ return f"Error: can't write to existing file {path_}, use other functions to edit the file"
616
+ else:
617
+ add_overwrite_warning = content
618
+
619
+ # Since we've already errored once, add this to whitelist
620
+ BASH_STATE.add_to_whitelist_for_overwrite(path_)
570
621
 
571
622
  with TemporaryDirectory() as tmpdir:
572
623
  tmppath = os.path.join(tmpdir, os.path.basename(path_))
@@ -586,42 +637,75 @@ def write_file(writefile: WriteIfEmpty, error_on_exist: bool) -> str:
586
637
  if rcode != 0:
587
638
  return f"Error: Write failed with code {rcode}"
588
639
 
640
+ extension = Path(path_).suffix.lstrip(".")
641
+
589
642
  console.print(f"File written to {path_}")
590
- return "Success"
643
+
644
+ warnings = []
645
+ try:
646
+ check = check_syntax(extension, writefile.file_content)
647
+ syntax_errors = check.description
648
+ if syntax_errors:
649
+ console.print(f"W: Syntax errors encountered: {syntax_errors}")
650
+ warnings.append(f"""
651
+ ---
652
+ Warning: tree-sitter reported syntax errors, please re-read the file and fix if any errors.
653
+ Errors:
654
+ {syntax_errors}
655
+ ---
656
+ """)
657
+
658
+ except Exception:
659
+ pass
660
+
661
+ if add_overwrite_warning:
662
+ warnings.append(
663
+ "\n---\nWarning: a file already existed and it's now overwritten. Was it a mistake? If yes please revert your action."
664
+ "Here's the previous content:\n```\n" + add_overwrite_warning + "\n```"
665
+ "\n---\n"
666
+ )
667
+
668
+ return "Success" + "".join(warnings)
591
669
 
592
670
 
593
671
  def find_least_edit_distance_substring(
594
- content: str, find_str: str
595
- ) -> tuple[str, str, float]:
596
- orig_content_lines = content.split("\n")
597
- content_lines = [
598
- line.strip() for line in orig_content_lines
599
- ] # Remove trailing and leading space for calculating edit distance
672
+ orig_content_lines: list[str], find_lines: list[str]
673
+ ) -> tuple[list[str], str]:
674
+ # Prepare content lines, stripping whitespace and keeping track of original indices
675
+ content_lines = [line.strip() for line in orig_content_lines]
600
676
  new_to_original_indices = {}
601
677
  new_content_lines = []
602
- for i in range(len(content_lines)):
603
- if not content_lines[i]:
678
+ for i, line in enumerate(content_lines):
679
+ if not line:
604
680
  continue
605
- new_content_lines.append(content_lines[i])
681
+ new_content_lines.append(line)
606
682
  new_to_original_indices[len(new_content_lines) - 1] = i
607
683
  content_lines = new_content_lines
608
- find_lines = find_str.split("\n")
609
- find_lines = [
610
- line.strip() for line in find_lines if line.strip()
611
- ] # Remove trailing and leading space for calculating edit distance
612
- # Slide window and find one with sum of edit distance least
613
- min_edit_distance = float("inf")
684
+
685
+ # Prepare find lines, removing empty lines
686
+ find_lines = [line.strip() for line in find_lines if line.strip()]
687
+
688
+ # Initialize variables for best match tracking
689
+ max_similarity = 0.0
614
690
  min_edit_distance_lines = []
615
691
  context_lines = []
692
+
693
+ # For each possible starting position in content
616
694
  for i in range(max(1, len(content_lines) - len(find_lines) + 1)):
617
- edit_distance_sum = 0
695
+ # Calculate similarity for the block starting at position i
696
+ block_similarity = 0.0
618
697
  for j in range(len(find_lines)):
619
698
  if (i + j) < len(content_lines):
620
- edit_distance_sum += edit_distance(content_lines[i + j], find_lines[j])
621
- else:
622
- edit_distance_sum += len(find_lines[j])
623
- if edit_distance_sum < min_edit_distance:
624
- min_edit_distance = edit_distance_sum
699
+ # Use SequenceMatcher for more efficient similarity calculation
700
+ similarity = SequenceMatcher(
701
+ None, content_lines[i + j], find_lines[j]
702
+ ).ratio()
703
+ block_similarity += similarity
704
+
705
+ # If this block is more similar than previous best
706
+ if block_similarity > max_similarity:
707
+ max_similarity = block_similarity
708
+ # Map back to original line indices
625
709
  orig_start_index = new_to_original_indices[i]
626
710
  orig_end_index = (
627
711
  new_to_original_indices.get(
@@ -629,42 +713,79 @@ def find_least_edit_distance_substring(
629
713
  )
630
714
  + 1
631
715
  )
716
+ # Get the original lines
632
717
  min_edit_distance_lines = orig_content_lines[
633
718
  orig_start_index:orig_end_index
634
719
  ]
635
-
720
+ # Get context (10 lines before and after)
636
721
  context_lines = orig_content_lines[
637
722
  max(0, orig_start_index - 10) : (orig_end_index + 10)
638
723
  ]
724
+
639
725
  return (
640
- "\n".join(min_edit_distance_lines),
726
+ min_edit_distance_lines,
641
727
  "\n".join(context_lines),
642
- min_edit_distance,
643
728
  )
644
729
 
645
730
 
646
- def edit_content(content: str, find_lines: str, replace_with_lines: str) -> str:
647
- count = content.count(find_lines)
648
- if count == 0:
649
- closest_match, context_lines, min_edit_distance = (
650
- find_least_edit_distance_substring(content, find_lines)
651
- )
652
- if min_edit_distance == 0:
653
- return content.replace(closest_match, replace_with_lines, 1)
654
- else:
655
- print(
656
- f"Exact match not found, found with whitespace removed edit distance: {min_edit_distance}"
731
+ def lines_replacer(
732
+ orig_content_lines: list[str], search_lines: list[str], replace_lines: list[str]
733
+ ) -> str:
734
+ # Validation for empty search
735
+ search_lines = list(filter(None, [x.strip() for x in search_lines]))
736
+
737
+ # Create mapping of non-empty lines to original indices
738
+ new_to_original_indices = []
739
+ new_content_lines = []
740
+ for i, line in enumerate(orig_content_lines):
741
+ stripped = line.strip()
742
+ if not stripped:
743
+ continue
744
+ new_content_lines.append(stripped)
745
+ new_to_original_indices.append(i)
746
+
747
+ if not new_content_lines and not search_lines:
748
+ return "\n".join(replace_lines)
749
+ elif not search_lines:
750
+ raise ValueError("Search block is empty")
751
+ elif not new_content_lines:
752
+ raise ValueError("File content is empty")
753
+
754
+ # Search for matching block
755
+ for i in range(len(new_content_lines) - len(search_lines) + 1):
756
+ if all(
757
+ new_content_lines[i + j] == search_lines[j]
758
+ for j in range(len(search_lines))
759
+ ):
760
+ start_idx = new_to_original_indices[i]
761
+ end_idx = new_to_original_indices[i + len(search_lines) - 1] + 1
762
+ return "\n".join(
763
+ orig_content_lines[:start_idx]
764
+ + replace_lines
765
+ + orig_content_lines[end_idx:]
657
766
  )
658
- raise Exception(
659
- f"""Error: no match found for the provided search block.
660
- Requested search block: \n```\n{find_lines}\n```
661
- Possible relevant section in the file:\n---\n```\n{context_lines}\n```\n---\nFile not edited
662
- \nPlease retry with exact search. Re-read the file if unsure.
663
- """
664
- )
665
767
 
666
- content = content.replace(find_lines, replace_with_lines, 1)
667
- return content
768
+ raise ValueError("Search block not found in content")
769
+
770
+
771
+ def edit_content(content: str, find_lines: str, replace_with_lines: str) -> str:
772
+ replace_with_lines_ = replace_with_lines.split("\n")
773
+ find_lines_ = find_lines.split("\n")
774
+ content_lines_ = content.split("\n")
775
+ try:
776
+ return lines_replacer(content_lines_, find_lines_, replace_with_lines_)
777
+ except ValueError:
778
+ pass
779
+
780
+ _, context_lines = find_least_edit_distance_substring(content_lines_, find_lines_)
781
+
782
+ raise Exception(
783
+ f"""Error: no match found for the provided search block.
784
+ Requested search block: \n```\n{find_lines}\n```
785
+ Possible relevant section in the file:\n---\n```\n{context_lines}\n```\n---\nFile not edited
786
+ \nPlease retry with exact search. Re-read the file if unsure.
787
+ """
788
+ )
668
789
 
669
790
 
670
791
  def do_diff_edit(fedit: FileEdit) -> str:
@@ -695,6 +816,9 @@ def _do_diff_edit(fedit: FileEdit) -> str:
695
816
  else:
696
817
  path_ = fedit.file_path
697
818
 
819
+ # The LLM is now aware that the file exists
820
+ BASH_STATE.add_to_whitelist_for_overwrite(path_)
821
+
698
822
  if not BASH_STATE.is_in_docker:
699
823
  if not os.path.exists(path_):
700
824
  raise Exception(f"Error: file {path_} does not exist")
@@ -775,6 +899,22 @@ def _do_diff_edit(fedit: FileEdit) -> str:
775
899
  if rcode != 0:
776
900
  raise Exception(f"Error: Write failed with code {rcode}")
777
901
 
902
+ syntax_errors = ""
903
+ extension = Path(path_).suffix.lstrip(".")
904
+ try:
905
+ check = check_syntax(extension, apply_diff_to)
906
+ syntax_errors = check.description
907
+ if syntax_errors:
908
+ console.print(f"W: Syntax errors encountered: {syntax_errors}")
909
+ return f"""Wrote file succesfully.
910
+ ---
911
+ However, tree-sitter reported syntax errors, please re-read the file and fix if there are any errors.
912
+ Errors:
913
+ {syntax_errors}
914
+ """
915
+ except Exception:
916
+ pass
917
+
778
918
  return "Success"
779
919
 
780
920
 
@@ -885,7 +1025,7 @@ def get_tool_output(
885
1025
  output = ask_confirmation(arg), 0.0
886
1026
  elif isinstance(arg, (BashCommand | BashInteraction)):
887
1027
  console.print("Calling execute bash tool")
888
- output = execute_bash(enc, arg, max_tokens, None)
1028
+ output = execute_bash(enc, arg, max_tokens, arg.wait_for_seconds)
889
1029
  elif isinstance(arg, WriteIfEmpty):
890
1030
  console.print("Calling write file tool")
891
1031
  output = write_file(arg, True), 0
@@ -1054,6 +1194,8 @@ def read_file(readfile: ReadFile, max_tokens: Optional[int]) -> str:
1054
1194
  if not os.path.isabs(readfile.file_path):
1055
1195
  return f"Failure: file_path should be absolute path, current working directory is {BASH_STATE.cwd}"
1056
1196
 
1197
+ BASH_STATE.add_to_whitelist_for_overwrite(readfile.file_path)
1198
+
1057
1199
  if not BASH_STATE.is_in_docker:
1058
1200
  path = Path(readfile.file_path)
1059
1201
  if not path.exists():
@@ -1075,7 +1217,14 @@ def read_file(readfile: ReadFile, max_tokens: Optional[int]) -> str:
1075
1217
  if max_tokens is not None:
1076
1218
  tokens = default_enc.encode(content)
1077
1219
  if len(tokens) > max_tokens:
1078
- content = default_enc.decode(tokens[: max_tokens - 5])
1079
- content += "\n...(truncated)"
1220
+ content, rest = save_out_of_context(
1221
+ tokens,
1222
+ max_tokens - 100,
1223
+ Path(readfile.file_path).suffix,
1224
+ default_enc.decode,
1225
+ )
1226
+ if rest:
1227
+ rest_ = "\n".join(map(str, rest))
1228
+ content += f"\n(...truncated)\n---\nI've split the rest of the file into multiple files. Here are the remaining splits, please read them:\n{rest_}"
1080
1229
 
1081
1230
  return content
@@ -5,6 +5,7 @@ from pydantic import BaseModel
5
5
 
6
6
  class BashCommand(BaseModel):
7
7
  command: str
8
+ wait_for_seconds: Optional[int] = None
8
9
 
9
10
 
10
11
  Specials = Literal[
@@ -17,6 +18,7 @@ class BashInteraction(BaseModel):
17
18
  send_text: Optional[str] = None
18
19
  send_specials: Optional[Sequence[Specials]] = None
19
20
  send_ascii: Optional[Sequence[int]] = None
21
+ wait_for_seconds: Optional[int] = None
20
22
 
21
23
 
22
24
  class ReadImage(BaseModel):