wcgw 2.6.2__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

@@ -1,59 +1,51 @@
1
1
  import base64
2
2
  import json
3
3
  import mimetypes
4
- from pathlib import Path
5
- import sys
4
+ import os
5
+ import subprocess
6
+ import tempfile
6
7
  import traceback
7
- from typing import Callable, DefaultDict, Optional, cast, Literal
8
- import anthropic
8
+ import uuid
9
+ from pathlib import Path
10
+ from typing import Literal, Optional, cast
11
+
12
+ import rich
9
13
  from anthropic import Anthropic
10
14
  from anthropic.types import (
11
- ToolParam,
15
+ ImageBlockParam,
12
16
  MessageParam,
17
+ TextBlockParam,
18
+ ToolParam,
13
19
  ToolResultBlockParam,
14
20
  ToolUseBlockParam,
15
- ImageBlockParam,
16
- TextBlockParam,
17
21
  )
18
-
19
- import rich
20
- import petname # type: ignore[import-untyped]
22
+ from dotenv import load_dotenv
21
23
  from typer import Typer
22
- import uuid
23
24
 
24
25
  from ..types_ import (
25
26
  BashCommand,
26
27
  BashInteraction,
27
- WriteIfEmpty,
28
- FileEditFindReplace,
29
28
  FileEdit,
29
+ GetScreenInfo,
30
30
  Keyboard,
31
+ KnowledgeTransfer,
31
32
  Mouse,
32
33
  ReadFiles,
33
34
  ReadImage,
34
35
  ResetShell,
35
36
  ScreenShot,
36
- GetScreenInfo,
37
+ WriteIfEmpty,
38
+ )
39
+ from .common import discard_input
40
+ from .memory import load_memory
41
+ from .tools import (
42
+ DoneFlag,
43
+ ImageData,
44
+ default_enc,
45
+ get_tool_output,
46
+ initialize,
47
+ which_tool_name,
37
48
  )
38
-
39
- from .common import Models, discard_input
40
- from .common import CostData
41
- from .tools import ImageData
42
- from .computer_use import Computer
43
-
44
- from .tools import DoneFlag, get_tool_output, which_tool_name, default_enc
45
-
46
- from urllib import parse
47
- import subprocess
48
- import os
49
- import tempfile
50
-
51
- import toml
52
- from pydantic import BaseModel
53
-
54
-
55
- from dotenv import load_dotenv
56
-
57
49
 
58
50
  History = list[MessageParam]
59
51
 
@@ -135,19 +127,23 @@ def loop(
135
127
 
136
128
  history: History = []
137
129
  waiting_for_assistant = False
130
+ memory = None
138
131
  if resume:
139
- if resume == "latest":
140
- resume_path = sorted(Path(".wcgw").iterdir(), key=os.path.getmtime)[-1]
141
- else:
142
- resume_path = Path(resume)
143
- if not resume_path.exists():
144
- raise FileNotFoundError(f"File {resume} not found")
145
- with resume_path.open() as f:
146
- history = json.load(f)
147
- if len(history) <= 2:
148
- raise ValueError("Invalid history file")
149
- first_message = ""
150
- waiting_for_assistant = history[-1]["role"] != "assistant"
132
+ try:
133
+ memory = load_memory(resume)
134
+ except OSError:
135
+ if resume == "latest":
136
+ resume_path = sorted(Path(".wcgw").iterdir(), key=os.path.getmtime)[-1]
137
+ else:
138
+ resume_path = Path(resume)
139
+ if not resume_path.exists():
140
+ raise FileNotFoundError(f"File {resume} not found")
141
+ with resume_path.open() as f:
142
+ history = json.load(f)
143
+ if len(history) <= 2:
144
+ raise ValueError("Invalid history file")
145
+ first_message = ""
146
+ waiting_for_assistant = history[-1]["role"] != "assistant"
151
147
 
152
148
  limit = 1
153
149
 
@@ -216,6 +212,25 @@ def loop(
216
212
  - Use absolute file path only.
217
213
  - Use SEARCH/REPLACE blocks to edit the file.
218
214
  - If the edit fails due to block not matching, please retry with correct block till it matches. Re-read the file to ensure you've all the lines correct.
215
+ """,
216
+ ),
217
+ ToolParam(
218
+ input_schema=KnowledgeTransfer.model_json_schema(),
219
+ name="KnowledgeTransfer",
220
+ description="""
221
+ Write detailed description in order to do a KT, if the user asks for it.
222
+ Save all information necessary for a person to understand the task and the problems.
223
+
224
+ - `all_user_instructions` should contain all instructions user shared in the conversation.
225
+ - `current_status_of_the_task` should contain only what is already achieved, not what's remaining.
226
+ - `all_issues_snippets` should only contain snippets of error, traceback, file snippets, commands, etc., no comments or solutions (important!).
227
+ - Be very verbose in `all_issues_snippets` providing as much error context as possible.
228
+ - Provide an id if the user hasn't provided one.
229
+ - This tool will return a text file path where the information is saved.
230
+ - After the tool completes succesfully, tell the user the task id and the generate file path. (important!)
231
+ - Leave arguments as empty string if they aren't relevant.
232
+ - This tool marks end of your conversation, do not run any further tools after calling this.
233
+ - Provide absolute file paths only in `relevant_file_paths` containing all relevant files.
219
234
  """,
220
235
  ),
221
236
  ]
@@ -270,9 +285,10 @@ def loop(
270
285
  """,
271
286
  ),
272
287
  ]
273
- uname_sysname = os.uname().sysname
274
- uname_machine = os.uname().machine
275
288
 
289
+ initial_info = initialize(
290
+ os.getcwd(), [], resume if (memory and resume) else "", 8000
291
+ )
276
292
  system = f"""
277
293
  You're an expert software engineer with shell and code knowledge.
278
294
 
@@ -284,10 +300,7 @@ Instructions:
284
300
  - Do not provide code snippets unless asked by the user, instead directly add/edit the code.
285
301
  - Do not install new tools/packages before ensuring no such tools/package or an alternative already exists.
286
302
 
287
- System information:
288
- - System: {uname_sysname}
289
- - Machine: {uname_machine}
290
- - Current directory: {os.getcwd()}
303
+ {initial_info}
291
304
  """
292
305
 
293
306
  with open(os.path.join(os.path.dirname(__file__), "diff-instructions.txt")) as f:
@@ -47,7 +47,6 @@ Every *SEARCH/REPLACE block* must use this format:
47
47
 
48
48
  Every "<<<<<<< SEARCH" section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, whitespaces, etc.
49
49
 
50
- *SEARCH/REPLACE* blocks will *only* replace the first match occurrence.
51
50
  Including multiple unique *SEARCH/REPLACE* blocks if needed.
52
51
  Include enough lines in each SEARCH section to uniquely match each set of lines that need to change.
53
52
 
@@ -0,0 +1,482 @@
1
+ import re
2
+ from dataclasses import dataclass, field
3
+ from difflib import SequenceMatcher
4
+ from typing import Callable, DefaultDict, Literal, Optional
5
+
6
+ TOLERANCE_TYPES = Literal["SILENT", "WARNING", "ERROR"]
7
+
8
+
9
+ @dataclass
10
+ class Tolerance:
11
+ line_process: Callable[[str], str]
12
+ severity_cat: TOLERANCE_TYPES
13
+ score_multiplier: float
14
+ error_name: str
15
+
16
+
17
+ @dataclass
18
+ class TolerancesHit(Tolerance):
19
+ count: int
20
+
21
+
22
+ @dataclass
23
+ class FileEditOutput:
24
+ original_content: list[str]
25
+ orig_search_blocks: list[list[str]]
26
+ edited_with_tolerances: list[tuple[slice, list[TolerancesHit], list[str]]]
27
+
28
+ def replace_or_throw(
29
+ self,
30
+ max_errors: int,
31
+ ) -> tuple[list[str], set[str]]:
32
+ new_lines = list[str]()
33
+ last_idx = 0
34
+ errors = []
35
+ warnings = set[str]()
36
+ for (span, tolerances, replace_with), search_ in zip(
37
+ self.edited_with_tolerances, self.orig_search_blocks
38
+ ):
39
+ for tol in tolerances:
40
+ if tol.count > 0:
41
+ if tol.severity_cat == "WARNING":
42
+ warnings.add(tol.error_name)
43
+ elif tol.severity_cat == "ERROR":
44
+ errors.append(f"""
45
+ Got error while processing the following search block:
46
+ ---
47
+ ```
48
+ {'\n'.join(search_)}
49
+ ```
50
+ ---
51
+ Error:
52
+ {tol.error_name}
53
+ ---
54
+ """)
55
+ if len(errors) >= max_errors:
56
+ raise Exception("\n".join(errors))
57
+ if last_idx < span.start:
58
+ new_lines.extend(self.original_content[last_idx : span.start])
59
+
60
+ new_lines.extend(replace_with)
61
+ last_idx = span.stop
62
+
63
+ if last_idx < len(self.original_content):
64
+ new_lines.extend(self.original_content[last_idx:])
65
+
66
+ if errors:
67
+ raise Exception("\n".join(errors))
68
+
69
+ return new_lines, set(warnings)
70
+
71
+ @staticmethod
72
+ def get_best_match(
73
+ outputs: list["FileEditOutput"],
74
+ ) -> tuple[list["FileEditOutput"], bool]:
75
+ best_hits: list[FileEditOutput] = []
76
+ best_score = float("-inf")
77
+ assert outputs
78
+ for output in outputs:
79
+ hit_score = 0.0
80
+ for _, tols, _ in output.edited_with_tolerances:
81
+ for tol in tols:
82
+ hit_score += tol.count * tol.score_multiplier
83
+ if not best_hits:
84
+ best_hits.append(output)
85
+ best_score = hit_score
86
+ else:
87
+ if hit_score < best_score:
88
+ best_hits = [output]
89
+ best_score = hit_score
90
+ elif abs(hit_score - best_score) < 1e-3:
91
+ best_hits.append(output)
92
+
93
+ return best_hits, best_score < 0
94
+
95
+
96
+ def line_process_max_space_tolerance(line: str) -> str:
97
+ line = line.strip()
98
+ return re.sub(r"\s", "", line)
99
+
100
+
101
+ DEFAULT_TOLERANCES = [
102
+ Tolerance(
103
+ line_process=str.rstrip,
104
+ severity_cat="SILENT",
105
+ score_multiplier=1,
106
+ error_name="",
107
+ ),
108
+ Tolerance(
109
+ line_process=str.lstrip,
110
+ severity_cat="WARNING",
111
+ score_multiplier=10,
112
+ error_name="Warning: matching without considering indentation (leading spaces).",
113
+ ),
114
+ Tolerance(
115
+ line_process=line_process_max_space_tolerance,
116
+ severity_cat="WARNING",
117
+ score_multiplier=50,
118
+ error_name="Warning: matching after removing all spaces in lines.",
119
+ ),
120
+ ]
121
+
122
+
123
+ def remove_leading_trailing_empty_lines(lines: list[str]) -> list[str]:
124
+ start = 0
125
+ end = len(lines) - 1
126
+ if end < start:
127
+ return lines
128
+ while not lines[start].strip():
129
+ start += 1
130
+ if start >= len(lines):
131
+ break
132
+ while not lines[end].strip():
133
+ end -= 1
134
+ if end < 0:
135
+ break
136
+ return lines[start : end + 1]
137
+
138
+
139
+ @dataclass
140
+ class FileEditInput:
141
+ file_lines: list[str]
142
+ file_line_offset: int
143
+ search_replace_blocks: list[tuple[list[str], list[str]]]
144
+ search_replace_offset: int
145
+ tolerances: list["Tolerance"] = field(default_factory=lambda: DEFAULT_TOLERANCES)
146
+
147
+ def edit_file(self) -> list[FileEditOutput]:
148
+ n_file_lines = len(self.file_lines)
149
+ n_blocks = len(self.search_replace_blocks)
150
+
151
+ # Boundary conditions
152
+ no_match_output = FileEditOutput(
153
+ original_content=self.file_lines,
154
+ orig_search_blocks=[x[0] for x in self.search_replace_blocks],
155
+ edited_with_tolerances=[
156
+ (
157
+ slice(0, 0),
158
+ [
159
+ TolerancesHit(
160
+ line_process=lambda x: x,
161
+ severity_cat="ERROR",
162
+ score_multiplier=float("-inf"),
163
+ error_name="The blocks couldn't be matched, maybe the sequence of search blocks was incorrect?",
164
+ count=max(1, len(search_lines)),
165
+ )
166
+ for search_lines, _ in self.search_replace_blocks[
167
+ self.search_replace_offset :
168
+ ]
169
+ ],
170
+ [],
171
+ )
172
+ ],
173
+ )
174
+ if (
175
+ self.file_line_offset >= n_file_lines
176
+ and self.search_replace_offset < n_blocks
177
+ ):
178
+ return [no_match_output]
179
+ elif self.file_line_offset >= n_file_lines:
180
+ return [
181
+ FileEditOutput(
182
+ self.file_lines,
183
+ [x[0] for x in self.search_replace_blocks],
184
+ [(slice(0, 0), [], [])],
185
+ )
186
+ ]
187
+ elif self.search_replace_offset >= n_blocks:
188
+ return [
189
+ FileEditOutput(
190
+ self.file_lines,
191
+ [x[0] for x in self.search_replace_blocks],
192
+ [(slice(0, 0), [], [])],
193
+ )
194
+ ]
195
+
196
+ # search for first block
197
+ first_block = self.search_replace_blocks[self.search_replace_offset]
198
+
199
+ # Try exact match
200
+ matches = match_exact(self.file_lines, self.file_line_offset, first_block[0])
201
+
202
+ all_outputs = list[list[tuple[slice, list[TolerancesHit], list[str]]]]()
203
+
204
+ if not matches:
205
+ # Try tolerances
206
+ matches_with_tolerances = match_with_tolerance(
207
+ self.file_lines, self.file_line_offset, first_block[0], self.tolerances
208
+ )
209
+ replace_by = first_block[1]
210
+ if not matches_with_tolerances:
211
+ # Try with no empty lines
212
+ matches_with_tolerances = match_with_tolerance_empty_line(
213
+ self.file_lines,
214
+ self.file_line_offset,
215
+ first_block[0],
216
+ self.tolerances,
217
+ )
218
+ replace_by = remove_leading_trailing_empty_lines(first_block[1])
219
+
220
+ if not matches_with_tolerances:
221
+ # Report edit distance
222
+ sim_match, sim_sim, sim_context = (
223
+ find_least_edit_distance_substring(
224
+ self.file_lines, self.file_line_offset, first_block[0]
225
+ )
226
+ )
227
+ if sim_match:
228
+ matches_with_tolerances = [
229
+ (
230
+ sim_match,
231
+ [
232
+ TolerancesHit(
233
+ lambda x: x,
234
+ "ERROR",
235
+ -1,
236
+ "Couldn't find match. Do you mean to match the lines in the following context?\n```"
237
+ + sim_context
238
+ + "\n```",
239
+ int(len(first_block[0]) // sim_sim),
240
+ )
241
+ ],
242
+ )
243
+ ]
244
+
245
+ for match, tolerances in matches_with_tolerances:
246
+ file_edit_input = FileEditInput(
247
+ self.file_lines,
248
+ match.stop,
249
+ self.search_replace_blocks,
250
+ self.search_replace_offset + 1,
251
+ self.tolerances,
252
+ )
253
+
254
+ remaining_output = file_edit_input.edit_file()
255
+ for rem_output in remaining_output:
256
+ all_outputs.append(
257
+ [
258
+ (match, tolerances, replace_by),
259
+ *rem_output.edited_with_tolerances,
260
+ ]
261
+ )
262
+ else:
263
+ for match in matches:
264
+ file_edit_input = FileEditInput(
265
+ self.file_lines,
266
+ match.stop,
267
+ self.search_replace_blocks,
268
+ self.search_replace_offset + 1,
269
+ self.tolerances,
270
+ )
271
+ remaining_output = file_edit_input.edit_file()
272
+ for rem_output in remaining_output:
273
+ all_outputs.append(
274
+ [
275
+ (
276
+ match,
277
+ [],
278
+ first_block[1],
279
+ ),
280
+ *rem_output.edited_with_tolerances,
281
+ ]
282
+ )
283
+
284
+ if not all_outputs:
285
+ return [no_match_output]
286
+
287
+ return [
288
+ FileEditOutput(
289
+ self.file_lines, [x[0] for x in self.search_replace_blocks], output
290
+ )
291
+ for output in all_outputs
292
+ ]
293
+
294
+
295
+ def find_contiguous_match(search_line_positions: list[set[int]]) -> list[slice]:
296
+ n_search_lines = len(search_line_positions)
297
+
298
+ def search_in_dictionary(search_offset: int, search_index: int) -> bool:
299
+ if search_offset >= n_search_lines:
300
+ return True
301
+
302
+ if search_index in search_line_positions[search_offset]:
303
+ return search_in_dictionary(search_offset + 1, search_index + 1)
304
+ return False
305
+
306
+ matched_slices = []
307
+ for index in search_line_positions[0]:
308
+ if search_in_dictionary(1, index + 1):
309
+ matched_slices.append(slice(index, index + n_search_lines, 1))
310
+ return matched_slices
311
+
312
+
313
+ def match_exact(
314
+ content: list[str], content_offset: int, search: list[str]
315
+ ) -> list[slice]:
316
+ n_search_lines = len(search)
317
+ n_content = len(content) - content_offset
318
+ if n_search_lines > n_content:
319
+ return []
320
+ if n_search_lines == 0:
321
+ return []
322
+ if n_content == 0:
323
+ return []
324
+ content_positions = DefaultDict[str, set[int]](set)
325
+ for i in range(content_offset, n_content):
326
+ content_positions[content[i]].add(i)
327
+ search_line_positions = [content_positions[line] for line in search]
328
+
329
+ matched_slices = find_contiguous_match(search_line_positions)
330
+
331
+ return matched_slices
332
+
333
+
334
+ def match_with_tolerance(
335
+ content: list[str],
336
+ content_offset: int,
337
+ search: list[str],
338
+ tolerances: list[Tolerance],
339
+ ) -> list[tuple[slice, list[TolerancesHit]]]:
340
+ n_search_lines = len(search)
341
+ n_content = len(content) - content_offset
342
+ if n_search_lines > n_content:
343
+ return []
344
+ if n_search_lines == 0:
345
+ return []
346
+ if n_content == 0:
347
+ return []
348
+ content_positions = DefaultDict[str, set[int]](set)
349
+ for i in range(content_offset, n_content):
350
+ content_positions[content[i]].add(i)
351
+ search_line_positions = [content_positions[line] for line in search]
352
+
353
+ tolerance_index_by_content_line: list[dict[int, int]] = [
354
+ {} for _ in range(len(search))
355
+ ]
356
+ for tidx, tolerance in enumerate(tolerances):
357
+ content_positions = DefaultDict[str, set[int]](set)
358
+ for i in range(content_offset, n_content):
359
+ line = content[i]
360
+ content_positions[tolerance.line_process(line)].add(i)
361
+ for i, line in enumerate(search):
362
+ new_lines = content_positions[tolerance.line_process(line)]
363
+ new_indices = new_lines - search_line_positions[i]
364
+ search_line_positions[i].update(new_indices)
365
+ tolerance_index_by_content_line[i].update(
366
+ {idx: tidx for idx in new_indices}
367
+ )
368
+ matched_slices = find_contiguous_match(search_line_positions)
369
+
370
+ tolerances_counts: list[list[TolerancesHit]] = [
371
+ [
372
+ TolerancesHit(
373
+ line_process=tol.line_process,
374
+ severity_cat=tol.severity_cat,
375
+ score_multiplier=tol.score_multiplier,
376
+ count=0,
377
+ error_name=tol.error_name,
378
+ )
379
+ for tol in tolerances
380
+ ]
381
+ for _ in range(len(matched_slices))
382
+ ]
383
+ for sidx, slice in enumerate(matched_slices):
384
+ for search_idx, content_idx in enumerate(
385
+ range(slice.start, slice.stop, slice.step)
386
+ ):
387
+ if content_idx in tolerance_index_by_content_line[search_idx]:
388
+ tolerances_counts[sidx][
389
+ tolerance_index_by_content_line[search_idx][content_idx]
390
+ ].count += 1
391
+
392
+ return list(zip(matched_slices, tolerances_counts))
393
+
394
+
395
+ def match_with_tolerance_empty_line(
396
+ content: list[str],
397
+ content_offset: int,
398
+ search: list[str],
399
+ tolerances: list[Tolerance],
400
+ ) -> list[tuple[slice, list[TolerancesHit]]]:
401
+ new_content = list[str]()
402
+ new_to_original = dict[int, int]()
403
+ for i in range(content_offset, len(content)):
404
+ line = content[i]
405
+ if line.strip():
406
+ new_to_original[len(new_content)] = i
407
+ new_content.append(line)
408
+
409
+ search = [line for line in search if line.strip()]
410
+
411
+ matches_with_tolerancs = match_with_tolerance(new_content, 0, search, tolerances)
412
+
413
+ new_matches_with_tolerances = list[tuple[slice, list[TolerancesHit]]]()
414
+ for matches, tolerance_counts in matches_with_tolerancs:
415
+ matches = slice(
416
+ new_to_original[matches.start], new_to_original[matches.stop - 1] + 1, 1
417
+ )
418
+ new_matches_with_tolerances.append((matches, tolerance_counts))
419
+ return new_matches_with_tolerances
420
+
421
+
422
+ def find_least_edit_distance_substring(
423
+ orig_content_lines: list[str], offset: int, find_lines: list[str]
424
+ ) -> tuple[Optional[slice], float, str]:
425
+ # Prepare content lines, stripping whitespace and keeping track of original indices
426
+ content_lines = [
427
+ orig_content_lines[i].strip() for i in range(offset, len(orig_content_lines))
428
+ ]
429
+ new_to_original_indices = {}
430
+ new_content_lines = []
431
+ for i, line in enumerate(content_lines):
432
+ if not line:
433
+ continue
434
+ new_content_lines.append(line)
435
+ new_to_original_indices[len(new_content_lines) - 1] = i
436
+ content_lines = new_content_lines
437
+
438
+ # Prepare find lines, removing empty lines
439
+ find_lines = [line.strip() for line in find_lines if line.strip()]
440
+
441
+ # Initialize variables for best match tracking
442
+ max_similarity = 0.0
443
+ min_edit_distance_lines = None
444
+ context_lines = []
445
+
446
+ # For each possible starting position in content
447
+ for i in range(max(1, len(content_lines) - len(find_lines) + 1)):
448
+ # Calculate similarity for the block starting at position i
449
+ block_similarity = 0.0
450
+ for j in range(len(find_lines)):
451
+ if (i + j) < len(content_lines):
452
+ # Use SequenceMatcher for more efficient similarity calculation
453
+ similarity = SequenceMatcher(
454
+ None, content_lines[i + j], find_lines[j]
455
+ ).ratio()
456
+ block_similarity += similarity
457
+
458
+ # If this block is more similar than previous best
459
+ if block_similarity > max_similarity:
460
+ max_similarity = block_similarity
461
+ # Map back to original line indices
462
+ orig_start_index = new_to_original_indices[i]
463
+ orig_end_index = (
464
+ new_to_original_indices.get(
465
+ i + len(find_lines) - 1, len(orig_content_lines) - 1
466
+ )
467
+ + 1
468
+ )
469
+ # Get the original lines
470
+ min_edit_distance_lines = slice(
471
+ orig_start_index + offset, orig_end_index + offset
472
+ )
473
+ # Get context (10 lines before and after)
474
+ context_lines = orig_content_lines[
475
+ max(0, orig_start_index - 10 + offset) : (orig_end_index + 10 + offset)
476
+ ]
477
+
478
+ return (
479
+ min_edit_distance_lines,
480
+ max_similarity,
481
+ "\n".join(context_lines),
482
+ )