wcgw 5.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wcgw/__init__.py +4 -0
- wcgw/client/__init__.py +0 -0
- wcgw/client/bash_state/bash_state.py +1426 -0
- wcgw/client/bash_state/parser/__init__.py +7 -0
- wcgw/client/bash_state/parser/bash_statement_parser.py +181 -0
- wcgw/client/common.py +51 -0
- wcgw/client/diff-instructions.txt +73 -0
- wcgw/client/encoder/__init__.py +47 -0
- wcgw/client/file_ops/diff_edit.py +619 -0
- wcgw/client/file_ops/extensions.py +137 -0
- wcgw/client/file_ops/search_replace.py +212 -0
- wcgw/client/mcp_server/Readme.md +3 -0
- wcgw/client/mcp_server/__init__.py +32 -0
- wcgw/client/mcp_server/server.py +184 -0
- wcgw/client/memory.py +103 -0
- wcgw/client/modes.py +240 -0
- wcgw/client/repo_ops/display_tree.py +116 -0
- wcgw/client/repo_ops/file_stats.py +152 -0
- wcgw/client/repo_ops/path_prob.py +58 -0
- wcgw/client/repo_ops/paths_model.vocab +20000 -0
- wcgw/client/repo_ops/paths_tokens.model +80042 -0
- wcgw/client/repo_ops/repo_context.py +289 -0
- wcgw/client/schema_generator.py +63 -0
- wcgw/client/tool_prompts.py +98 -0
- wcgw/client/tools.py +1432 -0
- wcgw/py.typed +0 -0
- wcgw/types_.py +318 -0
- wcgw-5.5.4.dist-info/METADATA +339 -0
- wcgw-5.5.4.dist-info/RECORD +38 -0
- wcgw-5.5.4.dist-info/WHEEL +4 -0
- wcgw-5.5.4.dist-info/entry_points.txt +4 -0
- wcgw-5.5.4.dist-info/licenses/LICENSE +213 -0
- wcgw_cli/__init__.py +1 -0
- wcgw_cli/__main__.py +3 -0
- wcgw_cli/anthropic_client.py +486 -0
- wcgw_cli/cli.py +40 -0
- wcgw_cli/openai_client.py +404 -0
- wcgw_cli/openai_utils.py +67 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Bash Statement Parser
|
|
4
|
+
|
|
5
|
+
This script parses bash scripts and identifies individual statements using tree-sitter.
|
|
6
|
+
It correctly handles multi-line strings, command chains with && and ||, and semicolon-separated statements.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Any, List, Optional
|
|
12
|
+
|
|
13
|
+
import tree_sitter_bash
|
|
14
|
+
from tree_sitter import Language, Parser
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class Statement:
|
|
19
|
+
"""A bash statement with its source code and position information."""
|
|
20
|
+
|
|
21
|
+
text: str
|
|
22
|
+
start_line: int
|
|
23
|
+
end_line: int
|
|
24
|
+
start_byte: int
|
|
25
|
+
end_byte: int
|
|
26
|
+
node_type: str
|
|
27
|
+
parent_type: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
def __str__(self) -> str:
|
|
30
|
+
return self.text.strip()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BashStatementParser:
|
|
34
|
+
def __init__(self) -> None:
|
|
35
|
+
# Use the precompiled bash language
|
|
36
|
+
self.language = Language(tree_sitter_bash.language())
|
|
37
|
+
self.parser = Parser(self.language)
|
|
38
|
+
|
|
39
|
+
def parse_file(self, file_path: str) -> List[Statement]:
|
|
40
|
+
"""Parse a bash script file and return a list of statements."""
|
|
41
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
42
|
+
content = f.read()
|
|
43
|
+
return self.parse_string(content)
|
|
44
|
+
|
|
45
|
+
def parse_string(self, content: str) -> List[Statement]:
|
|
46
|
+
"""Parse a string containing bash script and return a list of statements."""
|
|
47
|
+
tree = self.parser.parse(bytes(content, "utf-8"))
|
|
48
|
+
root_node = tree.root_node
|
|
49
|
+
|
|
50
|
+
# For debugging: Uncomment to print the tree structure
|
|
51
|
+
# self._print_tree(root_node, content)
|
|
52
|
+
|
|
53
|
+
statements: List[Statement] = []
|
|
54
|
+
self._extract_statements(root_node, content, statements, None)
|
|
55
|
+
|
|
56
|
+
# Post-process statements to handle multi-line statements correctly
|
|
57
|
+
return self._post_process_statements(statements, content)
|
|
58
|
+
|
|
59
|
+
def _print_tree(self, node: Any, content: str, indent: str = "") -> None:
|
|
60
|
+
"""Debug helper to print the entire syntax tree."""
|
|
61
|
+
node_text = content[node.start_byte : node.end_byte]
|
|
62
|
+
if len(node_text) > 40:
|
|
63
|
+
node_text = node_text[:37] + "..."
|
|
64
|
+
print(f"{indent}{node.type}: {repr(node_text)}")
|
|
65
|
+
for child in node.children:
|
|
66
|
+
self._print_tree(child, content, indent + " ")
|
|
67
|
+
|
|
68
|
+
def _extract_statements(
|
|
69
|
+
self,
|
|
70
|
+
node: Any,
|
|
71
|
+
content: str,
|
|
72
|
+
statements: List[Statement],
|
|
73
|
+
parent_type: Optional[str],
|
|
74
|
+
) -> None:
|
|
75
|
+
"""Recursively extract statements from the syntax tree."""
|
|
76
|
+
# Node types that represent bash statements
|
|
77
|
+
statement_node_types = {
|
|
78
|
+
# Basic statements
|
|
79
|
+
"command",
|
|
80
|
+
"variable_assignment",
|
|
81
|
+
"declaration_command",
|
|
82
|
+
"unset_command",
|
|
83
|
+
# Control flow statements
|
|
84
|
+
"for_statement",
|
|
85
|
+
"c_style_for_statement",
|
|
86
|
+
"while_statement",
|
|
87
|
+
"if_statement",
|
|
88
|
+
"case_statement",
|
|
89
|
+
# Function definition
|
|
90
|
+
"function_definition",
|
|
91
|
+
# Command chains and groups
|
|
92
|
+
"pipeline", # For command chains with | and |&
|
|
93
|
+
"list", # For command chains with && and ||
|
|
94
|
+
"compound_statement",
|
|
95
|
+
"subshell",
|
|
96
|
+
"redirected_statement",
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
# Create a Statement object for this node if it's a recognized statement type
|
|
100
|
+
if node.type in statement_node_types:
|
|
101
|
+
# Get the text of this statement
|
|
102
|
+
start_byte = node.start_byte
|
|
103
|
+
end_byte = node.end_byte
|
|
104
|
+
statement_text = content[start_byte:end_byte]
|
|
105
|
+
|
|
106
|
+
# Get line numbers
|
|
107
|
+
start_line = (
|
|
108
|
+
node.start_point[0] + 1
|
|
109
|
+
) # tree-sitter uses 0-indexed line numbers
|
|
110
|
+
end_line = node.end_point[0] + 1
|
|
111
|
+
|
|
112
|
+
statements.append(
|
|
113
|
+
Statement(
|
|
114
|
+
text=statement_text,
|
|
115
|
+
start_line=start_line,
|
|
116
|
+
end_line=end_line,
|
|
117
|
+
start_byte=start_byte,
|
|
118
|
+
end_byte=end_byte,
|
|
119
|
+
node_type=node.type,
|
|
120
|
+
parent_type=parent_type,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Update parent type for children
|
|
125
|
+
parent_type = node.type
|
|
126
|
+
|
|
127
|
+
# Recursively process all children
|
|
128
|
+
for child in node.children:
|
|
129
|
+
self._extract_statements(child, content, statements, parent_type)
|
|
130
|
+
|
|
131
|
+
def _post_process_statements(
|
|
132
|
+
self, statements: List[Statement], content: str
|
|
133
|
+
) -> List[Statement]:
|
|
134
|
+
if not statements:
|
|
135
|
+
return []
|
|
136
|
+
|
|
137
|
+
# Filter out list statements that have been split
|
|
138
|
+
top_statements = []
|
|
139
|
+
for stmt in statements:
|
|
140
|
+
# Skip statements that are contained within others
|
|
141
|
+
is_contained = False
|
|
142
|
+
for other in statements:
|
|
143
|
+
if other is stmt:
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
# Check if completely contained (except for lists we've split)
|
|
147
|
+
if other.node_type != "list" or ";" not in other.text:
|
|
148
|
+
if (
|
|
149
|
+
other.start_line <= stmt.start_line
|
|
150
|
+
and other.end_line >= stmt.end_line
|
|
151
|
+
and len(other.text) > len(stmt.text)
|
|
152
|
+
and stmt.text in other.text
|
|
153
|
+
):
|
|
154
|
+
is_contained = True
|
|
155
|
+
break
|
|
156
|
+
|
|
157
|
+
if not is_contained:
|
|
158
|
+
top_statements.append(stmt)
|
|
159
|
+
|
|
160
|
+
# Sort by position in file for consistent output
|
|
161
|
+
top_statements.sort(key=lambda s: (s.start_line, s.text))
|
|
162
|
+
|
|
163
|
+
return top_statements
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def main() -> None:
|
|
167
|
+
if len(sys.argv) < 2:
|
|
168
|
+
print("Usage: python bash_statement_parser.py <bash_script_file>")
|
|
169
|
+
sys.exit(1)
|
|
170
|
+
|
|
171
|
+
parser = BashStatementParser()
|
|
172
|
+
statements = parser.parse_file(sys.argv[1])
|
|
173
|
+
|
|
174
|
+
print(f"Found {len(statements)} statements:")
|
|
175
|
+
for i, stmt in enumerate(statements, 1):
|
|
176
|
+
print(f"\n--- Statement {i} (Lines {stmt.start_line}-{stmt.end_line}) ---")
|
|
177
|
+
print(stmt)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
if __name__ == "__main__":
|
|
181
|
+
main()
|
wcgw/client/common.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import select
|
|
2
|
+
import sys
|
|
3
|
+
import termios
|
|
4
|
+
import tty
|
|
5
|
+
from typing import Literal
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CostData(BaseModel):
|
|
10
|
+
cost_per_1m_input_tokens: float
|
|
11
|
+
cost_per_1m_output_tokens: float
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
from openai.types.chat import (
|
|
15
|
+
ChatCompletionMessageParam,
|
|
16
|
+
ChatCompletionAssistantMessageParam,
|
|
17
|
+
ChatCompletionMessage,
|
|
18
|
+
ParsedChatCompletionMessage,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
History = list[ChatCompletionMessageParam]
|
|
22
|
+
Models = Literal["gpt-4o-2024-08-06", "gpt-4o-mini"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def discard_input() -> None:
|
|
26
|
+
try:
|
|
27
|
+
# Get the file descriptor for stdin
|
|
28
|
+
fd = sys.stdin.fileno()
|
|
29
|
+
|
|
30
|
+
# Save current terminal settings
|
|
31
|
+
old_settings = termios.tcgetattr(fd)
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
# Switch terminal to non-canonical mode where input is read immediately
|
|
35
|
+
tty.setcbreak(fd)
|
|
36
|
+
|
|
37
|
+
# Discard all input
|
|
38
|
+
while True:
|
|
39
|
+
# Check if there is input to be read
|
|
40
|
+
if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
|
|
41
|
+
sys.stdin.read(
|
|
42
|
+
1
|
|
43
|
+
) # Read one character at a time to flush the input buffer
|
|
44
|
+
else:
|
|
45
|
+
break
|
|
46
|
+
finally:
|
|
47
|
+
# Restore old terminal settings
|
|
48
|
+
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
|
|
49
|
+
except (termios.error, ValueError) as e:
|
|
50
|
+
# Handle the error gracefully
|
|
51
|
+
print(f"Warning: Unable to discard input. Error: {e}")
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
|
|
2
|
+
Instructions for editing files.
|
|
3
|
+
# Example
|
|
4
|
+
## Input file
|
|
5
|
+
```
|
|
6
|
+
import numpy as np
|
|
7
|
+
from impls import impl1, impl2
|
|
8
|
+
|
|
9
|
+
def hello():
|
|
10
|
+
"print a greeting"
|
|
11
|
+
|
|
12
|
+
print("hello")
|
|
13
|
+
|
|
14
|
+
def call_hello():
|
|
15
|
+
"call hello"
|
|
16
|
+
|
|
17
|
+
hello()
|
|
18
|
+
print("Called")
|
|
19
|
+
impl1()
|
|
20
|
+
hello()
|
|
21
|
+
impl2()
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
## Edit format on the input file
|
|
25
|
+
```
|
|
26
|
+
<<<<<<< SEARCH
|
|
27
|
+
from impls import impl1, impl2
|
|
28
|
+
=======
|
|
29
|
+
from impls import impl1, impl2
|
|
30
|
+
from hello import hello as hello_renamed
|
|
31
|
+
>>>>>>> REPLACE
|
|
32
|
+
<<<<<<< SEARCH
|
|
33
|
+
def hello():
|
|
34
|
+
"print a greeting"
|
|
35
|
+
|
|
36
|
+
print("hello")
|
|
37
|
+
=======
|
|
38
|
+
>>>>>>> REPLACE
|
|
39
|
+
<<<<<<< SEARCH
|
|
40
|
+
def call_hello():
|
|
41
|
+
"call hello"
|
|
42
|
+
|
|
43
|
+
hello()
|
|
44
|
+
=======
|
|
45
|
+
def call_hello_renamed():
|
|
46
|
+
"call hello renamed"
|
|
47
|
+
|
|
48
|
+
hello_renamed()
|
|
49
|
+
>>>>>>> REPLACE
|
|
50
|
+
<<<<<<< SEARCH
|
|
51
|
+
impl1()
|
|
52
|
+
hello()
|
|
53
|
+
impl2()
|
|
54
|
+
=======
|
|
55
|
+
impl1()
|
|
56
|
+
hello_renamed()
|
|
57
|
+
impl2()
|
|
58
|
+
>>>>>>> REPLACE
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
# *SEARCH/REPLACE block* Rules:
|
|
62
|
+
Every "<<<<<<< SEARCH" section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, whitespaces, etc.
|
|
63
|
+
|
|
64
|
+
Including multiple unique *SEARCH/REPLACE* blocks if needed.
|
|
65
|
+
Include enough and only enough lines in each SEARCH section to uniquely match each set of lines that need to change.
|
|
66
|
+
|
|
67
|
+
Keep *SEARCH/REPLACE* blocks concise.
|
|
68
|
+
Break large *SEARCH/REPLACE* blocks into a series of smaller blocks that each change a small portion of the file.
|
|
69
|
+
Include just the changing lines, and a few surrounding lines (0-3 lines) if needed for uniqueness.
|
|
70
|
+
Other than for uniqueness, avoid including those lines which do not change in search (and replace) blocks. Target 0-3 non trivial extra lines per block.
|
|
71
|
+
|
|
72
|
+
Preserve leading spaces and indentations in both SEARCH and REPLACE blocks.
|
|
73
|
+
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
from typing import Callable, Protocol, TypeVar, cast
|
|
3
|
+
|
|
4
|
+
import tokenizers # type: ignore[import-untyped]
|
|
5
|
+
|
|
6
|
+
T = TypeVar("T")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class EncoderDecoder(Protocol[T]):
|
|
10
|
+
def encoder(self, text: str) -> list[T]: ...
|
|
11
|
+
|
|
12
|
+
def decoder(self, tokens: list[T]) -> str: ...
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LazyEncoder:
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
self._tokenizer: tokenizers.Tokenizer | None = None
|
|
18
|
+
self._init_lock = threading.Lock()
|
|
19
|
+
self._init_thread = threading.Thread(target=self._initialize, daemon=True)
|
|
20
|
+
self._init_thread.start()
|
|
21
|
+
|
|
22
|
+
def _initialize(self) -> None:
|
|
23
|
+
with self._init_lock:
|
|
24
|
+
if self._tokenizer is None:
|
|
25
|
+
self._tokenizer = tokenizers.Tokenizer.from_pretrained(
|
|
26
|
+
"Xenova/claude-tokenizer"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
def _ensure_initialized(self) -> None:
|
|
30
|
+
if self._tokenizer is None:
|
|
31
|
+
with self._init_lock:
|
|
32
|
+
if self._tokenizer is None:
|
|
33
|
+
self._init_thread.join()
|
|
34
|
+
|
|
35
|
+
def encoder(self, text: str) -> list[int]:
|
|
36
|
+
self._ensure_initialized()
|
|
37
|
+
assert self._tokenizer is not None, "Couldn't initialize tokenizer"
|
|
38
|
+
return cast(list[int], self._tokenizer.encode(text).ids)
|
|
39
|
+
|
|
40
|
+
def decoder(self, tokens: list[int]) -> str:
|
|
41
|
+
self._ensure_initialized()
|
|
42
|
+
assert self._tokenizer is not None, "Couldn't initialize tokenizer"
|
|
43
|
+
return cast(str, self._tokenizer.decode(tokens))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_default_encoder() -> EncoderDecoder[int]:
|
|
47
|
+
return LazyEncoder()
|