wcgw 5.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ """
2
+ Parser for bash statements using tree-sitter.
3
+
4
+ This module provides functionality to parse and identify individual bash statements.
5
+ """
6
+
7
+ from .bash_statement_parser import BashStatementParser, Statement
@@ -0,0 +1,181 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Bash Statement Parser
4
+
5
+ This script parses bash scripts and identifies individual statements using tree-sitter.
6
+ It correctly handles multi-line strings, command chains with && and ||, and semicolon-separated statements.
7
+ """
8
+
9
+ import sys
10
+ from dataclasses import dataclass
11
+ from typing import Any, List, Optional
12
+
13
+ import tree_sitter_bash
14
+ from tree_sitter import Language, Parser
15
+
16
+
17
+ @dataclass
18
+ class Statement:
19
+ """A bash statement with its source code and position information."""
20
+
21
+ text: str
22
+ start_line: int
23
+ end_line: int
24
+ start_byte: int
25
+ end_byte: int
26
+ node_type: str
27
+ parent_type: Optional[str] = None
28
+
29
+ def __str__(self) -> str:
30
+ return self.text.strip()
31
+
32
+
33
+ class BashStatementParser:
34
+ def __init__(self) -> None:
35
+ # Use the precompiled bash language
36
+ self.language = Language(tree_sitter_bash.language())
37
+ self.parser = Parser(self.language)
38
+
39
+ def parse_file(self, file_path: str) -> List[Statement]:
40
+ """Parse a bash script file and return a list of statements."""
41
+ with open(file_path, "r", encoding="utf-8") as f:
42
+ content = f.read()
43
+ return self.parse_string(content)
44
+
45
+ def parse_string(self, content: str) -> List[Statement]:
46
+ """Parse a string containing bash script and return a list of statements."""
47
+ tree = self.parser.parse(bytes(content, "utf-8"))
48
+ root_node = tree.root_node
49
+
50
+ # For debugging: Uncomment to print the tree structure
51
+ # self._print_tree(root_node, content)
52
+
53
+ statements: List[Statement] = []
54
+ self._extract_statements(root_node, content, statements, None)
55
+
56
+ # Post-process statements to handle multi-line statements correctly
57
+ return self._post_process_statements(statements, content)
58
+
59
+ def _print_tree(self, node: Any, content: str, indent: str = "") -> None:
60
+ """Debug helper to print the entire syntax tree."""
61
+ node_text = content[node.start_byte : node.end_byte]
62
+ if len(node_text) > 40:
63
+ node_text = node_text[:37] + "..."
64
+ print(f"{indent}{node.type}: {repr(node_text)}")
65
+ for child in node.children:
66
+ self._print_tree(child, content, indent + " ")
67
+
68
+ def _extract_statements(
69
+ self,
70
+ node: Any,
71
+ content: str,
72
+ statements: List[Statement],
73
+ parent_type: Optional[str],
74
+ ) -> None:
75
+ """Recursively extract statements from the syntax tree."""
76
+ # Node types that represent bash statements
77
+ statement_node_types = {
78
+ # Basic statements
79
+ "command",
80
+ "variable_assignment",
81
+ "declaration_command",
82
+ "unset_command",
83
+ # Control flow statements
84
+ "for_statement",
85
+ "c_style_for_statement",
86
+ "while_statement",
87
+ "if_statement",
88
+ "case_statement",
89
+ # Function definition
90
+ "function_definition",
91
+ # Command chains and groups
92
+ "pipeline", # For command chains with | and |&
93
+ "list", # For command chains with && and ||
94
+ "compound_statement",
95
+ "subshell",
96
+ "redirected_statement",
97
+ }
98
+
99
+ # Create a Statement object for this node if it's a recognized statement type
100
+ if node.type in statement_node_types:
101
+ # Get the text of this statement
102
+ start_byte = node.start_byte
103
+ end_byte = node.end_byte
104
+ statement_text = content[start_byte:end_byte]
105
+
106
+ # Get line numbers
107
+ start_line = (
108
+ node.start_point[0] + 1
109
+ ) # tree-sitter uses 0-indexed line numbers
110
+ end_line = node.end_point[0] + 1
111
+
112
+ statements.append(
113
+ Statement(
114
+ text=statement_text,
115
+ start_line=start_line,
116
+ end_line=end_line,
117
+ start_byte=start_byte,
118
+ end_byte=end_byte,
119
+ node_type=node.type,
120
+ parent_type=parent_type,
121
+ )
122
+ )
123
+
124
+ # Update parent type for children
125
+ parent_type = node.type
126
+
127
+ # Recursively process all children
128
+ for child in node.children:
129
+ self._extract_statements(child, content, statements, parent_type)
130
+
131
+ def _post_process_statements(
132
+ self, statements: List[Statement], content: str
133
+ ) -> List[Statement]:
134
+ if not statements:
135
+ return []
136
+
137
+ # Filter out list statements that have been split
138
+ top_statements = []
139
+ for stmt in statements:
140
+ # Skip statements that are contained within others
141
+ is_contained = False
142
+ for other in statements:
143
+ if other is stmt:
144
+ continue
145
+
146
+ # Check if completely contained (except for lists we've split)
147
+ if other.node_type != "list" or ";" not in other.text:
148
+ if (
149
+ other.start_line <= stmt.start_line
150
+ and other.end_line >= stmt.end_line
151
+ and len(other.text) > len(stmt.text)
152
+ and stmt.text in other.text
153
+ ):
154
+ is_contained = True
155
+ break
156
+
157
+ if not is_contained:
158
+ top_statements.append(stmt)
159
+
160
+ # Sort by position in file for consistent output
161
+ top_statements.sort(key=lambda s: (s.start_line, s.text))
162
+
163
+ return top_statements
164
+
165
+
166
+ def main() -> None:
167
+ if len(sys.argv) < 2:
168
+ print("Usage: python bash_statement_parser.py <bash_script_file>")
169
+ sys.exit(1)
170
+
171
+ parser = BashStatementParser()
172
+ statements = parser.parse_file(sys.argv[1])
173
+
174
+ print(f"Found {len(statements)} statements:")
175
+ for i, stmt in enumerate(statements, 1):
176
+ print(f"\n--- Statement {i} (Lines {stmt.start_line}-{stmt.end_line}) ---")
177
+ print(stmt)
178
+
179
+
180
+ if __name__ == "__main__":
181
+ main()
wcgw/client/common.py ADDED
@@ -0,0 +1,51 @@
1
+ import select
2
+ import sys
3
+ import termios
4
+ import tty
5
+ from typing import Literal
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class CostData(BaseModel):
10
+ cost_per_1m_input_tokens: float
11
+ cost_per_1m_output_tokens: float
12
+
13
+
14
+ from openai.types.chat import (
15
+ ChatCompletionMessageParam,
16
+ ChatCompletionAssistantMessageParam,
17
+ ChatCompletionMessage,
18
+ ParsedChatCompletionMessage,
19
+ )
20
+
21
+ History = list[ChatCompletionMessageParam]
22
+ Models = Literal["gpt-4o-2024-08-06", "gpt-4o-mini"]
23
+
24
+
25
+ def discard_input() -> None:
26
+ try:
27
+ # Get the file descriptor for stdin
28
+ fd = sys.stdin.fileno()
29
+
30
+ # Save current terminal settings
31
+ old_settings = termios.tcgetattr(fd)
32
+
33
+ try:
34
+ # Switch terminal to non-canonical mode where input is read immediately
35
+ tty.setcbreak(fd)
36
+
37
+ # Discard all input
38
+ while True:
39
+ # Check if there is input to be read
40
+ if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
41
+ sys.stdin.read(
42
+ 1
43
+ ) # Read one character at a time to flush the input buffer
44
+ else:
45
+ break
46
+ finally:
47
+ # Restore old terminal settings
48
+ termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
49
+ except (termios.error, ValueError) as e:
50
+ # Handle the error gracefully
51
+ print(f"Warning: Unable to discard input. Error: {e}")
@@ -0,0 +1,73 @@
1
+
2
+ Instructions for editing files.
3
+ # Example
4
+ ## Input file
5
+ ```
6
+ import numpy as np
7
+ from impls import impl1, impl2
8
+
9
+ def hello():
10
+ "print a greeting"
11
+
12
+ print("hello")
13
+
14
+ def call_hello():
15
+ "call hello"
16
+
17
+ hello()
18
+ print("Called")
19
+ impl1()
20
+ hello()
21
+ impl2()
22
+
23
+ ```
24
+ ## Edit format on the input file
25
+ ```
26
+ <<<<<<< SEARCH
27
+ from impls import impl1, impl2
28
+ =======
29
+ from impls import impl1, impl2
30
+ from hello import hello as hello_renamed
31
+ >>>>>>> REPLACE
32
+ <<<<<<< SEARCH
33
+ def hello():
34
+ "print a greeting"
35
+
36
+ print("hello")
37
+ =======
38
+ >>>>>>> REPLACE
39
+ <<<<<<< SEARCH
40
+ def call_hello():
41
+ "call hello"
42
+
43
+ hello()
44
+ =======
45
+ def call_hello_renamed():
46
+ "call hello renamed"
47
+
48
+ hello_renamed()
49
+ >>>>>>> REPLACE
50
+ <<<<<<< SEARCH
51
+ impl1()
52
+ hello()
53
+ impl2()
54
+ =======
55
+ impl1()
56
+ hello_renamed()
57
+ impl2()
58
+ >>>>>>> REPLACE
59
+ ```
60
+
61
+ # *SEARCH/REPLACE block* Rules:
62
+ Every "<<<<<<< SEARCH" section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, whitespaces, etc.
63
+
64
+ Including multiple unique *SEARCH/REPLACE* blocks if needed.
65
+ Include enough and only enough lines in each SEARCH section to uniquely match each set of lines that need to change.
66
+
67
+ Keep *SEARCH/REPLACE* blocks concise.
68
+ Break large *SEARCH/REPLACE* blocks into a series of smaller blocks that each change a small portion of the file.
69
+ Include just the changing lines, and a few surrounding lines (0-3 lines) if needed for uniqueness.
70
+ Other than for uniqueness, avoid including those lines which do not change in search (and replace) blocks. Target 0-3 non trivial extra lines per block.
71
+
72
+ Preserve leading spaces and indentations in both SEARCH and REPLACE blocks.
73
+
@@ -0,0 +1,47 @@
1
+ import threading
2
+ from typing import Callable, Protocol, TypeVar, cast
3
+
4
+ import tokenizers # type: ignore[import-untyped]
5
+
6
+ T = TypeVar("T")
7
+
8
+
9
+ class EncoderDecoder(Protocol[T]):
10
+ def encoder(self, text: str) -> list[T]: ...
11
+
12
+ def decoder(self, tokens: list[T]) -> str: ...
13
+
14
+
15
+ class LazyEncoder:
16
+ def __init__(self) -> None:
17
+ self._tokenizer: tokenizers.Tokenizer | None = None
18
+ self._init_lock = threading.Lock()
19
+ self._init_thread = threading.Thread(target=self._initialize, daemon=True)
20
+ self._init_thread.start()
21
+
22
+ def _initialize(self) -> None:
23
+ with self._init_lock:
24
+ if self._tokenizer is None:
25
+ self._tokenizer = tokenizers.Tokenizer.from_pretrained(
26
+ "Xenova/claude-tokenizer"
27
+ )
28
+
29
+ def _ensure_initialized(self) -> None:
30
+ if self._tokenizer is None:
31
+ with self._init_lock:
32
+ if self._tokenizer is None:
33
+ self._init_thread.join()
34
+
35
+ def encoder(self, text: str) -> list[int]:
36
+ self._ensure_initialized()
37
+ assert self._tokenizer is not None, "Couldn't initialize tokenizer"
38
+ return cast(list[int], self._tokenizer.encode(text).ids)
39
+
40
+ def decoder(self, tokens: list[int]) -> str:
41
+ self._ensure_initialized()
42
+ assert self._tokenizer is not None, "Couldn't initialize tokenizer"
43
+ return cast(str, self._tokenizer.decode(tokens))
44
+
45
+
46
+ def get_default_encoder() -> EncoderDecoder[int]:
47
+ return LazyEncoder()