wcgw 5.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wcgw/__init__.py +4 -0
- wcgw/client/__init__.py +0 -0
- wcgw/client/bash_state/bash_state.py +1426 -0
- wcgw/client/bash_state/parser/__init__.py +7 -0
- wcgw/client/bash_state/parser/bash_statement_parser.py +181 -0
- wcgw/client/common.py +51 -0
- wcgw/client/diff-instructions.txt +73 -0
- wcgw/client/encoder/__init__.py +47 -0
- wcgw/client/file_ops/diff_edit.py +619 -0
- wcgw/client/file_ops/extensions.py +137 -0
- wcgw/client/file_ops/search_replace.py +212 -0
- wcgw/client/mcp_server/Readme.md +3 -0
- wcgw/client/mcp_server/__init__.py +32 -0
- wcgw/client/mcp_server/server.py +184 -0
- wcgw/client/memory.py +103 -0
- wcgw/client/modes.py +240 -0
- wcgw/client/repo_ops/display_tree.py +116 -0
- wcgw/client/repo_ops/file_stats.py +152 -0
- wcgw/client/repo_ops/path_prob.py +58 -0
- wcgw/client/repo_ops/paths_model.vocab +20000 -0
- wcgw/client/repo_ops/paths_tokens.model +80042 -0
- wcgw/client/repo_ops/repo_context.py +289 -0
- wcgw/client/schema_generator.py +63 -0
- wcgw/client/tool_prompts.py +98 -0
- wcgw/client/tools.py +1432 -0
- wcgw/py.typed +0 -0
- wcgw/types_.py +318 -0
- wcgw-5.5.4.dist-info/METADATA +339 -0
- wcgw-5.5.4.dist-info/RECORD +38 -0
- wcgw-5.5.4.dist-info/WHEEL +4 -0
- wcgw-5.5.4.dist-info/entry_points.txt +4 -0
- wcgw-5.5.4.dist-info/licenses/LICENSE +213 -0
- wcgw_cli/__init__.py +1 -0
- wcgw_cli/__main__.py +3 -0
- wcgw_cli/anthropic_client.py +486 -0
- wcgw_cli/cli.py +40 -0
- wcgw_cli/openai_client.py +404 -0
- wcgw_cli/openai_utils.py +67 -0
wcgw/client/modes.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Any, Literal, NamedTuple
|
|
3
|
+
|
|
4
|
+
from ..types_ import Modes, ModesConfig
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BashCommandMode(NamedTuple):
|
|
8
|
+
bash_mode: Literal["normal_mode", "restricted_mode"]
|
|
9
|
+
allowed_commands: Literal["all", "none"]
|
|
10
|
+
|
|
11
|
+
def serialize(self) -> dict[str, Any]:
|
|
12
|
+
return {"bash_mode": self.bash_mode, "allowed_commands": self.allowed_commands}
|
|
13
|
+
|
|
14
|
+
@classmethod
|
|
15
|
+
def deserialize(cls, data: dict[str, Any]) -> "BashCommandMode":
|
|
16
|
+
return cls(data["bash_mode"], data["allowed_commands"])
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FileEditMode(NamedTuple):
|
|
20
|
+
allowed_globs: Literal["all"] | list[str]
|
|
21
|
+
|
|
22
|
+
def serialize(self) -> dict[str, Any]:
|
|
23
|
+
return {"allowed_globs": self.allowed_globs}
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def deserialize(cls, data: dict[str, Any]) -> "FileEditMode":
|
|
27
|
+
return cls(data["allowed_globs"])
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class WriteIfEmptyMode(NamedTuple):
|
|
31
|
+
allowed_globs: Literal["all"] | list[str]
|
|
32
|
+
|
|
33
|
+
def serialize(self) -> dict[str, Any]:
|
|
34
|
+
return {"allowed_globs": self.allowed_globs}
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def deserialize(cls, data: dict[str, Any]) -> "WriteIfEmptyMode":
|
|
38
|
+
return cls(data["allowed_globs"])
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ModeImpl:
|
|
43
|
+
bash_command_mode: BashCommandMode
|
|
44
|
+
file_edit_mode: FileEditMode
|
|
45
|
+
write_if_empty_mode: WriteIfEmptyMode
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def code_writer_prompt(
|
|
49
|
+
allowed_file_edit_globs: Literal["all"] | list[str],
|
|
50
|
+
all_write_new_globs: Literal["all"] | list[str],
|
|
51
|
+
allowed_commands: Literal["all"] | list[str],
|
|
52
|
+
) -> str:
|
|
53
|
+
base = """
|
|
54
|
+
You are now running in "code_writer" mode.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
path_prompt = """
|
|
58
|
+
- You are allowed to edit files in the provided repository only.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
if allowed_file_edit_globs != "all":
|
|
62
|
+
if allowed_file_edit_globs:
|
|
63
|
+
path_prompt = f"""
|
|
64
|
+
- You are allowed to edit files for files matching only the following globs: {", ".join(allowed_file_edit_globs)}
|
|
65
|
+
"""
|
|
66
|
+
else:
|
|
67
|
+
path_prompt = """
|
|
68
|
+
- You are not allowed to edit files.
|
|
69
|
+
"""
|
|
70
|
+
base += path_prompt
|
|
71
|
+
|
|
72
|
+
path_prompt = """
|
|
73
|
+
- You are allowed to write files in the provided repository only.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
if all_write_new_globs != "all":
|
|
77
|
+
if all_write_new_globs:
|
|
78
|
+
path_prompt = f"""
|
|
79
|
+
- You are allowed to write files files matching only the following globs: {", ".join(allowed_file_edit_globs)}
|
|
80
|
+
"""
|
|
81
|
+
else:
|
|
82
|
+
path_prompt = """
|
|
83
|
+
- You are not allowed to write files.
|
|
84
|
+
"""
|
|
85
|
+
base += path_prompt
|
|
86
|
+
|
|
87
|
+
run_command_common = """
|
|
88
|
+
- Do not use Ctrl-c interrupt commands without asking the user, because often the programs don't show any update but they still are running.
|
|
89
|
+
- Do not use echo/cat to write any file, always use FileWriteOrEdit tool to create/update files.
|
|
90
|
+
- Do not provide code snippets unless asked by the user, instead directly add/edit the code.
|
|
91
|
+
- You should use the provided bash execution, reading and writing file tools to complete objective.
|
|
92
|
+
- Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using wcgw tools.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
command_prompt = f"""
|
|
96
|
+
- You are only allowed to run commands for project setup, code writing, editing, updating, testing, running and debugging related to the project.
|
|
97
|
+
- Do not run anything that adds or removes packages, changes system configuration or environment.
|
|
98
|
+
{run_command_common}
|
|
99
|
+
"""
|
|
100
|
+
if allowed_commands != "all":
|
|
101
|
+
if allowed_commands:
|
|
102
|
+
command_prompt = f"""
|
|
103
|
+
- You are only allowed to run the following commands: {", ".join(allowed_commands)}
|
|
104
|
+
{run_command_common}
|
|
105
|
+
"""
|
|
106
|
+
else:
|
|
107
|
+
command_prompt = """
|
|
108
|
+
- You are not allowed to run any commands.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
base += command_prompt
|
|
112
|
+
return base
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
WCGW_PROMPT = """
|
|
116
|
+
# Instructions
|
|
117
|
+
|
|
118
|
+
- You should use the provided bash execution, reading and writing file tools to complete objective.
|
|
119
|
+
- Do not provide code snippets unless asked by the user, instead directly add/edit the code.
|
|
120
|
+
- Do not install new tools/packages before ensuring no such tools/package or an alternative already exists.
|
|
121
|
+
- Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using wcgw tools
|
|
122
|
+
- Do not use Ctrl-c or interrupt commands without asking the user, because often the programs don't show any update but they still are running.
|
|
123
|
+
- Do not use echo/cat to write any file, always use FileWriteOrEdit tool to create/update files.
|
|
124
|
+
- You can share task summary directly without creating any file.
|
|
125
|
+
- Provide as many file paths as you need in ReadFiles in one go.
|
|
126
|
+
|
|
127
|
+
Additional instructions:
|
|
128
|
+
Always run `pwd` if you get any file or directory not found error to make sure you're not lost, or to get absolute cwd.
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
"""
|
|
132
|
+
ARCHITECT_PROMPT = """
|
|
133
|
+
# Instructions
|
|
134
|
+
You are now running in "architect" mode. This means
|
|
135
|
+
- You are not allowed to edit or update any file. You are not allowed to create any file.
|
|
136
|
+
- You are not allowed to run any commands that may change disk, system configuration, packages or environment. Only read-only commands are allowed.
|
|
137
|
+
- Only run commands that allows you to explore the repository, understand the system or read anything of relevance.
|
|
138
|
+
- Do not use Ctrl-c or interrupt commands without asking the user, because often the programs don't show any update but they still are running.
|
|
139
|
+
- You are not allowed to change directory (bash will run in -r mode)
|
|
140
|
+
- Share only snippets when any implementation is requested.
|
|
141
|
+
- Provide as many file paths as you need in ReadFiles in one go.
|
|
142
|
+
|
|
143
|
+
# Disallowed tools (important!)
|
|
144
|
+
- FileWriteOrEdit
|
|
145
|
+
|
|
146
|
+
# Response instructions
|
|
147
|
+
Respond only after doing the following:
|
|
148
|
+
- Read as many relevant files as possible.
|
|
149
|
+
- Be comprehensive in your understanding and search of relevant files.
|
|
150
|
+
- First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.)
|
|
151
|
+
- Share minimal snippets higlighting the changes (avoid large number of lines in the snippets, use ... comments)
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
DEFAULT_MODES: dict[Modes, ModeImpl] = {
|
|
156
|
+
"wcgw": ModeImpl(
|
|
157
|
+
bash_command_mode=BashCommandMode("normal_mode", "all"),
|
|
158
|
+
write_if_empty_mode=WriteIfEmptyMode("all"),
|
|
159
|
+
file_edit_mode=FileEditMode("all"),
|
|
160
|
+
),
|
|
161
|
+
"architect": ModeImpl(
|
|
162
|
+
bash_command_mode=BashCommandMode("restricted_mode", "all"),
|
|
163
|
+
write_if_empty_mode=WriteIfEmptyMode([]),
|
|
164
|
+
file_edit_mode=FileEditMode([]),
|
|
165
|
+
),
|
|
166
|
+
"code_writer": ModeImpl(
|
|
167
|
+
bash_command_mode=BashCommandMode("normal_mode", "all"),
|
|
168
|
+
write_if_empty_mode=WriteIfEmptyMode("all"),
|
|
169
|
+
file_edit_mode=FileEditMode("all"),
|
|
170
|
+
),
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def modes_to_state(
|
|
175
|
+
mode: ModesConfig,
|
|
176
|
+
) -> tuple[BashCommandMode, FileEditMode, WriteIfEmptyMode, Modes]:
|
|
177
|
+
# First get default mode config
|
|
178
|
+
if isinstance(mode, str):
|
|
179
|
+
mode_impl = DEFAULT_MODES[mode] # converts str to Modes enum
|
|
180
|
+
mode_name: Modes = mode
|
|
181
|
+
else:
|
|
182
|
+
# For CodeWriterMode, use code_writer as base and override
|
|
183
|
+
mode_impl = DEFAULT_MODES["code_writer"]
|
|
184
|
+
# Override with custom settings from CodeWriterMode
|
|
185
|
+
mode_impl = ModeImpl(
|
|
186
|
+
bash_command_mode=BashCommandMode(
|
|
187
|
+
mode_impl.bash_command_mode.bash_mode,
|
|
188
|
+
"all" if mode.allowed_commands else "none",
|
|
189
|
+
),
|
|
190
|
+
file_edit_mode=FileEditMode(mode.allowed_globs),
|
|
191
|
+
write_if_empty_mode=WriteIfEmptyMode(mode.allowed_globs),
|
|
192
|
+
)
|
|
193
|
+
mode_name = "code_writer"
|
|
194
|
+
return (
|
|
195
|
+
mode_impl.bash_command_mode,
|
|
196
|
+
mode_impl.file_edit_mode,
|
|
197
|
+
mode_impl.write_if_empty_mode,
|
|
198
|
+
mode_name,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
WCGW_KT = """Use `ContextSave` tool to do a knowledge transfer of the task in hand.
|
|
203
|
+
Write detailed description in order to do a KT.
|
|
204
|
+
Save all information necessary for a person to understand the task and the problems.
|
|
205
|
+
|
|
206
|
+
Format the `description` field using Markdown with the following sections.
|
|
207
|
+
- "# Objective" section containing project and task objective.
|
|
208
|
+
- "# All user instructions" section should be provided containing all instructions user shared in the conversation.
|
|
209
|
+
- "# Current status of the task" should be provided containing only what is already achieved, not what's remaining.
|
|
210
|
+
- "# Pending issues with snippets" section containing snippets of pending errors, traceback, file snippets, commands, etc. But no comments or solutions.
|
|
211
|
+
- Be very verbose in the all issues with snippets section providing as much error context as possible.
|
|
212
|
+
- "# Build and development instructions" section containing instructions to build or run project or run tests, or envrionment related information. Only include what's known. Leave empty if unknown.
|
|
213
|
+
- Any other relevant sections following the above.
|
|
214
|
+
- After the tool completes succesfully, tell me the task id and the file path the tool generated (important!)
|
|
215
|
+
- This tool marks end of your conversation, do not run any further tools after calling this.
|
|
216
|
+
|
|
217
|
+
Provide all relevant file paths in order to understand and solve the the task. Err towards providing more file paths than fewer.
|
|
218
|
+
|
|
219
|
+
(Note to self: this conversation can then be resumed later asking "Resume wcgw task `<generated id>`" which should call Initialize tool)
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
ARCHITECT_KT = """Use `ContextSave` tool to do a knowledge transfer of the task in hand.
|
|
224
|
+
Write detailed description in order to do a KT.
|
|
225
|
+
Save all information necessary for a person to understand the task and the problems.
|
|
226
|
+
|
|
227
|
+
Format the `description` field using Markdown with the following sections.
|
|
228
|
+
- "# Objective" section containing project and task objective.
|
|
229
|
+
- "# All user instructions" section should be provided containing all instructions user shared in the conversation.
|
|
230
|
+
- "# Designed plan" should be provided containing the designed plan as discussed.
|
|
231
|
+
- Any other relevant sections following the above.
|
|
232
|
+
- After the tool completes succesfully, tell me the task id and the file path the tool generated (important!)
|
|
233
|
+
- This tool marks end of your conversation, do not run any further tools after calling this.
|
|
234
|
+
|
|
235
|
+
Provide all relevant file paths in order to understand and solve the the task. Err towards providing more file paths than fewer.
|
|
236
|
+
|
|
237
|
+
(Note to self: this conversation can then be resumed later asking "Resume wcgw task `<generated id>`" which should call Initialize tool)
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
KTS = {"wcgw": WCGW_KT, "architect": ARCHITECT_KT, "code_writer": WCGW_KT}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import io
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import List, Set
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DirectoryTree:
|
|
7
|
+
def __init__(self, root: Path, max_files: int = 10):
|
|
8
|
+
"""
|
|
9
|
+
Initialize the DirectoryTree with a root path and maximum number of files to display
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
root_path: The root directory path to start from
|
|
13
|
+
max_files: Maximum number of files to display in unexpanded directories
|
|
14
|
+
"""
|
|
15
|
+
self.root = root
|
|
16
|
+
self.max_files = max_files
|
|
17
|
+
self.expanded_files: Set[Path] = set()
|
|
18
|
+
self.expanded_dirs: Set[Path] = set()
|
|
19
|
+
|
|
20
|
+
if not self.root.exists():
|
|
21
|
+
raise ValueError(f"Root path {root} does not exist")
|
|
22
|
+
|
|
23
|
+
if not self.root.is_dir():
|
|
24
|
+
raise ValueError(f"Root path {root} is not a directory")
|
|
25
|
+
|
|
26
|
+
def expand(self, rel_path: str) -> None:
|
|
27
|
+
"""
|
|
28
|
+
Expand a specific file in the tree
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
rel_path: Relative path from root to the file to expand
|
|
32
|
+
"""
|
|
33
|
+
abs_path = self.root / rel_path
|
|
34
|
+
|
|
35
|
+
if not abs_path.exists():
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
if not abs_path.is_file():
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
if not str(abs_path).startswith(str(self.root)):
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
self.expanded_files.add(abs_path)
|
|
45
|
+
|
|
46
|
+
# Add all parent directories to expanded dirs
|
|
47
|
+
current = abs_path.parent
|
|
48
|
+
while str(current) >= str(self.root):
|
|
49
|
+
if current not in self.expanded_dirs:
|
|
50
|
+
self.expanded_dirs.add(current)
|
|
51
|
+
if current == current.parent:
|
|
52
|
+
break
|
|
53
|
+
current = current.parent
|
|
54
|
+
|
|
55
|
+
def _list_directory(self, dir_path: Path) -> List[Path]:
|
|
56
|
+
"""List contents of a directory, sorted with directories first"""
|
|
57
|
+
contents = list(dir_path.iterdir())
|
|
58
|
+
return sorted(contents, key=lambda x: (not x.is_dir(), x.name.lower()))
|
|
59
|
+
|
|
60
|
+
def _count_hidden_items(
|
|
61
|
+
self, dir_path: Path, shown_items: List[Path]
|
|
62
|
+
) -> tuple[int, int]:
|
|
63
|
+
"""Count hidden files and directories in a directory"""
|
|
64
|
+
all_items = set(self._list_directory(dir_path))
|
|
65
|
+
shown_items_set = set(shown_items)
|
|
66
|
+
hidden_items = all_items - shown_items_set
|
|
67
|
+
|
|
68
|
+
hidden_files = sum(1 for p in hidden_items if p.is_file())
|
|
69
|
+
hidden_dirs = sum(1 for p in hidden_items if p.is_dir())
|
|
70
|
+
|
|
71
|
+
return hidden_files, hidden_dirs
|
|
72
|
+
|
|
73
|
+
def display(self) -> str:
|
|
74
|
+
"""Display the directory tree with expanded state"""
|
|
75
|
+
writer = io.StringIO()
|
|
76
|
+
|
|
77
|
+
def _display_recursive(
|
|
78
|
+
current_path: Path, indent: int = 0, depth: int = 0
|
|
79
|
+
) -> None:
|
|
80
|
+
# Print current directory name with a trailing slash for directories
|
|
81
|
+
if current_path == self.root:
|
|
82
|
+
writer.write(f"{current_path}/\n")
|
|
83
|
+
else:
|
|
84
|
+
writer.write(f"{' ' * indent}{current_path.name}/\n")
|
|
85
|
+
|
|
86
|
+
# Don't recurse beyond depth 1 unless path contains expanded files
|
|
87
|
+
if depth > 0 and current_path not in self.expanded_dirs:
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
# Get directory contents
|
|
91
|
+
contents = self._list_directory(current_path)
|
|
92
|
+
shown_items = []
|
|
93
|
+
|
|
94
|
+
for item in contents:
|
|
95
|
+
# Show items only if:
|
|
96
|
+
# 1. They are expanded files
|
|
97
|
+
# 2. They are parents of expanded items
|
|
98
|
+
should_show = item in self.expanded_files or item in self.expanded_dirs
|
|
99
|
+
|
|
100
|
+
if should_show:
|
|
101
|
+
shown_items.append(item)
|
|
102
|
+
if item.is_dir():
|
|
103
|
+
_display_recursive(item, indent + 2, depth + 1)
|
|
104
|
+
else:
|
|
105
|
+
writer.write(f"{' ' * (indent + 2)}{item.name}\n")
|
|
106
|
+
|
|
107
|
+
# Show hidden items count if any items were hidden
|
|
108
|
+
hidden_files, hidden_dirs = self._count_hidden_items(
|
|
109
|
+
current_path, shown_items
|
|
110
|
+
)
|
|
111
|
+
if hidden_files > 0 or hidden_dirs > 0:
|
|
112
|
+
writer.write(f"{' ' * (indent + 2)}...\n")
|
|
113
|
+
|
|
114
|
+
_display_recursive(self.root, depth=0)
|
|
115
|
+
|
|
116
|
+
return writer.getvalue()
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Any, Callable, Dict, TypeVar, cast
|
|
6
|
+
|
|
7
|
+
T = TypeVar("T") # Type variable for generic functions
|
|
8
|
+
F = TypeVar("F", bound=Callable[..., Any]) # Type variable for decorated functions
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FileStats:
|
|
12
|
+
"""Track read, edit, and write counts for a single file."""
|
|
13
|
+
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
self.read_count: int = 0
|
|
16
|
+
self.edit_count: int = 0
|
|
17
|
+
self.write_count: int = 0
|
|
18
|
+
|
|
19
|
+
def increment_read(self) -> None:
|
|
20
|
+
"""Increment the read counter."""
|
|
21
|
+
self.read_count += 1
|
|
22
|
+
|
|
23
|
+
def increment_edit(self) -> None:
|
|
24
|
+
"""Increment the edit counter."""
|
|
25
|
+
self.edit_count += 1
|
|
26
|
+
|
|
27
|
+
def increment_write(self) -> None:
|
|
28
|
+
"""Increment the write counter."""
|
|
29
|
+
self.write_count += 1
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> Dict[str, int]:
|
|
32
|
+
"""Convert to a dictionary for serialization."""
|
|
33
|
+
return {
|
|
34
|
+
"read_count": self.read_count,
|
|
35
|
+
"edit_count": self.edit_count,
|
|
36
|
+
"write_count": self.write_count,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def from_dict(cls, data: Dict[str, Any]) -> "FileStats":
|
|
41
|
+
"""Create from a serialized dictionary."""
|
|
42
|
+
stats = cls()
|
|
43
|
+
stats.read_count = data.get("read_count", 0)
|
|
44
|
+
stats.edit_count = data.get("edit_count", 0)
|
|
45
|
+
stats.write_count = data.get("write_count", 0)
|
|
46
|
+
return stats
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class WorkspaceStats:
|
|
50
|
+
"""Track file operations statistics for an entire workspace."""
|
|
51
|
+
|
|
52
|
+
def __init__(self) -> None:
|
|
53
|
+
self.files: Dict[str, FileStats] = {} # filepath -> FileStats
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
56
|
+
"""Convert to a dictionary for serialization."""
|
|
57
|
+
return {"files": {k: v.to_dict() for k, v in self.files.items()}}
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_dict(cls, data: Dict[str, Any]) -> "WorkspaceStats":
|
|
61
|
+
"""Create from a serialized dictionary."""
|
|
62
|
+
stats = cls()
|
|
63
|
+
files_data = data.get("files", {})
|
|
64
|
+
stats.files = {k: FileStats.from_dict(v) for k, v in files_data.items()}
|
|
65
|
+
return stats
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def safe_stats_operation(func: F) -> F:
|
|
69
|
+
"""
|
|
70
|
+
Decorator to safely perform stats operations without affecting core functionality.
|
|
71
|
+
If an exception occurs, it logs the error but allows the program to continue.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
75
|
+
try:
|
|
76
|
+
return func(*args, **kwargs)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
# Log the error but continue with the operation
|
|
79
|
+
print(f"Warning: Stats tracking error - {e}", file=sys.stderr)
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
# This is a workaround for proper typing with decorators
|
|
83
|
+
return cast(F, wrapper)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_stats_path(workspace_path: str) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Get the path to the stats file for a workspace using a hash-based approach.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
workspace_path: The full path of the workspace directory.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The path to the stats file.
|
|
95
|
+
"""
|
|
96
|
+
# Normalize the path
|
|
97
|
+
workspace_path = os.path.normpath(os.path.expanduser(workspace_path))
|
|
98
|
+
|
|
99
|
+
# Get the basename of the workspace path for readability
|
|
100
|
+
workspace_name = os.path.basename(workspace_path)
|
|
101
|
+
if not workspace_name: # In case of root directory
|
|
102
|
+
workspace_name = "root"
|
|
103
|
+
|
|
104
|
+
# Create a hash of the full path
|
|
105
|
+
path_hash = hashlib.md5(workspace_path.encode()).hexdigest()
|
|
106
|
+
|
|
107
|
+
# Combine to create a unique identifier that's still somewhat readable
|
|
108
|
+
filename = f"{workspace_name}_{path_hash}.json"
|
|
109
|
+
|
|
110
|
+
# Create directory if it doesn't exist
|
|
111
|
+
xdg_data_dir = os.environ.get("XDG_DATA_HOME", os.path.expanduser("~/.local/share"))
|
|
112
|
+
stats_dir = os.path.join(xdg_data_dir, "wcgw/workspace_stats")
|
|
113
|
+
os.makedirs(stats_dir, exist_ok=True)
|
|
114
|
+
|
|
115
|
+
return os.path.join(stats_dir, filename)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@safe_stats_operation
|
|
119
|
+
def load_workspace_stats(workspace_path: str) -> WorkspaceStats:
|
|
120
|
+
"""
|
|
121
|
+
Load the stats for a workspace, or create empty stats if not exists.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
workspace_path: The full path of the workspace directory.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
WorkspaceStats object containing file operation statistics.
|
|
128
|
+
"""
|
|
129
|
+
stats_path = get_stats_path(workspace_path)
|
|
130
|
+
if os.path.exists(stats_path):
|
|
131
|
+
try:
|
|
132
|
+
with open(stats_path, "r") as f:
|
|
133
|
+
return WorkspaceStats.from_dict(json.load(f))
|
|
134
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
|
135
|
+
# Handle corrupted file
|
|
136
|
+
return WorkspaceStats()
|
|
137
|
+
else:
|
|
138
|
+
return WorkspaceStats()
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@safe_stats_operation
|
|
142
|
+
def save_workspace_stats(workspace_path: str, stats: WorkspaceStats) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Save the stats for a workspace.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
workspace_path: The full path of the workspace directory.
|
|
148
|
+
stats: WorkspaceStats object to save.
|
|
149
|
+
"""
|
|
150
|
+
stats_path = get_stats_path(workspace_path)
|
|
151
|
+
with open(stats_path, "w") as f:
|
|
152
|
+
json.dump(stats.to_dict(), f, indent=2)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from typing import Dict, List, Tuple
|
|
2
|
+
|
|
3
|
+
import tokenizers # type: ignore[import-untyped]
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FastPathAnalyzer:
|
|
7
|
+
def __init__(self, model_path: str, vocab_path: str) -> None:
|
|
8
|
+
"""Initialize with vocabulary."""
|
|
9
|
+
# Load vocabulary and probabilities
|
|
10
|
+
self.vocab_probs: Dict[str, float] = {}
|
|
11
|
+
with open(vocab_path, "r") as f:
|
|
12
|
+
for line in f:
|
|
13
|
+
parts = line.strip().split()
|
|
14
|
+
if len(parts) == 2:
|
|
15
|
+
token, prob = parts
|
|
16
|
+
try:
|
|
17
|
+
self.vocab_probs[token] = float(prob)
|
|
18
|
+
except ValueError:
|
|
19
|
+
continue
|
|
20
|
+
|
|
21
|
+
self.encoder = tokenizers.Tokenizer.from_file(model_path)
|
|
22
|
+
|
|
23
|
+
def tokenize_batch(self, texts: List[str]) -> List[List[str]]:
|
|
24
|
+
"""Tokenize multiple texts at once."""
|
|
25
|
+
encodings = self.encoder.encode_batch(texts)
|
|
26
|
+
return [encoding.tokens for encoding in encodings]
|
|
27
|
+
|
|
28
|
+
def detokenize(self, tokens: List[str]) -> str:
|
|
29
|
+
"""Convert tokens back to text, handling special tokens."""
|
|
30
|
+
return self.encoder.decode(tokens) # type: ignore[no-any-return]
|
|
31
|
+
|
|
32
|
+
def calculate_path_probabilities_batch(
|
|
33
|
+
self, paths: List[str]
|
|
34
|
+
) -> List[Tuple[float, List[str], List[str]]]:
|
|
35
|
+
"""Calculate log probability for multiple paths at once."""
|
|
36
|
+
# Batch tokenize all paths
|
|
37
|
+
all_tokens = self.tokenize_batch(paths)
|
|
38
|
+
|
|
39
|
+
results = []
|
|
40
|
+
for tokens in all_tokens:
|
|
41
|
+
# Calculate sum of log probabilities for each path
|
|
42
|
+
log_prob_sum = 0.0
|
|
43
|
+
unknown_tokens = []
|
|
44
|
+
for token in tokens:
|
|
45
|
+
if token in self.vocab_probs:
|
|
46
|
+
log_prob_sum += self.vocab_probs[token]
|
|
47
|
+
else:
|
|
48
|
+
unknown_tokens.append(token)
|
|
49
|
+
|
|
50
|
+
results.append((log_prob_sum, tokens, unknown_tokens))
|
|
51
|
+
|
|
52
|
+
return results
|
|
53
|
+
|
|
54
|
+
def calculate_path_probability(
|
|
55
|
+
self, path: str
|
|
56
|
+
) -> Tuple[float, List[str], List[str]]:
|
|
57
|
+
"""Calculate log probability for a single path."""
|
|
58
|
+
return self.calculate_path_probabilities_batch([path])[0]
|