tunacode-cli 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tunacode-cli might be problematic. Click here for more details.
- tunacode/__init__.py +0 -0
- tunacode/cli/textual_repl.tcss +283 -0
- tunacode/configuration/__init__.py +1 -0
- tunacode/configuration/defaults.py +45 -0
- tunacode/configuration/models.py +147 -0
- tunacode/configuration/models_registry.json +1 -0
- tunacode/configuration/pricing.py +74 -0
- tunacode/configuration/settings.py +35 -0
- tunacode/constants.py +227 -0
- tunacode/core/__init__.py +6 -0
- tunacode/core/agents/__init__.py +39 -0
- tunacode/core/agents/agent_components/__init__.py +48 -0
- tunacode/core/agents/agent_components/agent_config.py +441 -0
- tunacode/core/agents/agent_components/agent_helpers.py +290 -0
- tunacode/core/agents/agent_components/message_handler.py +99 -0
- tunacode/core/agents/agent_components/node_processor.py +477 -0
- tunacode/core/agents/agent_components/response_state.py +129 -0
- tunacode/core/agents/agent_components/result_wrapper.py +51 -0
- tunacode/core/agents/agent_components/state_transition.py +112 -0
- tunacode/core/agents/agent_components/streaming.py +271 -0
- tunacode/core/agents/agent_components/task_completion.py +40 -0
- tunacode/core/agents/agent_components/tool_buffer.py +44 -0
- tunacode/core/agents/agent_components/tool_executor.py +101 -0
- tunacode/core/agents/agent_components/truncation_checker.py +37 -0
- tunacode/core/agents/delegation_tools.py +109 -0
- tunacode/core/agents/main.py +545 -0
- tunacode/core/agents/prompts.py +66 -0
- tunacode/core/agents/research_agent.py +231 -0
- tunacode/core/compaction.py +218 -0
- tunacode/core/prompting/__init__.py +27 -0
- tunacode/core/prompting/loader.py +66 -0
- tunacode/core/prompting/prompting_engine.py +98 -0
- tunacode/core/prompting/sections.py +50 -0
- tunacode/core/prompting/templates.py +69 -0
- tunacode/core/state.py +409 -0
- tunacode/exceptions.py +313 -0
- tunacode/indexing/__init__.py +5 -0
- tunacode/indexing/code_index.py +432 -0
- tunacode/indexing/constants.py +86 -0
- tunacode/lsp/__init__.py +112 -0
- tunacode/lsp/client.py +351 -0
- tunacode/lsp/diagnostics.py +19 -0
- tunacode/lsp/servers.py +101 -0
- tunacode/prompts/default_prompt.md +952 -0
- tunacode/prompts/research/sections/agent_role.xml +5 -0
- tunacode/prompts/research/sections/constraints.xml +14 -0
- tunacode/prompts/research/sections/output_format.xml +57 -0
- tunacode/prompts/research/sections/tool_use.xml +23 -0
- tunacode/prompts/sections/advanced_patterns.xml +255 -0
- tunacode/prompts/sections/agent_role.xml +8 -0
- tunacode/prompts/sections/completion.xml +10 -0
- tunacode/prompts/sections/critical_rules.xml +37 -0
- tunacode/prompts/sections/examples.xml +220 -0
- tunacode/prompts/sections/output_style.xml +94 -0
- tunacode/prompts/sections/parallel_exec.xml +105 -0
- tunacode/prompts/sections/search_pattern.xml +100 -0
- tunacode/prompts/sections/system_info.xml +6 -0
- tunacode/prompts/sections/tool_use.xml +84 -0
- tunacode/prompts/sections/user_instructions.xml +3 -0
- tunacode/py.typed +0 -0
- tunacode/templates/__init__.py +5 -0
- tunacode/templates/loader.py +15 -0
- tunacode/tools/__init__.py +10 -0
- tunacode/tools/authorization/__init__.py +29 -0
- tunacode/tools/authorization/context.py +32 -0
- tunacode/tools/authorization/factory.py +20 -0
- tunacode/tools/authorization/handler.py +58 -0
- tunacode/tools/authorization/notifier.py +35 -0
- tunacode/tools/authorization/policy.py +19 -0
- tunacode/tools/authorization/requests.py +119 -0
- tunacode/tools/authorization/rules.py +72 -0
- tunacode/tools/bash.py +222 -0
- tunacode/tools/decorators.py +213 -0
- tunacode/tools/glob.py +353 -0
- tunacode/tools/grep.py +468 -0
- tunacode/tools/grep_components/__init__.py +9 -0
- tunacode/tools/grep_components/file_filter.py +93 -0
- tunacode/tools/grep_components/pattern_matcher.py +158 -0
- tunacode/tools/grep_components/result_formatter.py +87 -0
- tunacode/tools/grep_components/search_result.py +34 -0
- tunacode/tools/list_dir.py +205 -0
- tunacode/tools/prompts/bash_prompt.xml +10 -0
- tunacode/tools/prompts/glob_prompt.xml +7 -0
- tunacode/tools/prompts/grep_prompt.xml +10 -0
- tunacode/tools/prompts/list_dir_prompt.xml +7 -0
- tunacode/tools/prompts/read_file_prompt.xml +9 -0
- tunacode/tools/prompts/todoclear_prompt.xml +12 -0
- tunacode/tools/prompts/todoread_prompt.xml +16 -0
- tunacode/tools/prompts/todowrite_prompt.xml +28 -0
- tunacode/tools/prompts/update_file_prompt.xml +9 -0
- tunacode/tools/prompts/web_fetch_prompt.xml +11 -0
- tunacode/tools/prompts/write_file_prompt.xml +7 -0
- tunacode/tools/react.py +111 -0
- tunacode/tools/read_file.py +68 -0
- tunacode/tools/todo.py +222 -0
- tunacode/tools/update_file.py +62 -0
- tunacode/tools/utils/__init__.py +1 -0
- tunacode/tools/utils/ripgrep.py +311 -0
- tunacode/tools/utils/text_match.py +352 -0
- tunacode/tools/web_fetch.py +245 -0
- tunacode/tools/write_file.py +34 -0
- tunacode/tools/xml_helper.py +34 -0
- tunacode/types/__init__.py +166 -0
- tunacode/types/base.py +94 -0
- tunacode/types/callbacks.py +53 -0
- tunacode/types/dataclasses.py +121 -0
- tunacode/types/pydantic_ai.py +31 -0
- tunacode/types/state.py +122 -0
- tunacode/ui/__init__.py +6 -0
- tunacode/ui/app.py +542 -0
- tunacode/ui/commands/__init__.py +430 -0
- tunacode/ui/components/__init__.py +1 -0
- tunacode/ui/headless/__init__.py +5 -0
- tunacode/ui/headless/output.py +72 -0
- tunacode/ui/main.py +252 -0
- tunacode/ui/renderers/__init__.py +41 -0
- tunacode/ui/renderers/errors.py +197 -0
- tunacode/ui/renderers/panels.py +550 -0
- tunacode/ui/renderers/search.py +314 -0
- tunacode/ui/renderers/tools/__init__.py +21 -0
- tunacode/ui/renderers/tools/bash.py +247 -0
- tunacode/ui/renderers/tools/diagnostics.py +186 -0
- tunacode/ui/renderers/tools/glob.py +226 -0
- tunacode/ui/renderers/tools/grep.py +228 -0
- tunacode/ui/renderers/tools/list_dir.py +198 -0
- tunacode/ui/renderers/tools/read_file.py +226 -0
- tunacode/ui/renderers/tools/research.py +294 -0
- tunacode/ui/renderers/tools/update_file.py +237 -0
- tunacode/ui/renderers/tools/web_fetch.py +182 -0
- tunacode/ui/repl_support.py +226 -0
- tunacode/ui/screens/__init__.py +16 -0
- tunacode/ui/screens/model_picker.py +303 -0
- tunacode/ui/screens/session_picker.py +181 -0
- tunacode/ui/screens/setup.py +218 -0
- tunacode/ui/screens/theme_picker.py +90 -0
- tunacode/ui/screens/update_confirm.py +69 -0
- tunacode/ui/shell_runner.py +129 -0
- tunacode/ui/styles/layout.tcss +98 -0
- tunacode/ui/styles/modals.tcss +38 -0
- tunacode/ui/styles/panels.tcss +81 -0
- tunacode/ui/styles/theme-nextstep.tcss +303 -0
- tunacode/ui/styles/widgets.tcss +33 -0
- tunacode/ui/styles.py +18 -0
- tunacode/ui/widgets/__init__.py +23 -0
- tunacode/ui/widgets/command_autocomplete.py +62 -0
- tunacode/ui/widgets/editor.py +402 -0
- tunacode/ui/widgets/file_autocomplete.py +47 -0
- tunacode/ui/widgets/messages.py +46 -0
- tunacode/ui/widgets/resource_bar.py +182 -0
- tunacode/ui/widgets/status_bar.py +98 -0
- tunacode/utils/__init__.py +0 -0
- tunacode/utils/config/__init__.py +13 -0
- tunacode/utils/config/user_configuration.py +91 -0
- tunacode/utils/messaging/__init__.py +10 -0
- tunacode/utils/messaging/message_utils.py +34 -0
- tunacode/utils/messaging/token_counter.py +77 -0
- tunacode/utils/parsing/__init__.py +13 -0
- tunacode/utils/parsing/command_parser.py +55 -0
- tunacode/utils/parsing/json_utils.py +188 -0
- tunacode/utils/parsing/retry.py +146 -0
- tunacode/utils/parsing/tool_parser.py +267 -0
- tunacode/utils/security/__init__.py +15 -0
- tunacode/utils/security/command.py +106 -0
- tunacode/utils/system/__init__.py +25 -0
- tunacode/utils/system/gitignore.py +155 -0
- tunacode/utils/system/paths.py +190 -0
- tunacode/utils/ui/__init__.py +9 -0
- tunacode/utils/ui/file_filter.py +135 -0
- tunacode/utils/ui/helpers.py +24 -0
- tunacode_cli-0.1.21.dist-info/METADATA +170 -0
- tunacode_cli-0.1.21.dist-info/RECORD +174 -0
- tunacode_cli-0.1.21.dist-info/WHEEL +4 -0
- tunacode_cli-0.1.21.dist-info/entry_points.txt +2 -0
- tunacode_cli-0.1.21.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Status bar widget for TunaCode REPL."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import subprocess
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
from textual.app import ComposeResult
|
|
10
|
+
from textual.containers import Horizontal
|
|
11
|
+
from textual.widgets import Static
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from tunacode.types import ToolProgress
|
|
15
|
+
|
|
16
|
+
# Maximum length for operation text in status bar
|
|
17
|
+
MAX_STATUS_OPERATION_LEN = 25
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class StatusBar(Horizontal):
|
|
21
|
+
"""Bottom status bar - 3 zones."""
|
|
22
|
+
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
super().__init__()
|
|
25
|
+
self._edited_files: set[str] = set()
|
|
26
|
+
self._location_text: str = ""
|
|
27
|
+
|
|
28
|
+
def compose(self) -> ComposeResult:
|
|
29
|
+
yield Static("main ● ~/proj", id="status-left")
|
|
30
|
+
yield Static("edited: -", id="status-mid")
|
|
31
|
+
yield Static("last: -", id="status-right")
|
|
32
|
+
|
|
33
|
+
def on_mount(self) -> None:
|
|
34
|
+
self._refresh_location()
|
|
35
|
+
|
|
36
|
+
def _refresh_location(self) -> None:
|
|
37
|
+
try:
|
|
38
|
+
result = subprocess.run(
|
|
39
|
+
["git", "rev-parse", "--abbrev-ref", "HEAD"],
|
|
40
|
+
capture_output=True,
|
|
41
|
+
text=True,
|
|
42
|
+
timeout=1,
|
|
43
|
+
)
|
|
44
|
+
branch = result.stdout.strip() or "main"
|
|
45
|
+
except Exception:
|
|
46
|
+
branch = "main"
|
|
47
|
+
|
|
48
|
+
dirname = os.path.basename(os.getcwd()) or "~"
|
|
49
|
+
self._location_text = f"{branch} ● {dirname}"
|
|
50
|
+
self.query_one("#status-left", Static).update(self._location_text)
|
|
51
|
+
|
|
52
|
+
def update_last_action(self, tool_name: str) -> None:
|
|
53
|
+
self.query_one("#status-right", Static).update(f"last: {tool_name}")
|
|
54
|
+
|
|
55
|
+
def update_running_action(self, tool_name: str) -> None:
|
|
56
|
+
self.query_one("#status-right", Static).update(f"running: {tool_name}")
|
|
57
|
+
|
|
58
|
+
def update_subagent_progress(self, progress: ToolProgress) -> None:
|
|
59
|
+
"""Update status with subagent progress.
|
|
60
|
+
|
|
61
|
+
Shows format: running: research [3] grep pattern...
|
|
62
|
+
"""
|
|
63
|
+
operation = progress.operation
|
|
64
|
+
# Truncate operation to fit status bar
|
|
65
|
+
if len(operation) > MAX_STATUS_OPERATION_LEN:
|
|
66
|
+
operation = operation[: MAX_STATUS_OPERATION_LEN - 3] + "..."
|
|
67
|
+
|
|
68
|
+
# Format: running: subagent [current/?] operation
|
|
69
|
+
progress_text = f"running: {progress.subagent} [{progress.current}] {operation}"
|
|
70
|
+
self.query_one("#status-right", Static).update(progress_text)
|
|
71
|
+
|
|
72
|
+
def add_edited_file(self, filepath: str) -> None:
|
|
73
|
+
"""Track an edited file and update display."""
|
|
74
|
+
filename = os.path.basename(filepath)
|
|
75
|
+
self._edited_files.add(filename)
|
|
76
|
+
self._update_edited_display()
|
|
77
|
+
|
|
78
|
+
def _update_edited_display(self) -> None:
|
|
79
|
+
"""Update mid zone with edited files list."""
|
|
80
|
+
files = sorted(self._edited_files)
|
|
81
|
+
if not files:
|
|
82
|
+
text = "edited: -"
|
|
83
|
+
elif len(files) <= 3:
|
|
84
|
+
text = f"edited: {', '.join(files)}"
|
|
85
|
+
else:
|
|
86
|
+
shown = ", ".join(files[:2])
|
|
87
|
+
text = f"edited: {shown} +{len(files) - 2}"
|
|
88
|
+
self.query_one("#status-mid", Static).update(text)
|
|
89
|
+
|
|
90
|
+
def set_mode(self, mode: str | None) -> None:
|
|
91
|
+
"""Show mode indicator in status bar."""
|
|
92
|
+
left = self.query_one("#status-left", Static)
|
|
93
|
+
if mode:
|
|
94
|
+
left.add_class("mode-active")
|
|
95
|
+
left.update(f"[{mode}] {self._location_text}")
|
|
96
|
+
else:
|
|
97
|
+
left.remove_class("mode-active")
|
|
98
|
+
left.update(self._location_text)
|
|
File without changes
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module: tunacode.utils.user_configuration
|
|
3
|
+
|
|
4
|
+
Provides user configuration file management.
|
|
5
|
+
Handles loading, saving, and updating user preferences including model selection.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from json import JSONDecodeError
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from tunacode.configuration.settings import ApplicationSettings
|
|
13
|
+
from tunacode.exceptions import ConfigurationError
|
|
14
|
+
from tunacode.types import ModelName, UserConfig
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from tunacode.core.state import StateManager
|
|
18
|
+
|
|
19
|
+
import hashlib
|
|
20
|
+
|
|
21
|
+
_config_fingerprint = None
|
|
22
|
+
_config_cache = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def load_config() -> UserConfig | None:
|
|
26
|
+
"""Load user config from file, using fingerprint fast path if available."""
|
|
27
|
+
global _config_fingerprint, _config_cache
|
|
28
|
+
app_settings = ApplicationSettings()
|
|
29
|
+
try:
|
|
30
|
+
with open(app_settings.paths.config_file) as f:
|
|
31
|
+
raw = f.read()
|
|
32
|
+
loaded = json.loads(raw)
|
|
33
|
+
new_fp = hashlib.sha1(raw.encode()).hexdigest()[:12]
|
|
34
|
+
# If hash matches, return in-memory cached config object
|
|
35
|
+
if new_fp == _config_fingerprint and _config_cache is not None:
|
|
36
|
+
return _config_cache
|
|
37
|
+
# else, update fast path
|
|
38
|
+
_config_fingerprint = new_fp
|
|
39
|
+
_config_cache = loaded
|
|
40
|
+
|
|
41
|
+
# Initialize onboarding defaults for new configurations
|
|
42
|
+
_ensure_onboarding_defaults(loaded)
|
|
43
|
+
|
|
44
|
+
return loaded
|
|
45
|
+
except FileNotFoundError:
|
|
46
|
+
return None
|
|
47
|
+
except JSONDecodeError as err:
|
|
48
|
+
msg = f"Invalid JSON in config file at {app_settings.paths.config_file}"
|
|
49
|
+
raise ConfigurationError(msg) from err
|
|
50
|
+
except Exception as err:
|
|
51
|
+
raise ConfigurationError(f"Failed to load configuration: {err}") from err
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def save_config(state_manager: "StateManager") -> bool:
|
|
55
|
+
"""Save user config to file"""
|
|
56
|
+
app_settings = ApplicationSettings()
|
|
57
|
+
try:
|
|
58
|
+
# Ensure config directory exists
|
|
59
|
+
app_settings.paths.config_dir.mkdir(mode=0o700, parents=True, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
# Write config file
|
|
62
|
+
with open(app_settings.paths.config_file, "w") as f:
|
|
63
|
+
json.dump(state_manager.session.user_config, f, indent=4)
|
|
64
|
+
return True
|
|
65
|
+
except PermissionError as e:
|
|
66
|
+
raise ConfigurationError(
|
|
67
|
+
f"Permission denied writing to {app_settings.paths.config_file}: {e}"
|
|
68
|
+
) from e
|
|
69
|
+
except OSError as e:
|
|
70
|
+
raise ConfigurationError(
|
|
71
|
+
f"Failed to save configuration to {app_settings.paths.config_file}: {e}"
|
|
72
|
+
) from e
|
|
73
|
+
except Exception as e:
|
|
74
|
+
raise ConfigurationError(f"Unexpected error saving configuration: {e}") from e
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def set_default_model(model_name: ModelName, state_manager: "StateManager") -> bool:
|
|
78
|
+
"""Set the default model in the user config and save"""
|
|
79
|
+
state_manager.session.user_config["default_model"] = model_name
|
|
80
|
+
try:
|
|
81
|
+
save_config(state_manager)
|
|
82
|
+
return True
|
|
83
|
+
except ConfigurationError:
|
|
84
|
+
# Re-raise ConfigurationError to be handled by caller
|
|
85
|
+
raise
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _ensure_onboarding_defaults(config: UserConfig) -> None:
|
|
89
|
+
"""Ensure onboarding-related default settings are present in config."""
|
|
90
|
+
if "settings" not in config:
|
|
91
|
+
config["settings"] = {}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Messaging utilities: content extraction and token counting."""
|
|
2
|
+
|
|
3
|
+
from tunacode.utils.messaging.message_utils import get_message_content
|
|
4
|
+
from tunacode.utils.messaging.token_counter import estimate_tokens, get_encoding
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"get_message_content",
|
|
8
|
+
"estimate_tokens",
|
|
9
|
+
"get_encoding",
|
|
10
|
+
]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Utilities for processing message history."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_message_content(message: Any) -> str:
|
|
7
|
+
"""Extracts the content from a message object of any type."""
|
|
8
|
+
if isinstance(message, str):
|
|
9
|
+
return message
|
|
10
|
+
if isinstance(message, dict):
|
|
11
|
+
if "content" in message:
|
|
12
|
+
content = message["content"]
|
|
13
|
+
# Handle nested content structures
|
|
14
|
+
if isinstance(content, list):
|
|
15
|
+
return " ".join(get_message_content(item) for item in content)
|
|
16
|
+
return str(content)
|
|
17
|
+
if "parts" in message:
|
|
18
|
+
parts = message["parts"]
|
|
19
|
+
if isinstance(parts, list):
|
|
20
|
+
return " ".join(get_message_content(part) for part in parts)
|
|
21
|
+
return str(parts)
|
|
22
|
+
if "thought" in message:
|
|
23
|
+
return str(message["thought"])
|
|
24
|
+
if hasattr(message, "content"):
|
|
25
|
+
content = message.content
|
|
26
|
+
if isinstance(content, list):
|
|
27
|
+
return " ".join(get_message_content(item) for item in content)
|
|
28
|
+
return str(content)
|
|
29
|
+
if hasattr(message, "parts"):
|
|
30
|
+
parts = message.parts
|
|
31
|
+
if isinstance(parts, list):
|
|
32
|
+
return " ".join(get_message_content(part) for part in parts)
|
|
33
|
+
return str(parts)
|
|
34
|
+
return ""
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Token counting utility using tiktoken for accurate, offline token estimation."""
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
# Cache for tokenizer encodings
|
|
7
|
+
_encoding_cache: dict[str, Any] = {}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@lru_cache(maxsize=8)
|
|
11
|
+
def get_encoding(model_name: str):
|
|
12
|
+
"""Get the appropriate tiktoken encoding for a model.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
model_name: The model name in format "provider:model"
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
A tiktoken encoding instance
|
|
19
|
+
"""
|
|
20
|
+
try:
|
|
21
|
+
import tiktoken
|
|
22
|
+
except ImportError:
|
|
23
|
+
return None
|
|
24
|
+
|
|
25
|
+
# Extract the model part from "provider:model" format
|
|
26
|
+
if ":" in model_name:
|
|
27
|
+
provider, model = model_name.split(":", 1)
|
|
28
|
+
else:
|
|
29
|
+
provider, model = "unknown", model_name
|
|
30
|
+
|
|
31
|
+
# Map common models to their tiktoken encodings
|
|
32
|
+
if provider == "openai":
|
|
33
|
+
if "gpt-4" in model:
|
|
34
|
+
encoding_name = "cl100k_base" # GPT-4 encoding
|
|
35
|
+
elif "gpt-3.5" in model:
|
|
36
|
+
encoding_name = "cl100k_base" # GPT-3.5-turbo encoding
|
|
37
|
+
else:
|
|
38
|
+
encoding_name = "cl100k_base" # Default for newer models
|
|
39
|
+
elif provider == "anthropic":
|
|
40
|
+
# Claude models use similar tokenization to GPT-4
|
|
41
|
+
encoding_name = "cl100k_base"
|
|
42
|
+
else:
|
|
43
|
+
# Default encoding for unknown models
|
|
44
|
+
encoding_name = "cl100k_base"
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
return tiktoken.get_encoding(encoding_name)
|
|
48
|
+
except Exception:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def estimate_tokens(text: str, model_name: str | None = None) -> int:
|
|
53
|
+
"""
|
|
54
|
+
Estimate token count using tiktoken for accurate results.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
text: The text to count tokens for.
|
|
58
|
+
model_name: Optional model name for model-specific tokenization.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
The estimated number of tokens.
|
|
62
|
+
"""
|
|
63
|
+
if not text:
|
|
64
|
+
return 0
|
|
65
|
+
|
|
66
|
+
# Try tiktoken first if model is specified
|
|
67
|
+
if model_name:
|
|
68
|
+
encoding = get_encoding(model_name)
|
|
69
|
+
if encoding:
|
|
70
|
+
try:
|
|
71
|
+
return len(encoding.encode(text))
|
|
72
|
+
except Exception:
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
# Fallback to character-based estimation
|
|
76
|
+
# This is roughly accurate for English text
|
|
77
|
+
return len(text) // 4
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .command_parser import parse_args
|
|
2
|
+
from .tool_parser import (
|
|
3
|
+
ParsedToolCall,
|
|
4
|
+
has_potential_tool_call,
|
|
5
|
+
parse_tool_calls_from_text,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"parse_args",
|
|
10
|
+
"ParsedToolCall",
|
|
11
|
+
"has_potential_tool_call",
|
|
12
|
+
"parse_tool_calls_from_text",
|
|
13
|
+
]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Module: tunacode.cli.command_parser
|
|
2
|
+
|
|
3
|
+
Command parsing utilities for the Textual REPL."""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
from tunacode.constants import (
|
|
8
|
+
JSON_PARSE_BASE_DELAY,
|
|
9
|
+
JSON_PARSE_MAX_DELAY,
|
|
10
|
+
JSON_PARSE_MAX_RETRIES,
|
|
11
|
+
)
|
|
12
|
+
from tunacode.exceptions import ValidationError
|
|
13
|
+
from tunacode.types import ToolArgs
|
|
14
|
+
from tunacode.utils.parsing.json_utils import safe_json_parse
|
|
15
|
+
from tunacode.utils.parsing.retry import retry_json_parse
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse_args(args) -> ToolArgs:
|
|
19
|
+
"""
|
|
20
|
+
Parse tool arguments from a JSON string or dictionary with retry logic.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
args (str or dict): A JSON-formatted string or a dictionary containing tool arguments.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
dict: The parsed arguments.
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
ValidationError: If 'args' is not a string or dictionary, or if the string
|
|
30
|
+
is not valid JSON.
|
|
31
|
+
"""
|
|
32
|
+
if isinstance(args, str):
|
|
33
|
+
try:
|
|
34
|
+
return retry_json_parse(
|
|
35
|
+
args,
|
|
36
|
+
max_retries=JSON_PARSE_MAX_RETRIES,
|
|
37
|
+
base_delay=JSON_PARSE_BASE_DELAY,
|
|
38
|
+
max_delay=JSON_PARSE_MAX_DELAY,
|
|
39
|
+
)
|
|
40
|
+
except json.JSONDecodeError as e:
|
|
41
|
+
if "Extra data" in str(e):
|
|
42
|
+
try:
|
|
43
|
+
result = safe_json_parse(args, allow_concatenated=True)
|
|
44
|
+
if isinstance(result, dict):
|
|
45
|
+
return result
|
|
46
|
+
elif isinstance(result, list) and result:
|
|
47
|
+
return result[0]
|
|
48
|
+
except Exception:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
raise ValidationError(f"Invalid JSON: {args}") from e
|
|
52
|
+
elif isinstance(args, dict):
|
|
53
|
+
return args
|
|
54
|
+
else:
|
|
55
|
+
raise ValidationError(f"Invalid args type: {type(args)}")
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module: tunacode.utils.json_utils
|
|
3
|
+
|
|
4
|
+
JSON parsing utilities with enhanced error handling and concatenated object support.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from tunacode.constants import READ_ONLY_TOOLS
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ConcatenatedJSONError(Exception):
|
|
14
|
+
"""Raised when concatenated JSON objects are detected but cannot be safely handled."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, message: str, objects_found: int, tool_name: str | None = None):
|
|
17
|
+
self.message = message
|
|
18
|
+
self.objects_found = objects_found
|
|
19
|
+
self.tool_name = tool_name
|
|
20
|
+
super().__init__(message)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def split_concatenated_json(json_string: str) -> list[dict[str, Any]]:
|
|
24
|
+
"""
|
|
25
|
+
Split concatenated JSON objects like {"a": 1}{"b": 2} into separate objects.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
json_string: String containing potentially concatenated JSON objects
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
List of parsed JSON objects
|
|
32
|
+
|
|
33
|
+
Raises:
|
|
34
|
+
json.JSONDecodeError: If no valid JSON objects can be extracted
|
|
35
|
+
"""
|
|
36
|
+
objects = []
|
|
37
|
+
brace_count = 0
|
|
38
|
+
start_pos = 0
|
|
39
|
+
in_string = False
|
|
40
|
+
escape_next = False
|
|
41
|
+
|
|
42
|
+
for i, char in enumerate(json_string):
|
|
43
|
+
if escape_next:
|
|
44
|
+
escape_next = False
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
if char == "\\":
|
|
48
|
+
escape_next = True
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
if char == '"' and not escape_next:
|
|
52
|
+
in_string = not in_string
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
if in_string:
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
if char == "{":
|
|
59
|
+
if brace_count == 0:
|
|
60
|
+
start_pos = i
|
|
61
|
+
brace_count += 1
|
|
62
|
+
elif char == "}":
|
|
63
|
+
brace_count -= 1
|
|
64
|
+
if brace_count == 0:
|
|
65
|
+
potential_json = json_string[start_pos : i + 1].strip()
|
|
66
|
+
try:
|
|
67
|
+
parsed = json.loads(potential_json)
|
|
68
|
+
except json.JSONDecodeError:
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
if isinstance(parsed, dict):
|
|
72
|
+
objects.append(parsed)
|
|
73
|
+
|
|
74
|
+
if not objects:
|
|
75
|
+
raise json.JSONDecodeError("No valid JSON objects found", json_string, 0)
|
|
76
|
+
|
|
77
|
+
return objects
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def validate_tool_args_safety(objects: list[dict[str, Any]], tool_name: str | None = None) -> bool:
|
|
81
|
+
"""
|
|
82
|
+
Validate whether it's safe to execute multiple JSON objects for a given tool.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
objects: List of JSON objects to validate
|
|
86
|
+
tool_name: Name of the tool (if known)
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
bool: True if safe to execute, False otherwise
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
ConcatenatedJSONError: If multiple objects detected for unsafe tool
|
|
93
|
+
"""
|
|
94
|
+
if len(objects) <= 1:
|
|
95
|
+
return True
|
|
96
|
+
|
|
97
|
+
# Check if tool is read-only (safer to execute multiple times)
|
|
98
|
+
if tool_name and tool_name in READ_ONLY_TOOLS:
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
# For write/execute tools, multiple objects are potentially dangerous
|
|
102
|
+
if tool_name is None:
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
raise ConcatenatedJSONError(
|
|
106
|
+
f"Multiple JSON objects not safe for tool {tool_name}",
|
|
107
|
+
objects_found=len(objects),
|
|
108
|
+
tool_name=tool_name,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def safe_json_parse(
|
|
113
|
+
json_string: str, tool_name: str | None = None, allow_concatenated: bool = False
|
|
114
|
+
) -> dict[str, Any] | list[dict[str, Any]]:
|
|
115
|
+
"""
|
|
116
|
+
Safely parse JSON with optional concatenated object support.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
json_string: JSON string to parse
|
|
120
|
+
tool_name: Name of the tool (for safety validation)
|
|
121
|
+
allow_concatenated: Whether to attempt splitting concatenated objects
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Single dict if one object, or list of dicts if multiple objects
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
json.JSONDecodeError: If parsing fails
|
|
128
|
+
ConcatenatedJSONError: If concatenated objects are unsafe
|
|
129
|
+
"""
|
|
130
|
+
try:
|
|
131
|
+
# First, try normal JSON parsing
|
|
132
|
+
result = json.loads(json_string)
|
|
133
|
+
except json.JSONDecodeError as e:
|
|
134
|
+
if not allow_concatenated or "Extra data" not in str(e):
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
# Try to split concatenated objects
|
|
138
|
+
objects = split_concatenated_json(json_string)
|
|
139
|
+
|
|
140
|
+
# Validate safety - fail loud if multiple objects would be discarded
|
|
141
|
+
if not validate_tool_args_safety(objects, tool_name):
|
|
142
|
+
if len(objects) > 1:
|
|
143
|
+
raise ConcatenatedJSONError(
|
|
144
|
+
"Multiple JSON objects detected but tool safety unknown",
|
|
145
|
+
objects_found=len(objects),
|
|
146
|
+
tool_name=tool_name,
|
|
147
|
+
) from None
|
|
148
|
+
return objects[0]
|
|
149
|
+
|
|
150
|
+
if len(objects) == 1:
|
|
151
|
+
return objects[0]
|
|
152
|
+
|
|
153
|
+
return objects
|
|
154
|
+
|
|
155
|
+
if not isinstance(result, dict):
|
|
156
|
+
raise json.JSONDecodeError(f"Expected dict, got {type(result)}", json_string, 0)
|
|
157
|
+
|
|
158
|
+
return result
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def merge_json_objects(objects: list[dict[str, Any]], strategy: str = "first") -> dict[str, Any]:
|
|
162
|
+
"""
|
|
163
|
+
Merge multiple JSON objects using different strategies.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
objects: List of JSON objects to merge
|
|
167
|
+
strategy: Merge strategy ("first", "last", "combine")
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
Single merged JSON object
|
|
171
|
+
"""
|
|
172
|
+
if not objects:
|
|
173
|
+
return {}
|
|
174
|
+
|
|
175
|
+
if len(objects) == 1:
|
|
176
|
+
return objects[0]
|
|
177
|
+
|
|
178
|
+
if strategy == "first":
|
|
179
|
+
return objects[0]
|
|
180
|
+
if strategy == "last":
|
|
181
|
+
return objects[-1]
|
|
182
|
+
if strategy == "combine":
|
|
183
|
+
result: dict[str, Any] = {}
|
|
184
|
+
for obj in objects:
|
|
185
|
+
result.update(obj)
|
|
186
|
+
return result
|
|
187
|
+
|
|
188
|
+
raise ValueError(f"Unknown merge strategy: {strategy}")
|