tunacode-cli 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tunacode-cli might be problematic. Click here for more details.

Files changed (50) hide show
  1. tunacode/cli/commands/__init__.py +2 -0
  2. tunacode/cli/commands/implementations/__init__.py +3 -0
  3. tunacode/cli/commands/implementations/debug.py +1 -1
  4. tunacode/cli/commands/implementations/todo.py +217 -0
  5. tunacode/cli/commands/registry.py +2 -0
  6. tunacode/cli/main.py +12 -5
  7. tunacode/cli/repl.py +205 -136
  8. tunacode/configuration/defaults.py +2 -0
  9. tunacode/configuration/models.py +6 -0
  10. tunacode/constants.py +27 -3
  11. tunacode/context.py +7 -3
  12. tunacode/core/agents/dspy_integration.py +223 -0
  13. tunacode/core/agents/dspy_tunacode.py +458 -0
  14. tunacode/core/agents/main.py +182 -12
  15. tunacode/core/agents/utils.py +54 -6
  16. tunacode/core/recursive/__init__.py +18 -0
  17. tunacode/core/recursive/aggregator.py +467 -0
  18. tunacode/core/recursive/budget.py +414 -0
  19. tunacode/core/recursive/decomposer.py +398 -0
  20. tunacode/core/recursive/executor.py +467 -0
  21. tunacode/core/recursive/hierarchy.py +487 -0
  22. tunacode/core/setup/config_setup.py +5 -0
  23. tunacode/core/state.py +91 -1
  24. tunacode/core/token_usage/api_response_parser.py +44 -0
  25. tunacode/core/token_usage/cost_calculator.py +58 -0
  26. tunacode/core/token_usage/usage_tracker.py +98 -0
  27. tunacode/exceptions.py +23 -0
  28. tunacode/prompts/dspy_task_planning.md +45 -0
  29. tunacode/prompts/dspy_tool_selection.md +58 -0
  30. tunacode/prompts/system.md +69 -5
  31. tunacode/tools/todo.py +343 -0
  32. tunacode/types.py +20 -1
  33. tunacode/ui/console.py +1 -1
  34. tunacode/ui/input.py +1 -1
  35. tunacode/ui/output.py +38 -1
  36. tunacode/ui/panels.py +4 -1
  37. tunacode/ui/recursive_progress.py +380 -0
  38. tunacode/ui/tool_ui.py +24 -6
  39. tunacode/ui/utils.py +1 -1
  40. tunacode/utils/message_utils.py +17 -0
  41. tunacode/utils/retry.py +163 -0
  42. tunacode/utils/token_counter.py +78 -8
  43. {tunacode_cli-0.0.40.dist-info → tunacode_cli-0.0.42.dist-info}/METADATA +4 -1
  44. {tunacode_cli-0.0.40.dist-info → tunacode_cli-0.0.42.dist-info}/RECORD +48 -32
  45. tunacode/cli/textual_app.py +0 -420
  46. tunacode/cli/textual_bridge.py +0 -161
  47. {tunacode_cli-0.0.40.dist-info → tunacode_cli-0.0.42.dist-info}/WHEEL +0 -0
  48. {tunacode_cli-0.0.40.dist-info → tunacode_cli-0.0.42.dist-info}/entry_points.txt +0 -0
  49. {tunacode_cli-0.0.40.dist-info → tunacode_cli-0.0.42.dist-info}/licenses/LICENSE +0 -0
  50. {tunacode_cli-0.0.40.dist-info → tunacode_cli-0.0.42.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,58 @@
1
+ """
2
+ Module: tunacode.pricing.cost_calculator
3
+ Provides a utility for calculating the cost of model usage based on token counts.
4
+ """
5
+
6
+ from tunacode.configuration.models import ModelRegistry
7
+ from tunacode.types import CostAmount, ModelName, TokenCount
8
+
9
+
10
+ class CostCalculator:
11
+ """
12
+ Calculates the cost of a model interaction based on prompt and completion tokens.
13
+ """
14
+
15
+ def __init__(self, registry: ModelRegistry):
16
+ """
17
+ Initializes the CostCalculator with a model registry.
18
+
19
+ Args:
20
+ registry (ModelRegistry): An instance of ModelRegistry that contains
21
+ the pricing information for various models.
22
+ """
23
+ self._registry = registry
24
+
25
+ def calculate_cost(
26
+ self,
27
+ model_name: ModelName,
28
+ prompt_tokens: TokenCount,
29
+ completion_tokens: TokenCount,
30
+ ) -> CostAmount:
31
+ """
32
+ Calculates the total cost for a given model and token usage.
33
+
34
+ Args:
35
+ model_name (ModelName): The identifier for the model (e.g., "openai:gpt-4o").
36
+ prompt_tokens (TokenCount): The number of tokens in the input/prompt.
37
+ completion_tokens (TokenCount): The number of tokens in the output/completion.
38
+
39
+ Returns:
40
+ CostAmount: The calculated cost as a float. Returns 0.0 if the model
41
+ is not found in the registry.
42
+ """
43
+ model_config = self._registry.get_model(model_name)
44
+
45
+ if not model_config:
46
+ return 0.0
47
+
48
+ TOKENS_PER_MILLION = 1_000_000
49
+
50
+ pricing = model_config.pricing
51
+
52
+ input_cost = (prompt_tokens / TOKENS_PER_MILLION) * pricing.input
53
+
54
+ output_cost = (completion_tokens / TOKENS_PER_MILLION) * pricing.output
55
+
56
+ total_cost = input_cost + output_cost
57
+
58
+ return total_cost
@@ -0,0 +1,98 @@
1
+ from typing import Any
2
+
3
+ from tunacode.core.state import StateManager
4
+ from tunacode.core.token_usage.api_response_parser import ApiResponseParser
5
+ from tunacode.core.token_usage.cost_calculator import CostCalculator
6
+ from tunacode.types import UsageTrackerProtocol
7
+ from tunacode.ui import console as ui # Import the ui console directly
8
+
9
+
10
+ class UsageTracker(UsageTrackerProtocol):
11
+ """
12
+ Handles parsing, calculating, storing, and displaying token usage and cost.
13
+ """
14
+
15
+ def __init__(
16
+ self,
17
+ parser: ApiResponseParser,
18
+ calculator: CostCalculator,
19
+ state_manager: StateManager,
20
+ ):
21
+ self.parser = parser
22
+ self.calculator = calculator
23
+ self.state_manager = state_manager
24
+
25
+ async def track_and_display(self, response_obj: Any):
26
+ """
27
+ Main method to process a model response for usage tracking.
28
+ """
29
+ try:
30
+ # 1. Parse the response to get token data
31
+ requested_model = self.state_manager.session.current_model
32
+ parsed_data = self.parser.parse(model=requested_model, response_obj=response_obj)
33
+
34
+ if not parsed_data:
35
+ return
36
+
37
+ # 2. Calculate the cost
38
+ cost = self._calculate_cost(parsed_data)
39
+
40
+ # 3. Update the session state
41
+ self._update_state(parsed_data, cost)
42
+
43
+ # 4. Display the summary if enabled
44
+ if self.state_manager.session.show_thoughts:
45
+ await self._display_summary()
46
+
47
+ except Exception as e:
48
+ if self.state_manager.session.show_thoughts:
49
+ await ui.error(f"Error during cost calculation: {e}")
50
+
51
+ def _calculate_cost(self, parsed_data: dict) -> float:
52
+ """Calculates the cost for the given parsed data."""
53
+ requested_model = self.state_manager.session.current_model
54
+ api_model_name = parsed_data.get("model_name", requested_model)
55
+ final_model_name = api_model_name
56
+
57
+ # Logic to preserve the provider prefix
58
+ if ":" in requested_model:
59
+ provider_prefix = requested_model.split(":", 1)[0]
60
+ if not api_model_name.startswith(provider_prefix + ":"):
61
+ final_model_name = f"{provider_prefix}:{api_model_name}"
62
+
63
+ return self.calculator.calculate_cost(
64
+ prompt_tokens=parsed_data.get("prompt_tokens", 0),
65
+ completion_tokens=parsed_data.get("completion_tokens", 0),
66
+ model_name=final_model_name,
67
+ )
68
+
69
+ def _update_state(self, parsed_data: dict, cost: float):
70
+ """Updates the last_call and session_total usage in the state."""
71
+ session = self.state_manager.session
72
+ prompt_tokens = parsed_data.get("prompt_tokens", 0)
73
+ completion_tokens = parsed_data.get("completion_tokens", 0)
74
+
75
+ # Update last call usage
76
+ session.last_call_usage["prompt_tokens"] = prompt_tokens
77
+ session.last_call_usage["completion_tokens"] = completion_tokens
78
+ session.last_call_usage["cost"] = cost
79
+
80
+ # Accumulate session totals
81
+ session.session_total_usage["prompt_tokens"] += prompt_tokens
82
+ session.session_total_usage["completion_tokens"] += completion_tokens
83
+ session.session_total_usage["cost"] += cost
84
+
85
+ async def _display_summary(self):
86
+ """Formats and prints the usage summary to the console."""
87
+ session = self.state_manager.session
88
+ prompt = session.last_call_usage["prompt_tokens"]
89
+ completion = session.last_call_usage["completion_tokens"]
90
+ last_cost = session.last_call_usage["cost"]
91
+ session_cost = session.session_total_usage["cost"]
92
+
93
+ usage_summary = (
94
+ f"[ Tokens: {prompt + completion:,} (P: {prompt:,}, C: {completion:,}) | "
95
+ f"Cost: ${last_cost:.4f} | "
96
+ f"Session Total: ${session_cost:.4f} ]"
97
+ )
98
+ await ui.muted(usage_summary)
tunacode/exceptions.py CHANGED
@@ -114,3 +114,26 @@ class TooBroadPatternError(ToolExecutionError):
114
114
  f"Pattern '{pattern}' is too broad - no matches found within {timeout_seconds}s. "
115
115
  "Please use a more specific pattern.",
116
116
  )
117
+
118
+
119
+ class ToolBatchingJSONError(TunaCodeError):
120
+ """Raised when JSON parsing fails during tool batching after all retries are exhausted."""
121
+
122
+ def __init__(
123
+ self,
124
+ json_content: str,
125
+ retry_count: int,
126
+ original_error: OriginalError = None,
127
+ ):
128
+ self.json_content = json_content
129
+ self.retry_count = retry_count
130
+ self.original_error = original_error
131
+
132
+ # Truncate JSON content for display if too long
133
+ display_content = json_content[:100] + "..." if len(json_content) > 100 else json_content
134
+
135
+ super().__init__(
136
+ f"The model is having issues with tool batching. "
137
+ f"JSON parsing failed after {retry_count} retries. "
138
+ f"Invalid JSON: {display_content}"
139
+ )
@@ -0,0 +1,45 @@
1
+ # DSPy Task Planning Prompt
2
+
3
+ This prompt is automatically generated and optimized by DSPy for TunaCode's complex task planning.
4
+
5
+ ## Format
6
+
7
+ Given the fields `complex_request`, produce the fields `subtasks_with_tools`, `total_tool_calls`, `requires_todo`, `parallelization_opportunities`.
8
+
9
+ ---
10
+
11
+ Follow the following format.
12
+
13
+ Complex Request: A complex task that needs breakdown
14
+ Reasoning: Let's think step by step in order to ${produce the answer}. We need to break this down into manageable subtasks...
15
+ Subtasks With Tools: JSON array of {task, tools, priority} objects
16
+ Total Tool Calls: Estimated total number of tool calls
17
+ Requires Todo: Whether todo tool should be used
18
+ Parallelization Opportunities: Number of parallel execution opportunities
19
+
20
+ ---
21
+
22
+ ## Learned Example (Selected by DSPy)
23
+
24
+ ### Example: Authentication System Implementation
25
+ Complex Request: Implement user authentication system with JWT tokens
26
+ Reasoning: Let's think step by step. This is a complex implementation task that requires multiple components. First, I need to analyze the current application structure to understand how to integrate authentication. Then I'll design the user model and database schema. Next, I'll create the authentication endpoints for registration and login. I'll need to implement JWT token generation and validation. Finally, I'll add tests to ensure everything works correctly. Several of these tasks can be done in parallel once the initial analysis is complete.
27
+ Subtasks With Tools: [{"task": "Analyze current app structure", "tools": ["list_dir", "grep", "read_file"], "priority": "high"}, {"task": "Design user model", "tools": ["write_file"], "priority": "high"}, {"task": "Create auth endpoints", "tools": ["write_file", "update_file"], "priority": "high"}, {"task": "Add JWT tokens", "tools": ["write_file", "grep"], "priority": "high"}, {"task": "Write tests", "tools": ["write_file", "run_command"], "priority": "medium"}]
28
+ Total Tool Calls: 15
29
+ Requires Todo: true
30
+ Parallelization Opportunities: 3
31
+
32
+ ---
33
+
34
+ ## Key Patterns for Complex Tasks
35
+
36
+ 1. **Break Down First**: Start with analysis/exploration before implementation
37
+ 2. **Priority Levels**: High for core functionality, medium for tests/docs, low for nice-to-haves
38
+ 3. **Tool Grouping**: Group related tools together for each subtask
39
+ 4. **Todo Usage**: Use todo tool for tasks with 5+ subtasks
40
+ 5. **Parallelization**: Identify independent subtasks that can run concurrently
41
+
42
+ ---
43
+
44
+ Complex Request: ${complex_request}
45
+ Reasoning: Let's think step by step...
@@ -0,0 +1,58 @@
1
+ # DSPy Tool Selection Prompt
2
+
3
+ This prompt is automatically generated and optimized by DSPy for TunaCode's tool selection.
4
+
5
+ ## Format
6
+
7
+ Given the fields `user_request`, `current_directory`, produce the fields `tools_json`, `requires_confirmation`, `reasoning`.
8
+
9
+ ---
10
+
11
+ Follow the following format.
12
+
13
+ User Request: The user's request or task
14
+ Current Directory: Current working directory context
15
+ Reasoning: Let's think step by step in order to ${produce the answer}. We ...
16
+ Tools Json: JSON array of tool calls with batch grouping, e.g. [[tool1, tool2, tool3], [tool4]]
17
+ Requires Confirmation: Whether any tools require user confirmation
18
+
19
+ ---
20
+
21
+ ## Learned Examples (Automatically Selected by DSPy)
22
+
23
+ ### Example 1: Searching for Implementation
24
+ User Request: Show me the authentication system implementation
25
+ Current Directory: .
26
+ Reasoning: Let's think step by step. To show the authentication implementation, I need to search for auth-related files across the codebase. I'll use grep to find files containing 'auth', list the auth directory if it exists, and use glob to find all auth-related Python files. These are all read-only operations that can be executed in parallel.
27
+ Tools Json: [["grep(\"auth\", \"src/\")", "list_dir(\"src/auth/\")", "glob(\"**/*auth*.py\")"]]
28
+ Requires Confirmation: false
29
+
30
+ ### Example 2: Reading Multiple Files (Optimal Batching)
31
+ User Request: Read all config files and the main module
32
+ Current Directory: .
33
+ Reasoning: Let's think step by step. I need to read multiple specific files. All of these are read operations that can be batched together for parallel execution. I'll batch them in a group of 4 for optimal performance.
34
+ Tools Json: [["read_file(\"config.json\")", "read_file(\"settings.py\")", "read_file(\".env\")", "read_file(\"main.py\")"]]
35
+ Requires Confirmation: false
36
+
37
+ ### Example 3: Search, Read, then Modify Pattern
38
+ User Request: Find the bug in validation and fix it
39
+ Current Directory: .
40
+ Reasoning: Let's think step by step. First, I need to search for validation-related code and errors. I'll use grep to search for error patterns and validation code, and list the validators directory. These search operations can be parallelized. After finding the issue, I'll need to read the specific file and then update it to fix the bug.
41
+ Tools Json: [["grep(\"error\", \"logs/\")", "grep(\"validation\", \"src/\")", "list_dir(\"src/validators/\")"], ["read_file(\"src/validators/user.py\")"], ["update_file(\"src/validators/user.py\", \"old\", \"new\")"]]
42
+ Requires Confirmation: true
43
+
44
+ ---
45
+
46
+ ## Key Patterns Learned by DSPy
47
+
48
+ 1. **3-4 Tool Batching**: Optimal batch size for parallel read-only operations
49
+ 2. **Read-Only Parallelization**: grep, list_dir, glob, read_file can run in parallel
50
+ 3. **Sequential Writes**: write_file, update_file, run_command, bash must run sequentially
51
+ 4. **Confirmation Required**: Any write/execute operation needs confirmation
52
+ 5. **Search → Read → Modify**: Common pattern for debugging and fixes
53
+
54
+ ---
55
+
56
+ User Request: ${user_request}
57
+ Current Directory: ${current_directory}
58
+ Reasoning: Let's think step by step...
@@ -12,7 +12,7 @@ You MUST follow these rules:
12
12
 
13
13
  \###Tool Access Rules###
14
14
 
15
- You have 8 powerful tools at your disposal. Understanding their categories is CRITICAL for performance:
15
+ You have 9 powerful tools at your disposal. Understanding their categories is CRITICAL for performance:
16
16
 
17
17
  ** READ-ONLY TOOLS (Safe, Parallel-Executable)**
18
18
  These tools can and SHOULD be executed in parallel batches for 3x-10x performance gains:
@@ -30,19 +30,28 @@ These tools can and SHOULD be executed in parallel batches for 3x-10x performanc
30
30
  - Returns: Sorted list of matching file paths
31
31
  - Use for: Finding all \*.py files, configs, etc.
32
32
 
33
+ ** TASK MANAGEMENT TOOLS (Fast, Sequential)**
34
+ These tools help organize and track complex multi-step tasks:
35
+
36
+ 5. `todo(action: str, content: str = None, todo_id: str = None, status: str = None, priority: str = None, todos: list = None)` — Manage task lists
37
+ - Actions: "add", "add_multiple", "update", "complete", "list", "remove"
38
+ - Use for: Breaking down complex tasks, tracking progress, organizing work
39
+ - **IMPORTANT**: Use this tool when tackling multi-step problems or complex implementations
40
+ - **Multiple todos**: Use `todo("add_multiple", todos=[{"content": "task1", "priority": "high"}, {"content": "task2", "priority": "medium"}])` to add many todos at once
41
+
33
42
  ** WRITE/EXECUTE TOOLS (Require Confirmation, Sequential)**
34
43
  These tools modify state and MUST run one at a time with user confirmation:
35
44
 
36
- 5. `write_file(filepath: str, content: str)` — Create new files
45
+ 6. `write_file(filepath: str, content: str)` — Create new files
37
46
  - Safety: Fails if file exists (no overwrites)
38
47
  - Use for: Creating new modules, configs, tests
39
- 6. `update_file(filepath: str, target: str, patch: str)` — Modify existing files
48
+ 7. `update_file(filepath: str, target: str, patch: str)` — Modify existing files
40
49
  - Safety: Shows diff before applying changes
41
50
  - Use for: Fixing bugs, updating imports, refactoring
42
- 7. `run_command(command: str)` — Execute shell commands
51
+ 8. `run_command(command: str)` — Execute shell commands
43
52
  - Safety: Full command confirmation required
44
53
  - Use for: Running tests, git operations, installs
45
- 8. `bash(command: str)` — Advanced shell with environment control
54
+ 9. `bash(command: str)` — Advanced shell with environment control
46
55
  - Safety: Enhanced security, output limits (5KB)
47
56
  - Use for: Complex scripts, interactive commands
48
57
 
@@ -85,12 +94,65 @@ These tools modify state and MUST run one at a time with user confirmation:
85
94
  - Need to see file content? → `read_file`
86
95
  - Need to find something? → `grep` (content) or `glob` (filenames)
87
96
  - Need to explore? → `list_dir`
97
+ - Need to track tasks? → `todo` (for complex multi-step work)
88
98
  - Need to create? → `write_file`
89
99
  - Need to modify? → `update_file`
90
100
  - Need to run commands? → `run_command` (simple) or `bash` (complex)
91
101
 
92
102
  ---
93
103
 
104
+ \###Task Management Best Practices###
105
+
106
+ **IMPORTANT**: For complex, multi-step tasks, you MUST use the todo tool to break down work and track progress.
107
+
108
+ **When to use the todo tool:**
109
+ - User requests implementing new features (3+ steps involved)
110
+ - Complex debugging that requires multiple investigation steps
111
+ - Refactoring that affects multiple files
112
+ - Any task where you need to track progress across multiple tool executions
113
+
114
+ **Todo workflow pattern:**
115
+ 1. **Break down complex requests**: `todo("add", "Analyze current authentication system", priority="high")`
116
+ 2. **Track progress**: `todo("update", todo_id="1", status="in_progress")`
117
+ 3. **Mark completion**: `todo("complete", todo_id="1")`
118
+ 4. **Show status**: `todo("list")` to display current work
119
+
120
+ **Example multi-step task breakdown:**
121
+ ```
122
+ User: "Add authentication to my Flask app"
123
+
124
+ OPTIMAL approach (multiple individual adds):
125
+ 1. todo("add", "Analyze Flask app structure", priority="high")
126
+ 2. todo("add", "Create user model and database schema", priority="high")
127
+ 3. todo("add", "Implement registration endpoint", priority="medium")
128
+ 4. todo("add", "Implement login endpoint", priority="medium")
129
+ 5. todo("add", "Add password hashing", priority="high")
130
+ 6. todo("add", "Create auth middleware", priority="medium")
131
+ 7. todo("add", "Write tests for auth system", priority="low")
132
+
133
+ ALTERNATIVE (batch add for efficiency):
134
+ todo("add_multiple", todos=[
135
+ {"content": "Analyze Flask app structure", "priority": "high"},
136
+ {"content": "Create user model and database schema", "priority": "high"},
137
+ {"content": "Implement registration endpoint", "priority": "medium"},
138
+ {"content": "Implement login endpoint", "priority": "medium"},
139
+ {"content": "Add password hashing", "priority": "high"},
140
+ {"content": "Create auth middleware", "priority": "medium"},
141
+ {"content": "Write tests for auth system", "priority": "low"}
142
+ ])
143
+
144
+ Then work through each task systematically, marking progress as you go.
145
+ ```
146
+
147
+ **Benefits of using todos:**
148
+ - Helps users understand the full scope of work
149
+ - Provides clear progress tracking
150
+ - Ensures no steps are forgotten
151
+ - Makes complex tasks feel manageable
152
+ - Shows professional project management approach
153
+
154
+ ---
155
+
94
156
  \###Working Directory Rules###
95
157
 
96
158
  **CRITICAL**: You MUST respect the user's current working directory:
@@ -371,11 +433,13 @@ RESPONSE TO USER: The main.py file contains a simple main function that prints '
371
433
  | **grep** | 🔍 Read | ✅ Yes | ❌ No | 4KB | Search text patterns |
372
434
  | **list_dir** | 🔍 Read | ✅ Yes | ❌ No | 200 entries | Browse directories |
373
435
  | **glob** | 🔍 Read | ✅ Yes | ❌ No | 1000 files | Find files by pattern |
436
+ | **todo** | 📋 Task | ❌ No | ❌ No | - | Track multi-step tasks |
374
437
  | **write_file** | ⚡ Write | ❌ No | ✅ Yes | - | Create new files |
375
438
  | **update_file** | ⚡ Write | ❌ No | ✅ Yes | - | Modify existing files |
376
439
  | **run_command** | ⚡ Execute | ❌ No | ✅ Yes | 5KB | Simple shell commands |
377
440
  | **bash** | ⚡ Execute | ❌ No | ✅ Yes | 5KB | Complex shell scripts |
378
441
 
379
442
  **Remember**: ALWAYS batch 3-4 read-only tools together for optimal performance (3x faster)!
443
+ **Remember**: Use the todo tool to break down and track complex multi-step tasks!
380
444
 
381
445
  ```