tunacode-cli 0.0.39__py3-none-any.whl → 0.0.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tunacode-cli might be problematic. Click here for more details.

Files changed (33) hide show
  1. tunacode/cli/commands/__init__.py +2 -0
  2. tunacode/cli/commands/implementations/__init__.py +3 -0
  3. tunacode/cli/commands/implementations/debug.py +1 -1
  4. tunacode/cli/commands/implementations/todo.py +217 -0
  5. tunacode/cli/commands/registry.py +2 -0
  6. tunacode/cli/main.py +12 -5
  7. tunacode/cli/repl.py +197 -132
  8. tunacode/configuration/defaults.py +1 -0
  9. tunacode/configuration/models.py +6 -0
  10. tunacode/constants.py +32 -3
  11. tunacode/context.py +7 -3
  12. tunacode/core/agents/main.py +52 -9
  13. tunacode/core/setup/config_setup.py +5 -0
  14. tunacode/core/state.py +50 -1
  15. tunacode/core/token_usage/api_response_parser.py +44 -0
  16. tunacode/core/token_usage/cost_calculator.py +58 -0
  17. tunacode/core/token_usage/usage_tracker.py +98 -0
  18. tunacode/prompts/system.md +69 -5
  19. tunacode/tools/todo.py +343 -0
  20. tunacode/types.py +20 -1
  21. tunacode/ui/input.py +1 -1
  22. tunacode/ui/output.py +36 -0
  23. tunacode/utils/message_utils.py +17 -0
  24. tunacode/utils/text_utils.py +131 -25
  25. tunacode/utils/token_counter.py +78 -8
  26. {tunacode_cli-0.0.39.dist-info → tunacode_cli-0.0.41.dist-info}/METADATA +3 -1
  27. {tunacode_cli-0.0.39.dist-info → tunacode_cli-0.0.41.dist-info}/RECORD +31 -27
  28. tunacode/cli/textual_app.py +0 -420
  29. tunacode/cli/textual_bridge.py +0 -161
  30. {tunacode_cli-0.0.39.dist-info → tunacode_cli-0.0.41.dist-info}/WHEEL +0 -0
  31. {tunacode_cli-0.0.39.dist-info → tunacode_cli-0.0.41.dist-info}/entry_points.txt +0 -0
  32. {tunacode_cli-0.0.39.dist-info → tunacode_cli-0.0.41.dist-info}/licenses/LICENSE +0 -0
  33. {tunacode_cli-0.0.39.dist-info → tunacode_cli-0.0.41.dist-info}/top_level.txt +0 -0
tunacode/context.py CHANGED
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import subprocess
2
3
  from pathlib import Path
3
4
  from typing import Dict, List
@@ -5,6 +6,8 @@ from typing import Dict, List
5
6
  from tunacode.utils.ripgrep import ripgrep
6
7
  from tunacode.utils.system import list_cwd
7
8
 
9
+ logger = logging.getLogger(__name__)
10
+
8
11
 
9
12
  async def get_git_status() -> Dict[str, object]:
10
13
  """Return git branch and dirty state information."""
@@ -29,7 +32,8 @@ async def get_git_status() -> Dict[str, object]:
29
32
  behind = int(part.split("behind")[1].strip().strip(" ]"))
30
33
  dirty = any(line for line in lines[1:])
31
34
  return {"branch": branch, "ahead": ahead, "behind": behind, "dirty": dirty}
32
- except Exception:
35
+ except Exception as e:
36
+ logger.warning(f"Failed to get git status: {e}")
33
37
  return {}
34
38
 
35
39
 
@@ -54,8 +58,8 @@ async def get_code_style() -> str:
54
58
  if file.exists():
55
59
  try:
56
60
  parts.append(file.read_text(encoding="utf-8"))
57
- except Exception:
58
- pass
61
+ except Exception as e:
62
+ logger.debug(f"Failed to read TUNACODE.md at {file}: {e}")
59
63
  if current == current.parent:
60
64
  break
61
65
  current = current.parent
@@ -6,6 +6,7 @@ Handles agent creation, configuration, and request processing.
6
6
 
7
7
  import asyncio
8
8
  import json
9
+ import logging
9
10
  import os
10
11
  import re
11
12
  from datetime import datetime, timezone
@@ -30,6 +31,8 @@ except ImportError:
30
31
 
31
32
  from tunacode.constants import READ_ONLY_TOOLS
32
33
  from tunacode.core.state import StateManager
34
+ from tunacode.core.token_usage.api_response_parser import ApiResponseParser
35
+ from tunacode.core.token_usage.cost_calculator import CostCalculator
33
36
  from tunacode.services.mcp import get_mcp_servers
34
37
  from tunacode.tools.bash import bash
35
38
  from tunacode.tools.glob import glob
@@ -37,6 +40,7 @@ from tunacode.tools.grep import grep
37
40
  from tunacode.tools.list_dir import list_dir
38
41
  from tunacode.tools.read_file import read_file
39
42
  from tunacode.tools.run_command import run_command
43
+ from tunacode.tools.todo import TodoTool
40
44
  from tunacode.tools.update_file import update_file
41
45
  from tunacode.tools.write_file import write_file
42
46
  from tunacode.types import (
@@ -50,8 +54,12 @@ from tunacode.types import (
50
54
  ToolCallback,
51
55
  ToolCallId,
52
56
  ToolName,
57
+ UsageTrackerProtocol,
53
58
  )
54
59
 
60
+ # Configure logging
61
+ logger = logging.getLogger(__name__)
62
+
55
63
 
56
64
  class ToolBuffer:
57
65
  """Buffer for collecting read-only tool calls to execute in parallel."""
@@ -110,6 +118,7 @@ async def execute_tools_parallel(
110
118
  try:
111
119
  return await callback(part, node)
112
120
  except Exception as e:
121
+ logger.error(f"Error executing parallel tool: {e}", exc_info=True)
113
122
  return e
114
123
 
115
124
  # If we have more tools than max_parallel, execute in batches
@@ -214,6 +223,7 @@ async def _process_node(
214
223
  state_manager: StateManager,
215
224
  tool_buffer: Optional[ToolBuffer] = None,
216
225
  streaming_callback: Optional[callable] = None,
226
+ usage_tracker: Optional[UsageTrackerProtocol] = None,
217
227
  ):
218
228
  from tunacode.ui import console as ui
219
229
  from tunacode.utils.token_counter import estimate_tokens
@@ -233,6 +243,9 @@ async def _process_node(
233
243
  if hasattr(node, "model_response"):
234
244
  state_manager.session.messages.append(node.model_response)
235
245
 
246
+ if usage_tracker:
247
+ await usage_tracker.track_and_display(node.model_response)
248
+
236
249
  # Stream content to callback if provided
237
250
  # Use this as fallback when true token streaming is not available
238
251
  if streaming_callback and not STREAMING_AVAILABLE:
@@ -313,8 +326,8 @@ async def _process_node(
313
326
  thought_obj = json.loads(content)
314
327
  if "thought" in thought_obj:
315
328
  await ui.muted(f"REASONING: {thought_obj['thought']}")
316
- except (json.JSONDecodeError, KeyError):
317
- pass
329
+ except (json.JSONDecodeError, KeyError) as e:
330
+ logger.debug(f"Failed to parse thought JSON: {e}")
318
331
 
319
332
  # Pattern 3: Multi-line thoughts with context
320
333
  multiline_pattern = r'\{"thought":\s*"([^"]+(?:\\.[^"]*)*?)"\}'
@@ -442,8 +455,9 @@ async def _process_node(
442
455
  # Handle tool returns
443
456
  for part in node.model_response.parts:
444
457
  if part.part_kind == "tool-return":
445
- obs_msg = f"OBSERVATION[{part.tool_name}]: {part.content[:2_000]}"
446
- state_manager.session.messages.append(obs_msg)
458
+ state_manager.session.messages.append(
459
+ f"OBSERVATION[{part.tool_name}]: {part.content}"
460
+ )
447
461
 
448
462
  # Display tool return when thoughts are enabled
449
463
  if state_manager.session.show_thoughts:
@@ -510,9 +524,22 @@ def get_or_create_agent(model: ModelName, state_manager: StateManager) -> Pydant
510
524
  else:
511
525
  # Log that TUNACODE.md was not found
512
526
  print("📄 TUNACODE.md not found: Using default context")
513
- except Exception:
514
- # Ignore errors loading TUNACODE.md
515
- pass
527
+ except Exception as e:
528
+ # Log errors loading TUNACODE.md at debug level
529
+ logger.debug(f"Error loading TUNACODE.md: {e}")
530
+
531
+ todo_tool = TodoTool(state_manager=state_manager)
532
+
533
+ try:
534
+ # Only add todo section if there are actual todos
535
+ current_todos = todo_tool.get_current_todos_sync()
536
+ if current_todos != "No todos found":
537
+ system_prompt += f'\n\n# Current Todo List\n\nYou have existing todos that need attention:\n\n{current_todos}\n\nRemember to check progress on these todos and update them as you work. Use todo("list") to see current status anytime.'
538
+ except Exception as e:
539
+ # Log error but don't fail agent creation
540
+ import sys
541
+
542
+ print(f"Warning: Failed to load todos: {e}", file=sys.stderr)
516
543
 
517
544
  state_manager.session.agents[model] = Agent(
518
545
  model=model,
@@ -524,6 +551,7 @@ def get_or_create_agent(model: ModelName, state_manager: StateManager) -> Pydant
524
551
  Tool(list_dir, max_retries=max_retries),
525
552
  Tool(read_file, max_retries=max_retries),
526
553
  Tool(run_command, max_retries=max_retries),
554
+ Tool(todo_tool._execute, max_retries=max_retries),
527
555
  Tool(update_file, max_retries=max_retries),
528
556
  Tool(write_file, max_retries=max_retries),
529
557
  ],
@@ -622,7 +650,9 @@ async def parse_json_tool_calls(
622
650
  if isinstance(parsed, dict) and "tool" in parsed and "args" in parsed:
623
651
  potential_jsons.append((parsed["tool"], parsed["args"]))
624
652
  except json.JSONDecodeError:
625
- pass
653
+ logger.debug(
654
+ f"Failed to parse potential JSON tool call: {potential_json[:50]}..."
655
+ )
626
656
  start_pos = -1
627
657
 
628
658
  matches = potential_jsons
@@ -719,7 +749,13 @@ async def process_request(
719
749
  fallback_enabled = state_manager.session.user_config.get("settings", {}).get(
720
750
  "fallback_response", True
721
751
  )
752
+ from tunacode.configuration.models import ModelRegistry
753
+ from tunacode.core.token_usage.usage_tracker import UsageTracker
722
754
 
755
+ parser = ApiResponseParser()
756
+ registry = ModelRegistry()
757
+ calculator = CostCalculator(registry)
758
+ usage_tracker = UsageTracker(parser, calculator, state_manager)
723
759
  response_state = ResponseState()
724
760
 
725
761
  # Reset iteration tracking for this request
@@ -763,7 +799,14 @@ async def process_request(
763
799
  if event.delta.content_delta:
764
800
  await streaming_callback(event.delta.content_delta)
765
801
 
766
- await _process_node(node, tool_callback, state_manager, tool_buffer, streaming_callback)
802
+ await _process_node(
803
+ node,
804
+ tool_callback,
805
+ state_manager,
806
+ tool_buffer,
807
+ streaming_callback,
808
+ usage_tracker,
809
+ )
767
810
  if hasattr(node, "result") and node.result and hasattr(node.result, "output"):
768
811
  if node.result.output:
769
812
  response_state.has_user_response = True
@@ -318,6 +318,11 @@ class ConfigSetup(BaseSetup):
318
318
 
319
319
  self.state_manager.session.user_config["default_model"] = model
320
320
 
321
+ if self.cli_config.get("custom_context_window"):
322
+ self.state_manager.session.user_config["context_window_size"] = self.cli_config[
323
+ "custom_context_window"
324
+ ]
325
+
321
326
  # Set current model
322
327
  self.state_manager.session.current_model = self.state_manager.session.user_config[
323
328
  "default_model"
tunacode/core/state.py CHANGED
@@ -14,9 +14,12 @@ from tunacode.types import (
14
14
  MessageHistory,
15
15
  ModelName,
16
16
  SessionId,
17
+ TodoItem,
17
18
  ToolName,
18
19
  UserConfig,
19
20
  )
21
+ from tunacode.utils.message_utils import get_message_content
22
+ from tunacode.utils.token_counter import estimate_tokens
20
23
 
21
24
 
22
25
  @dataclass
@@ -37,6 +40,7 @@ class SessionState:
37
40
  device_id: Optional[DeviceId] = None
38
41
  input_sessions: InputSessions = field(default_factory=dict)
39
42
  current_task: Optional[Any] = None
43
+ todos: list[TodoItem] = field(default_factory=list)
40
44
  # Enhanced tracking for thoughts display
41
45
  files_in_context: set[str] = field(default_factory=set)
42
46
  tool_calls: list[dict[str, Any]] = field(default_factory=list)
@@ -46,6 +50,31 @@ class SessionState:
46
50
  is_streaming_active: bool = False
47
51
  # Track streaming panel reference for tool handler access
48
52
  streaming_panel: Optional[Any] = None
53
+ # Context window tracking (estimation based)
54
+ total_tokens: int = 0
55
+ max_tokens: int = 0
56
+ # API usage tracking (actual from providers)
57
+ last_call_usage: dict = field(
58
+ default_factory=lambda: {
59
+ "prompt_tokens": 0,
60
+ "completion_tokens": 0,
61
+ "cost": 0.0,
62
+ }
63
+ )
64
+ session_total_usage: dict = field(
65
+ default_factory=lambda: {
66
+ "prompt_tokens": 0,
67
+ "completion_tokens": 0,
68
+ "cost": 0.0,
69
+ }
70
+ )
71
+
72
+ def update_token_count(self):
73
+ """Calculates the total token count from messages and files in context."""
74
+ message_contents = [get_message_content(msg) for msg in self.messages]
75
+ message_content = " ".join(c for c in message_contents if c)
76
+ file_content = " ".join(self.files_in_context)
77
+ self.total_tokens = estimate_tokens(message_content + file_content, self.current_model)
49
78
 
50
79
 
51
80
  class StateManager:
@@ -56,5 +85,25 @@ class StateManager:
56
85
  def session(self) -> SessionState:
57
86
  return self._session
58
87
 
59
- def reset_session(self):
88
+ def add_todo(self, todo: TodoItem) -> None:
89
+ self._session.todos.append(todo)
90
+
91
+ def update_todo(self, todo_id: str, status: str) -> None:
92
+ from datetime import datetime
93
+
94
+ for todo in self._session.todos:
95
+ if todo.id == todo_id:
96
+ todo.status = status
97
+ if status == "completed" and not todo.completed_at:
98
+ todo.completed_at = datetime.now()
99
+ break
100
+
101
+ def remove_todo(self, todo_id: str) -> None:
102
+ self._session.todos = [todo for todo in self._session.todos if todo.id != todo_id]
103
+
104
+ def clear_todos(self) -> None:
105
+ self._session.todos = []
106
+
107
+ def reset_session(self) -> None:
108
+ """Reset the session to a fresh state."""
60
109
  self._session = SessionState()
@@ -0,0 +1,44 @@
1
+ """
2
+ Module: tunacode.llm.api_response_parser
3
+ Provides a parser to standardize token usage information from various LLM API responses.
4
+ """
5
+
6
+ from typing import Any, Dict
7
+
8
+ from tunacode.types import ModelName
9
+
10
+
11
+ class ApiResponseParser:
12
+ """
13
+ Parses LLM API response objects to extract token usage and the actual model name used.
14
+ This version works directly with the pydantic-ai ModelResponse object.
15
+ """
16
+
17
+ def parse(self, model: ModelName, response_obj: Any) -> Dict[str, Any]:
18
+ """
19
+ Parses the standardized API response object.
20
+
21
+ Args:
22
+ model (ModelName): The model name that was requested. Used as a fallback.
23
+ response_obj (Any): The raw ModelResponse object from the agent.
24
+
25
+ Returns:
26
+ Dict[str, Any]: A standardized dictionary with 'prompt_tokens',
27
+ 'completion_tokens', and 'model_name'.
28
+ """
29
+ # --- FIX: Access attributes directly from the object ---
30
+ # Default to an empty object if usage is None
31
+ usage = getattr(response_obj, "usage", None) or {}
32
+
33
+ # Extract the actual model name, falling back to the requested model.
34
+ actual_model_name = getattr(response_obj, "model_name", model)
35
+
36
+ # The pydantic-ai Usage object standardizes keys to 'request_tokens'
37
+ # and 'response_tokens'. We access them as attributes.
38
+ parsed_data = {
39
+ "prompt_tokens": getattr(usage, "request_tokens", 0),
40
+ "completion_tokens": getattr(usage, "response_tokens", 0),
41
+ "model_name": actual_model_name,
42
+ }
43
+
44
+ return parsed_data
@@ -0,0 +1,58 @@
1
+ """
2
+ Module: tunacode.pricing.cost_calculator
3
+ Provides a utility for calculating the cost of model usage based on token counts.
4
+ """
5
+
6
+ from tunacode.configuration.models import ModelRegistry
7
+ from tunacode.types import CostAmount, ModelName, TokenCount
8
+
9
+
10
+ class CostCalculator:
11
+ """
12
+ Calculates the cost of a model interaction based on prompt and completion tokens.
13
+ """
14
+
15
+ def __init__(self, registry: ModelRegistry):
16
+ """
17
+ Initializes the CostCalculator with a model registry.
18
+
19
+ Args:
20
+ registry (ModelRegistry): An instance of ModelRegistry that contains
21
+ the pricing information for various models.
22
+ """
23
+ self._registry = registry
24
+
25
+ def calculate_cost(
26
+ self,
27
+ model_name: ModelName,
28
+ prompt_tokens: TokenCount,
29
+ completion_tokens: TokenCount,
30
+ ) -> CostAmount:
31
+ """
32
+ Calculates the total cost for a given model and token usage.
33
+
34
+ Args:
35
+ model_name (ModelName): The identifier for the model (e.g., "openai:gpt-4o").
36
+ prompt_tokens (TokenCount): The number of tokens in the input/prompt.
37
+ completion_tokens (TokenCount): The number of tokens in the output/completion.
38
+
39
+ Returns:
40
+ CostAmount: The calculated cost as a float. Returns 0.0 if the model
41
+ is not found in the registry.
42
+ """
43
+ model_config = self._registry.get_model(model_name)
44
+
45
+ if not model_config:
46
+ return 0.0
47
+
48
+ TOKENS_PER_MILLION = 1_000_000
49
+
50
+ pricing = model_config.pricing
51
+
52
+ input_cost = (prompt_tokens / TOKENS_PER_MILLION) * pricing.input
53
+
54
+ output_cost = (completion_tokens / TOKENS_PER_MILLION) * pricing.output
55
+
56
+ total_cost = input_cost + output_cost
57
+
58
+ return total_cost
@@ -0,0 +1,98 @@
1
+ from typing import Any
2
+
3
+ from tunacode.core.state import StateManager
4
+ from tunacode.core.token_usage.api_response_parser import ApiResponseParser
5
+ from tunacode.core.token_usage.cost_calculator import CostCalculator
6
+ from tunacode.types import UsageTrackerProtocol
7
+ from tunacode.ui import console as ui # Import the ui console directly
8
+
9
+
10
+ class UsageTracker(UsageTrackerProtocol):
11
+ """
12
+ Handles parsing, calculating, storing, and displaying token usage and cost.
13
+ """
14
+
15
+ def __init__(
16
+ self,
17
+ parser: ApiResponseParser,
18
+ calculator: CostCalculator,
19
+ state_manager: StateManager,
20
+ ):
21
+ self.parser = parser
22
+ self.calculator = calculator
23
+ self.state_manager = state_manager
24
+
25
+ async def track_and_display(self, response_obj: Any):
26
+ """
27
+ Main method to process a model response for usage tracking.
28
+ """
29
+ try:
30
+ # 1. Parse the response to get token data
31
+ requested_model = self.state_manager.session.current_model
32
+ parsed_data = self.parser.parse(model=requested_model, response_obj=response_obj)
33
+
34
+ if not parsed_data:
35
+ return
36
+
37
+ # 2. Calculate the cost
38
+ cost = self._calculate_cost(parsed_data)
39
+
40
+ # 3. Update the session state
41
+ self._update_state(parsed_data, cost)
42
+
43
+ # 4. Display the summary if enabled
44
+ if self.state_manager.session.show_thoughts:
45
+ await self._display_summary()
46
+
47
+ except Exception as e:
48
+ if self.state_manager.session.show_thoughts:
49
+ await ui.error(f"Error during cost calculation: {e}")
50
+
51
+ def _calculate_cost(self, parsed_data: dict) -> float:
52
+ """Calculates the cost for the given parsed data."""
53
+ requested_model = self.state_manager.session.current_model
54
+ api_model_name = parsed_data.get("model_name", requested_model)
55
+ final_model_name = api_model_name
56
+
57
+ # Logic to preserve the provider prefix
58
+ if ":" in requested_model:
59
+ provider_prefix = requested_model.split(":", 1)[0]
60
+ if not api_model_name.startswith(provider_prefix + ":"):
61
+ final_model_name = f"{provider_prefix}:{api_model_name}"
62
+
63
+ return self.calculator.calculate_cost(
64
+ prompt_tokens=parsed_data.get("prompt_tokens", 0),
65
+ completion_tokens=parsed_data.get("completion_tokens", 0),
66
+ model_name=final_model_name,
67
+ )
68
+
69
+ def _update_state(self, parsed_data: dict, cost: float):
70
+ """Updates the last_call and session_total usage in the state."""
71
+ session = self.state_manager.session
72
+ prompt_tokens = parsed_data.get("prompt_tokens", 0)
73
+ completion_tokens = parsed_data.get("completion_tokens", 0)
74
+
75
+ # Update last call usage
76
+ session.last_call_usage["prompt_tokens"] = prompt_tokens
77
+ session.last_call_usage["completion_tokens"] = completion_tokens
78
+ session.last_call_usage["cost"] = cost
79
+
80
+ # Accumulate session totals
81
+ session.session_total_usage["prompt_tokens"] += prompt_tokens
82
+ session.session_total_usage["completion_tokens"] += completion_tokens
83
+ session.session_total_usage["cost"] += cost
84
+
85
+ async def _display_summary(self):
86
+ """Formats and prints the usage summary to the console."""
87
+ session = self.state_manager.session
88
+ prompt = session.last_call_usage["prompt_tokens"]
89
+ completion = session.last_call_usage["completion_tokens"]
90
+ last_cost = session.last_call_usage["cost"]
91
+ session_cost = session.session_total_usage["cost"]
92
+
93
+ usage_summary = (
94
+ f"[ Tokens: {prompt + completion:,} (P: {prompt:,}, C: {completion:,}) | "
95
+ f"Cost: ${last_cost:.4f} | "
96
+ f"Session Total: ${session_cost:.4f} ]"
97
+ )
98
+ await ui.muted(usage_summary)
@@ -12,7 +12,7 @@ You MUST follow these rules:
12
12
 
13
13
  \###Tool Access Rules###
14
14
 
15
- You have 8 powerful tools at your disposal. Understanding their categories is CRITICAL for performance:
15
+ You have 9 powerful tools at your disposal. Understanding their categories is CRITICAL for performance:
16
16
 
17
17
  ** READ-ONLY TOOLS (Safe, Parallel-Executable)**
18
18
  These tools can and SHOULD be executed in parallel batches for 3x-10x performance gains:
@@ -30,19 +30,28 @@ These tools can and SHOULD be executed in parallel batches for 3x-10x performanc
30
30
  - Returns: Sorted list of matching file paths
31
31
  - Use for: Finding all \*.py files, configs, etc.
32
32
 
33
+ ** TASK MANAGEMENT TOOLS (Fast, Sequential)**
34
+ These tools help organize and track complex multi-step tasks:
35
+
36
+ 5. `todo(action: str, content: str = None, todo_id: str = None, status: str = None, priority: str = None, todos: list = None)` — Manage task lists
37
+ - Actions: "add", "add_multiple", "update", "complete", "list", "remove"
38
+ - Use for: Breaking down complex tasks, tracking progress, organizing work
39
+ - **IMPORTANT**: Use this tool when tackling multi-step problems or complex implementations
40
+ - **Multiple todos**: Use `todo("add_multiple", todos=[{"content": "task1", "priority": "high"}, {"content": "task2", "priority": "medium"}])` to add many todos at once
41
+
33
42
  ** WRITE/EXECUTE TOOLS (Require Confirmation, Sequential)**
34
43
  These tools modify state and MUST run one at a time with user confirmation:
35
44
 
36
- 5. `write_file(filepath: str, content: str)` — Create new files
45
+ 6. `write_file(filepath: str, content: str)` — Create new files
37
46
  - Safety: Fails if file exists (no overwrites)
38
47
  - Use for: Creating new modules, configs, tests
39
- 6. `update_file(filepath: str, target: str, patch: str)` — Modify existing files
48
+ 7. `update_file(filepath: str, target: str, patch: str)` — Modify existing files
40
49
  - Safety: Shows diff before applying changes
41
50
  - Use for: Fixing bugs, updating imports, refactoring
42
- 7. `run_command(command: str)` — Execute shell commands
51
+ 8. `run_command(command: str)` — Execute shell commands
43
52
  - Safety: Full command confirmation required
44
53
  - Use for: Running tests, git operations, installs
45
- 8. `bash(command: str)` — Advanced shell with environment control
54
+ 9. `bash(command: str)` — Advanced shell with environment control
46
55
  - Safety: Enhanced security, output limits (5KB)
47
56
  - Use for: Complex scripts, interactive commands
48
57
 
@@ -85,12 +94,65 @@ These tools modify state and MUST run one at a time with user confirmation:
85
94
  - Need to see file content? → `read_file`
86
95
  - Need to find something? → `grep` (content) or `glob` (filenames)
87
96
  - Need to explore? → `list_dir`
97
+ - Need to track tasks? → `todo` (for complex multi-step work)
88
98
  - Need to create? → `write_file`
89
99
  - Need to modify? → `update_file`
90
100
  - Need to run commands? → `run_command` (simple) or `bash` (complex)
91
101
 
92
102
  ---
93
103
 
104
+ \###Task Management Best Practices###
105
+
106
+ **IMPORTANT**: For complex, multi-step tasks, you MUST use the todo tool to break down work and track progress.
107
+
108
+ **When to use the todo tool:**
109
+ - User requests implementing new features (3+ steps involved)
110
+ - Complex debugging that requires multiple investigation steps
111
+ - Refactoring that affects multiple files
112
+ - Any task where you need to track progress across multiple tool executions
113
+
114
+ **Todo workflow pattern:**
115
+ 1. **Break down complex requests**: `todo("add", "Analyze current authentication system", priority="high")`
116
+ 2. **Track progress**: `todo("update", todo_id="1", status="in_progress")`
117
+ 3. **Mark completion**: `todo("complete", todo_id="1")`
118
+ 4. **Show status**: `todo("list")` to display current work
119
+
120
+ **Example multi-step task breakdown:**
121
+ ```
122
+ User: "Add authentication to my Flask app"
123
+
124
+ OPTIMAL approach (multiple individual adds):
125
+ 1. todo("add", "Analyze Flask app structure", priority="high")
126
+ 2. todo("add", "Create user model and database schema", priority="high")
127
+ 3. todo("add", "Implement registration endpoint", priority="medium")
128
+ 4. todo("add", "Implement login endpoint", priority="medium")
129
+ 5. todo("add", "Add password hashing", priority="high")
130
+ 6. todo("add", "Create auth middleware", priority="medium")
131
+ 7. todo("add", "Write tests for auth system", priority="low")
132
+
133
+ ALTERNATIVE (batch add for efficiency):
134
+ todo("add_multiple", todos=[
135
+ {"content": "Analyze Flask app structure", "priority": "high"},
136
+ {"content": "Create user model and database schema", "priority": "high"},
137
+ {"content": "Implement registration endpoint", "priority": "medium"},
138
+ {"content": "Implement login endpoint", "priority": "medium"},
139
+ {"content": "Add password hashing", "priority": "high"},
140
+ {"content": "Create auth middleware", "priority": "medium"},
141
+ {"content": "Write tests for auth system", "priority": "low"}
142
+ ])
143
+
144
+ Then work through each task systematically, marking progress as you go.
145
+ ```
146
+
147
+ **Benefits of using todos:**
148
+ - Helps users understand the full scope of work
149
+ - Provides clear progress tracking
150
+ - Ensures no steps are forgotten
151
+ - Makes complex tasks feel manageable
152
+ - Shows professional project management approach
153
+
154
+ ---
155
+
94
156
  \###Working Directory Rules###
95
157
 
96
158
  **CRITICAL**: You MUST respect the user's current working directory:
@@ -371,11 +433,13 @@ RESPONSE TO USER: The main.py file contains a simple main function that prints '
371
433
  | **grep** | 🔍 Read | ✅ Yes | ❌ No | 4KB | Search text patterns |
372
434
  | **list_dir** | 🔍 Read | ✅ Yes | ❌ No | 200 entries | Browse directories |
373
435
  | **glob** | 🔍 Read | ✅ Yes | ❌ No | 1000 files | Find files by pattern |
436
+ | **todo** | 📋 Task | ❌ No | ❌ No | - | Track multi-step tasks |
374
437
  | **write_file** | ⚡ Write | ❌ No | ✅ Yes | - | Create new files |
375
438
  | **update_file** | ⚡ Write | ❌ No | ✅ Yes | - | Modify existing files |
376
439
  | **run_command** | ⚡ Execute | ❌ No | ✅ Yes | 5KB | Simple shell commands |
377
440
  | **bash** | ⚡ Execute | ❌ No | ✅ Yes | 5KB | Complex shell scripts |
378
441
 
379
442
  **Remember**: ALWAYS batch 3-4 read-only tools together for optimal performance (3x faster)!
443
+ **Remember**: Use the todo tool to break down and track complex multi-step tasks!
380
444
 
381
445
  ```