vision-agent 0.2.193__py3-none-any.whl → 0.2.196__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/.sim_tools/df.csv +640 -0
- vision_agent/.sim_tools/embs.npy +0 -0
- vision_agent/agent/__init__.py +2 -0
- vision_agent/agent/agent_utils.py +211 -3
- vision_agent/agent/vision_agent_coder.py +5 -113
- vision_agent/agent/vision_agent_coder_prompts_v2.py +119 -0
- vision_agent/agent/vision_agent_coder_v2.py +341 -0
- vision_agent/agent/vision_agent_planner.py +2 -2
- vision_agent/agent/vision_agent_planner_prompts.py +1 -1
- vision_agent/agent/vision_agent_planner_prompts_v2.py +748 -0
- vision_agent/agent/vision_agent_planner_v2.py +432 -0
- vision_agent/lmm/lmm.py +4 -0
- vision_agent/tools/__init__.py +2 -1
- vision_agent/tools/planner_tools.py +246 -0
- vision_agent/tools/tool_utils.py +65 -1
- vision_agent/tools/tools.py +76 -22
- vision_agent/utils/image_utils.py +12 -6
- vision_agent/utils/sim.py +65 -14
- {vision_agent-0.2.193.dist-info → vision_agent-0.2.196.dist-info}/METADATA +2 -1
- vision_agent-0.2.196.dist-info/RECORD +42 -0
- vision_agent-0.2.193.dist-info/RECORD +0 -35
- {vision_agent-0.2.193.dist-info → vision_agent-0.2.196.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.193.dist-info → vision_agent-0.2.196.dist-info}/WHEEL +0 -0
| @@ -0,0 +1,341 @@ | |
| 1 | 
            +
            import copy
         | 
| 2 | 
            +
            from pathlib import Path
         | 
| 3 | 
            +
            from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            from rich.console import Console
         | 
| 6 | 
            +
            from rich.markup import escape
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            import vision_agent.tools as T
         | 
| 9 | 
            +
            from vision_agent.agent import Agent
         | 
| 10 | 
            +
            from vision_agent.agent.agent_utils import (
         | 
| 11 | 
            +
                CodeContext,
         | 
| 12 | 
            +
                DefaultImports,
         | 
| 13 | 
            +
                PlanContext,
         | 
| 14 | 
            +
                add_media_to_chat,
         | 
| 15 | 
            +
                capture_media_from_exec,
         | 
| 16 | 
            +
                extract_tag,
         | 
| 17 | 
            +
                format_feedback,
         | 
| 18 | 
            +
                format_plan_v2,
         | 
| 19 | 
            +
                print_code,
         | 
| 20 | 
            +
                strip_function_calls,
         | 
| 21 | 
            +
            )
         | 
| 22 | 
            +
            from vision_agent.agent.vision_agent_coder_prompts_v2 import CODE, FIX_BUG, TEST
         | 
| 23 | 
            +
            from vision_agent.agent.vision_agent_planner_v2 import VisionAgentPlannerV2
         | 
| 24 | 
            +
            from vision_agent.lmm import LMM, AnthropicLMM
         | 
| 25 | 
            +
            from vision_agent.lmm.types import Message
         | 
| 26 | 
            +
            from vision_agent.tools.meta_tools import get_diff
         | 
| 27 | 
            +
            from vision_agent.utils.execute import (
         | 
| 28 | 
            +
                CodeInterpreter,
         | 
| 29 | 
            +
                CodeInterpreterFactory,
         | 
| 30 | 
            +
                Execution,
         | 
| 31 | 
            +
            )
         | 
| 32 | 
            +
            from vision_agent.utils.sim import Sim, load_cached_sim
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            _CONSOLE = Console()
         | 
| 35 | 
            +
             | 
| 36 | 
            +
             | 
| 37 | 
            +
            def retrieve_tools(
         | 
| 38 | 
            +
                plan: List[str],
         | 
| 39 | 
            +
                tool_recommender: Sim,
         | 
| 40 | 
            +
            ) -> str:
         | 
| 41 | 
            +
                tool_docs = []
         | 
| 42 | 
            +
                for inst in plan:
         | 
| 43 | 
            +
                    tools = tool_recommender.top_k(inst, k=1, thresh=0.3)
         | 
| 44 | 
            +
                    tool_docs.extend([e["doc"] for e in tools])
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                tool_docs_str = "\n\n".join(set(tool_docs))
         | 
| 47 | 
            +
                return tool_docs_str
         | 
| 48 | 
            +
             | 
| 49 | 
            +
             | 
| 50 | 
            +
            def write_code(
         | 
| 51 | 
            +
                coder: LMM,
         | 
| 52 | 
            +
                chat: List[Message],
         | 
| 53 | 
            +
                tool_docs: str,
         | 
| 54 | 
            +
                plan: str,
         | 
| 55 | 
            +
            ) -> str:
         | 
| 56 | 
            +
                chat = copy.deepcopy(chat)
         | 
| 57 | 
            +
                if chat[-1]["role"] != "user":
         | 
| 58 | 
            +
                    raise ValueError("Last chat message must be from the user.")
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                user_request = chat[-1]["content"]
         | 
| 61 | 
            +
                prompt = CODE.format(
         | 
| 62 | 
            +
                    docstring=tool_docs,
         | 
| 63 | 
            +
                    question=user_request,
         | 
| 64 | 
            +
                    plan=plan,
         | 
| 65 | 
            +
                )
         | 
| 66 | 
            +
                chat[-1]["content"] = prompt
         | 
| 67 | 
            +
                response = coder(chat, stream=False)
         | 
| 68 | 
            +
                return extract_tag(response, "code")  # type: ignore
         | 
| 69 | 
            +
             | 
| 70 | 
            +
             | 
| 71 | 
            +
            def write_test(
         | 
| 72 | 
            +
                tester: LMM,
         | 
| 73 | 
            +
                chat: List[Message],
         | 
| 74 | 
            +
                tool_util_docs: str,
         | 
| 75 | 
            +
                code: str,
         | 
| 76 | 
            +
                media_list: Optional[Sequence[Union[str, Path]]] = None,
         | 
| 77 | 
            +
            ) -> str:
         | 
| 78 | 
            +
                chat = copy.deepcopy(chat)
         | 
| 79 | 
            +
                if chat[-1]["role"] != "user":
         | 
| 80 | 
            +
                    raise ValueError("Last chat message must be from the user.")
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                user_request = chat[-1]["content"]
         | 
| 83 | 
            +
                prompt = TEST.format(
         | 
| 84 | 
            +
                    docstring=tool_util_docs,
         | 
| 85 | 
            +
                    question=user_request,
         | 
| 86 | 
            +
                    code=code,
         | 
| 87 | 
            +
                    media=media_list,
         | 
| 88 | 
            +
                )
         | 
| 89 | 
            +
                chat[-1]["content"] = prompt
         | 
| 90 | 
            +
                response = tester(chat, stream=False)
         | 
| 91 | 
            +
                return extract_tag(response, "code")  # type: ignore
         | 
| 92 | 
            +
             | 
| 93 | 
            +
             | 
| 94 | 
            +
            def debug_code(
         | 
| 95 | 
            +
                debugger: LMM,
         | 
| 96 | 
            +
                tool_docs: str,
         | 
| 97 | 
            +
                plan: str,
         | 
| 98 | 
            +
                code: str,
         | 
| 99 | 
            +
                test: str,
         | 
| 100 | 
            +
                result: Execution,
         | 
| 101 | 
            +
                debug_info: str,
         | 
| 102 | 
            +
                verbose: bool,
         | 
| 103 | 
            +
            ) -> tuple[str, str, str]:
         | 
| 104 | 
            +
                fixed_code = None
         | 
| 105 | 
            +
                fixed_test = None
         | 
| 106 | 
            +
                thoughts = ""
         | 
| 107 | 
            +
                success = False
         | 
| 108 | 
            +
                count = 0
         | 
| 109 | 
            +
                while not success and count < 3:
         | 
| 110 | 
            +
                    try:
         | 
| 111 | 
            +
                        # LLMs write worse code when it's in JSON, so we have it write JSON
         | 
| 112 | 
            +
                        # followed by code each wrapped in markdown blocks.
         | 
| 113 | 
            +
                        fixed_code_and_test_str = debugger(
         | 
| 114 | 
            +
                            FIX_BUG.format(
         | 
| 115 | 
            +
                                docstring=tool_docs,
         | 
| 116 | 
            +
                                plan=plan,
         | 
| 117 | 
            +
                                code=code,
         | 
| 118 | 
            +
                                tests=test,
         | 
| 119 | 
            +
                                # Because of the way we trace function calls the trace information
         | 
| 120 | 
            +
                                # ends up in the results. We don't want to show this info to the
         | 
| 121 | 
            +
                                # LLM so we don't include it in the tool_output_str.
         | 
| 122 | 
            +
                                result="\n".join(
         | 
| 123 | 
            +
                                    result.text(include_results=False).splitlines()[-50:]
         | 
| 124 | 
            +
                                ),
         | 
| 125 | 
            +
                                debug=debug_info,
         | 
| 126 | 
            +
                            ),
         | 
| 127 | 
            +
                            stream=False,
         | 
| 128 | 
            +
                        )
         | 
| 129 | 
            +
                        fixed_code_and_test_str = cast(str, fixed_code_and_test_str)
         | 
| 130 | 
            +
                        thoughts_tag = extract_tag(fixed_code_and_test_str, "thoughts")
         | 
| 131 | 
            +
                        thoughts = thoughts_tag if thoughts_tag is not None else ""
         | 
| 132 | 
            +
                        fixed_code = extract_tag(fixed_code_and_test_str, "code")
         | 
| 133 | 
            +
                        fixed_test = extract_tag(fixed_code_and_test_str, "test")
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                        success = not (fixed_code is None and fixed_test is None)
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                    except Exception as e:
         | 
| 138 | 
            +
                        _CONSOLE.print(f"[bold red]Error while extracting JSON:[/bold red] {e}")
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                    count += 1
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                old_code = code
         | 
| 143 | 
            +
                old_test = test
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                if fixed_code is not None and fixed_code.strip() != "":
         | 
| 146 | 
            +
                    code = fixed_code
         | 
| 147 | 
            +
                if fixed_test is not None and fixed_test.strip() != "":
         | 
| 148 | 
            +
                    test = fixed_test
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                debug_info_i = format_feedback(
         | 
| 151 | 
            +
                    [
         | 
| 152 | 
            +
                        {
         | 
| 153 | 
            +
                            "code": f"{code}\n{test}",
         | 
| 154 | 
            +
                            "feedback": thoughts,
         | 
| 155 | 
            +
                            "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
         | 
| 156 | 
            +
                        }
         | 
| 157 | 
            +
                    ]
         | 
| 158 | 
            +
                )
         | 
| 159 | 
            +
                debug_info += f"\n{debug_info_i}"
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                if verbose:
         | 
| 162 | 
            +
                    _CONSOLE.print(
         | 
| 163 | 
            +
                        f"[bold cyan]Thoughts on attempted fix:[/bold cyan] [green]{thoughts}[/green]"
         | 
| 164 | 
            +
                    )
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                return code, test, debug_info
         | 
| 167 | 
            +
             | 
| 168 | 
            +
             | 
| 169 | 
            +
            def write_and_test_code(
         | 
| 170 | 
            +
                coder: LMM,
         | 
| 171 | 
            +
                tester: LMM,
         | 
| 172 | 
            +
                debugger: LMM,
         | 
| 173 | 
            +
                chat: List[Message],
         | 
| 174 | 
            +
                plan: str,
         | 
| 175 | 
            +
                tool_docs: str,
         | 
| 176 | 
            +
                code_interpreter: CodeInterpreter,
         | 
| 177 | 
            +
                media_list: List[Union[str, Path]],
         | 
| 178 | 
            +
                update_callback: Callable[[Dict[str, Any]], None],
         | 
| 179 | 
            +
                verbose: bool,
         | 
| 180 | 
            +
            ) -> CodeContext:
         | 
| 181 | 
            +
                code = write_code(
         | 
| 182 | 
            +
                    coder=coder,
         | 
| 183 | 
            +
                    chat=chat,
         | 
| 184 | 
            +
                    tool_docs=tool_docs,
         | 
| 185 | 
            +
                    plan=plan,
         | 
| 186 | 
            +
                )
         | 
| 187 | 
            +
                code = strip_function_calls(code)
         | 
| 188 | 
            +
                test = write_test(
         | 
| 189 | 
            +
                    tester=tester,
         | 
| 190 | 
            +
                    chat=chat,
         | 
| 191 | 
            +
                    tool_util_docs=T.UTILITIES_DOCSTRING,
         | 
| 192 | 
            +
                    code=code,
         | 
| 193 | 
            +
                    media_list=media_list,
         | 
| 194 | 
            +
                )
         | 
| 195 | 
            +
                if verbose:
         | 
| 196 | 
            +
                    print_code("Code:", code)
         | 
| 197 | 
            +
                    print_code("Test:", test)
         | 
| 198 | 
            +
                result = code_interpreter.exec_isolation(
         | 
| 199 | 
            +
                    f"{DefaultImports.to_code_string()}\n{code}\n{test}"
         | 
| 200 | 
            +
                )
         | 
| 201 | 
            +
                if verbose:
         | 
| 202 | 
            +
                    _CONSOLE.print(
         | 
| 203 | 
            +
                        f"[bold cyan]Code execution result:[/bold cyan] [yellow]{escape(result.text(include_logs=True))}[/yellow]"
         | 
| 204 | 
            +
                    )
         | 
| 205 | 
            +
             | 
| 206 | 
            +
                count = 0
         | 
| 207 | 
            +
                debug_info = ""
         | 
| 208 | 
            +
                while (not result.success or len(result.logs.stdout) == 0) and count < 3:
         | 
| 209 | 
            +
                    code, test, debug_info = debug_code(
         | 
| 210 | 
            +
                        debugger,
         | 
| 211 | 
            +
                        T.UTILITIES_DOCSTRING + "\n" + tool_docs,
         | 
| 212 | 
            +
                        plan,
         | 
| 213 | 
            +
                        code,
         | 
| 214 | 
            +
                        test,
         | 
| 215 | 
            +
                        result,
         | 
| 216 | 
            +
                        debug_info,
         | 
| 217 | 
            +
                        verbose,
         | 
| 218 | 
            +
                    )
         | 
| 219 | 
            +
                    result = code_interpreter.exec_isolation(
         | 
| 220 | 
            +
                        f"{DefaultImports.to_code_string()}\n{code}\n{test}"
         | 
| 221 | 
            +
                    )
         | 
| 222 | 
            +
                    count += 1
         | 
| 223 | 
            +
                    if verbose:
         | 
| 224 | 
            +
                        print_code("Code and test after attempted fix:", code, test)
         | 
| 225 | 
            +
                        _CONSOLE.print(
         | 
| 226 | 
            +
                            f"[bold cyan]Code execution result after attempted fix:[/bold cyan] [yellow]{escape(result.text(include_logs=True))}[/yellow]"
         | 
| 227 | 
            +
                        )
         | 
| 228 | 
            +
             | 
| 229 | 
            +
                update_callback(
         | 
| 230 | 
            +
                    {
         | 
| 231 | 
            +
                        "role": "assistant",
         | 
| 232 | 
            +
                        "content": f"<final_code>{DefaultImports.to_code_string()}\n{code}</final_code>\n<final_test>{DefaultImports.to_code_string()}\n{test}</final_test>",
         | 
| 233 | 
            +
                        "media": capture_media_from_exec(result),
         | 
| 234 | 
            +
                    }
         | 
| 235 | 
            +
                )
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                return CodeContext(
         | 
| 238 | 
            +
                    code=f"{DefaultImports.to_code_string()}\n{code}",
         | 
| 239 | 
            +
                    test=f"{DefaultImports.to_code_string()}\n{test}",
         | 
| 240 | 
            +
                    success=result.success,
         | 
| 241 | 
            +
                    test_result=result,
         | 
| 242 | 
            +
                )
         | 
| 243 | 
            +
             | 
| 244 | 
            +
             | 
| 245 | 
            +
            class VisionAgentCoderV2(Agent):
         | 
| 246 | 
            +
                def __init__(
         | 
| 247 | 
            +
                    self,
         | 
| 248 | 
            +
                    planner: Optional[Agent] = None,
         | 
| 249 | 
            +
                    coder: Optional[LMM] = None,
         | 
| 250 | 
            +
                    tester: Optional[LMM] = None,
         | 
| 251 | 
            +
                    debugger: Optional[LMM] = None,
         | 
| 252 | 
            +
                    tool_recommender: Optional[Union[str, Sim]] = None,
         | 
| 253 | 
            +
                    verbose: bool = False,
         | 
| 254 | 
            +
                    code_sandbox_runtime: Optional[str] = None,
         | 
| 255 | 
            +
                    update_callback: Callable[[Dict[str, Any]], None] = lambda _: None,
         | 
| 256 | 
            +
                ) -> None:
         | 
| 257 | 
            +
                    self.planner = (
         | 
| 258 | 
            +
                        planner
         | 
| 259 | 
            +
                        if planner is not None
         | 
| 260 | 
            +
                        else VisionAgentPlannerV2(verbose=verbose, update_callback=update_callback)
         | 
| 261 | 
            +
                    )
         | 
| 262 | 
            +
                    self.coder = (
         | 
| 263 | 
            +
                        coder
         | 
| 264 | 
            +
                        if coder is not None
         | 
| 265 | 
            +
                        else AnthropicLMM(model_name="claude-3-5-sonnet-20241022", temperature=0.0)
         | 
| 266 | 
            +
                    )
         | 
| 267 | 
            +
                    self.tester = (
         | 
| 268 | 
            +
                        tester
         | 
| 269 | 
            +
                        if tester is not None
         | 
| 270 | 
            +
                        else AnthropicLMM(model_name="claude-3-5-sonnet-20241022", temperature=0.0)
         | 
| 271 | 
            +
                    )
         | 
| 272 | 
            +
                    self.debugger = (
         | 
| 273 | 
            +
                        debugger
         | 
| 274 | 
            +
                        if debugger is not None
         | 
| 275 | 
            +
                        else AnthropicLMM(model_name="claude-3-5-sonnet-20241022", temperature=0.0)
         | 
| 276 | 
            +
                    )
         | 
| 277 | 
            +
                    if tool_recommender is not None:
         | 
| 278 | 
            +
                        if isinstance(tool_recommender, str):
         | 
| 279 | 
            +
                            self.tool_recommender = Sim.load(tool_recommender)
         | 
| 280 | 
            +
                        elif isinstance(tool_recommender, Sim):
         | 
| 281 | 
            +
                            self.tool_recommender = tool_recommender
         | 
| 282 | 
            +
                    else:
         | 
| 283 | 
            +
                        self.tool_recommender = load_cached_sim(T.TOOLS_DF)
         | 
| 284 | 
            +
             | 
| 285 | 
            +
                    self.verbose = verbose
         | 
| 286 | 
            +
                    self.code_sandbox_runtime = code_sandbox_runtime
         | 
| 287 | 
            +
                    self.update_callback = update_callback
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                def __call__(
         | 
| 290 | 
            +
                    self,
         | 
| 291 | 
            +
                    input: Union[str, List[Message]],
         | 
| 292 | 
            +
                    media: Optional[Union[str, Path]] = None,
         | 
| 293 | 
            +
                ) -> Union[str, List[Message]]:
         | 
| 294 | 
            +
                    if isinstance(input, str):
         | 
| 295 | 
            +
                        input = [{"role": "user", "content": input}]
         | 
| 296 | 
            +
                    if media is not None:
         | 
| 297 | 
            +
                        input[0]["media"] = [media]
         | 
| 298 | 
            +
                    return self.generate_code(input).code
         | 
| 299 | 
            +
             | 
| 300 | 
            +
                def generate_code(self, chat: List[Message]) -> CodeContext:
         | 
| 301 | 
            +
                    chat = copy.deepcopy(chat)
         | 
| 302 | 
            +
                    with CodeInterpreterFactory.new_instance(
         | 
| 303 | 
            +
                        self.code_sandbox_runtime
         | 
| 304 | 
            +
                    ) as code_interpreter:
         | 
| 305 | 
            +
                        int_chat, orig_chat, _ = add_media_to_chat(chat, code_interpreter)
         | 
| 306 | 
            +
                        plan_context = self.planner.generate_plan(int_chat, code_interpreter)  # type: ignore
         | 
| 307 | 
            +
                        code_context = self.generate_code_from_plan(
         | 
| 308 | 
            +
                            orig_chat,
         | 
| 309 | 
            +
                            plan_context,
         | 
| 310 | 
            +
                            code_interpreter,
         | 
| 311 | 
            +
                        )
         | 
| 312 | 
            +
                    return code_context
         | 
| 313 | 
            +
             | 
| 314 | 
            +
                def generate_code_from_plan(
         | 
| 315 | 
            +
                    self,
         | 
| 316 | 
            +
                    chat: List[Message],
         | 
| 317 | 
            +
                    plan_context: PlanContext,
         | 
| 318 | 
            +
                    code_interpreter: Optional[CodeInterpreter] = None,
         | 
| 319 | 
            +
                ) -> CodeContext:
         | 
| 320 | 
            +
                    chat = copy.deepcopy(chat)
         | 
| 321 | 
            +
                    with CodeInterpreterFactory.new_instance(
         | 
| 322 | 
            +
                        self.code_sandbox_runtime
         | 
| 323 | 
            +
                    ) as code_interpreter:
         | 
| 324 | 
            +
                        int_chat, _, media_list = add_media_to_chat(chat, code_interpreter)
         | 
| 325 | 
            +
                        tool_docs = retrieve_tools(plan_context.instructions, self.tool_recommender)
         | 
| 326 | 
            +
                        code_context = write_and_test_code(
         | 
| 327 | 
            +
                            coder=self.coder,
         | 
| 328 | 
            +
                            tester=self.tester,
         | 
| 329 | 
            +
                            debugger=self.debugger,
         | 
| 330 | 
            +
                            chat=int_chat,
         | 
| 331 | 
            +
                            plan=format_plan_v2(plan_context),
         | 
| 332 | 
            +
                            tool_docs=tool_docs,
         | 
| 333 | 
            +
                            code_interpreter=code_interpreter,
         | 
| 334 | 
            +
                            media_list=media_list,  # type: ignore
         | 
| 335 | 
            +
                            update_callback=self.update_callback,
         | 
| 336 | 
            +
                            verbose=self.verbose,
         | 
| 337 | 
            +
                        )
         | 
| 338 | 
            +
                    return code_context
         | 
| 339 | 
            +
             | 
| 340 | 
            +
                def log_progress(self, data: Dict[str, Any]) -> None:
         | 
| 341 | 
            +
                    pass
         | 
| @@ -14,7 +14,7 @@ from vision_agent.agent.agent_utils import ( | |
| 14 14 | 
             
                DefaultImports,
         | 
| 15 15 | 
             
                extract_code,
         | 
| 16 16 | 
             
                extract_json,
         | 
| 17 | 
            -
                 | 
| 17 | 
            +
                format_feedback,
         | 
| 18 18 | 
             
                format_plans,
         | 
| 19 19 | 
             
                print_code,
         | 
| 20 20 | 
             
            )
         | 
| @@ -423,7 +423,7 @@ class VisionAgentPlanner(Agent): | |
| 423 423 | 
             
                            T.get_tool_descriptions_by_names(
         | 
| 424 424 | 
             
                                custom_tool_names, T.FUNCTION_TOOLS, T.UTIL_TOOLS  # type: ignore
         | 
| 425 425 | 
             
                            ),
         | 
| 426 | 
            -
                             | 
| 426 | 
            +
                            format_feedback(working_memory),
         | 
| 427 427 | 
             
                            self.planner,
         | 
| 428 428 | 
             
                        )
         | 
| 429 429 | 
             
                        if self.verbosity >= 1:
         | 
| @@ -190,7 +190,7 @@ PICK_PLAN = """ | |
| 190 190 | 
             
            1. Re-read the user request, plans, tool outputs and examine the image.
         | 
| 191 191 | 
             
            2. Solve the problem yourself given the image and pick the most accurate plan that matches your solution the best.
         | 
| 192 192 | 
             
            3. Add modifications to improve the plan including: changing a tool, adding thresholds, string matching.
         | 
| 193 | 
            -
             | 
| 193 | 
            +
            4. Output a JSON object with the following format:
         | 
| 194 194 | 
             
            {{
         | 
| 195 195 | 
             
                "predicted_answer": str # the answer you would expect from the best plan
         | 
| 196 196 | 
             
                "thoughts": str # your thought process for choosing the best plan over other plans and any modifications you made
         |