PyPI - vision-agent - Versions diffs - 0.2.82__py3-none-any.whl → 0.2.84__py3-none-any.whl - Mend

vision-agent 0.2.82py3-none-any.whl → 0.2.84py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -176,6 +176,7 @@ def pick_plan(
     model: LMM,
     code_interpreter: CodeInterpreter,
     verbosity: int = 0,
+    max_retries: int = 3,
 ) -> Tuple[str, str]:
     chat = copy.deepcopy(chat)
     if chat[-1]["role"] != "user":
@@ -192,13 +193,13 @@ def pick_plan(
     if len(tool_output.logs.stdout) > 0:
         tool_output_str = tool_output.logs.stdout[0]
-    if verbosity >= 1:
+    if verbosity == 2:
         _print_code("Initial code and tests:", code)
         _LOGGER.info(f"Initial code execution result:\n{tool_output.text()}")
     # retry if the tool output is empty or code fails
-    count = 1
-    while (not tool_output.success or tool_output_str == "") and count < 3:
+    count = 0
+    while (not tool_output.success or tool_output_str == "") and count < max_retries:
         prompt = TEST_PLANS.format(
             docstring=tool_info,
             plans=plan_str,
@@ -214,12 +215,15 @@ def pick_plan(
         if len(tool_output.logs.stdout) > 0:
             tool_output_str = tool_output.logs.stdout[0]
-        if verbosity == 1:
+        if verbosity == 2:
             _print_code("Code and test after attempted fix:", code)
             _LOGGER.info(f"Code execution result after attempte {count}")
         count += 1
+    if verbosity >= 1:
+        _print_code("Final code:", code)
     user_req = chat[-1]["content"]
     context = USER_REQ.format(user_request=user_req)
     # because the tool picker model gets the image as well, we have to be careful with
@@ -408,7 +412,7 @@ def debug_code(
                     FIX_BUG.format(
                         code=code,
                         tests=test,
-                        result="\n".join(result.text().splitlines()[-50:]),
+                        result="\n".join(result.text().splitlines()[-100:]),
                         feedback=format_memory(working_memory + new_working_memory),
                     )
                 )
@@ -673,92 +677,85 @@ class VisionAgent(Agent):
             working_memory: List[Dict[str, str]] = []
             results = {"code": "", "test": "", "plan": []}
             plan = []
-            success = False
-            retries = 0
-            while not success and retries < self.max_retries:
-                self.log_progress(
-                    {
-                        "type": "plans",
-                        "status": "started",
-                    }
-                )
-                plans = write_plans(
-                    int_chat,
-                    T.TOOL_DESCRIPTIONS,
-                    format_memory(working_memory),
-                    self.planner,
-                )
-                if self.verbosity >= 1:
-                    for p in plans:
-                        _LOGGER.info(
-                            f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
-                        )
-                tool_infos = retrieve_tools(
-                    plans,
-                    self.tool_recommender,
-                    self.log_progress,
-                    self.verbosity,
-                )
-                best_plan, tool_output_str = pick_plan(
-                    int_chat,
-                    plans,
-                    tool_infos["all"],
-                    self.coder,
-                    code_interpreter,
-                    verbosity=self.verbosity,
-                )
+            self.log_progress(
+                {
+                    "type": "plans",
+                    "status": "started",
+                }
+            )
+            plans = write_plans(
+                int_chat,
+                T.TOOL_DESCRIPTIONS,
+                format_memory(working_memory),
+                self.planner,
+            )
-                if best_plan in plans and best_plan in tool_infos:
-                    plan_i = plans[best_plan]
-                    tool_info = tool_infos[best_plan]
-                else:
-                    if self.verbosity >= 1:
-                        _LOGGER.warning(
-                            f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
-                        )
-                    k = list(plans.keys())[0]
-                    plan_i = plans[k]
-                    tool_info = tool_infos[k]
-                self.log_progress(
-                    {
-                        "type": "plans",
-                        "status": "completed",
-                        "payload": plan_i,
-                    }
-                )
-                if self.verbosity >= 1:
+            if self.verbosity >= 1:
+                for p in plans:
                     _LOGGER.info(
-                        f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
+                        f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
                     )
-                results = write_and_test_code(
-                    chat=[
-                        {"role": c["role"], "content": c["content"]} for c in int_chat
-                    ],
-                    plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
-                    tool_info=tool_info,
-                    tool_output=tool_output_str,
-                    tool_utils=T.UTILITIES_DOCSTRING,
-                    working_memory=working_memory,
-                    coder=self.coder,
-                    tester=self.tester,
-                    debugger=self.debugger,
-                    code_interpreter=code_interpreter,
-                    log_progress=self.log_progress,
-                    verbosity=self.verbosity,
-                    media=media_list,
+            tool_infos = retrieve_tools(
+                plans,
+                self.tool_recommender,
+                self.log_progress,
+                self.verbosity,
+            )
+            best_plan, tool_output_str = pick_plan(
+                int_chat,
+                plans,
+                tool_infos["all"],
+                self.coder,
+                code_interpreter,
+                verbosity=self.verbosity,
+            )
+            if best_plan in plans and best_plan in tool_infos:
+                plan_i = plans[best_plan]
+                tool_info = tool_infos[best_plan]
+            else:
+                if self.verbosity >= 1:
+                    _LOGGER.warning(
+                        f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
+                    )
+                k = list(plans.keys())[0]
+                plan_i = plans[k]
+                tool_info = tool_infos[k]
+            self.log_progress(
+                {
+                    "type": "plans",
+                    "status": "completed",
+                    "payload": plan_i,
+                }
+            )
+            if self.verbosity >= 1:
+                _LOGGER.info(
+                    f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
                 )
-                success = cast(bool, results["success"])
-                code = cast(str, results["code"])
-                test = cast(str, results["test"])
-                working_memory.extend(results["working_memory"])  # type: ignore
-                plan.append({"code": code, "test": test, "plan": plan_i})
-                retries += 1
+            results = write_and_test_code(
+                chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
+                plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
+                tool_info=tool_info,
+                tool_output=tool_output_str,
+                tool_utils=T.UTILITIES_DOCSTRING,
+                working_memory=working_memory,
+                coder=self.coder,
+                tester=self.tester,
+                debugger=self.debugger,
+                code_interpreter=code_interpreter,
+                log_progress=self.log_progress,
+                verbosity=self.verbosity,
+                media=media_list,
+            )
+            success = cast(bool, results["success"])
+            code = cast(str, results["code"])
+            test = cast(str, results["test"])
+            working_memory.extend(results["working_memory"])  # type: ignore
+            plan.append({"code": code, "test": test, "plan": plan_i})
             execution_result = cast(Execution, results["test_result"])
             self.log_progress(

vision_agent/lmm/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .lmm import LMM, AzureOpenAILMM, Message, OllamaLMM, OpenAILMM
1	+ from .lmm import LMM, AzureOpenAILMM, ClaudeSonnetLMM, Message, OllamaLMM, OpenAILMM

vision_agent/lmm/lmm.py CHANGED Viewed

@@ -7,7 +7,9 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Union, cast
+import anthropic
 import requests
+from anthropic.types import ImageBlockParam, MessageParam, TextBlockParam
 from openai import AzureOpenAI, OpenAI
 from PIL import Image
@@ -375,3 +377,92 @@ class OllamaLMM(LMM):
         response = response.json()
         return response["response"]  # type: ignore
+class ClaudeSonnetLMM(LMM):
+    r"""An LMM class for Anthropic's Claude Sonnet model."""
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model_name: str = "claude-3-sonnet-20240229",
+        max_tokens: int = 4096,
+        temperature: float = 0.7,
+        **kwargs: Any,
+    ):
+        self.client = anthropic.Anthropic(api_key=api_key)
+        self.model_name = model_name
+        self.max_tokens = max_tokens
+        self.temperature = temperature
+        self.kwargs = kwargs
+    def __call__(
+        self,
+        input: Union[str, List[Dict[str, Any]]],
+    ) -> str:
+        if isinstance(input, str):
+            return self.generate(input)
+        return self.chat(input)
+    def chat(
+        self,
+        chat: List[Dict[str, Any]],
+    ) -> str:
+        messages: List[MessageParam] = []
+        for msg in chat:
+            content: List[Union[TextBlockParam, ImageBlockParam]] = [
+                TextBlockParam(type="text", text=msg["content"])
+            ]
+            if "media" in msg:
+                for media_path in msg["media"]:
+                    encoded_media = encode_media(media_path)
+                    content.append(
+                        ImageBlockParam(
+                            type="image",
+                            source={
+                                "type": "base64",
+                                "media_type": "image/png",
+                                "data": encoded_media,
+                            },
+                        )
+                    )
+            messages.append({"role": msg["role"], "content": content})
+        response = self.client.messages.create(
+            model=self.model_name,
+            max_tokens=self.max_tokens,
+            temperature=self.temperature,
+            messages=messages,
+            **self.kwargs,
+        )
+        return cast(str, response.content[0].text)
+    def generate(
+        self,
+        prompt: str,
+        media: Optional[List[Union[str, Path]]] = None,
+    ) -> str:
+        content: List[Union[TextBlockParam, ImageBlockParam]] = [
+            TextBlockParam(type="text", text=prompt)
+        ]
+        if media:
+            for m in media:
+                encoded_media = encode_media(m)
+                content.append(
+                    ImageBlockParam(
+                        type="image",
+                        source={
+                            "type": "base64",
+                            "media_type": "image/png",
+                            "data": encoded_media,
+                        },
+                    )
+                )
+        response = self.client.messages.create(
+            model=self.model_name,
+            max_tokens=self.max_tokens,
+            temperature=self.temperature,
+            messages=[{"role": "user", "content": content}],
+            **self.kwargs,
+        )
+        return cast(str, response.content[0].text)

{vision_agent-0.2.82.dist-info → vision_agent-0.2.84.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.82
+Version: 0.2.84
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -9,6 +9,7 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Requires-Dist: anthropic (>=0.31.0,<0.32.0)
 Requires-Dist: e2b (>=0.17.1,<0.18.0)
 Requires-Dist: e2b-code-interpreter (==0.0.11a2)
 Requires-Dist: ipykernel (>=6.29.4,<7.0.0)

{vision_agent-0.2.82.dist-info → vision_agent-0.2.84.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=IUwfbPMcT8X_rnXMLmI8gJ4ltsHy_XSs9eLiKURJxeY,81
 vision_agent/agent/agent.py,sha256=ZK-5lOtd9-eD9aWcXssJpnOyvZuO7_5hAmnb-6sWVe8,569
-vision_agent/agent/vision_agent.py,sha256=2yQcwYoGF4-NsjD6OY1_XjisYJxr2K1871mnwyWioKo,29148
+vision_agent/agent/vision_agent.py,sha256=fLCkqYJzk9SNtu8TzKBk0TLZrXDMTCqgI3FI-zkc-qs,28768
 vision_agent/agent/vision_agent_prompts.py,sha256=brBV-SmzyzTG5M9nfV3R5xdYT_BUYOKzxNFmTa2Sp-o,11049
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
-vision_agent/lmm/__init__.py,sha256=bw24xyQJHGzmph5e-bKCiTh9AX6tRFI2OUd0mofxjZI,68
-vision_agent/lmm/lmm.py,sha256=UtUl3k2TiN4gbdlqE16rexQ72WFE7FGru0yguyJ4jAE,12129
+vision_agent/lmm/__init__.py,sha256=j9mQsIXQOYfW6nFd47uTwuBe1ranpEbwW308qLfCWN0,85
+vision_agent/lmm/lmm.py,sha256=035uONyp6_jD3PVdNdSg2PMHOG1voqnpsn2IyybUENs,15147
 vision_agent/tools/__init__.py,sha256=k69hvcy2FWjDqVA0klzybKeoToOH_bom5NTVSliA0Og,1838
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=6z0jrvUnesJEFqDHZoAvbXPic8rzh0KfILL07tu0uRo,2205
@@ -18,7 +18,7 @@ vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOk
 vision_agent/utils/sim.py,sha256=1HTaiVaBiKeyXIy21IYGXlPw0TipOyw9FPOJDfyLI94,4409
 vision_agent/utils/type_defs.py,sha256=QeQRRIlklZMWzxROcCn5ELxP89nYdXGydy1rAiSpZZw,1384
 vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
-vision_agent-0.2.82.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.82.dist-info/METADATA,sha256=hvZlgdZ55jCzin2ZHECYtMLH6n6yTa3yhnXDU8Nvjcc,9433
-vision_agent-0.2.82.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.82.dist-info/RECORD,,
+vision_agent-0.2.84.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.84.dist-info/METADATA,sha256=mZM17x03oCnI8tp4g7psZzonwNlS0fqN0f78dWbob-o,9477
+vision_agent-0.2.84.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.84.dist-info/RECORD,,

{vision_agent-0.2.82.dist-info → vision_agent-0.2.84.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.82.dist-info → vision_agent-0.2.84.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.82__py3-none-any.whl → 0.2.84__py3-none-any.whl

vision-agent 0.2.82py3-none-any.whl → 0.2.84py3-none-any.whl