PyPI - vision-agent - Versions diffs - 0.2.48__py3-none-any.whl → 0.2.50__py3-none-any.whl - Mend

vision-agent 0.2.48py3-none-any.whl → 0.2.50py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -36,11 +36,25 @@ logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
 _MAX_TABULATE_COL_WIDTH = 80
 _CONSOLE = Console()
-_DEFAULT_IMPORT = "\n".join(T.__new_tools__) + "\n".join(
-    [
+class DefaultImports:
+    """Container for default imports used in the code execution."""
+    common_imports = [
         "from typing import *",
     ]
-)
+    @staticmethod
+    def to_code_string() -> str:
+        return "\n".join(DefaultImports.common_imports + T.__new_tools__)
+    @staticmethod
+    def prepend_imports(code: str) -> str:
+        """Run this method to prepend the default imports to the code.
+        NOTE: be sure to run this method after the custom tools have been registered.
+        """
+        return DefaultImports.to_code_string() + "\n\n" + code
 def get_diff(before: str, after: str) -> str:
@@ -202,18 +216,20 @@ def write_and_test_code(
             "type": "code",
             "status": "running",
             "payload": {
-                "code": code,
+                "code": DefaultImports.prepend_imports(code),
                 "test": test,
             },
         }
     )
-    result = code_interpreter.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
+    result = code_interpreter.exec_isolation(
+        f"{DefaultImports.to_code_string()}\n{code}\n{test}"
+    )
     log_progress(
         {
             "type": "code",
             "status": "completed" if result.success else "failed",
             "payload": {
-                "code": code,
+                "code": DefaultImports.prepend_imports(code),
                 "test": test,
                 "result": result.to_json(),
             },
@@ -264,19 +280,21 @@ def write_and_test_code(
                 "type": "code",
                 "status": "running",
                 "payload": {
-                    "code": code,
+                    "code": DefaultImports.prepend_imports(code),
                     "test": test,
                 },
             }
         )
-        result = code_interpreter.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
+        result = code_interpreter.exec_isolation(
+            f"{DefaultImports.to_code_string()}\n{code}\n{test}"
+        )
         log_progress(
             {
                 "type": "code",
                 "status": "completed" if result.success else "failed",
                 "payload": {
-                    "code": code,
+                    "code": DefaultImports.prepend_imports(code),
                     "test": test,
                     "result": result.to_json(),
                 },
@@ -307,7 +325,14 @@ def write_and_test_code(
 def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
     _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
     _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
-    _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
+    _CONSOLE.print(
+        Syntax(
+            DefaultImports.prepend_imports(code),
+            "python",
+            theme="gruvbox-dark",
+            line_numbers=True,
+        )
+    )
     if test:
         _CONSOLE.print("=" * 30 + " Test " + "=" * 30)
         _CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))
@@ -464,10 +489,6 @@ class VisionAgent(Agent):
                     if chat_i["role"] == "user":
                         chat_i["content"] += f" Image name {media}"
-            # re-grab custom tools
-            global _DEFAULT_IMPORT
-            _DEFAULT_IMPORT = "\n".join(T.__new_tools__)
             code = ""
             test = ""
             working_memory: List[Dict[str, str]] = []
@@ -531,38 +552,35 @@ class VisionAgent(Agent):
                 working_memory.extend(results["working_memory"])  # type: ignore
                 plan.append({"code": code, "test": test, "plan": plan_i})
-                if self_reflection:
-                    self.log_progress(
-                        {
-                            "type": "self_reflection",
-                            "status": "started",
-                        }
-                    )
-                    reflection = reflect(
-                        chat,
-                        FULL_TASK.format(
-                            user_request=chat[0]["content"], subtasks=plan_i_str
-                        ),
-                        code,
-                        self.planner,
-                    )
-                    if self.verbosity > 0:
-                        _LOGGER.info(f"Reflection: {reflection}")
-                    feedback = cast(str, reflection["feedback"])
-                    success = cast(bool, reflection["success"])
-                    self.log_progress(
-                        {
-                            "type": "self_reflection",
-                            "status": "completed" if success else "failed",
-                            "payload": reflection,
-                        }
-                    )
-                    working_memory.append(
-                        {"code": f"{code}\n{test}", "feedback": feedback}
-                    )
-                else:
+                if not self_reflection:
                     break
+                self.log_progress(
+                    {
+                        "type": "self_reflection",
+                        "status": "started",
+                    }
+                )
+                reflection = reflect(
+                    chat,
+                    FULL_TASK.format(
+                        user_request=chat[0]["content"], subtasks=plan_i_str
+                    ),
+                    code,
+                    self.planner,
+                )
+                if self.verbosity > 0:
+                    _LOGGER.info(f"Reflection: {reflection}")
+                feedback = cast(str, reflection["feedback"])
+                success = cast(bool, reflection["success"])
+                self.log_progress(
+                    {
+                        "type": "self_reflection",
+                        "status": "completed" if success else "failed",
+                        "payload": reflection,
+                    }
+                )
+                working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
                 retries += 1
             execution_result = cast(Execution, results["test_result"])
@@ -571,7 +589,7 @@ class VisionAgent(Agent):
                     "type": "final_code",
                     "status": "completed" if success else "failed",
                     "payload": {
-                        "code": code,
+                        "code": DefaultImports.prepend_imports(code),
                         "test": test,
                         "result": execution_result.to_json(),
                     },
@@ -586,7 +604,7 @@ class VisionAgent(Agent):
                         play_video(res.mp4)
             return {
-                "code": code,
+                "code": DefaultImports.prepend_imports(code),
                 "test": test,
                 "test_result": execution_result,
                 "plan": plan,

vision_agent/tools/tools.py CHANGED Viewed

@@ -187,7 +187,7 @@ def extract_frames(
     Returns:
         List[Tuple[np.ndarray, float]]: A list of tuples containing the extracted frame
-        and the timestamp in seconds.
+        as a numpy array and the timestamp in seconds.
     Example
     -------
@@ -515,7 +515,7 @@ def save_json(data: Any, file_path: str) -> None:
 def load_image(image_path: str) -> np.ndarray:
-    """'load_image' is a utility function that loads an image from the given path.
+    """'load_image' is a utility function that loads an image from the given file path string.
     Parameters:
         image_path (str): The path to the image.
@@ -527,7 +527,9 @@ def load_image(image_path: str) -> np.ndarray:
     -------
     >>> load_image("path/to/image.jpg")
     """
+    # NOTE: sometimes the generated code pass in a NumPy array
+    if isinstance(image_path, np.ndarray):
+        return image_path
     image = Image.open(image_path).convert("RGB")
     return np.array(image)
@@ -647,12 +649,9 @@ def overlay_bounding_boxes(
         box = elt["bbox"]
         scores = elt["score"]
-        box = [
-            int(box[0] * width),
-            int(box[1] * height),
-            int(box[2] * width),
-            int(box[3] * height),
-        ]
+        # denormalize the box if it is normalized
+        box = denormalize_bbox(box, (height, width))
         draw.rectangle(box, outline=color[label], width=4)
         text = f"{label}: {scores:.2f}"
         text_box = draw.textbbox((box[0], box[1]), text=text, font=font)

{vision_agent-0.2.48.dist-info → vision_agent-0.2.50.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.48
+Version: 0.2.50
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.48.dist-info → vision_agent-0.2.50.dist-info}/RECORD RENAMED Viewed

@@ -11,7 +11,7 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI
 vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
 vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
 vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
-vision_agent/agent/vision_agent.py,sha256=X_LF2wRXVYAr8xMuJs3Omi8n06uVgLNgtF25sidKtfM,20424
+vision_agent/agent/vision_agent.py,sha256=0EqpLyyzpRGmT7fhS2XvLeUlktgCXTE5k1KGMQ8z3_s,20963
 vision_agent/agent/vision_agent_prompts.py,sha256=hgnTlaYp2HMBHLi3e4faPb-DI5jQL9jfhKq9jyEUEgY,8370
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=Sng6dChynJJCYWjraXXM0tep_VPdnYl3L9vb0HMy_P
 vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
-vision_agent/tools/tools.py,sha256=Vpn2SxtjEcnztovat6qMiH52gFsDHo3ikEPrAT4e5yc,26639
+vision_agent/tools/tools.py,sha256=Sc6tAYbH03TbrPKAT8XIj1YZIwhd9j2k4ia8iKHhxzM,26743
 vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
 vision_agent/utils/execute.py,sha256=GqoAodxtwTPBr1nujPTsWiZO2rBGvWVXTe8lgxY4d_g,20603
 vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
 vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
 vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
 vision_agent/utils/video.py,sha256=BJ9fomy2giAl038JThQP1WQZ-u4J4J_nsZB7QEWvlcQ,8767
-vision_agent-0.2.48.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.48.dist-info/METADATA,sha256=sJSWNAHN2-JMNb5hi4iA-HTzKNskLioIse9sdrMDuy4,6817
-vision_agent-0.2.48.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.48.dist-info/RECORD,,
+vision_agent-0.2.50.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.50.dist-info/METADATA,sha256=nLyeSFYnn4Bv_RyKzrP5iqnCRRkwCZT_d3euN1zgBOA,6817
+vision_agent-0.2.50.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.50.dist-info/RECORD,,

{vision_agent-0.2.48.dist-info → vision_agent-0.2.50.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.48.dist-info → vision_agent-0.2.50.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.48__py3-none-any.whl → 0.2.50__py3-none-any.whl

vision-agent 0.2.48py3-none-any.whl → 0.2.50py3-none-any.whl