PyPI - vision-agent - Versions diffs - 0.2.83__tar.gz → 0.2.85__tar.gz - Mend

vision-agent 0.2.83tar.gz → 0.2.85tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{vision_agent-0.2.83 → vision_agent-0.2.85}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.83
+Version: 0.2.85
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -9,6 +9,7 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Requires-Dist: anthropic (>=0.31.0,<0.32.0)
 Requires-Dist: e2b (>=0.17.1,<0.18.0)
 Requires-Dist: e2b-code-interpreter (==0.0.11a2)
 Requires-Dist: ipykernel (>=6.29.4,<7.0.0)

{vision_agent-0.2.83 → vision_agent-0.2.85}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.83"
+version = "0.2.85"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"
@@ -39,6 +39,7 @@ e2b-code-interpreter = "0.0.11a2"
 tenacity = "^8.3.0"
 pillow-heif = "^0.16.0"
 pytube = "15.0.0"
+anthropic = "^0.31.0"
 [tool.poetry.group.dev.dependencies]
 autoflake = "1.*"

{vision_agent-0.2.83 → vision_agent-0.2.85}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -622,6 +622,7 @@ class VisionAgent(Agent):
     def chat_with_workflow(
         self,
         chat: List[Message],
+        test_multi_plan: bool = True,
         display_visualization: bool = False,
     ) -> Dict[str, Any]:
         """Chat with Vision Agent and return intermediate information regarding the task.
@@ -691,7 +692,7 @@ class VisionAgent(Agent):
                 self.planner,
             )
-            if self.verbosity >= 1:
+            if self.verbosity >= 1 and test_multi_plan:
                 for p in plans:
                     _LOGGER.info(
                         f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
@@ -703,14 +704,19 @@ class VisionAgent(Agent):
                 self.log_progress,
                 self.verbosity,
             )
-            best_plan, tool_output_str = pick_plan(
-                int_chat,
-                plans,
-                tool_infos["all"],
-                self.coder,
-                code_interpreter,
-                verbosity=self.verbosity,
-            )
+            if test_multi_plan:
+                best_plan, tool_output_str = pick_plan(
+                    int_chat,
+                    plans,
+                    tool_infos["all"],
+                    self.coder,
+                    code_interpreter,
+                    verbosity=self.verbosity,
+                )
+            else:
+                best_plan = list(plans.keys())[0]
+                tool_output_str = ""
             if best_plan in plans and best_plan in tool_infos:
                 plan_i = plans[best_plan]

vision_agent-0.2.85/vision_agent/lmm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .lmm import LMM, AzureOpenAILMM, ClaudeSonnetLMM, Message, OllamaLMM, OpenAILMM

{vision_agent-0.2.83 → vision_agent-0.2.85}/vision_agent/lmm/lmm.py RENAMED Viewed

@@ -7,7 +7,9 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Union, cast
+import anthropic
 import requests
+from anthropic.types import ImageBlockParam, MessageParam, TextBlockParam
 from openai import AzureOpenAI, OpenAI
 from PIL import Image
@@ -375,3 +377,92 @@ class OllamaLMM(LMM):
         response = response.json()
         return response["response"]  # type: ignore
+class ClaudeSonnetLMM(LMM):
+    r"""An LMM class for Anthropic's Claude Sonnet model."""
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model_name: str = "claude-3-sonnet-20240229",
+        max_tokens: int = 4096,
+        temperature: float = 0.7,
+        **kwargs: Any,
+    ):
+        self.client = anthropic.Anthropic(api_key=api_key)
+        self.model_name = model_name
+        self.max_tokens = max_tokens
+        self.temperature = temperature
+        self.kwargs = kwargs
+    def __call__(
+        self,
+        input: Union[str, List[Dict[str, Any]]],
+    ) -> str:
+        if isinstance(input, str):
+            return self.generate(input)
+        return self.chat(input)
+    def chat(
+        self,
+        chat: List[Dict[str, Any]],
+    ) -> str:
+        messages: List[MessageParam] = []
+        for msg in chat:
+            content: List[Union[TextBlockParam, ImageBlockParam]] = [
+                TextBlockParam(type="text", text=msg["content"])
+            ]
+            if "media" in msg:
+                for media_path in msg["media"]:
+                    encoded_media = encode_media(media_path)
+                    content.append(
+                        ImageBlockParam(
+                            type="image",
+                            source={
+                                "type": "base64",
+                                "media_type": "image/png",
+                                "data": encoded_media,
+                            },
+                        )
+                    )
+            messages.append({"role": msg["role"], "content": content})
+        response = self.client.messages.create(
+            model=self.model_name,
+            max_tokens=self.max_tokens,
+            temperature=self.temperature,
+            messages=messages,
+            **self.kwargs,
+        )
+        return cast(str, response.content[0].text)
+    def generate(
+        self,
+        prompt: str,
+        media: Optional[List[Union[str, Path]]] = None,
+    ) -> str:
+        content: List[Union[TextBlockParam, ImageBlockParam]] = [
+            TextBlockParam(type="text", text=prompt)
+        ]
+        if media:
+            for m in media:
+                encoded_media = encode_media(m)
+                content.append(
+                    ImageBlockParam(
+                        type="image",
+                        source={
+                            "type": "base64",
+                            "media_type": "image/png",
+                            "data": encoded_media,
+                        },
+                    )
+                )
+        response = self.client.messages.create(
+            model=self.model_name,
+            max_tokens=self.max_tokens,
+            temperature=self.temperature,
+            messages=[{"role": "user", "content": content}],
+            **self.kwargs,
+        )
+        return cast(str, response.content[0].text)