PyPI - vision-agent - Versions diffs - 0.2.58__tar.gz → 0.2.78__tar.gz - Mend

vision-agent 0.2.58tar.gz → 0.2.78tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{vision_agent-0.2.58 → vision_agent-0.2.78}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.58
+Version: 0.2.78
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -9,8 +9,8 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Dist: e2b (>=0.17.0,<0.18.0)
-Requires-Dist: e2b-code-interpreter (>=0.0.7,<0.0.8)
+Requires-Dist: e2b (>=0.17.1,<0.18.0)
+Requires-Dist: e2b-code-interpreter (==0.0.11a1)
 Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
 Requires-Dist: langsmith (>=0.1.58,<0.2.0)
 Requires-Dist: moviepy (>=1.0.0,<2.0.0)
@@ -21,7 +21,9 @@ Requires-Dist: openai (>=1.0.0,<2.0.0)
 Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
 Requires-Dist: pandas (>=2.0.0,<3.0.0)
 Requires-Dist: pillow (>=10.0.0,<11.0.0)
+Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
 Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
+Requires-Dist: pytube (==15.0.0)
 Requires-Dist: requests (>=2.0.0,<3.0.0)
 Requires-Dist: rich (>=13.7.1,<14.0.0)
 Requires-Dist: scipy (>=1.13.0,<1.14.0)
@@ -76,6 +78,9 @@ using Azure OpenAI please see the Azure setup section):
 export OPENAI_API_KEY="your-api-key"
 ```
+### Important Note on API Usage
+Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#how-to-get-started-with-openai-api-credits)
 ### Vision Agent
 #### Basic Usage
 You can interact with the agent as you would with any LLM or LMM model:
@@ -178,8 +183,8 @@ you. For example:
 ```python
 >>> import vision_agent as va
->>> llm = va.llm.OpenAILMM()
->>> detector = llm.generate_detector("Can you build a jar detector for me?")
+>>> lmm = va.lmm.OpenAILMM()
+>>> detector = lmm.generate_detector("Can you build a jar detector for me?")
 >>> detector(va.tools.load_image("jar.jpg"))
 [{"labels": ["jar",],
   "scores": [0.99],
@@ -218,18 +223,44 @@ ensure the documentation is in the same format above with description, `Paramete
 `Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
 ### Azure Setup
-If you want to use Azure OpenAI models, you can set the environment variable:
+If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
+1. OpenAI GPT-4o model
+2. OpenAI text embedding model
+<img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
+Then you can set the following environment variables:
 ```bash
 export AZURE_OPENAI_API_KEY="your-api-key"
 export AZURE_OPENAI_ENDPOINT="your-endpoint"
+# The deployment name of your Azure OpenAI chat model
+export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
+# The deployment name of your Azure OpenAI text embedding model
+export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
 ```
+> NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
 You can then run Vision Agent using the Azure OpenAI models:
 ```python
 import vision_agent as va
-import vision_agent.tools as T
 agent = va.agent.AzureVisionAgent()
 ```
+******************************************************************************************************************************
+### Q&A
+#### How to get started with OpenAI API credits
+1. Visit the[OpenAI API platform](https://beta.openai.com/signup/) to sign up for an API key.
+2. Follow the instructions to purchase and manage your API credits.
+3. Ensure your API key is correctly configured in your project settings.
+Failure to have sufficient API credits may result in limited or no functionality for the features that rely on the OpenAI API.
+For more details on managing your API usage and credits, please refer to the OpenAI API documentation.

{vision_agent-0.2.58 → vision_agent-0.2.78}/README.md RENAMED Viewed

@@ -40,6 +40,9 @@ using Azure OpenAI please see the Azure setup section):
 export OPENAI_API_KEY="your-api-key"
 ```
+### Important Note on API Usage
+Please be aware that using the API in this project requires you to have API credits (minimum of five US dollars). This is different from the OpenAI subscription used in this chatbot. If you don't have credit, further information can be found [here](https://github.com/landing-ai/vision-agent?tab=readme-ov-file#how-to-get-started-with-openai-api-credits)
 ### Vision Agent
 #### Basic Usage
 You can interact with the agent as you would with any LLM or LMM model:
@@ -142,8 +145,8 @@ you. For example:
 ```python
 >>> import vision_agent as va
->>> llm = va.llm.OpenAILMM()
->>> detector = llm.generate_detector("Can you build a jar detector for me?")
+>>> lmm = va.lmm.OpenAILMM()
+>>> detector = lmm.generate_detector("Can you build a jar detector for me?")
 >>> detector(va.tools.load_image("jar.jpg"))
 [{"labels": ["jar",],
   "scores": [0.99],
@@ -182,17 +185,43 @@ ensure the documentation is in the same format above with description, `Paramete
 `Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
 ### Azure Setup
-If you want to use Azure OpenAI models, you can set the environment variable:
+If you want to use Azure OpenAI models, you need to have two OpenAI model deployments:
+1. OpenAI GPT-4o model
+2. OpenAI text embedding model
+<img width="1201" alt="Screenshot 2024-06-12 at 5 54 48 PM" src="https://github.com/landing-ai/vision-agent/assets/2736300/da125592-b01d-45bc-bc99-d48c9dcdfa32">
+Then you can set the following environment variables:
 ```bash
 export AZURE_OPENAI_API_KEY="your-api-key"
 export AZURE_OPENAI_ENDPOINT="your-endpoint"
+# The deployment name of your Azure OpenAI chat model
+export AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME="your_gpt4o_model_deployment_name"
+# The deployment name of your Azure OpenAI text embedding model
+export AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME="your_embedding_model_deployment_name"
 ```
+> NOTE: make sure your Azure model deployment have enough quota (token per minute) to support it. The default value 8000TPM is not enough.
 You can then run Vision Agent using the Azure OpenAI models:
 ```python
 import vision_agent as va
-import vision_agent.tools as T
 agent = va.agent.AzureVisionAgent()
 ```
+******************************************************************************************************************************
+### Q&A
+#### How to get started with OpenAI API credits
+1. Visit the[OpenAI API platform](https://beta.openai.com/signup/) to sign up for an API key.
+2. Follow the instructions to purchase and manage your API credits.
+3. Ensure your API key is correctly configured in your project settings.
+Failure to have sufficient API credits may result in limited or no functionality for the features that rely on the OpenAI API.
+For more details on managing your API usage and credits, please refer to the OpenAI API documentation.

{vision_agent-0.2.58 → vision_agent-0.2.78}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.58"
+version = "0.2.78"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"
@@ -34,9 +34,11 @@ nbformat = "^5.10.4"
 rich = "^13.7.1"
 langsmith = "^0.1.58"
 ipykernel = "^6.29.4"
-e2b = "^0.17.0"
-e2b-code-interpreter = "^0.0.7"
+e2b = "^0.17.1"
+e2b-code-interpreter = "0.0.11a1"
 tenacity = "^8.3.0"
+pillow-heif = "^0.16.0"
+pytube = "15.0.0"
 [tool.poetry.group.dev.dependencies]
 autoflake = "1.*"

{vision_agent-0.2.58 → vision_agent-0.2.78}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -7,6 +7,7 @@ import tempfile
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
+from langsmith import traceable
 from PIL import Image
 from rich.console import Console
 from rich.style import Style
@@ -42,6 +43,8 @@ class DefaultImports:
     common_imports = [
         "from typing import *",
+        "from pillow_heif import register_heif_opener",
+        "register_heif_opener()",
     ]
     @staticmethod
@@ -96,6 +99,7 @@ def extract_json(json_str: str) -> Dict[str, Any]:
     try:
         json_dict = json.loads(json_str)
     except json.JSONDecodeError:
+        input_json_str = json_str
         if "```json" in json_str:
             json_str = json_str[json_str.find("```json") + len("```json") :]
             json_str = json_str[: json_str.find("```")]
@@ -103,7 +107,12 @@ def extract_json(json_str: str) -> Dict[str, Any]:
             json_str = json_str[json_str.find("```") + len("```") :]
             # get the last ``` not one from an intermediate string
             json_str = json_str[: json_str.find("}```")]
-        json_dict = json.loads(json_str)
+        try:
+            json_dict = json.loads(json_str)
+        except json.JSONDecodeError as e:
+            error_msg = f"Could not extract JSON from the given str: {json_str}.\nFunction input:\n{input_json_str}"
+            _LOGGER.exception(error_msg)
+            raise ValueError(error_msg) from e
     return json_dict  # type: ignore
@@ -130,6 +139,7 @@ def extract_image(
     return new_media
+@traceable
 def write_plan(
     chat: List[Message],
     tool_desc: str,
@@ -147,6 +157,7 @@ def write_plan(
     return extract_json(model.chat(chat))["plan"]  # type: ignore
+@traceable
 def write_code(
     coder: LMM,
     chat: List[Message],
@@ -167,6 +178,7 @@ def write_code(
     return extract_code(coder(chat))
+@traceable
 def write_test(
     tester: LMM,
     chat: List[Message],
@@ -191,6 +203,7 @@ def write_test(
     return extract_code(tester(chat))
+@traceable
 def reflect(
     chat: List[Message],
     plan: str,
@@ -266,70 +279,19 @@ def write_and_test_code(
     count = 0
     new_working_memory: List[Dict[str, str]] = []
     while not result.success and count < max_retries:
-        log_progress(
-            {
-                "type": "code",
-                "status": "started",
-            }
-        )
-        fixed_code_and_test = extract_json(
-            debugger(
-                FIX_BUG.format(
-                    code=code,
-                    tests=test,
-                    result="\n".join(result.text().splitlines()[-50:]),
-                    feedback=format_memory(working_memory + new_working_memory),
-                )
-            )
-        )
-        old_code = code
-        old_test = test
-        if fixed_code_and_test["code"].strip() != "":
-            code = extract_code(fixed_code_and_test["code"])
-        if fixed_code_and_test["test"].strip() != "":
-            test = extract_code(fixed_code_and_test["test"])
-        new_working_memory.append(
-            {
-                "code": f"{code}\n{test}",
-                "feedback": fixed_code_and_test["reflections"],
-                "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
-            }
-        )
-        log_progress(
-            {
-                "type": "code",
-                "status": "running",
-                "payload": {
-                    "code": DefaultImports.prepend_imports(code),
-                    "test": test,
-                },
-            }
-        )
-        result = code_interpreter.exec_isolation(
-            f"{DefaultImports.to_code_string()}\n{code}\n{test}"
-        )
-        log_progress(
-            {
-                "type": "code",
-                "status": "completed" if result.success else "failed",
-                "payload": {
-                    "code": DefaultImports.prepend_imports(code),
-                    "test": test,
-                    "result": result.to_json(),
-                },
-            }
-        )
         if verbosity == 2:
-            _LOGGER.info(
-                f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
-            )
-            _print_code("Code and test after attempted fix:", code, test)
-            _LOGGER.info(
-                f"Code execution result after attempted fix: {result.text(include_logs=True)}"
-            )
+            _LOGGER.info(f"Start debugging attempt {count + 1}")
+        code, test, result = debug_code(
+            working_memory,
+            debugger,
+            code_interpreter,
+            code,
+            test,
+            result,
+            new_working_memory,
+            log_progress,
+            verbosity,
+        )
         count += 1
     if verbosity >= 1:
@@ -344,6 +306,95 @@ def write_and_test_code(
     }
+@traceable
+def debug_code(
+    working_memory: List[Dict[str, str]],
+    debugger: LMM,
+    code_interpreter: CodeInterpreter,
+    code: str,
+    test: str,
+    result: Execution,
+    new_working_memory: List[Dict[str, str]],
+    log_progress: Callable[[Dict[str, Any]], None],
+    verbosity: int = 0,
+) -> tuple[str, str, Execution]:
+    log_progress(
+        {
+            "type": "code",
+            "status": "started",
+        }
+    )
+    fixed_code_and_test = {"code": "", "test": "", "reflections": ""}
+    success = False
+    count = 0
+    while not success and count < 3:
+        try:
+            fixed_code_and_test = extract_json(
+                debugger(
+                    FIX_BUG.format(
+                        code=code,
+                        tests=test,
+                        result="\n".join(result.text().splitlines()[-50:]),
+                        feedback=format_memory(working_memory + new_working_memory),
+                    )
+                )
+            )
+            success = True
+        except Exception as e:
+            _LOGGER.exception(f"Error while extracting JSON: {e}")
+        count += 1
+    old_code = code
+    old_test = test
+    if fixed_code_and_test["code"].strip() != "":
+        code = extract_code(fixed_code_and_test["code"])
+    if fixed_code_and_test["test"].strip() != "":
+        test = extract_code(fixed_code_and_test["test"])
+    new_working_memory.append(
+        {
+            "code": f"{code}\n{test}",
+            "feedback": fixed_code_and_test["reflections"],
+            "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
+        }
+    )
+    log_progress(
+        {
+            "type": "code",
+            "status": "running",
+            "payload": {
+                "code": DefaultImports.prepend_imports(code),
+                "test": test,
+            },
+        }
+    )
+    result = code_interpreter.exec_isolation(
+        f"{DefaultImports.to_code_string()}\n{code}\n{test}"
+    )
+    log_progress(
+        {
+            "type": "code",
+            "status": "completed" if result.success else "failed",
+            "payload": {
+                "code": DefaultImports.prepend_imports(code),
+                "test": test,
+                "result": result.to_json(),
+            },
+        }
+    )
+    if verbosity == 2:
+        _print_code("Code and test after attempted fix:", code, test)
+        _LOGGER.info(
+            f"Reflection: {fixed_code_and_test['reflections']}\nCode execution result after attempted fix: {result.text(include_logs=True)}"
+        )
+    return code, test, result
 def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
     _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
     _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
@@ -386,12 +437,12 @@ def retrieve_tools(
         {
             "type": "tools",
             "status": "completed",
-            "payload": tool_list,
+            "payload": list({v["description"]: v for v in tool_list}.values()),
         }
     )
     if verbosity == 2:
-        tool_desc_str = "\n".join(tool_desc)
+        tool_desc_str = "\n".join(set(tool_desc))
         _LOGGER.info(f"Tools Description:\n{tool_desc_str}")
     tool_info_set = set(tool_info)
     return "\n\n".join(tool_info_set)
@@ -481,6 +532,7 @@ class VisionAgent(Agent):
         results.pop("working_memory")
         return results  # type: ignore
+    @traceable
     def chat_with_workflow(
         self,
         chat: List[Message],

{vision_agent-0.2.58 → vision_agent-0.2.78}/vision_agent/agent/vision_agent_prompts.py RENAMED Viewed

@@ -179,6 +179,8 @@ This is the documentation for the functions you have access to. You may call any
 8. DO NOT use try except block to handle the error, let the error be raised if the code is incorrect.
 9. DO NOT import the testing function as it will available in the testing environment.
 10. Print the output of the function that is being tested.
+11. Use the output of the function that is being tested as the return value of the testing function.
+12. Run the testing function in the end and don't assign a variable to its output.
 """

vision_agent-0.2.78/vision_agent/lmm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .lmm import LMM, AzureOpenAILMM, Message, OllamaLMM, OpenAILMM

{vision_agent-0.2.58 → vision_agent-0.2.78}/vision_agent/lmm/lmm.py RENAMED Viewed

@@ -6,6 +6,7 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Union, cast
+import requests
 from openai import AzureOpenAI, OpenAI
 import vision_agent.tools as T
@@ -163,6 +164,7 @@ class OpenAILMM(LMM):
                 {"role": "system", "content": SYSTEM_PROMPT},
                 {"role": "user", "content": prompt},
             ],
+            response_format={"type": "json_object"},
         )
         try:
@@ -178,7 +180,7 @@ class OpenAILMM(LMM):
         return lambda x: T.clip(x, params["prompt"])
     def generate_detector(self, question: str) -> Callable:
-        api_doc = T.get_tool_documentation([T.grounding_dino])
+        api_doc = T.get_tool_documentation([T.owl_v2])
         prompt = CHOOSE_PARAMS.format(api_doc=api_doc, question=question)
         response = self.client.chat.completions.create(
             model=self.model_name,
@@ -186,6 +188,7 @@ class OpenAILMM(LMM):
                 {"role": "system", "content": SYSTEM_PROMPT},
                 {"role": "user", "content": prompt},
             ],
+            response_format={"type": "json_object"},
         )
         try:
@@ -198,7 +201,7 @@ class OpenAILMM(LMM):
             )
             raise ValueError("Failed to decode response")
-        return lambda x: T.grounding_dino(params["prompt"], x)
+        return lambda x: T.owl_v2(params["prompt"], x)
     def generate_segmentor(self, question: str) -> Callable:
         api_doc = T.get_tool_documentation([T.grounding_sam])
@@ -209,6 +212,7 @@ class OpenAILMM(LMM):
                 {"role": "system", "content": SYSTEM_PROMPT},
                 {"role": "user", "content": prompt},
             ],
+            response_format={"type": "json_object"},
         )
         try:
@@ -224,16 +228,16 @@ class OpenAILMM(LMM):
         return lambda x: T.grounding_sam(params["prompt"], x)
     def generate_zero_shot_counter(self, question: str) -> Callable:
-        return T.zero_shot_counting
+        return T.loca_zero_shot_counting
     def generate_image_qa_tool(self, question: str) -> Callable:
-        return lambda x: T.image_question_answering(question, x)
+        return lambda x: T.git_vqa_v2(question, x)
 class AzureOpenAILMM(OpenAILMM):
     def __init__(
         self,
-        model_name: str = "gpt-4o",
+        model_name: Optional[str] = None,
         api_key: Optional[str] = None,
         api_version: str = "2024-02-01",
         azure_endpoint: Optional[str] = None,
@@ -245,14 +249,20 @@ class AzureOpenAILMM(OpenAILMM):
             api_key = os.getenv("AZURE_OPENAI_API_KEY")
         if not azure_endpoint:
             azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+        if not model_name:
+            model_name = os.getenv("AZURE_OPENAI_CHAT_MODEL_DEPLOYMENT_NAME")
         if not api_key:
             raise ValueError("OpenAI API key is required.")
         if not azure_endpoint:
             raise ValueError("Azure OpenAI endpoint is required.")
+        if not model_name:
+            raise ValueError("Azure OpenAI chat model deployment name is required.")
         self.client = AzureOpenAI(
-            api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint
+            api_key=api_key,
+            api_version=api_version,
+            azure_endpoint=azure_endpoint,
         )
         self.model_name = model_name
@@ -261,3 +271,84 @@ class AzureOpenAILMM(OpenAILMM):
         if json_mode:
             kwargs["response_format"] = {"type": "json_object"}
         self.kwargs = kwargs
+class OllamaLMM(LMM):
+    r"""An LMM class for the ollama."""
+    def __init__(
+        self,
+        model_name: str = "llava",
+        base_url: Optional[str] = "http://localhost:11434/api",
+        json_mode: bool = False,
+        **kwargs: Any,
+    ):
+        self.url = base_url
+        self.model_name = model_name
+        self.json_mode = json_mode
+        self.stream = False
+    def __call__(
+        self,
+        input: Union[str, List[Message]],
+    ) -> str:
+        if isinstance(input, str):
+            return self.generate(input)
+        return self.chat(input)
+    def chat(
+        self,
+        chat: List[Message],
+    ) -> str:
+        """Chat with the LMM model.
+        Parameters:
+            chat (List[Dict[str, str]]): A list of dictionaries containing the chat
+                messages. The messages can be in the format:
+                [{"role": "user", "content": "Hello!"}, ...]
+                or if it contains media, it should be in the format:
+                [{"role": "user", "content": "Hello!", "media": ["image1.jpg", ...]}, ...]
+        """
+        fixed_chat = []
+        for message in chat:
+            if "media" in message:
+                message["images"] = [encode_image(m) for m in message["media"]]
+                del message["media"]
+            fixed_chat.append(message)
+        url = f"{self.url}/chat"
+        model = self.model_name
+        messages = fixed_chat
+        data = {"model": model, "messages": messages, "stream": self.stream}
+        json_data = json.dumps(data)
+        response = requests.post(url, data=json_data)
+        if response.status_code != 200:
+            raise ValueError(f"Request failed with status code {response.status_code}")
+        response = response.json()
+        return response["message"]["content"]  # type: ignore
+    def generate(
+        self,
+        prompt: str,
+        media: Optional[List[Union[str, Path]]] = None,
+    ) -> str:
+        url = f"{self.url}/generate"
+        data = {
+            "model": self.model_name,
+            "prompt": prompt,
+            "images": [],
+            "stream": self.stream,
+        }
+        json_data = json.dumps(data)
+        if media and len(media) > 0:
+            for m in media:
+                data["images"].append(encode_image(m))  # type: ignore
+        response = requests.post(url, data=json_data)
+        if response.status_code != 200:
+            raise ValueError(f"Request failed with status code {response.status_code}")
+        response = response.json()
+        return response["response"]  # type: ignore

{vision_agent-0.2.58 → vision_agent-0.2.78}/vision_agent/tools/__init__.py RENAMED Viewed

@@ -7,25 +7,37 @@ from .tools import (
     TOOLS,
     TOOLS_DF,
     UTILITIES_DOCSTRING,
+    blip_image_caption,
     clip,
     closest_box_distance,
     closest_mask_distance,
     extract_frames,
+    florencev2_image_caption,
     get_tool_documentation,
+    florencev2_object_detection,
+    detr_segmentation,
+    depth_anything_v2,
+    generate_soft_edge_image,
+    dpt_hybrid_midas,
+    generate_pose_image,
+    git_vqa_v2,
     grounding_dino,
     grounding_sam,
-    image_caption,
-    image_question_answering,
+    florencev2_roberta_vqa,
     load_image,
+    loca_visual_prompt_counting,
+    loca_zero_shot_counting,
     ocr,
     overlay_bounding_boxes,
     overlay_heat_map,
     overlay_segmentation_masks,
+    owl_v2,
     save_image,
     save_json,
     save_video,
-    visual_prompt_counting,
-    zero_shot_counting,
+    template_match,
+    vit_image_classification,
+    vit_nsfw_classification,
 )
 __new_tools__ = [

vision-agent 0.2.58__tar.gz → 0.2.78__tar.gz

vision-agent 0.2.58tar.gz → 0.2.78tar.gz