PyPI - vision-agent - Versions diffs - 0.2.189__py3-none-any.whl → 0.2.191__py3-none-any.whl - Mend

vision-agent 0.2.189py3-none-any.whl → 0.2.191py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

vision_agent/agent/vision_agent_coder.py CHANGED Viewed

@@ -527,9 +527,6 @@ class VisionAgentCoder(Agent):
                 [{"role": "user", "content": "describe your task here..."}].
             plan_context (PlanContext): The context of the plan, including the plans,
                 best_plan, plan_thoughts, tool_doc, and tool_output.
-            test_multi_plan (bool): Whether to test multiple plans or just the best plan.
-            custom_tool_names (Optional[List[str]]): A list of custom tool names to use
-                for the planner.
         Returns:
             Dict[str, Any]: A dictionary containing the code output by the

vision_agent/agent/vision_agent_planner.py CHANGED Viewed

@@ -519,11 +519,7 @@ class OpenAIVisionAgentPlanner(VisionAgentPlanner):
         code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         super().__init__(
-            planner=(
-                OpenAILMM(temperature=0.0, json_mode=True)
-                if planner is None
-                else planner
-            ),
+            planner=(OpenAILMM(temperature=0.0) if planner is None else planner),
             tool_recommender=tool_recommender,
             verbosity=verbosity,
             report_progress_callback=report_progress_callback,
@@ -567,11 +563,7 @@ class AzureVisionAgentPlanner(VisionAgentPlanner):
         code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         super().__init__(
-            planner=(
-                AzureOpenAILMM(temperature=0.0, json_mode=True)
-                if planner is None
-                else planner
-            ),
+            planner=(AzureOpenAILMM(temperature=0.0) if planner is None else planner),
             tool_recommender=(
                 AzureSim(T.TOOLS_DF, sim_key="desc")
                 if tool_recommender is None

vision_agent/tools/__init__.py CHANGED Viewed

@@ -40,6 +40,7 @@ from .tools import (
     florence2_roberta_vqa,
     florence2_sam2_image,
     florence2_sam2_video_tracking,
+    flux_image_inpainting,
     generate_pose_image,
     generate_soft_edge_image,
     get_tool_documentation,
@@ -59,16 +60,16 @@ from .tools import (
     overlay_segmentation_masks,
     owl_v2_image,
     owl_v2_video,
+    qwen2_vl_images_vqa,
+    qwen2_vl_video_vqa,
     save_image,
     save_json,
     save_video,
+    siglip_classification,
     template_match,
+    video_temporal_localization,
     vit_image_classification,
     vit_nsfw_classification,
-    qwen2_vl_images_vqa,
-    qwen2_vl_video_vqa,
-    video_temporal_localization,
-    flux_image_inpainting,
 )
 __new_tools__ = [

vision_agent/tools/tools.py CHANGED Viewed

@@ -27,10 +27,7 @@ from vision_agent.tools.tool_utils import (
     send_inference_request,
     send_task_inference_request,
 )
-from vision_agent.tools.tools_types import (
-    JobStatus,
-    ODResponseData,
-)
+from vision_agent.tools.tools_types import JobStatus, ODResponseData
 from vision_agent.utils.exceptions import FineTuneModelIsNotReady
 from vision_agent.utils.execute import FileSerializer, MimeType
 from vision_agent.utils.image_utils import (
@@ -641,8 +638,8 @@ def loca_visual_prompt_counting(
     Parameters:
         image (np.ndarray): The image that contains lot of instances of a single object
-        visual_prompt (Dict[str, List[float]]): Bounding box of the object in format
-        [xmin, ymin, xmax, ymax]. Only 1 bounding box can be provided.
+            visual_prompt (Dict[str, List[float]]): Bounding box of the object in
+            format [xmin, ymin, xmax, ymax]. Only 1 bounding box can be provided.
     Returns:
         Dict[str, Any]: A dictionary containing the key 'count' and the count as a
@@ -750,10 +747,10 @@ def countgd_example_based_counting(
     Parameters:
         visual_prompts (List[List[float]]): Bounding boxes of the object in format
-        [xmin, ymin, xmax, ymax]. Upto 3 bounding boxes can be provided.
-        image (np.ndarray): The image that contains multiple instances of the object.
-        box_threshold (float, optional): The threshold for detection. Defaults
-            to 0.23.
+            [xmin, ymin, xmax, ymax]. Upto 3 bounding boxes can be provided. image
+            (np.ndarray): The image that contains multiple instances of the object.
+            box_threshold (float, optional): The threshold for detection. Defaults to
+            0.23.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -1845,6 +1842,48 @@ def flux_image_inpainting(
     return output_image
+def siglip_classification(image: np.ndarray, labels: List[str]) -> Dict[str, Any]:
+    """'siglip_classification' is a tool that can classify an image or a cropped detection given a list
+    of input labels or tags. It returns the same list of the input labels along with
+    their probability scores based on image content.
+    Parameters:
+        image (np.ndarray): The image to classify or tag
+        labels (List[str]): The list of labels or tags that is associated with the image
+    Returns:
+        Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
+            contains a list of given labels and other a list of scores.
+    Example
+    -------
+        >>> siglip_classification(image, ['dog', 'cat', 'bird'])
+        {"labels": ["dog", "cat", "bird"], "scores": [0.68, 0.30, 0.02]},
+    """
+    if image.shape[0] < 1 or image.shape[1] < 1:
+        return {"labels": [], "scores": []}
+    image_file = numpy_to_bytes(image)
+    files = [("image", image_file)]
+    payload = {
+        "model": "siglip",
+        "labels": labels,
+    }
+    response: dict[str, Any] = send_inference_request(
+        payload=payload,
+        endpoint_name="classification",
+        files=files,
+        v2=True,
+        metadata_payload={"function_name": "siglip_classification"},
+    )
+    return response
 # Utility and visualization functions

{vision_agent-0.2.189.dist-info → vision_agent-0.2.191.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.189
+Version: 0.2.191
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -54,11 +54,7 @@ Description-Content-Type: text/markdown
 </div>
 VisionAgent is a library that helps you utilize agent frameworks to generate code to
-solve your vision task. Many current vision problems can easily take hours or days to
-solve, you need to find the right model, figure out how to use it and program it to
-accomplish the task you want. VisionAgent aims to provide an in-seconds experience by
-allowing users to describe their problem in text and have the agent framework generate
-code to solve the task for them. Check out our discord for updates and roadmaps!
+solve your vision task. Check out our discord for updates and roadmaps!
 ## Table of Contents
 - [🚀Quick Start](#quick-start)
@@ -82,19 +78,19 @@ To get started with the python library, you can install it using pip:
 pip install vision-agent
 ```
-Ensure you have an Anthropic key and an OpenAI API key and set in your environment
+Ensure you have both an Anthropic key and an OpenAI API key and set in your environment
 variables (if you are using Azure OpenAI please see the Azure setup section):
 ```bash
-export ANTHROPIC_API_KEY="your-api-key"
-export OPENAI_API_KEY="your-api-key"
+export ANTHROPIC_API_KEY="your-api-key" # needed for VisionAgent and VisionAgentCoder
+export OPENAI_API_KEY="your-api-key" # needed for ToolRecommender
 ```
 ### Basic Usage
 To get started you can just import the `VisionAgent` and start chatting with it:
 ```python
 >>> from vision_agent.agent import VisionAgent
->>> agent = VisionAgent()
+>>> agent = VisionAgent(verbosity=2)
 >>> resp = agent("Hello")
 >>> print(resp)
 [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "{'thoughts': 'The user has greeted me. I will respond with a greeting and ask how I can assist them.', 'response': 'Hello! How can I assist you today?', 'let_user_respond': True}"}]
@@ -103,7 +99,7 @@ To get started you can just import the `VisionAgent` and start chatting with it:
 ```
 The chat messages are similar to `OpenAI`'s format with `role` and `content` keys but
-in addition to those you can add `medai` which is a list of media files that can either
+in addition to those you can add `media` which is a list of media files that can either
 be images or video files.
 ## Documentation

{vision_agent-0.2.189.dist-info → vision_agent-0.2.191.dist-info}/RECORD RENAMED Viewed

@@ -3,9 +3,9 @@ vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xl
 vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
 vision_agent/agent/agent_utils.py,sha256=WYJF11PfKXlRMPnogGz3s7c2TlWoxoGzuLiIptVYE1s,5524
 vision_agent/agent/vision_agent.py,sha256=rr1P9iTbr7OsjgMYWCeIxQYI4cLwPWia3NIMJNi-9Yo,26110
-vision_agent/agent/vision_agent_coder.py,sha256=3Q1VWrN-BNUoSD4OAqKazvXkP2c04PXDYu2Z1f5dQb0,31960
+vision_agent/agent/vision_agent_coder.py,sha256=7Ko1c41dvdDbSP_Yw2yz_SlE3sO6hhlpf_oCjW0we2w,31749
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
-vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
+vision_agent/agent/vision_agent_planner.py,sha256=Hy4vKiae7zIIKVPgLetGArbsjGRNVOXlxY9xhFgW-A0,18871
 vision_agent/agent/vision_agent_planner_prompts.py,sha256=JDARUzko2HZdxkBtcy6wuP9DCCmbqhK_gnVgrjr6l1k,6691
 vision_agent/agent/vision_agent_prompts.py,sha256=4329ll0kqCznRALIMl-rlKWGjN92p3bcRrz8R-cO744,13748
 vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -16,11 +16,11 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
 vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
 vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
-vision_agent/tools/__init__.py,sha256=KVP4_6qxOb2lpFdQgQtyDfdkMLL1O6wVZNK19MXp-xo,2798
+vision_agent/tools/__init__.py,sha256=UrpGFB1ACOZZCAyj8vNw0IHhKm9wGp0qHOtci2cqAMU,2825
 vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
-vision_agent/tools/tools.py,sha256=LwpFnHRPvnGaRrzHFAs9CojcbKLyhaYnJYDk7l9fGlw,83609
+vision_agent/tools/tools.py,sha256=72Ml8kxtaqIqrh4hiZQ81f5Mrl-7z-W1a6bCjIMBvoA,84952
 vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
 vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
 vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
 vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
-vision_agent-0.2.189.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.189.dist-info/METADATA,sha256=8ZrD4pcM8kLhBGOhLnIITMVYwd02L84J7a_xMvYltPo,18328
-vision_agent-0.2.189.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.189.dist-info/RECORD,,
+vision_agent-0.2.191.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.191.dist-info/METADATA,sha256=eZGSUWuHBTmyStliR_oxFyoWMeLW0_0qP2ULx8y_-1E,18067
+vision_agent-0.2.191.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.191.dist-info/RECORD,,

{vision_agent-0.2.189.dist-info → vision_agent-0.2.191.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.189.dist-info → vision_agent-0.2.191.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.189__py3-none-any.whl → 0.2.191__py3-none-any.whl

vision-agent 0.2.189py3-none-any.whl → 0.2.191py3-none-any.whl