vision-agent 0.2.229__py3-none-any.whl → 0.2.231__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/.sim_tools/df.csv +10 -8
 - vision_agent/agent/agent_utils.py +10 -9
 - vision_agent/agent/types.py +1 -0
 - vision_agent/agent/vision_agent.py +3 -4
 - vision_agent/agent/vision_agent_coder_prompts.py +6 -6
 - vision_agent/agent/vision_agent_coder_v2.py +41 -26
 - vision_agent/agent/vision_agent_planner_prompts.py +6 -6
 - vision_agent/agent/vision_agent_planner_prompts_v2.py +16 -50
 - vision_agent/agent/vision_agent_planner_v2.py +11 -12
 - vision_agent/agent/vision_agent_prompts.py +11 -11
 - vision_agent/agent/vision_agent_prompts_v2.py +18 -3
 - vision_agent/agent/vision_agent_v2.py +29 -30
 - vision_agent/configs/__init__.py +1 -0
 - vision_agent/configs/anthropic_config.py +150 -0
 - vision_agent/configs/anthropic_openai_config.py +150 -0
 - vision_agent/configs/config.py +150 -0
 - vision_agent/configs/openai_config.py +160 -0
 - vision_agent/lmm/__init__.py +1 -1
 - vision_agent/lmm/lmm.py +63 -9
 - vision_agent/tools/__init__.py +4 -4
 - vision_agent/tools/planner_tools.py +74 -48
 - vision_agent/tools/tool_utils.py +3 -0
 - vision_agent/tools/tools.py +49 -31
 - vision_agent/utils/sim.py +33 -12
 - vision_agent-0.2.231.dist-info/METADATA +148 -0
 - vision_agent-0.2.231.dist-info/RECORD +52 -0
 - vision_agent-0.2.229.dist-info/METADATA +0 -562
 - vision_agent-0.2.229.dist-info/RECORD +0 -47
 - {vision_agent-0.2.229.dist-info → vision_agent-0.2.231.dist-info}/LICENSE +0 -0
 - {vision_agent-0.2.229.dist-info → vision_agent-0.2.231.dist-info}/WHEEL +0 -0
 
    
        vision_agent/tools/tools.py
    CHANGED
    
    | 
         @@ -222,7 +222,7 @@ def sam2( 
     | 
|
| 
       222 
222 
     | 
    
         
             
                ret = _sam2(image, detections, image_size)
         
     | 
| 
       223 
223 
     | 
    
         
             
                _display_tool_trace(
         
     | 
| 
       224 
224 
     | 
    
         
             
                    sam2.__name__,
         
     | 
| 
       225 
     | 
    
         
            -
                    {},
         
     | 
| 
      
 225 
     | 
    
         
            +
                    {"detections": detections},
         
     | 
| 
       226 
226 
     | 
    
         
             
                    ret["display_data"],
         
     | 
| 
       227 
227 
     | 
    
         
             
                    ret["files"],
         
     | 
| 
       228 
228 
     | 
    
         
             
                )
         
     | 
| 
         @@ -314,18 +314,29 @@ def od_sam2_video_tracking( 
     | 
|
| 
       314 
314 
     | 
    
         | 
| 
       315 
315 
     | 
    
         
             
                # Process each segment and collect detections
         
     | 
| 
       316 
316 
     | 
    
         
             
                detections_per_segment: List[Any] = []
         
     | 
| 
       317 
     | 
    
         
            -
                 
     | 
| 
       318 
     | 
    
         
            -
                     
     | 
| 
       319 
     | 
    
         
            -
                         
     | 
| 
       320 
     | 
    
         
            -
             
     | 
| 
       321 
     | 
    
         
            -
             
     | 
| 
       322 
     | 
    
         
            -
             
     | 
| 
       323 
     | 
    
         
            -
             
     | 
| 
       324 
     | 
    
         
            -
             
     | 
| 
       325 
     | 
    
         
            -
             
     | 
| 
       326 
     | 
    
         
            -
             
     | 
| 
       327 
     | 
    
         
            -
             
     | 
| 
       328 
     | 
    
         
            -
             
     | 
| 
      
 317 
     | 
    
         
            +
                with ThreadPoolExecutor() as executor:
         
     | 
| 
      
 318 
     | 
    
         
            +
                    futures = {
         
     | 
| 
      
 319 
     | 
    
         
            +
                        executor.submit(
         
     | 
| 
      
 320 
     | 
    
         
            +
                            process_segment,
         
     | 
| 
      
 321 
     | 
    
         
            +
                            segment_frames=segment,
         
     | 
| 
      
 322 
     | 
    
         
            +
                            od_model=od_model,
         
     | 
| 
      
 323 
     | 
    
         
            +
                            prompt=prompt,
         
     | 
| 
      
 324 
     | 
    
         
            +
                            fine_tune_id=fine_tune_id,
         
     | 
| 
      
 325 
     | 
    
         
            +
                            chunk_length=chunk_length,
         
     | 
| 
      
 326 
     | 
    
         
            +
                            image_size=image_size,
         
     | 
| 
      
 327 
     | 
    
         
            +
                            segment_index=segment_index,
         
     | 
| 
      
 328 
     | 
    
         
            +
                            object_detection_tool=_apply_object_detection,
         
     | 
| 
      
 329 
     | 
    
         
            +
                        ): segment_index
         
     | 
| 
      
 330 
     | 
    
         
            +
                        for segment_index, segment in enumerate(segments)
         
     | 
| 
      
 331 
     | 
    
         
            +
                    }
         
     | 
| 
      
 332 
     | 
    
         
            +
             
     | 
| 
      
 333 
     | 
    
         
            +
                    for future in as_completed(futures):
         
     | 
| 
      
 334 
     | 
    
         
            +
                        segment_index = futures[future]
         
     | 
| 
      
 335 
     | 
    
         
            +
                        detections_per_segment.append((segment_index, future.result()))
         
     | 
| 
      
 336 
     | 
    
         
            +
             
     | 
| 
      
 337 
     | 
    
         
            +
                detections_per_segment = [
         
     | 
| 
      
 338 
     | 
    
         
            +
                    x[1] for x in sorted(detections_per_segment, key=lambda x: x[0])
         
     | 
| 
      
 339 
     | 
    
         
            +
                ]
         
     | 
| 
       329 
340 
     | 
    
         | 
| 
       330 
341 
     | 
    
         
             
                merged_detections = merge_segments(detections_per_segment)
         
     | 
| 
       331 
342 
     | 
    
         
             
                post_processed = post_process(merged_detections, image_size)
         
     | 
| 
         @@ -390,7 +401,7 @@ def _owlv2_object_detection( 
     | 
|
| 
       390 
401 
     | 
    
         
             
                    {
         
     | 
| 
       391 
402 
     | 
    
         
             
                        "label": bbox["label"],
         
     | 
| 
       392 
403 
     | 
    
         
             
                        "bbox": normalize_bbox(bbox["bounding_box"], image_size),
         
     | 
| 
       393 
     | 
    
         
            -
                        "score": bbox["score"],
         
     | 
| 
      
 404 
     | 
    
         
            +
                        "score": round(bbox["score"], 2),
         
     | 
| 
       394 
405 
     | 
    
         
             
                    }
         
     | 
| 
       395 
406 
     | 
    
         
             
                    for bbox in bboxes
         
     | 
| 
       396 
407 
     | 
    
         
             
                ]
         
     | 
| 
         @@ -398,7 +409,7 @@ def _owlv2_object_detection( 
     | 
|
| 
       398 
409 
     | 
    
         
             
                    {
         
     | 
| 
       399 
410 
     | 
    
         
             
                        "label": bbox["label"],
         
     | 
| 
       400 
411 
     | 
    
         
             
                        "bbox": bbox["bounding_box"],
         
     | 
| 
       401 
     | 
    
         
            -
                        "score": bbox["score"],
         
     | 
| 
      
 412 
     | 
    
         
            +
                        "score": round(bbox["score"], 2),
         
     | 
| 
       402 
413 
     | 
    
         
             
                    }
         
     | 
| 
       403 
414 
     | 
    
         
             
                    for bbox in bboxes
         
     | 
| 
       404 
415 
     | 
    
         
             
                ]
         
     | 
| 
         @@ -582,7 +593,7 @@ def owlv2_sam2_video_tracking( 
     | 
|
| 
       582 
593 
     | 
    
         
             
                )
         
     | 
| 
       583 
594 
     | 
    
         
             
                _display_tool_trace(
         
     | 
| 
       584 
595 
     | 
    
         
             
                    owlv2_sam2_video_tracking.__name__,
         
     | 
| 
       585 
     | 
    
         
            -
                    {},
         
     | 
| 
      
 596 
     | 
    
         
            +
                    {"prompt": prompt, "chunk_length": chunk_length},
         
     | 
| 
       586 
597 
     | 
    
         
             
                    ret["display_data"],
         
     | 
| 
       587 
598 
     | 
    
         
             
                    ret["files"],
         
     | 
| 
       588 
599 
     | 
    
         
             
                )
         
     | 
| 
         @@ -595,14 +606,14 @@ def owlv2_sam2_video_tracking( 
     | 
|
| 
       595 
606 
     | 
    
         
             
            def florence2_object_detection(
         
     | 
| 
       596 
607 
     | 
    
         
             
                prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
         
     | 
| 
       597 
608 
     | 
    
         
             
            ) -> List[Dict[str, Any]]:
         
     | 
| 
       598 
     | 
    
         
            -
                """'florence2_object_detection' is a tool that can detect multiple
         
     | 
| 
       599 
     | 
    
         
            -
                 
     | 
| 
       600 
     | 
    
         
            -
                 
     | 
| 
       601 
     | 
    
         
            -
                 
     | 
| 
       602 
     | 
    
         
            -
                confidence scores of 1.0.
         
     | 
| 
      
 609 
     | 
    
         
            +
                """'florence2_object_detection' is a tool that can detect multiple objects given a
         
     | 
| 
      
 610 
     | 
    
         
            +
                text prompt which can be object names or caption. You can optionally separate the
         
     | 
| 
      
 611 
     | 
    
         
            +
                object names in the text with commas. It returns a list of bounding boxes with
         
     | 
| 
      
 612 
     | 
    
         
            +
                normalized coordinates, label names and associated confidence scores of 1.0.
         
     | 
| 
       603 
613 
     | 
    
         | 
| 
       604 
614 
     | 
    
         
             
                Parameters:
         
     | 
| 
       605 
     | 
    
         
            -
                    prompt (str): The prompt to ground to the image.
         
     | 
| 
      
 615 
     | 
    
         
            +
                    prompt (str): The prompt to ground to the image. Use exclusive categories that
         
     | 
| 
      
 616 
     | 
    
         
            +
                        do not overlap such as 'person, car' and NOT 'person, athlete'.
         
     | 
| 
       606 
617 
     | 
    
         
             
                    image (np.ndarray): The image to used to detect objects
         
     | 
| 
       607 
618 
     | 
    
         
             
                    fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
         
     | 
| 
       608 
619 
     | 
    
         
             
                        fine-tuned model ID here to use it.
         
     | 
| 
         @@ -681,7 +692,8 @@ def florence2_sam2_instance_segmentation( 
     | 
|
| 
       681 
692 
     | 
    
         
             
                1.0.
         
     | 
| 
       682 
693 
     | 
    
         | 
| 
       683 
694 
     | 
    
         
             
                Parameters:
         
     | 
| 
       684 
     | 
    
         
            -
                    prompt (str): The prompt to ground to the image.
         
     | 
| 
      
 695 
     | 
    
         
            +
                    prompt (str): The prompt to ground to the image. Use exclusive categories that
         
     | 
| 
      
 696 
     | 
    
         
            +
                        do not overlap such as 'person, car' and NOT 'person, athlete'.
         
     | 
| 
       685 
697 
     | 
    
         
             
                    image (np.ndarray): The image to ground the prompt to.
         
     | 
| 
       686 
698 
     | 
    
         
             
                    fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
         
     | 
| 
       687 
699 
     | 
    
         
             
                        fine-tuned model ID here to use it.
         
     | 
| 
         @@ -769,7 +781,8 @@ def florence2_sam2_video_tracking( 
     | 
|
| 
       769 
781 
     | 
    
         
             
                is useful for tracking and counting without duplicating counts.
         
     | 
| 
       770 
782 
     | 
    
         | 
| 
       771 
783 
     | 
    
         
             
                Parameters:
         
     | 
| 
       772 
     | 
    
         
            -
                    prompt (str): The prompt to ground to the  
     | 
| 
      
 784 
     | 
    
         
            +
                    prompt (str): The prompt to ground to the image. Use exclusive categories that
         
     | 
| 
      
 785 
     | 
    
         
            +
                        do not overlap such as 'person, car' and NOT 'person, athlete'.
         
     | 
| 
       773 
786 
     | 
    
         
             
                    frames (List[np.ndarray]): The list of frames to ground the prompt to.
         
     | 
| 
       774 
787 
     | 
    
         
             
                    chunk_length (Optional[int]): The number of frames to re-run florence2 to find
         
     | 
| 
       775 
788 
     | 
    
         
             
                        new objects.
         
     | 
| 
         @@ -1679,7 +1692,7 @@ def video_temporal_localization( 
     | 
|
| 
       1679 
1692 
     | 
    
         
             
                prompt: str,
         
     | 
| 
       1680 
1693 
     | 
    
         
             
                frames: List[np.ndarray],
         
     | 
| 
       1681 
1694 
     | 
    
         
             
                model: str = "qwen2vl",
         
     | 
| 
       1682 
     | 
    
         
            -
                chunk_length_frames:  
     | 
| 
      
 1695 
     | 
    
         
            +
                chunk_length_frames: int = 2,
         
     | 
| 
       1683 
1696 
     | 
    
         
             
            ) -> List[float]:
         
     | 
| 
       1684 
1697 
     | 
    
         
             
                """'video_temporal_localization' will run qwen2vl on each chunk_length_frames
         
     | 
| 
       1685 
1698 
     | 
    
         
             
                value selected for the video. It can detect multiple objects independently per
         
     | 
| 
         @@ -1693,7 +1706,7 @@ def video_temporal_localization( 
     | 
|
| 
       1693 
1706 
     | 
    
         
             
                    frames (List[np.ndarray]): The reference frames used for the question
         
     | 
| 
       1694 
1707 
     | 
    
         
             
                    model (str): The model to use for the inference. Valid values are
         
     | 
| 
       1695 
1708 
     | 
    
         
             
                        'qwen2vl', 'gpt4o'.
         
     | 
| 
       1696 
     | 
    
         
            -
                    chunk_length_frames ( 
     | 
| 
      
 1709 
     | 
    
         
            +
                    chunk_length_frames (int): length of each chunk in frames
         
     | 
| 
       1697 
1710 
     | 
    
         | 
| 
       1698 
1711 
     | 
    
         
             
                Returns:
         
     | 
| 
       1699 
1712 
     | 
    
         
             
                    List[float]: A list of floats with a value of 1.0 if the objects to be found
         
     | 
| 
         @@ -1712,8 +1725,7 @@ def video_temporal_localization( 
     | 
|
| 
       1712 
1725 
     | 
    
         
             
                    "model": model,
         
     | 
| 
       1713 
1726 
     | 
    
         
             
                    "function_name": "video_temporal_localization",
         
     | 
| 
       1714 
1727 
     | 
    
         
             
                }
         
     | 
| 
       1715 
     | 
    
         
            -
                 
     | 
| 
       1716 
     | 
    
         
            -
                    payload["chunk_length_frames"] = chunk_length_frames
         
     | 
| 
      
 1728 
     | 
    
         
            +
                payload["chunk_length_frames"] = chunk_length_frames
         
     | 
| 
       1717 
1729 
     | 
    
         | 
| 
       1718 
1730 
     | 
    
         
             
                data = send_inference_request(
         
     | 
| 
       1719 
1731 
     | 
    
         
             
                    payload, "video-temporal-localization", files=files, v2=True
         
     | 
| 
         @@ -1724,7 +1736,13 @@ def video_temporal_localization( 
     | 
|
| 
       1724 
1736 
     | 
    
         
             
                    data,
         
     | 
| 
       1725 
1737 
     | 
    
         
             
                    files,
         
     | 
| 
       1726 
1738 
     | 
    
         
             
                )
         
     | 
| 
       1727 
     | 
    
         
            -
                 
     | 
| 
      
 1739 
     | 
    
         
            +
                chunked_data = [cast(float, value) for value in data]
         
     | 
| 
      
 1740 
     | 
    
         
            +
             
     | 
| 
      
 1741 
     | 
    
         
            +
                full_data = []
         
     | 
| 
      
 1742 
     | 
    
         
            +
                for value in chunked_data:
         
     | 
| 
      
 1743 
     | 
    
         
            +
                    full_data.extend([value] * chunk_length_frames)
         
     | 
| 
      
 1744 
     | 
    
         
            +
             
     | 
| 
      
 1745 
     | 
    
         
            +
                return full_data[: len(frames)]
         
     | 
| 
       1728 
1746 
     | 
    
         | 
| 
       1729 
1747 
     | 
    
         | 
| 
       1730 
1748 
     | 
    
         
             
            def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
         
     | 
| 
         @@ -2148,7 +2166,7 @@ def siglip_classification(image: np.ndarray, labels: List[str]) -> Dict[str, Any 
     | 
|
| 
       2148 
2166 
     | 
    
         
             
                return response
         
     | 
| 
       2149 
2167 
     | 
    
         | 
| 
       2150 
2168 
     | 
    
         | 
| 
       2151 
     | 
    
         
            -
            #  
     | 
| 
      
 2169 
     | 
    
         
            +
            # Agentic OD Tools
         
     | 
| 
       2152 
2170 
     | 
    
         | 
| 
       2153 
2171 
     | 
    
         | 
| 
       2154 
2172 
     | 
    
         
             
            def _agentic_object_detection(
         
     | 
| 
         @@ -2644,7 +2662,7 @@ def save_image(image: np.ndarray, file_path: str) -> None: 
     | 
|
| 
       2644 
2662 
     | 
    
         | 
| 
       2645 
2663 
     | 
    
         | 
| 
       2646 
2664 
     | 
    
         
             
            def save_video(
         
     | 
| 
       2647 
     | 
    
         
            -
                frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float =  
     | 
| 
      
 2665 
     | 
    
         
            +
                frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 5
         
     | 
| 
       2648 
2666 
     | 
    
         
             
            ) -> str:
         
     | 
| 
       2649 
2667 
     | 
    
         
             
                """'save_video' is a utility function that saves a list of frames as a mp4 video file on disk.
         
     | 
| 
       2650 
2668 
     | 
    
         | 
    
        vision_agent/utils/sim.py
    CHANGED
    
    | 
         @@ -98,10 +98,12 @@ class Sim: 
     | 
|
| 
       98 
98 
     | 
    
         
             
                        raise ValueError("key is required if no column 'embs' is present.")
         
     | 
| 
       99 
99 
     | 
    
         | 
| 
       100 
100 
     | 
    
         
             
                    if sim_key is not None:
         
     | 
| 
       101 
     | 
    
         
            -
                        self.df 
     | 
| 
       102 
     | 
    
         
            -
                             
     | 
| 
       103 
     | 
    
         
            -
                                 
     | 
| 
       104 
     | 
    
         
            -
             
     | 
| 
      
 101 
     | 
    
         
            +
                        self.df = self.df.assign(
         
     | 
| 
      
 102 
     | 
    
         
            +
                            embs=self.df[sim_key].apply(
         
     | 
| 
      
 103 
     | 
    
         
            +
                                lambda x: get_embedding(
         
     | 
| 
      
 104 
     | 
    
         
            +
                                    self.emb_call,
         
     | 
| 
      
 105 
     | 
    
         
            +
                                    x,
         
     | 
| 
      
 106 
     | 
    
         
            +
                                )
         
     | 
| 
       105 
107 
     | 
    
         
             
                            )
         
     | 
| 
       106 
108 
     | 
    
         
             
                        )
         
     | 
| 
       107 
109 
     | 
    
         | 
| 
         @@ -141,7 +143,9 @@ class Sim: 
     | 
|
| 
       141 
143 
     | 
    
         | 
| 
       142 
144 
     | 
    
         
             
                    df_load = pd.read_csv(load_dir / "df.csv")
         
     | 
| 
       143 
145 
     | 
    
         
             
                    if platform.system() == "Windows":
         
     | 
| 
       144 
     | 
    
         
            -
                        df_load 
     | 
| 
      
 146 
     | 
    
         
            +
                        df_load = df_load.assign(
         
     | 
| 
      
 147 
     | 
    
         
            +
                            doc=df_load.doc.apply(lambda x: x.replace("\r", ""))
         
     | 
| 
      
 148 
     | 
    
         
            +
                        )
         
     | 
| 
       145 
149 
     | 
    
         
             
                    return df.equals(df_load)  # type: ignore
         
     | 
| 
       146 
150 
     | 
    
         | 
| 
       147 
151 
     | 
    
         
             
                @lru_cache(maxsize=256)
         
     | 
| 
         @@ -166,7 +170,9 @@ class Sim: 
     | 
|
| 
       166 
170 
     | 
    
         
             
                        self.emb_call,
         
     | 
| 
       167 
171 
     | 
    
         
             
                        query,
         
     | 
| 
       168 
172 
     | 
    
         
             
                    )
         
     | 
| 
       169 
     | 
    
         
            -
                    self.df 
     | 
| 
      
 173 
     | 
    
         
            +
                    self.df = self.df.assign(
         
     | 
| 
      
 174 
     | 
    
         
            +
                        sim=self.df.embs.apply(lambda x: 1 - cosine(x, embedding))
         
     | 
| 
      
 175 
     | 
    
         
            +
                    )
         
     | 
| 
       170 
176 
     | 
    
         
             
                    res = self.df.sort_values("sim", ascending=False).head(k)
         
     | 
| 
       171 
177 
     | 
    
         
             
                    if thresh is not None:
         
     | 
| 
       172 
178 
     | 
    
         
             
                        res = res[res.sim > thresh]
         
     | 
| 
         @@ -214,8 +220,13 @@ class AzureSim(Sim): 
     | 
|
| 
       214 
220 
     | 
    
         
             
                        raise ValueError("key is required if no column 'embs' is present.")
         
     | 
| 
       215 
221 
     | 
    
         | 
| 
       216 
222 
     | 
    
         
             
                    if sim_key is not None:
         
     | 
| 
       217 
     | 
    
         
            -
                        self.df 
     | 
| 
       218 
     | 
    
         
            -
                             
     | 
| 
      
 223 
     | 
    
         
            +
                        self.df = self.df.assign(
         
     | 
| 
      
 224 
     | 
    
         
            +
                            embs=self.df[sim_key].apply(
         
     | 
| 
      
 225 
     | 
    
         
            +
                                lambda x: get_embedding(
         
     | 
| 
      
 226 
     | 
    
         
            +
                                    self.emb_call,
         
     | 
| 
      
 227 
     | 
    
         
            +
                                    x,
         
     | 
| 
      
 228 
     | 
    
         
            +
                                )
         
     | 
| 
      
 229 
     | 
    
         
            +
                            )
         
     | 
| 
       219 
230 
     | 
    
         
             
                        )
         
     | 
| 
       220 
231 
     | 
    
         | 
| 
       221 
232 
     | 
    
         | 
| 
         @@ -245,8 +256,13 @@ class OllamaSim(Sim): 
     | 
|
| 
       245 
256 
     | 
    
         
             
                        raise ValueError("key is required if no column 'embs' is present.")
         
     | 
| 
       246 
257 
     | 
    
         | 
| 
       247 
258 
     | 
    
         
             
                    if sim_key is not None:
         
     | 
| 
       248 
     | 
    
         
            -
                        self.df 
     | 
| 
       249 
     | 
    
         
            -
                             
     | 
| 
      
 259 
     | 
    
         
            +
                        self.df = self.df.assign(
         
     | 
| 
      
 260 
     | 
    
         
            +
                            embs=self.df[sim_key].apply(
         
     | 
| 
      
 261 
     | 
    
         
            +
                                lambda x: get_embedding(
         
     | 
| 
      
 262 
     | 
    
         
            +
                                    self.emb_call,
         
     | 
| 
      
 263 
     | 
    
         
            +
                                    x,
         
     | 
| 
      
 264 
     | 
    
         
            +
                                )
         
     | 
| 
      
 265 
     | 
    
         
            +
                            )
         
     | 
| 
       250 
266 
     | 
    
         
             
                        )
         
     | 
| 
       251 
267 
     | 
    
         | 
| 
       252 
268 
     | 
    
         | 
| 
         @@ -267,8 +283,13 @@ class StellaSim(Sim): 
     | 
|
| 
       267 
283 
     | 
    
         
             
                        raise ValueError("key is required if no column 'embs' is present.")
         
     | 
| 
       268 
284 
     | 
    
         | 
| 
       269 
285 
     | 
    
         
             
                    if sim_key is not None:
         
     | 
| 
       270 
     | 
    
         
            -
                        self.df 
     | 
| 
       271 
     | 
    
         
            -
                             
     | 
| 
      
 286 
     | 
    
         
            +
                        self.df = self.df.assign(
         
     | 
| 
      
 287 
     | 
    
         
            +
                            embs=self.df[sim_key].apply(
         
     | 
| 
      
 288 
     | 
    
         
            +
                                lambda x: get_embedding(
         
     | 
| 
      
 289 
     | 
    
         
            +
                                    self.emb_call,
         
     | 
| 
      
 290 
     | 
    
         
            +
                                    x,
         
     | 
| 
      
 291 
     | 
    
         
            +
                                )
         
     | 
| 
      
 292 
     | 
    
         
            +
                            )
         
     | 
| 
       272 
293 
     | 
    
         
             
                        )
         
     | 
| 
       273 
294 
     | 
    
         | 
| 
       274 
295 
     | 
    
         
             
                @staticmethod
         
     | 
| 
         @@ -0,0 +1,148 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            Metadata-Version: 2.1
         
     | 
| 
      
 2 
     | 
    
         
            +
            Name: vision-agent
         
     | 
| 
      
 3 
     | 
    
         
            +
            Version: 0.2.231
         
     | 
| 
      
 4 
     | 
    
         
            +
            Summary: Toolset for Vision Agent
         
     | 
| 
      
 5 
     | 
    
         
            +
            Author: Landing AI
         
     | 
| 
      
 6 
     | 
    
         
            +
            Author-email: dev@landing.ai
         
     | 
| 
      
 7 
     | 
    
         
            +
            Requires-Python: >=3.9,<4.0
         
     | 
| 
      
 8 
     | 
    
         
            +
            Classifier: Programming Language :: Python :: 3
         
     | 
| 
      
 9 
     | 
    
         
            +
            Classifier: Programming Language :: Python :: 3.9
         
     | 
| 
      
 10 
     | 
    
         
            +
            Classifier: Programming Language :: Python :: 3.10
         
     | 
| 
      
 11 
     | 
    
         
            +
            Classifier: Programming Language :: Python :: 3.11
         
     | 
| 
      
 12 
     | 
    
         
            +
            Requires-Dist: anthropic (>=0.31.0,<0.32.0)
         
     | 
| 
      
 13 
     | 
    
         
            +
            Requires-Dist: av (>=11.0.0,<12.0.0)
         
     | 
| 
      
 14 
     | 
    
         
            +
            Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
         
     | 
| 
      
 15 
     | 
    
         
            +
            Requires-Dist: e2b-code-interpreter (==0.0.11a37)
         
     | 
| 
      
 16 
     | 
    
         
            +
            Requires-Dist: flake8 (>=7.0.0,<8.0.0)
         
     | 
| 
      
 17 
     | 
    
         
            +
            Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
         
     | 
| 
      
 18 
     | 
    
         
            +
            Requires-Dist: langsmith (>=0.1.58,<0.2.0)
         
     | 
| 
      
 19 
     | 
    
         
            +
            Requires-Dist: libcst (>=1.5.0,<2.0.0)
         
     | 
| 
      
 20 
     | 
    
         
            +
            Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
         
     | 
| 
      
 21 
     | 
    
         
            +
            Requires-Dist: nbclient (>=0.10.0,<0.11.0)
         
     | 
| 
      
 22 
     | 
    
         
            +
            Requires-Dist: nbformat (>=5.10.4,<6.0.0)
         
     | 
| 
      
 23 
     | 
    
         
            +
            Requires-Dist: numpy (>=1.21.0,<2.0.0)
         
     | 
| 
      
 24 
     | 
    
         
            +
            Requires-Dist: openai (>=1.0.0,<2.0.0)
         
     | 
| 
      
 25 
     | 
    
         
            +
            Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
         
     | 
| 
      
 26 
     | 
    
         
            +
            Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
         
     | 
| 
      
 27 
     | 
    
         
            +
            Requires-Dist: pandas (>=2.0.0,<3.0.0)
         
     | 
| 
      
 28 
     | 
    
         
            +
            Requires-Dist: pillow (>=10.0.0,<11.0.0)
         
     | 
| 
      
 29 
     | 
    
         
            +
            Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
         
     | 
| 
      
 30 
     | 
    
         
            +
            Requires-Dist: pydantic (==2.7.4)
         
     | 
| 
      
 31 
     | 
    
         
            +
            Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
         
     | 
| 
      
 32 
     | 
    
         
            +
            Requires-Dist: pytube (==15.0.0)
         
     | 
| 
      
 33 
     | 
    
         
            +
            Requires-Dist: requests (>=2.0.0,<3.0.0)
         
     | 
| 
      
 34 
     | 
    
         
            +
            Requires-Dist: rich (>=13.7.1,<14.0.0)
         
     | 
| 
      
 35 
     | 
    
         
            +
            Requires-Dist: scikit-learn (>=1.5.2,<2.0.0)
         
     | 
| 
      
 36 
     | 
    
         
            +
            Requires-Dist: scipy (>=1.13.0,<1.14.0)
         
     | 
| 
      
 37 
     | 
    
         
            +
            Requires-Dist: tabulate (>=0.9.0,<0.10.0)
         
     | 
| 
      
 38 
     | 
    
         
            +
            Requires-Dist: tenacity (>=8.3.0,<9.0.0)
         
     | 
| 
      
 39 
     | 
    
         
            +
            Requires-Dist: tqdm (>=4.64.0,<5.0.0)
         
     | 
| 
      
 40 
     | 
    
         
            +
            Requires-Dist: typing_extensions (>=4.0.0,<5.0.0)
         
     | 
| 
      
 41 
     | 
    
         
            +
            Project-URL: Homepage, https://landing.ai
         
     | 
| 
      
 42 
     | 
    
         
            +
            Project-URL: documentation, https://github.com/landing-ai/vision-agent
         
     | 
| 
      
 43 
     | 
    
         
            +
            Project-URL: repository, https://github.com/landing-ai/vision-agent
         
     | 
| 
      
 44 
     | 
    
         
            +
            Description-Content-Type: text/markdown
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            <div align="center">
         
     | 
| 
      
 47 
     | 
    
         
            +
                <picture>
         
     | 
| 
      
 48 
     | 
    
         
            +
                    <source media="(prefers-color-scheme: dark)" srcset="https://github.com/landing-ai/vision-agent/blob/main/assets/logo_light.svg?raw=true">
         
     | 
| 
      
 49 
     | 
    
         
            +
                    <source media="(prefers-color-scheme: light)" srcset="https://github.com/landing-ai/vision-agent/blob/main/assets/logo_dark.svg?raw=true">
         
     | 
| 
      
 50 
     | 
    
         
            +
                    <img alt="VisionAgent" height="200px" src="https://github.com/landing-ai/vision-agent/blob/main/assets/logo_light.svg?raw=true">
         
     | 
| 
      
 51 
     | 
    
         
            +
                </picture>
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            [](https://discord.gg/wPdN8RCYew)
         
     | 
| 
      
 54 
     | 
    
         
            +
            
         
     | 
| 
      
 55 
     | 
    
         
            +
            [](https://badge.fury.io/py/vision-agent)
         
     | 
| 
      
 56 
     | 
    
         
            +
            
         
     | 
| 
      
 57 
     | 
    
         
            +
            </div>
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            ## VisionAgent
         
     | 
| 
      
 60 
     | 
    
         
            +
            VisionAgent is a library that helps you utilize agent frameworks to generate code to
         
     | 
| 
      
 61 
     | 
    
         
            +
            solve your vision task. Check out our discord for updates and roadmaps! The fastest
         
     | 
| 
      
 62 
     | 
    
         
            +
            way to test out VisionAgent is to use our web application which you can find [here](https://va.landing.ai/).
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
            ## Installation
         
     | 
| 
      
 65 
     | 
    
         
            +
            ```bash
         
     | 
| 
      
 66 
     | 
    
         
            +
            pip install vision-agent
         
     | 
| 
      
 67 
     | 
    
         
            +
            ```
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
            ```bash
         
     | 
| 
      
 70 
     | 
    
         
            +
            export ANTHROPIC_API_KEY="your-api-key"
         
     | 
| 
      
 71 
     | 
    
         
            +
            export OPENAI_API_KEY="your-api-key"
         
     | 
| 
      
 72 
     | 
    
         
            +
            ```
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
            > **_NOTE:_** We found using both Anthropic Claude-3.5 and OpenAI o1 to be provide the best performance for VisionAgent. If you want to use a different LLM provider or only one, see 'Using Other LLM Providers' below.
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
            ## Documentation
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
            [VisionAgent Library Docs](https://landing-ai.github.io/vision-agent/)
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
            ## Examples
         
     | 
| 
      
 81 
     | 
    
         
            +
            ### Counting cans in an image
         
     | 
| 
      
 82 
     | 
    
         
            +
            You can run VisionAgent in a local Jupyter Notebook [Counting cans in an image](https://github.com/landing-ai/vision-agent/blob/main/examples/notebooks/counting_cans.ipynb)
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
            ### Generating code
         
     | 
| 
      
 85 
     | 
    
         
            +
            You can use VisionAgent to generate code to count the number of people in an image:
         
     | 
| 
      
 86 
     | 
    
         
            +
            ```python
         
     | 
| 
      
 87 
     | 
    
         
            +
            from vision_agent.agent import VisionAgentCoderV2
         
     | 
| 
      
 88 
     | 
    
         
            +
            from vision_agent.agent.types import AgentMessage
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
            agent = VisionAgentCoderV2(verbose=True)
         
     | 
| 
      
 91 
     | 
    
         
            +
            code_context = agent.generate_code(
         
     | 
| 
      
 92 
     | 
    
         
            +
                [
         
     | 
| 
      
 93 
     | 
    
         
            +
                    AgentMessage(
         
     | 
| 
      
 94 
     | 
    
         
            +
                        role="user",
         
     | 
| 
      
 95 
     | 
    
         
            +
                        content="Count the number of people in this image",
         
     | 
| 
      
 96 
     | 
    
         
            +
                        media=["people.png"]
         
     | 
| 
      
 97 
     | 
    
         
            +
                    )
         
     | 
| 
      
 98 
     | 
    
         
            +
                ]
         
     | 
| 
      
 99 
     | 
    
         
            +
            )
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
            with open("generated_code.py", "w") as f:
         
     | 
| 
      
 102 
     | 
    
         
            +
                f.write(code_context.code + "\n" + code_context.test)
         
     | 
| 
      
 103 
     | 
    
         
            +
            ```
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
            ### Using the tools directly
         
     | 
| 
      
 106 
     | 
    
         
            +
            VisionAgent produces code that utilizes our tools. You can also use the tools directly.
         
     | 
| 
      
 107 
     | 
    
         
            +
            For example if you wanted to detect people in an image and visualize the results:
         
     | 
| 
      
 108 
     | 
    
         
            +
            ```python
         
     | 
| 
      
 109 
     | 
    
         
            +
            import vision_agent.tools as T
         
     | 
| 
      
 110 
     | 
    
         
            +
            import matplotlib.pyplot as plt
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
            image = T.load_image("people.png")
         
     | 
| 
      
 113 
     | 
    
         
            +
            dets = T.countgd_object_detection("person", image)
         
     | 
| 
      
 114 
     | 
    
         
            +
            # visualize the countgd bounding boxes on the image
         
     | 
| 
      
 115 
     | 
    
         
            +
            viz = T.overlay_bounding_boxes(image, dets)
         
     | 
| 
      
 116 
     | 
    
         
            +
             
     | 
| 
      
 117 
     | 
    
         
            +
            # save the visualization to a file
         
     | 
| 
      
 118 
     | 
    
         
            +
            T.save_image(viz, "people_detected.png")
         
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
            # display the visualization
         
     | 
| 
      
 121 
     | 
    
         
            +
            plt.imshow(viz)
         
     | 
| 
      
 122 
     | 
    
         
            +
            plt.show()
         
     | 
| 
      
 123 
     | 
    
         
            +
            ```
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
            You can also use the tools for running on video files:
         
     | 
| 
      
 126 
     | 
    
         
            +
            ```python
         
     | 
| 
      
 127 
     | 
    
         
            +
            import vision_agent.tools as T
         
     | 
| 
      
 128 
     | 
    
         
            +
             
     | 
| 
      
 129 
     | 
    
         
            +
            frames_and_ts = T.extract_frames_and_timestamps("people.mp4")
         
     | 
| 
      
 130 
     | 
    
         
            +
            # extract the frames from the frames_and_ts list
         
     | 
| 
      
 131 
     | 
    
         
            +
            frames = [f["frame"] for f in frames_and_ts]
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
            # run the countgd tracking on the frames
         
     | 
| 
      
 134 
     | 
    
         
            +
            tracks = T.countgd_sam2_video_tracking("person", frames)
         
     | 
| 
      
 135 
     | 
    
         
            +
            # visualize the countgd tracking results on the frames and save the video
         
     | 
| 
      
 136 
     | 
    
         
            +
            viz = T.overlay_segmentation_masks(frames, tracks)
         
     | 
| 
      
 137 
     | 
    
         
            +
            T.save_video(viz, "people_detected.mp4")
         
     | 
| 
      
 138 
     | 
    
         
            +
            ```
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
            ## Using Other LLM Providers
         
     | 
| 
      
 141 
     | 
    
         
            +
            You can use other LLM providers by changing `config.py` in the `vision_agent/configs`
         
     | 
| 
      
 142 
     | 
    
         
            +
            directory. For example to change to Anthropic simply just run:
         
     | 
| 
      
 143 
     | 
    
         
            +
            ```bash
         
     | 
| 
      
 144 
     | 
    
         
            +
            cp vision_agent/configs/anthropic_config.py vision_agent/configs/config.py
         
     | 
| 
      
 145 
     | 
    
         
            +
            ```
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
            > **_NOTE:_** VisionAgent moves fast and we are constantly updating and changing the library. If you have any questions or need help, please reach out to us on our discord channel.
         
     | 
| 
      
 148 
     | 
    
         
            +
             
     | 
| 
         @@ -0,0 +1,52 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            vision_agent/.sim_tools/df.csv,sha256=XdcgkjC7CjF_CoJnXmFkYOPUBwHemiwsauh62b1eh1M,42472
         
     | 
| 
      
 2 
     | 
    
         
            +
            vision_agent/.sim_tools/embs.npy,sha256=YJe8EcKVNmeX_75CS2T1sbY-sUS_1HQAMT-34zc18a0,254080
         
     | 
| 
      
 3 
     | 
    
         
            +
            vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
         
     | 
| 
      
 4 
     | 
    
         
            +
            vision_agent/agent/README.md,sha256=Q4w7FWw38qaWosQYAZ7NqWx8Q5XzuWrlv7nLhjUd1-8,5527
         
     | 
| 
      
 5 
     | 
    
         
            +
            vision_agent/agent/__init__.py,sha256=M8CffavdIh8Zh-skznLHIaQkYGCGK7vk4dq1FaVkbs4,617
         
     | 
| 
      
 6 
     | 
    
         
            +
            vision_agent/agent/agent.py,sha256=_1tHWAs7Jm5tqDzEcPfCRvJV3uRRveyh4n9_9pd6I1w,1565
         
     | 
| 
      
 7 
     | 
    
         
            +
            vision_agent/agent/agent_utils.py,sha256=IXxN9XruaeNTreUrdztb3kWJhimpsdH6hjv6xT4jg1Q,14062
         
     | 
| 
      
 8 
     | 
    
         
            +
            vision_agent/agent/types.py,sha256=dIdxATH_PP76pD5Wfo0oofWt6iPQh0vpf48QbEQSzhs,2472
         
     | 
| 
      
 9 
     | 
    
         
            +
            vision_agent/agent/vision_agent.py,sha256=fH9NOLk7twL1fPr9vLSqkaYhah-gfDWfTOVF2FfMyzI,23461
         
     | 
| 
      
 10 
     | 
    
         
            +
            vision_agent/agent/vision_agent_coder.py,sha256=flUxOibyGZK19BCSK5mhaD3HjCxHw6c6FtKom6N2q1E,27359
         
     | 
| 
      
 11 
     | 
    
         
            +
            vision_agent/agent/vision_agent_coder_prompts.py,sha256=_kkPLezUVnBXieNPlxMQab_6J6P7F-aa6ItF5NhZZsM,12281
         
     | 
| 
      
 12 
     | 
    
         
            +
            vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=idmSMfxebPULqqvllz3gqRzGDchEvS5dkGngvBs4PGo,4872
         
     | 
| 
      
 13 
     | 
    
         
            +
            vision_agent/agent/vision_agent_coder_v2.py,sha256=ZR2PQoMqNM6yK3vn_0rrCJf_EplRKye7t7bVjyl51ls,16476
         
     | 
| 
      
 14 
     | 
    
         
            +
            vision_agent/agent/vision_agent_planner.py,sha256=fFzjNkZBKkh8Y_oS06ATI4qz31xmIJvixb_tV1kX8KA,18590
         
     | 
| 
      
 15 
     | 
    
         
            +
            vision_agent/agent/vision_agent_planner_prompts.py,sha256=rYRdJthc-sQN57VgCBKrF09Sd73BSxcBdjNe6C4WNZ8,6837
         
     | 
| 
      
 16 
     | 
    
         
            +
            vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=5xTx93lNpoyT4eAD9jicwDyDAkuW7eQqicr17zCjrQw,33337
         
     | 
| 
      
 17 
     | 
    
         
            +
            vision_agent/agent/vision_agent_planner_v2.py,sha256=7hBQdg9y4oCLDiQ54Kh12_uIMywedKKNPWiKPRA01cQ,20568
         
     | 
| 
      
 18 
     | 
    
         
            +
            vision_agent/agent/vision_agent_prompts.py,sha256=KaJwYPUP7_GvQsCPPs6Fdawmi3AQWmWajBUuzj7gTG4,13812
         
     | 
| 
      
 19 
     | 
    
         
            +
            vision_agent/agent/vision_agent_prompts_v2.py,sha256=AW_bW1boGiCLyLFd3h4GQenfDACttQagDHwpBkSW4Xo,2518
         
     | 
| 
      
 20 
     | 
    
         
            +
            vision_agent/agent/vision_agent_v2.py,sha256=335VT0hk0jkB14y4W3cJo5ueEu1wY_jjN-R_m2xaQ30,10752
         
     | 
| 
      
 21 
     | 
    
         
            +
            vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         
     | 
| 
      
 22 
     | 
    
         
            +
            vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
         
     | 
| 
      
 23 
     | 
    
         
            +
            vision_agent/clients/landing_public_api.py,sha256=lU2ev6E8NICmR8DMUljuGcVFy5VNJQ4WQkWC8WnnJEc,1503
         
     | 
| 
      
 24 
     | 
    
         
            +
            vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
         
     | 
| 
      
 25 
     | 
    
         
            +
            vision_agent/configs/anthropic_config.py,sha256=T1UuESgiY8913A6wA42P7-cg8FTk9-LkJpyywo7OnIQ,4298
         
     | 
| 
      
 26 
     | 
    
         
            +
            vision_agent/configs/anthropic_openai_config.py,sha256=YQjFxmlxppn5L55dJjK_v1myBJQ_V5J4q25pmUtwTOU,4310
         
     | 
| 
      
 27 
     | 
    
         
            +
            vision_agent/configs/config.py,sha256=YQjFxmlxppn5L55dJjK_v1myBJQ_V5J4q25pmUtwTOU,4310
         
     | 
| 
      
 28 
     | 
    
         
            +
            vision_agent/configs/openai_config.py,sha256=v2_AIY89d7LKWn4uqA2G047U2IdmnqZrGH2Iww9gRIw,4498
         
     | 
| 
      
 29 
     | 
    
         
            +
            vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         
     | 
| 
      
 30 
     | 
    
         
            +
            vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
         
     | 
| 
      
 31 
     | 
    
         
            +
            vision_agent/lmm/__init__.py,sha256=xk2Rn8Zgpy2xwYaOGHzy4tXxnxo2aj6SkpNjeJ8yxcY,111
         
     | 
| 
      
 32 
     | 
    
         
            +
            vision_agent/lmm/lmm.py,sha256=arwfYPWme_RxCxSpEQ0ZkpHO22GFPCwVeoSvXqLPOAk,19288
         
     | 
| 
      
 33 
     | 
    
         
            +
            vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
         
     | 
| 
      
 34 
     | 
    
         
            +
            vision_agent/tools/__init__.py,sha256=zopUrANPx7p0NGy6BxmEaYhDrj8DX8w7BLfgmCbz-mU,2897
         
     | 
| 
      
 35 
     | 
    
         
            +
            vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
         
     | 
| 
      
 36 
     | 
    
         
            +
            vision_agent/tools/planner_tools.py,sha256=Mk3N-I-Qs4ezeyv8EL9BxdxmJG5oWiH5bFkvgwJKB0s,14660
         
     | 
| 
      
 37 
     | 
    
         
            +
            vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
         
     | 
| 
      
 38 
     | 
    
         
            +
            vision_agent/tools/tool_utils.py,sha256=xJRWF96Ge9RvhhVHrOtifjUYoc4HIJ2y7c2VOQ2Lp8s,10152
         
     | 
| 
      
 39 
     | 
    
         
            +
            vision_agent/tools/tools.py,sha256=3B3xWFVA3qfAO6ySSQ2yUPUAiTrgJomL48hLO_VP6RQ,106015
         
     | 
| 
      
 40 
     | 
    
         
            +
            vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
         
     | 
| 
      
 41 
     | 
    
         
            +
            vision_agent/utils/__init__.py,sha256=QKk4zVjMwGxQI0MQ-aZZA50N-qItxRY4EB9CwQkZ2HY,185
         
     | 
| 
      
 42 
     | 
    
         
            +
            vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
         
     | 
| 
      
 43 
     | 
    
         
            +
            vision_agent/utils/execute.py,sha256=vOEP5Ys7S2lc0_7pOJbgk7OaWi85hrCNu9_8Bo3zk6I,29356
         
     | 
| 
      
 44 
     | 
    
         
            +
            vision_agent/utils/image_utils.py,sha256=z_ONgcza125B10NkoGwPOzXnL470bpTWZbkB16NeeH0,12188
         
     | 
| 
      
 45 
     | 
    
         
            +
            vision_agent/utils/sim.py,sha256=DYya76dYVtifFyXilMLxBzGgyfyeqhEwU4RJ4894lCI,9796
         
     | 
| 
      
 46 
     | 
    
         
            +
            vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
         
     | 
| 
      
 47 
     | 
    
         
            +
            vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
         
     | 
| 
      
 48 
     | 
    
         
            +
            vision_agent/utils/video_tracking.py,sha256=wK5dOutqV2t2aeaxedstCBa7xy-NNQE0-QZqKu1QUds,9498
         
     | 
| 
      
 49 
     | 
    
         
            +
            vision_agent-0.2.231.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
         
     | 
| 
      
 50 
     | 
    
         
            +
            vision_agent-0.2.231.dist-info/METADATA,sha256=N8t9F4hZ4bgyZeDhrVepMZzO5dtRmzRB8VI6fq1fFAA,5760
         
     | 
| 
      
 51 
     | 
    
         
            +
            vision_agent-0.2.231.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
         
     | 
| 
      
 52 
     | 
    
         
            +
            vision_agent-0.2.231.dist-info/RECORD,,
         
     |