vision-agent 0.2.178__tar.gz → 0.2.179__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.178 → vision_agent-0.2.179}/PKG-INFO +1 -1
- {vision_agent-0.2.178 → vision_agent-0.2.179}/pyproject.toml +1 -1
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/tools/__init__.py +1 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/tools/tools.py +33 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/LICENSE +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/README.md +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/vision_agent_planner.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/utils/video.py +0 -0
@@ -852,6 +852,39 @@ def ixc25_image_vqa(prompt: str, image: np.ndarray) -> str:
|
|
852
852
|
return cast(str, data["answer"])
|
853
853
|
|
854
854
|
|
855
|
+
def qwen2_vl_images_vqa(prompt: str, images: List[np.ndarray]) -> str:
|
856
|
+
"""'qwen2_vl_images_vqa' is a tool that can answer any questions about arbitrary images
|
857
|
+
including regular images or images of documents or presentations. It returns text
|
858
|
+
as an answer to the question.
|
859
|
+
|
860
|
+
Parameters:
|
861
|
+
prompt (str): The question about the document image
|
862
|
+
images (List[np.ndarray]): The reference images used for the question
|
863
|
+
|
864
|
+
Returns:
|
865
|
+
str: A string which is the answer to the given prompt.
|
866
|
+
|
867
|
+
Example
|
868
|
+
-------
|
869
|
+
>>> qwen2_vl_images_vqa('Give a summary of the document', images)
|
870
|
+
'The document talks about the history of the United States of America and its...'
|
871
|
+
"""
|
872
|
+
for image in images:
|
873
|
+
if image.shape[0] < 1 or image.shape[1] < 1:
|
874
|
+
raise ValueError(f"Image is empty, image shape: {image.shape}")
|
875
|
+
|
876
|
+
files = [("images", numpy_to_bytes(image)) for image in images]
|
877
|
+
payload = {
|
878
|
+
"prompt": prompt,
|
879
|
+
"model": "qwen2vl",
|
880
|
+
"function_name": "qwen2_vl_images_vqa",
|
881
|
+
}
|
882
|
+
data: Dict[str, Any] = send_inference_request(
|
883
|
+
payload, "image-to-text", files=files, v2=True
|
884
|
+
)
|
885
|
+
return cast(str, data)
|
886
|
+
|
887
|
+
|
855
888
|
def ixc25_video_vqa(prompt: str, frames: List[np.ndarray]) -> str:
|
856
889
|
"""'ixc25_video_vqa' is a tool that can answer any questions about arbitrary videos
|
857
890
|
including regular videos or videos of documents or presentations. It returns text
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
{vision_agent-0.2.178 → vision_agent-0.2.179}/vision_agent/agent/vision_agent_planner_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|