vision-agent 0.2.182__tar.gz → 0.2.183__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.182 → vision_agent-0.2.183}/PKG-INFO +1 -1
- {vision_agent-0.2.182 → vision_agent-0.2.183}/pyproject.toml +1 -1
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/tools/__init__.py +1 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/tools/tools.py +33 -2
- {vision_agent-0.2.182 → vision_agent-0.2.183}/LICENSE +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/README.md +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/vision_agent_planner.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/utils/video.py +0 -0
@@ -930,6 +930,37 @@ def ixc25_video_vqa(prompt: str, frames: List[np.ndarray]) -> str:
|
|
930
930
|
return cast(str, data["answer"])
|
931
931
|
|
932
932
|
|
933
|
+
def qwen2_vl_video_vqa(prompt: str, frames: List[np.ndarray]) -> str:
|
934
|
+
"""'qwen2_vl_video_vqa' is a tool that can answer any questions about arbitrary videos
|
935
|
+
including regular videos or videos of documents or presentations. It returns text
|
936
|
+
as an answer to the question.
|
937
|
+
|
938
|
+
Parameters:
|
939
|
+
prompt (str): The question about the video
|
940
|
+
frames (List[np.ndarray]): The reference frames used for the question
|
941
|
+
|
942
|
+
Returns:
|
943
|
+
str: A string which is the answer to the given prompt.
|
944
|
+
|
945
|
+
Example
|
946
|
+
-------
|
947
|
+
>>> qwen2_vl_video_vqa('Which football player made the goal?', frames)
|
948
|
+
'Lionel Messi'
|
949
|
+
"""
|
950
|
+
|
951
|
+
buffer_bytes = frames_to_bytes(frames)
|
952
|
+
files = [("video", buffer_bytes)]
|
953
|
+
payload = {
|
954
|
+
"prompt": prompt,
|
955
|
+
"model": "qwen2vl",
|
956
|
+
"function_name": "qwen2_vl_video_vqa",
|
957
|
+
}
|
958
|
+
data: Dict[str, Any] = send_inference_request(
|
959
|
+
payload, "image-to-text", files=files, v2=True
|
960
|
+
)
|
961
|
+
return cast(str, data)
|
962
|
+
|
963
|
+
|
933
964
|
def gpt4o_image_vqa(prompt: str, image: np.ndarray) -> str:
|
934
965
|
"""'gpt4o_image_vqa' is a tool that can answer any questions about arbitrary images
|
935
966
|
including regular images or images of documents or presentations. It returns text
|
@@ -2238,13 +2269,13 @@ FUNCTION_TOOLS = [
|
|
2238
2269
|
florence2_sam2_image,
|
2239
2270
|
florence2_sam2_video_tracking,
|
2240
2271
|
florence2_phrase_grounding,
|
2241
|
-
ixc25_image_vqa,
|
2242
|
-
ixc25_video_vqa,
|
2243
2272
|
detr_segmentation,
|
2244
2273
|
depth_anything_v2,
|
2245
2274
|
generate_pose_image,
|
2246
2275
|
closest_mask_distance,
|
2247
2276
|
closest_box_distance,
|
2277
|
+
qwen2_vl_images_vqa,
|
2278
|
+
qwen2_vl_video_vqa,
|
2248
2279
|
]
|
2249
2280
|
|
2250
2281
|
UTIL_TOOLS = [
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
{vision_agent-0.2.182 → vision_agent-0.2.183}/vision_agent/agent/vision_agent_planner_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|