vision-agent 0.2.177__tar.gz → 0.2.178__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. {vision_agent-0.2.177 → vision_agent-0.2.178}/PKG-INFO +1 -1
  2. {vision_agent-0.2.177 → vision_agent-0.2.178}/pyproject.toml +1 -1
  3. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/tools/__init__.py +1 -0
  4. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/tools/tools.py +48 -0
  5. {vision_agent-0.2.177 → vision_agent-0.2.178}/LICENSE +0 -0
  6. {vision_agent-0.2.177 → vision_agent-0.2.178}/README.md +0 -0
  7. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/__init__.py +0 -0
  8. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/agent/__init__.py +0 -0
  9. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/agent/agent.py +0 -0
  10. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/agent/agent_utils.py +0 -0
  11. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/agent/vision_agent.py +0 -0
  12. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/agent/vision_agent_coder.py +0 -0
  13. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  14. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/agent/vision_agent_planner.py +0 -0
  15. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  16. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/agent/vision_agent_prompts.py +0 -0
  17. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/clients/__init__.py +0 -0
  18. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/clients/http.py +0 -0
  19. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/clients/landing_public_api.py +0 -0
  20. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/fonts/__init__.py +0 -0
  21. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  22. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/lmm/__init__.py +0 -0
  23. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/lmm/lmm.py +0 -0
  24. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/lmm/types.py +0 -0
  25. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/tools/meta_tools.py +0 -0
  26. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/tools/prompts.py +0 -0
  27. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/tools/tool_utils.py +0 -0
  28. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/tools/tools_types.py +0 -0
  29. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/utils/__init__.py +0 -0
  30. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/utils/exceptions.py +0 -0
  31. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/utils/execute.py +0 -0
  32. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/utils/image_utils.py +0 -0
  33. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/utils/sim.py +0 -0
  34. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/utils/type_defs.py +0 -0
  35. {vision_agent-0.2.177 → vision_agent-0.2.178}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.177
3
+ Version: 0.2.178
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.177"
7
+ version = "0.2.178"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -65,6 +65,7 @@ from .tools import (
65
65
  template_match,
66
66
  vit_image_classification,
67
67
  vit_nsfw_classification,
68
+ video_temporal_localization,
68
69
  )
69
70
 
70
71
  __new_tools__ = [
@@ -975,6 +975,54 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
975
975
  return answer["text"][0] # type: ignore
976
976
 
977
977
 
978
+ def video_temporal_localization(
979
+ prompt: str,
980
+ frames: List[np.ndarray],
981
+ model: str = "qwen2vl",
982
+ chunk_length: Optional[float] = None,
983
+ chunk_length_seconds: Optional[float] = None,
984
+ chunk_length_frames: Optional[int] = 2,
985
+ ) -> List[float]:
986
+ """'video_temporal_localization' is a tool that can find objects in a video given a question about it.
987
+ It returns a list of floats with a value of 1.0 if the object to be found is present in the chunk of video being analyzed.
988
+
989
+ Parameters:
990
+ prompt (str): The question about the video
991
+ frames (List[np.ndarray]): The reference frames used for the question
992
+ model (str): The model to use for the inference. Valid values are 'qwen2vl', 'gpt4o', 'internlm-xcomposer'
993
+ chunk_length (Optional[float]): length of each chunk in seconds
994
+ chunk_length_seconds (Optional[float]): alternative length for chunk in seconds
995
+ chunk_length_frames (Optional[int]): length of each chunk in frames
996
+
997
+ Returns:
998
+ List[float]: A list of floats with a value of 1.0 if the object to be found is present in the chunk of video
999
+
1000
+ Example
1001
+ -------
1002
+ >>> video_temporal_localization('Did a goal happened?', frames)
1003
+ [0.0, 0.0, 0.0, 1.0, 1.0, 0.0]
1004
+ """
1005
+
1006
+ buffer_bytes = frames_to_bytes(frames)
1007
+ files = [("video", buffer_bytes)]
1008
+ payload: Dict[str, Any] = {
1009
+ "prompt": prompt,
1010
+ "model": model,
1011
+ "function_name": "video_temporal_localization",
1012
+ }
1013
+ if chunk_length is not None:
1014
+ payload["chunk_length"] = chunk_length
1015
+ if chunk_length_seconds is not None:
1016
+ payload["chunk_length_seconds"] = chunk_length_seconds
1017
+ if chunk_length_frames is not None:
1018
+ payload["chunk_length_frames"] = chunk_length_frames
1019
+
1020
+ data = send_inference_request(
1021
+ payload, "video-temporal-localization", files=files, v2=True
1022
+ )
1023
+ return [cast(float, value) for value in data]
1024
+
1025
+
978
1026
  def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
979
1027
  """'clip' is a tool that can classify an image or a cropped detection given a list
980
1028
  of input classes or tags. It returns the same list of the input classes along with
File without changes
File without changes