vision-agent 0.2.95__tar.gz → 0.2.97__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.95 → vision_agent-0.2.97}/PKG-INFO +2 -2
- {vision_agent-0.2.95 → vision_agent-0.2.97}/pyproject.toml +2 -2
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/tools/tools.py +29 -14
- {vision_agent-0.2.95 → vision_agent-0.2.97}/LICENSE +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/README.md +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.97
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
12
|
Requires-Dist: anthropic (>=0.31.0,<0.32.0)
|
13
13
|
Requires-Dist: e2b (>=0.17.1,<0.18.0)
|
14
|
-
Requires-Dist: e2b-code-interpreter (==0.0.
|
14
|
+
Requires-Dist: e2b-code-interpreter (==0.0.11a27)
|
15
15
|
Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
|
16
16
|
Requires-Dist: langsmith (>=0.1.58,<0.2.0)
|
17
17
|
Requires-Dist: moviepy (>=1.0.0,<2.0.0)
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
5
5
|
[tool.poetry]
|
6
6
|
name = "vision-agent"
|
7
|
-
version = "0.2.
|
7
|
+
version = "0.2.97"
|
8
8
|
description = "Toolset for Vision Agent"
|
9
9
|
authors = ["Landing AI <dev@landing.ai>"]
|
10
10
|
readme = "README.md"
|
@@ -35,7 +35,7 @@ rich = "^13.7.1"
|
|
35
35
|
langsmith = "^0.1.58"
|
36
36
|
ipykernel = "^6.29.4"
|
37
37
|
e2b = "^0.17.1"
|
38
|
-
e2b-code-interpreter = "0.0.
|
38
|
+
e2b-code-interpreter = "0.0.11a27"
|
39
39
|
tenacity = "^8.3.0"
|
40
40
|
pillow-heif = "^0.16.0"
|
41
41
|
pytube = "15.0.0"
|
@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
|
9
9
|
import cv2
|
10
10
|
import numpy as np
|
11
11
|
import requests
|
12
|
+
from moviepy.editor import ImageSequenceClip
|
12
13
|
from PIL import Image, ImageDraw, ImageFont
|
13
14
|
from pillow_heif import register_heif_opener # type: ignore
|
14
15
|
from pytube import YouTube # type: ignore
|
@@ -106,6 +107,7 @@ def grounding_dino(
|
|
106
107
|
"visual_grounding" if model_size == "large" else "visual_grounding_tiny"
|
107
108
|
),
|
108
109
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
110
|
+
"function_name": "grounding_dino",
|
109
111
|
}
|
110
112
|
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
111
113
|
return_data = []
|
@@ -161,6 +163,7 @@ def owl_v2(
|
|
161
163
|
"image": image_b64,
|
162
164
|
"tool": "open_vocab_detection",
|
163
165
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
166
|
+
"function_name": "owl_v2",
|
164
167
|
}
|
165
168
|
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
166
169
|
return_data = []
|
@@ -225,6 +228,7 @@ def grounding_sam(
|
|
225
228
|
"image": image_b64,
|
226
229
|
"tool": "visual_grounding_segment",
|
227
230
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
231
|
+
"function_name": "grounding_sam",
|
228
232
|
}
|
229
233
|
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
230
234
|
return_data = []
|
@@ -364,6 +368,7 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
|
364
368
|
data = {
|
365
369
|
"image": image_b64,
|
366
370
|
"tool": "zero_shot_counting",
|
371
|
+
"function_name": "loca_zero_shot_counting",
|
367
372
|
}
|
368
373
|
resp_data = send_inference_request(data, "tools")
|
369
374
|
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
|
@@ -399,6 +404,7 @@ def loca_visual_prompt_counting(
|
|
399
404
|
"image": image_b64,
|
400
405
|
"prompt": bbox_str,
|
401
406
|
"tool": "few_shot_counting",
|
407
|
+
"function_name": "loca_visual_prompt_counting",
|
402
408
|
}
|
403
409
|
resp_data = send_inference_request(data, "tools")
|
404
410
|
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
|
@@ -428,6 +434,7 @@ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
|
|
428
434
|
"image": image_b64,
|
429
435
|
"prompt": prompt,
|
430
436
|
"tool": "image_question_answering_with_context",
|
437
|
+
"function_name": "florencev2_roberta_vqa",
|
431
438
|
}
|
432
439
|
|
433
440
|
answer = send_inference_request(data, "tools")
|
@@ -457,6 +464,7 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
|
|
457
464
|
"image": image_b64,
|
458
465
|
"prompt": prompt,
|
459
466
|
"tool": "image_question_answering",
|
467
|
+
"function_name": "git_vqa_v2",
|
460
468
|
}
|
461
469
|
|
462
470
|
answer = send_inference_request(data, "tools")
|
@@ -487,6 +495,7 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
|
487
495
|
"prompt": ",".join(classes),
|
488
496
|
"image": image_b64,
|
489
497
|
"tool": "closed_set_image_classification",
|
498
|
+
"function_name": "clip",
|
490
499
|
}
|
491
500
|
resp_data = send_inference_request(data, "tools")
|
492
501
|
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
|
@@ -514,6 +523,7 @@ def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
|
|
514
523
|
data = {
|
515
524
|
"image": image_b64,
|
516
525
|
"tool": "image_classification",
|
526
|
+
"function_name": "vit_image_classification",
|
517
527
|
}
|
518
528
|
resp_data = send_inference_request(data, "tools")
|
519
529
|
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
|
@@ -541,6 +551,7 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
|
|
541
551
|
data = {
|
542
552
|
"image": image_b64,
|
543
553
|
"tool": "nsfw_image_classification",
|
554
|
+
"function_name": "vit_nsfw_classification",
|
544
555
|
}
|
545
556
|
resp_data = send_inference_request(data, "tools")
|
546
557
|
resp_data["scores"] = round(resp_data["scores"], 4)
|
@@ -567,6 +578,7 @@ def blip_image_caption(image: np.ndarray) -> str:
|
|
567
578
|
data = {
|
568
579
|
"image": image_b64,
|
569
580
|
"tool": "image_captioning",
|
581
|
+
"function_name": "blip_image_caption",
|
570
582
|
}
|
571
583
|
|
572
584
|
answer = send_inference_request(data, "tools")
|
@@ -595,6 +607,7 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
|
|
595
607
|
"image": image_b64,
|
596
608
|
"tool": "florence2_image_captioning",
|
597
609
|
"detail_caption": detail_caption,
|
610
|
+
"function_name": "florencev2_image_caption",
|
598
611
|
}
|
599
612
|
|
600
613
|
answer = send_inference_request(data, "tools")
|
@@ -630,6 +643,7 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
630
643
|
data = {
|
631
644
|
"image": image_b64,
|
632
645
|
"tool": "object_detection",
|
646
|
+
"function_name": "florencev2_object_detection",
|
633
647
|
}
|
634
648
|
|
635
649
|
answer = send_inference_request(data, "tools")
|
@@ -686,6 +700,7 @@ def detr_segmentation(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
686
700
|
data = {
|
687
701
|
"image": image_b64,
|
688
702
|
"tool": "panoptic_segmentation",
|
703
|
+
"function_name": "detr_segmentation",
|
689
704
|
}
|
690
705
|
|
691
706
|
answer = send_inference_request(data, "tools")
|
@@ -728,6 +743,7 @@ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
|
|
728
743
|
data = {
|
729
744
|
"image": image_b64,
|
730
745
|
"tool": "generate_depth",
|
746
|
+
"function_name": "depth_anything_v2",
|
731
747
|
}
|
732
748
|
|
733
749
|
answer = send_inference_request(data, "tools")
|
@@ -759,6 +775,7 @@ def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
|
|
759
775
|
data = {
|
760
776
|
"image": image_b64,
|
761
777
|
"tool": "generate_hed",
|
778
|
+
"function_name": "generate_soft_edge_image",
|
762
779
|
}
|
763
780
|
|
764
781
|
answer = send_inference_request(data, "tools")
|
@@ -791,6 +808,7 @@ def dpt_hybrid_midas(image: np.ndarray) -> np.ndarray:
|
|
791
808
|
data = {
|
792
809
|
"image": image_b64,
|
793
810
|
"tool": "generate_normal",
|
811
|
+
"function_name": "dpt_hybrid_midas",
|
794
812
|
}
|
795
813
|
|
796
814
|
answer = send_inference_request(data, "tools")
|
@@ -822,6 +840,7 @@ def generate_pose_image(image: np.ndarray) -> np.ndarray:
|
|
822
840
|
data = {
|
823
841
|
"image": image_b64,
|
824
842
|
"tool": "generate_pose",
|
843
|
+
"function_name": "generate_pose_image",
|
825
844
|
}
|
826
845
|
|
827
846
|
answer = send_inference_request(data, "tools")
|
@@ -862,6 +881,7 @@ def template_match(
|
|
862
881
|
"image": image_b64,
|
863
882
|
"template": template_image_b64,
|
864
883
|
"tool": "template_match",
|
884
|
+
"function_name": "template_match",
|
865
885
|
}
|
866
886
|
|
867
887
|
answer = send_inference_request(data, "tools")
|
@@ -1044,20 +1064,15 @@ def save_video(
|
|
1044
1064
|
_LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
|
1045
1065
|
fps = 4
|
1046
1066
|
|
1047
|
-
|
1048
|
-
output_video_path
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
video.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
|
1057
|
-
video.release()
|
1058
|
-
|
1059
|
-
_save_video_to_result(output_video_path)
|
1060
|
-
return output_video_path
|
1067
|
+
with ImageSequenceClip(frames, fps=fps) as video:
|
1068
|
+
if output_video_path:
|
1069
|
+
f = open(output_video_path, "wb")
|
1070
|
+
else:
|
1071
|
+
f = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) # type: ignore
|
1072
|
+
video.write_videofile(f.name, codec="libx264")
|
1073
|
+
f.close()
|
1074
|
+
_save_video_to_result(f.name)
|
1075
|
+
return f.name
|
1061
1076
|
|
1062
1077
|
|
1063
1078
|
def _save_video_to_result(video_uri: str) -> None:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.95 → vision_agent-0.2.97}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|