vision-agent 0.2.95__py3-none-any.whl → 0.2.96__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/tools/tools.py +19 -0
- {vision_agent-0.2.95.dist-info → vision_agent-0.2.96.dist-info}/METADATA +1 -1
- {vision_agent-0.2.95.dist-info → vision_agent-0.2.96.dist-info}/RECORD +5 -5
- {vision_agent-0.2.95.dist-info → vision_agent-0.2.96.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.95.dist-info → vision_agent-0.2.96.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -106,6 +106,7 @@ def grounding_dino(
|
|
106
106
|
"visual_grounding" if model_size == "large" else "visual_grounding_tiny"
|
107
107
|
),
|
108
108
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
109
|
+
"function_name": "grounding_dino",
|
109
110
|
}
|
110
111
|
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
111
112
|
return_data = []
|
@@ -161,6 +162,7 @@ def owl_v2(
|
|
161
162
|
"image": image_b64,
|
162
163
|
"tool": "open_vocab_detection",
|
163
164
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
165
|
+
"function_name": "owl_v2",
|
164
166
|
}
|
165
167
|
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
166
168
|
return_data = []
|
@@ -225,6 +227,7 @@ def grounding_sam(
|
|
225
227
|
"image": image_b64,
|
226
228
|
"tool": "visual_grounding_segment",
|
227
229
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
230
|
+
"function_name": "grounding_sam",
|
228
231
|
}
|
229
232
|
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
230
233
|
return_data = []
|
@@ -364,6 +367,7 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
|
364
367
|
data = {
|
365
368
|
"image": image_b64,
|
366
369
|
"tool": "zero_shot_counting",
|
370
|
+
"function_name": "loca_zero_shot_counting",
|
367
371
|
}
|
368
372
|
resp_data = send_inference_request(data, "tools")
|
369
373
|
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
|
@@ -399,6 +403,7 @@ def loca_visual_prompt_counting(
|
|
399
403
|
"image": image_b64,
|
400
404
|
"prompt": bbox_str,
|
401
405
|
"tool": "few_shot_counting",
|
406
|
+
"function_name": "loca_visual_prompt_counting",
|
402
407
|
}
|
403
408
|
resp_data = send_inference_request(data, "tools")
|
404
409
|
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
|
@@ -428,6 +433,7 @@ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
|
|
428
433
|
"image": image_b64,
|
429
434
|
"prompt": prompt,
|
430
435
|
"tool": "image_question_answering_with_context",
|
436
|
+
"function_name": "florencev2_roberta_vqa",
|
431
437
|
}
|
432
438
|
|
433
439
|
answer = send_inference_request(data, "tools")
|
@@ -457,6 +463,7 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
|
|
457
463
|
"image": image_b64,
|
458
464
|
"prompt": prompt,
|
459
465
|
"tool": "image_question_answering",
|
466
|
+
"function_name": "git_vqa_v2",
|
460
467
|
}
|
461
468
|
|
462
469
|
answer = send_inference_request(data, "tools")
|
@@ -487,6 +494,7 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
|
487
494
|
"prompt": ",".join(classes),
|
488
495
|
"image": image_b64,
|
489
496
|
"tool": "closed_set_image_classification",
|
497
|
+
"function_name": "clip",
|
490
498
|
}
|
491
499
|
resp_data = send_inference_request(data, "tools")
|
492
500
|
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
|
@@ -514,6 +522,7 @@ def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
|
|
514
522
|
data = {
|
515
523
|
"image": image_b64,
|
516
524
|
"tool": "image_classification",
|
525
|
+
"function_name": "vit_image_classification",
|
517
526
|
}
|
518
527
|
resp_data = send_inference_request(data, "tools")
|
519
528
|
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
|
@@ -541,6 +550,7 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
|
|
541
550
|
data = {
|
542
551
|
"image": image_b64,
|
543
552
|
"tool": "nsfw_image_classification",
|
553
|
+
"function_name": "vit_nsfw_classification",
|
544
554
|
}
|
545
555
|
resp_data = send_inference_request(data, "tools")
|
546
556
|
resp_data["scores"] = round(resp_data["scores"], 4)
|
@@ -567,6 +577,7 @@ def blip_image_caption(image: np.ndarray) -> str:
|
|
567
577
|
data = {
|
568
578
|
"image": image_b64,
|
569
579
|
"tool": "image_captioning",
|
580
|
+
"function_name": "blip_image_caption",
|
570
581
|
}
|
571
582
|
|
572
583
|
answer = send_inference_request(data, "tools")
|
@@ -595,6 +606,7 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
|
|
595
606
|
"image": image_b64,
|
596
607
|
"tool": "florence2_image_captioning",
|
597
608
|
"detail_caption": detail_caption,
|
609
|
+
"function_name": "florencev2_image_caption",
|
598
610
|
}
|
599
611
|
|
600
612
|
answer = send_inference_request(data, "tools")
|
@@ -630,6 +642,7 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
630
642
|
data = {
|
631
643
|
"image": image_b64,
|
632
644
|
"tool": "object_detection",
|
645
|
+
"function_name": "florencev2_object_detection",
|
633
646
|
}
|
634
647
|
|
635
648
|
answer = send_inference_request(data, "tools")
|
@@ -686,6 +699,7 @@ def detr_segmentation(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
686
699
|
data = {
|
687
700
|
"image": image_b64,
|
688
701
|
"tool": "panoptic_segmentation",
|
702
|
+
"function_name": "detr_segmentation",
|
689
703
|
}
|
690
704
|
|
691
705
|
answer = send_inference_request(data, "tools")
|
@@ -728,6 +742,7 @@ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
|
|
728
742
|
data = {
|
729
743
|
"image": image_b64,
|
730
744
|
"tool": "generate_depth",
|
745
|
+
"function_name": "depth_anything_v2",
|
731
746
|
}
|
732
747
|
|
733
748
|
answer = send_inference_request(data, "tools")
|
@@ -759,6 +774,7 @@ def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
|
|
759
774
|
data = {
|
760
775
|
"image": image_b64,
|
761
776
|
"tool": "generate_hed",
|
777
|
+
"function_name": "generate_soft_edge_image",
|
762
778
|
}
|
763
779
|
|
764
780
|
answer = send_inference_request(data, "tools")
|
@@ -791,6 +807,7 @@ def dpt_hybrid_midas(image: np.ndarray) -> np.ndarray:
|
|
791
807
|
data = {
|
792
808
|
"image": image_b64,
|
793
809
|
"tool": "generate_normal",
|
810
|
+
"function_name": "dpt_hybrid_midas",
|
794
811
|
}
|
795
812
|
|
796
813
|
answer = send_inference_request(data, "tools")
|
@@ -822,6 +839,7 @@ def generate_pose_image(image: np.ndarray) -> np.ndarray:
|
|
822
839
|
data = {
|
823
840
|
"image": image_b64,
|
824
841
|
"tool": "generate_pose",
|
842
|
+
"function_name": "generate_pose_image",
|
825
843
|
}
|
826
844
|
|
827
845
|
answer = send_inference_request(data, "tools")
|
@@ -862,6 +880,7 @@ def template_match(
|
|
862
880
|
"image": image_b64,
|
863
881
|
"template": template_image_b64,
|
864
882
|
"tool": "template_match",
|
883
|
+
"function_name": "template_match",
|
865
884
|
}
|
866
885
|
|
867
886
|
answer = send_inference_request(data, "tools")
|
@@ -15,7 +15,7 @@ vision_agent/tools/__init__.py,sha256=UNiaJAOt1C709gaJ-a9h9BzKnY5JmoEUpgKftsOnyP
|
|
15
15
|
vision_agent/tools/meta_tools.py,sha256=rmxgVzj-vJKeewHbue3qHru4sYsFLxlSZV-YH-eyH5w,13366
|
16
16
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
17
17
|
vision_agent/tools/tool_utils.py,sha256=XoB-iae8hHrBQgJd3fV6-UjZAkClysobUaOM17IcHuE,4597
|
18
|
-
vision_agent/tools/tools.py,sha256=
|
18
|
+
vision_agent/tools/tools.py,sha256=wiMLnhH2pSelWv-XtIzI8DL1MtVk8ISGMk_f17GmQi0,43339
|
19
19
|
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
20
20
|
vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
|
21
21
|
vision_agent/utils/execute.py,sha256=s43aUtuq7ZNjil2mxrddiz8EvvqlJwttkYlIiZouXqM,25125
|
@@ -23,7 +23,7 @@ vision_agent/utils/image_utils.py,sha256=y69wtNla0xHZ1h1x0-vv7nOyKUq69jtjSJBiDCn
|
|
23
23
|
vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
|
24
24
|
vision_agent/utils/type_defs.py,sha256=oVFJcicB-s_09lqvn61u0A5ncZsTqZArZledXWbrrg0,1384
|
25
25
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
26
|
-
vision_agent-0.2.
|
27
|
-
vision_agent-0.2.
|
28
|
-
vision_agent-0.2.
|
29
|
-
vision_agent-0.2.
|
26
|
+
vision_agent-0.2.96.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
27
|
+
vision_agent-0.2.96.dist-info/METADATA,sha256=CHUkz8xYiyCJGAlk043s1d0WS86PzLoLyaHK19iBZm0,10728
|
28
|
+
vision_agent-0.2.96.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
29
|
+
vision_agent-0.2.96.dist-info/RECORD,,
|
File without changes
|
File without changes
|