vision-agent 0.2.95__py3-none-any.whl → 0.2.96__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/tools.py +19 -0
- {vision_agent-0.2.95.dist-info → vision_agent-0.2.96.dist-info}/METADATA +1 -1
- {vision_agent-0.2.95.dist-info → vision_agent-0.2.96.dist-info}/RECORD +5 -5
- {vision_agent-0.2.95.dist-info → vision_agent-0.2.96.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.95.dist-info → vision_agent-0.2.96.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -106,6 +106,7 @@ def grounding_dino(
|
|
106
106
|
"visual_grounding" if model_size == "large" else "visual_grounding_tiny"
|
107
107
|
),
|
108
108
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
109
|
+
"function_name": "grounding_dino",
|
109
110
|
}
|
110
111
|
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
111
112
|
return_data = []
|
@@ -161,6 +162,7 @@ def owl_v2(
|
|
161
162
|
"image": image_b64,
|
162
163
|
"tool": "open_vocab_detection",
|
163
164
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
165
|
+
"function_name": "owl_v2",
|
164
166
|
}
|
165
167
|
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
166
168
|
return_data = []
|
@@ -225,6 +227,7 @@ def grounding_sam(
|
|
225
227
|
"image": image_b64,
|
226
228
|
"tool": "visual_grounding_segment",
|
227
229
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
230
|
+
"function_name": "grounding_sam",
|
228
231
|
}
|
229
232
|
data: Dict[str, Any] = send_inference_request(request_data, "tools")
|
230
233
|
return_data = []
|
@@ -364,6 +367,7 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
|
364
367
|
data = {
|
365
368
|
"image": image_b64,
|
366
369
|
"tool": "zero_shot_counting",
|
370
|
+
"function_name": "loca_zero_shot_counting",
|
367
371
|
}
|
368
372
|
resp_data = send_inference_request(data, "tools")
|
369
373
|
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
|
@@ -399,6 +403,7 @@ def loca_visual_prompt_counting(
|
|
399
403
|
"image": image_b64,
|
400
404
|
"prompt": bbox_str,
|
401
405
|
"tool": "few_shot_counting",
|
406
|
+
"function_name": "loca_visual_prompt_counting",
|
402
407
|
}
|
403
408
|
resp_data = send_inference_request(data, "tools")
|
404
409
|
resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
|
@@ -428,6 +433,7 @@ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
|
|
428
433
|
"image": image_b64,
|
429
434
|
"prompt": prompt,
|
430
435
|
"tool": "image_question_answering_with_context",
|
436
|
+
"function_name": "florencev2_roberta_vqa",
|
431
437
|
}
|
432
438
|
|
433
439
|
answer = send_inference_request(data, "tools")
|
@@ -457,6 +463,7 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
|
|
457
463
|
"image": image_b64,
|
458
464
|
"prompt": prompt,
|
459
465
|
"tool": "image_question_answering",
|
466
|
+
"function_name": "git_vqa_v2",
|
460
467
|
}
|
461
468
|
|
462
469
|
answer = send_inference_request(data, "tools")
|
@@ -487,6 +494,7 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
|
487
494
|
"prompt": ",".join(classes),
|
488
495
|
"image": image_b64,
|
489
496
|
"tool": "closed_set_image_classification",
|
497
|
+
"function_name": "clip",
|
490
498
|
}
|
491
499
|
resp_data = send_inference_request(data, "tools")
|
492
500
|
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
|
@@ -514,6 +522,7 @@ def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
|
|
514
522
|
data = {
|
515
523
|
"image": image_b64,
|
516
524
|
"tool": "image_classification",
|
525
|
+
"function_name": "vit_image_classification",
|
517
526
|
}
|
518
527
|
resp_data = send_inference_request(data, "tools")
|
519
528
|
resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
|
@@ -541,6 +550,7 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
|
|
541
550
|
data = {
|
542
551
|
"image": image_b64,
|
543
552
|
"tool": "nsfw_image_classification",
|
553
|
+
"function_name": "vit_nsfw_classification",
|
544
554
|
}
|
545
555
|
resp_data = send_inference_request(data, "tools")
|
546
556
|
resp_data["scores"] = round(resp_data["scores"], 4)
|
@@ -567,6 +577,7 @@ def blip_image_caption(image: np.ndarray) -> str:
|
|
567
577
|
data = {
|
568
578
|
"image": image_b64,
|
569
579
|
"tool": "image_captioning",
|
580
|
+
"function_name": "blip_image_caption",
|
570
581
|
}
|
571
582
|
|
572
583
|
answer = send_inference_request(data, "tools")
|
@@ -595,6 +606,7 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
|
|
595
606
|
"image": image_b64,
|
596
607
|
"tool": "florence2_image_captioning",
|
597
608
|
"detail_caption": detail_caption,
|
609
|
+
"function_name": "florencev2_image_caption",
|
598
610
|
}
|
599
611
|
|
600
612
|
answer = send_inference_request(data, "tools")
|
@@ -630,6 +642,7 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
630
642
|
data = {
|
631
643
|
"image": image_b64,
|
632
644
|
"tool": "object_detection",
|
645
|
+
"function_name": "florencev2_object_detection",
|
633
646
|
}
|
634
647
|
|
635
648
|
answer = send_inference_request(data, "tools")
|
@@ -686,6 +699,7 @@ def detr_segmentation(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
686
699
|
data = {
|
687
700
|
"image": image_b64,
|
688
701
|
"tool": "panoptic_segmentation",
|
702
|
+
"function_name": "detr_segmentation",
|
689
703
|
}
|
690
704
|
|
691
705
|
answer = send_inference_request(data, "tools")
|
@@ -728,6 +742,7 @@ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
|
|
728
742
|
data = {
|
729
743
|
"image": image_b64,
|
730
744
|
"tool": "generate_depth",
|
745
|
+
"function_name": "depth_anything_v2",
|
731
746
|
}
|
732
747
|
|
733
748
|
answer = send_inference_request(data, "tools")
|
@@ -759,6 +774,7 @@ def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
|
|
759
774
|
data = {
|
760
775
|
"image": image_b64,
|
761
776
|
"tool": "generate_hed",
|
777
|
+
"function_name": "generate_soft_edge_image",
|
762
778
|
}
|
763
779
|
|
764
780
|
answer = send_inference_request(data, "tools")
|
@@ -791,6 +807,7 @@ def dpt_hybrid_midas(image: np.ndarray) -> np.ndarray:
|
|
791
807
|
data = {
|
792
808
|
"image": image_b64,
|
793
809
|
"tool": "generate_normal",
|
810
|
+
"function_name": "dpt_hybrid_midas",
|
794
811
|
}
|
795
812
|
|
796
813
|
answer = send_inference_request(data, "tools")
|
@@ -822,6 +839,7 @@ def generate_pose_image(image: np.ndarray) -> np.ndarray:
|
|
822
839
|
data = {
|
823
840
|
"image": image_b64,
|
824
841
|
"tool": "generate_pose",
|
842
|
+
"function_name": "generate_pose_image",
|
825
843
|
}
|
826
844
|
|
827
845
|
answer = send_inference_request(data, "tools")
|
@@ -862,6 +880,7 @@ def template_match(
|
|
862
880
|
"image": image_b64,
|
863
881
|
"template": template_image_b64,
|
864
882
|
"tool": "template_match",
|
883
|
+
"function_name": "template_match",
|
865
884
|
}
|
866
885
|
|
867
886
|
answer = send_inference_request(data, "tools")
|
@@ -15,7 +15,7 @@ vision_agent/tools/__init__.py,sha256=UNiaJAOt1C709gaJ-a9h9BzKnY5JmoEUpgKftsOnyP
|
|
15
15
|
vision_agent/tools/meta_tools.py,sha256=rmxgVzj-vJKeewHbue3qHru4sYsFLxlSZV-YH-eyH5w,13366
|
16
16
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
17
17
|
vision_agent/tools/tool_utils.py,sha256=XoB-iae8hHrBQgJd3fV6-UjZAkClysobUaOM17IcHuE,4597
|
18
|
-
vision_agent/tools/tools.py,sha256=
|
18
|
+
vision_agent/tools/tools.py,sha256=wiMLnhH2pSelWv-XtIzI8DL1MtVk8ISGMk_f17GmQi0,43339
|
19
19
|
vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
|
20
20
|
vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
|
21
21
|
vision_agent/utils/execute.py,sha256=s43aUtuq7ZNjil2mxrddiz8EvvqlJwttkYlIiZouXqM,25125
|
@@ -23,7 +23,7 @@ vision_agent/utils/image_utils.py,sha256=y69wtNla0xHZ1h1x0-vv7nOyKUq69jtjSJBiDCn
|
|
23
23
|
vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
|
24
24
|
vision_agent/utils/type_defs.py,sha256=oVFJcicB-s_09lqvn61u0A5ncZsTqZArZledXWbrrg0,1384
|
25
25
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
26
|
-
vision_agent-0.2.
|
27
|
-
vision_agent-0.2.
|
28
|
-
vision_agent-0.2.
|
29
|
-
vision_agent-0.2.
|
26
|
+
vision_agent-0.2.96.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
27
|
+
vision_agent-0.2.96.dist-info/METADATA,sha256=CHUkz8xYiyCJGAlk043s1d0WS86PzLoLyaHK19iBZm0,10728
|
28
|
+
vision_agent-0.2.96.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
29
|
+
vision_agent-0.2.96.dist-info/RECORD,,
|
File without changes
|
File without changes
|