vision-agent 0.2.95__py3-none-any.whl → 0.2.96__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,6 +106,7 @@ def grounding_dino(
106
106
  "visual_grounding" if model_size == "large" else "visual_grounding_tiny"
107
107
  ),
108
108
  "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
109
+ "function_name": "grounding_dino",
109
110
  }
110
111
  data: Dict[str, Any] = send_inference_request(request_data, "tools")
111
112
  return_data = []
@@ -161,6 +162,7 @@ def owl_v2(
161
162
  "image": image_b64,
162
163
  "tool": "open_vocab_detection",
163
164
  "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
165
+ "function_name": "owl_v2",
164
166
  }
165
167
  data: Dict[str, Any] = send_inference_request(request_data, "tools")
166
168
  return_data = []
@@ -225,6 +227,7 @@ def grounding_sam(
225
227
  "image": image_b64,
226
228
  "tool": "visual_grounding_segment",
227
229
  "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
230
+ "function_name": "grounding_sam",
228
231
  }
229
232
  data: Dict[str, Any] = send_inference_request(request_data, "tools")
230
233
  return_data = []
@@ -364,6 +367,7 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
364
367
  data = {
365
368
  "image": image_b64,
366
369
  "tool": "zero_shot_counting",
370
+ "function_name": "loca_zero_shot_counting",
367
371
  }
368
372
  resp_data = send_inference_request(data, "tools")
369
373
  resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
@@ -399,6 +403,7 @@ def loca_visual_prompt_counting(
399
403
  "image": image_b64,
400
404
  "prompt": bbox_str,
401
405
  "tool": "few_shot_counting",
406
+ "function_name": "loca_visual_prompt_counting",
402
407
  }
403
408
  resp_data = send_inference_request(data, "tools")
404
409
  resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
@@ -428,6 +433,7 @@ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
428
433
  "image": image_b64,
429
434
  "prompt": prompt,
430
435
  "tool": "image_question_answering_with_context",
436
+ "function_name": "florencev2_roberta_vqa",
431
437
  }
432
438
 
433
439
  answer = send_inference_request(data, "tools")
@@ -457,6 +463,7 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
457
463
  "image": image_b64,
458
464
  "prompt": prompt,
459
465
  "tool": "image_question_answering",
466
+ "function_name": "git_vqa_v2",
460
467
  }
461
468
 
462
469
  answer = send_inference_request(data, "tools")
@@ -487,6 +494,7 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
487
494
  "prompt": ",".join(classes),
488
495
  "image": image_b64,
489
496
  "tool": "closed_set_image_classification",
497
+ "function_name": "clip",
490
498
  }
491
499
  resp_data = send_inference_request(data, "tools")
492
500
  resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
@@ -514,6 +522,7 @@ def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
514
522
  data = {
515
523
  "image": image_b64,
516
524
  "tool": "image_classification",
525
+ "function_name": "vit_image_classification",
517
526
  }
518
527
  resp_data = send_inference_request(data, "tools")
519
528
  resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
@@ -541,6 +550,7 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
541
550
  data = {
542
551
  "image": image_b64,
543
552
  "tool": "nsfw_image_classification",
553
+ "function_name": "vit_nsfw_classification",
544
554
  }
545
555
  resp_data = send_inference_request(data, "tools")
546
556
  resp_data["scores"] = round(resp_data["scores"], 4)
@@ -567,6 +577,7 @@ def blip_image_caption(image: np.ndarray) -> str:
567
577
  data = {
568
578
  "image": image_b64,
569
579
  "tool": "image_captioning",
580
+ "function_name": "blip_image_caption",
570
581
  }
571
582
 
572
583
  answer = send_inference_request(data, "tools")
@@ -595,6 +606,7 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
595
606
  "image": image_b64,
596
607
  "tool": "florence2_image_captioning",
597
608
  "detail_caption": detail_caption,
609
+ "function_name": "florencev2_image_caption",
598
610
  }
599
611
 
600
612
  answer = send_inference_request(data, "tools")
@@ -630,6 +642,7 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
630
642
  data = {
631
643
  "image": image_b64,
632
644
  "tool": "object_detection",
645
+ "function_name": "florencev2_object_detection",
633
646
  }
634
647
 
635
648
  answer = send_inference_request(data, "tools")
@@ -686,6 +699,7 @@ def detr_segmentation(image: np.ndarray) -> List[Dict[str, Any]]:
686
699
  data = {
687
700
  "image": image_b64,
688
701
  "tool": "panoptic_segmentation",
702
+ "function_name": "detr_segmentation",
689
703
  }
690
704
 
691
705
  answer = send_inference_request(data, "tools")
@@ -728,6 +742,7 @@ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
728
742
  data = {
729
743
  "image": image_b64,
730
744
  "tool": "generate_depth",
745
+ "function_name": "depth_anything_v2",
731
746
  }
732
747
 
733
748
  answer = send_inference_request(data, "tools")
@@ -759,6 +774,7 @@ def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
759
774
  data = {
760
775
  "image": image_b64,
761
776
  "tool": "generate_hed",
777
+ "function_name": "generate_soft_edge_image",
762
778
  }
763
779
 
764
780
  answer = send_inference_request(data, "tools")
@@ -791,6 +807,7 @@ def dpt_hybrid_midas(image: np.ndarray) -> np.ndarray:
791
807
  data = {
792
808
  "image": image_b64,
793
809
  "tool": "generate_normal",
810
+ "function_name": "dpt_hybrid_midas",
794
811
  }
795
812
 
796
813
  answer = send_inference_request(data, "tools")
@@ -822,6 +839,7 @@ def generate_pose_image(image: np.ndarray) -> np.ndarray:
822
839
  data = {
823
840
  "image": image_b64,
824
841
  "tool": "generate_pose",
842
+ "function_name": "generate_pose_image",
825
843
  }
826
844
 
827
845
  answer = send_inference_request(data, "tools")
@@ -862,6 +880,7 @@ def template_match(
862
880
  "image": image_b64,
863
881
  "template": template_image_b64,
864
882
  "tool": "template_match",
883
+ "function_name": "template_match",
865
884
  }
866
885
 
867
886
  answer = send_inference_request(data, "tools")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.95
3
+ Version: 0.2.96
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -15,7 +15,7 @@ vision_agent/tools/__init__.py,sha256=UNiaJAOt1C709gaJ-a9h9BzKnY5JmoEUpgKftsOnyP
15
15
  vision_agent/tools/meta_tools.py,sha256=rmxgVzj-vJKeewHbue3qHru4sYsFLxlSZV-YH-eyH5w,13366
16
16
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
17
17
  vision_agent/tools/tool_utils.py,sha256=XoB-iae8hHrBQgJd3fV6-UjZAkClysobUaOM17IcHuE,4597
18
- vision_agent/tools/tools.py,sha256=CWQY1sD-xtWchPrg_AJNAGH-k7UxrKIkiog8r0sx1Do,42446
18
+ vision_agent/tools/tools.py,sha256=wiMLnhH2pSelWv-XtIzI8DL1MtVk8ISGMk_f17GmQi0,43339
19
19
  vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
20
20
  vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
21
21
  vision_agent/utils/execute.py,sha256=s43aUtuq7ZNjil2mxrddiz8EvvqlJwttkYlIiZouXqM,25125
@@ -23,7 +23,7 @@ vision_agent/utils/image_utils.py,sha256=y69wtNla0xHZ1h1x0-vv7nOyKUq69jtjSJBiDCn
23
23
  vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
24
24
  vision_agent/utils/type_defs.py,sha256=oVFJcicB-s_09lqvn61u0A5ncZsTqZArZledXWbrrg0,1384
25
25
  vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
26
- vision_agent-0.2.95.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
- vision_agent-0.2.95.dist-info/METADATA,sha256=-OCOFe_UAKyI5sjDr6nYklJq5jwKZbLjwFkFMO-wrV8,10728
28
- vision_agent-0.2.95.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
29
- vision_agent-0.2.95.dist-info/RECORD,,
26
+ vision_agent-0.2.96.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
+ vision_agent-0.2.96.dist-info/METADATA,sha256=CHUkz8xYiyCJGAlk043s1d0WS86PzLoLyaHK19iBZm0,10728
28
+ vision_agent-0.2.96.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
29
+ vision_agent-0.2.96.dist-info/RECORD,,