vision-agent 0.2.75__tar.gz → 0.2.76__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {vision_agent-0.2.75 → vision_agent-0.2.76}/PKG-INFO +2 -1
  2. {vision_agent-0.2.75 → vision_agent-0.2.76}/pyproject.toml +2 -1
  3. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/tools/__init__.py +9 -0
  4. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/tools/tools.py +373 -12
  5. {vision_agent-0.2.75 → vision_agent-0.2.76}/LICENSE +0 -0
  6. {vision_agent-0.2.75 → vision_agent-0.2.76}/README.md +0 -0
  7. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/__init__.py +0 -0
  8. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/agent/__init__.py +0 -0
  9. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/agent/agent.py +0 -0
  10. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/agent/vision_agent.py +0 -0
  11. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/agent/vision_agent_prompts.py +0 -0
  12. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/fonts/__init__.py +0 -0
  13. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  14. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/lmm/__init__.py +0 -0
  15. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/lmm/lmm.py +0 -0
  16. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/tools/prompts.py +0 -0
  17. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/tools/tool_utils.py +0 -0
  18. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/utils/__init__.py +0 -0
  19. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/utils/execute.py +0 -0
  20. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/utils/image_utils.py +0 -0
  21. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/utils/sim.py +0 -0
  22. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/utils/type_defs.py +0 -0
  23. {vision_agent-0.2.75 → vision_agent-0.2.76}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.75
3
+ Version: 0.2.76
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -23,6 +23,7 @@ Requires-Dist: pandas (>=2.0.0,<3.0.0)
23
23
  Requires-Dist: pillow (>=10.0.0,<11.0.0)
24
24
  Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
25
25
  Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
26
+ Requires-Dist: pytube (==15.0.0)
26
27
  Requires-Dist: requests (>=2.0.0,<3.0.0)
27
28
  Requires-Dist: rich (>=13.7.1,<14.0.0)
28
29
  Requires-Dist: scipy (>=1.13.0,<1.14.0)
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.75"
7
+ version = "0.2.76"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -38,6 +38,7 @@ e2b = "^0.17.1"
38
38
  e2b-code-interpreter = "^0.0.9"
39
39
  tenacity = "^8.3.0"
40
40
  pillow-heif = "^0.16.0"
41
+ pytube = "15.0.0"
41
42
 
42
43
  [tool.poetry.group.dev.dependencies]
43
44
  autoflake = "1.*"
@@ -12,10 +12,18 @@ from .tools import (
12
12
  closest_box_distance,
13
13
  closest_mask_distance,
14
14
  extract_frames,
15
+ florencev2_image_caption,
15
16
  get_tool_documentation,
17
+ florencev2_object_detection,
18
+ detr_segmentation,
19
+ depth_anything_v2,
20
+ generate_soft_edge_image,
21
+ dpt_hybrid_midas,
22
+ generate_pose_image,
16
23
  git_vqa_v2,
17
24
  grounding_dino,
18
25
  grounding_sam,
26
+ florencev2_roberta_vqa,
19
27
  load_image,
20
28
  loca_visual_prompt_counting,
21
29
  loca_zero_shot_counting,
@@ -27,6 +35,7 @@ from .tools import (
27
35
  save_image,
28
36
  save_json,
29
37
  save_video,
38
+ template_match,
30
39
  vit_image_classification,
31
40
  vit_nsfw_classification,
32
41
  )
@@ -14,6 +14,7 @@ import requests
14
14
  from moviepy.editor import ImageSequenceClip
15
15
  from PIL import Image, ImageDraw, ImageFont
16
16
  from pillow_heif import register_heif_opener # type: ignore
17
+ from pytube import YouTube # type: ignore
17
18
 
18
19
  from vision_agent.tools.tool_utils import send_inference_request
19
20
  from vision_agent.utils import extract_frames_from_video
@@ -126,7 +127,7 @@ def owl_v2(
126
127
  ) -> List[Dict[str, Any]]:
127
128
  """'owl_v2' is a tool that can detect and count multiple objects given a text
128
129
  prompt such as category names or referring expressions. The categories in text prompt
129
- are separated by commas or periods. It returns a list of bounding boxes with
130
+ are separated by commas. It returns a list of bounding boxes with
130
131
  normalized coordinates, label names and associated probability scores.
131
132
 
132
133
  Parameters:
@@ -136,7 +137,6 @@ def owl_v2(
136
137
  to 0.10.
137
138
  iou_threshold (float, optional): The threshold for the Intersection over Union
138
139
  (IoU). Defaults to 0.10.
139
- model_size (str, optional): The size of the model to use.
140
140
 
141
141
  Returns:
142
142
  List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -180,7 +180,7 @@ def grounding_sam(
180
180
  box_threshold: float = 0.20,
181
181
  iou_threshold: float = 0.20,
182
182
  ) -> List[Dict[str, Any]]:
183
- """'grounding_sam' is a tool that can detect and segment multiple objects given a
183
+ """'grounding_sam' is a tool that can segment multiple objects given a
184
184
  text prompt such as category names or referring expressions. The categories in text
185
185
  prompt are separated by commas or periods. It returns a list of bounding boxes,
186
186
  label names, mask file names and associated probability scores.
@@ -242,12 +242,12 @@ def grounding_sam(
242
242
  def extract_frames(
243
243
  video_uri: Union[str, Path], fps: float = 0.5
244
244
  ) -> List[Tuple[np.ndarray, float]]:
245
- """'extract_frames' extracts frames from a video, returns a list of tuples (frame,
246
- timestamp), where timestamp is the relative time in seconds where the frame was
247
- captured. The frame is a numpy array.
245
+ """'extract_frames' extracts frames from a video which can be a file path or youtube
246
+ link, returns a list of tuples (frame, timestamp), where timestamp is the relative
247
+ time in seconds where the frame was captured. The frame is a numpy array.
248
248
 
249
249
  Parameters:
250
- video_uri (Union[str, Path]): The path to the video file.
250
+ video_uri (Union[str, Path]): The path to the video file or youtube link
251
251
  fps (float, optional): The frame rate per second to extract the frames. Defaults
252
252
  to 0.5.
253
253
 
@@ -261,6 +261,29 @@ def extract_frames(
261
261
  [(frame1, 0.0), (frame2, 0.5), ...]
262
262
  """
263
263
 
264
+ if str(video_uri).startswith(
265
+ (
266
+ "http://www.youtube.com/",
267
+ "https://www.youtube.com/",
268
+ "http://youtu.be/",
269
+ "https://youtu.be/",
270
+ )
271
+ ):
272
+ with tempfile.TemporaryDirectory() as temp_dir:
273
+ yt = YouTube(str(video_uri))
274
+ # Download the highest resolution video
275
+ video = (
276
+ yt.streams.filter(progressive=True, file_extension="mp4")
277
+ .order_by("resolution")
278
+ .desc()
279
+ .first()
280
+ )
281
+ if not video:
282
+ raise Exception("No suitable video stream found")
283
+ video_file_path = video.download(output_path=temp_dir)
284
+
285
+ return extract_frames_from_video(video_file_path, fps)
286
+
264
287
  return extract_frames_from_video(str(video_uri), fps)
265
288
 
266
289
 
@@ -381,6 +404,35 @@ def loca_visual_prompt_counting(
381
404
  return resp_data
382
405
 
383
406
 
407
+ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
408
+ """'florencev2_roberta_vqa' is a tool that takes an image and analyzes
409
+ its contents, generates detailed captions and then tries to answer the given
410
+ question using the generated context. It returns text as an answer to the question.
411
+
412
+ Parameters:
413
+ prompt (str): The question about the image
414
+ image (np.ndarray): The reference image used for the question
415
+
416
+ Returns:
417
+ str: A string which is the answer to the given prompt.
418
+
419
+ Example
420
+ -------
421
+ >>> florencev2_roberta_vqa('What is the top left animal in this image ?', image)
422
+ 'white tiger'
423
+ """
424
+
425
+ image_b64 = convert_to_b64(image)
426
+ data = {
427
+ "image": image_b64,
428
+ "prompt": prompt,
429
+ "tool": "image_question_answering_with_context",
430
+ }
431
+
432
+ answer = send_inference_request(data, "tools")
433
+ return answer["text"][0] # type: ignore
434
+
435
+
384
436
  def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
385
437
  """'git_vqa_v2' is a tool that can answer questions about the visual
386
438
  contents of an image given a question and an image. It returns an answer to the
@@ -391,8 +443,7 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
391
443
  image (np.ndarray): The reference image used for the question
392
444
 
393
445
  Returns:
394
- str: A string which is the answer to the given prompt. E.g. {'text': 'This
395
- image contains a cat sitting on a table with a bowl of milk.'}.
446
+ str: A string which is the answer to the given prompt.
396
447
 
397
448
  Example
398
449
  -------
@@ -521,6 +572,309 @@ def blip_image_caption(image: np.ndarray) -> str:
521
572
  return answer["text"][0] # type: ignore
522
573
 
523
574
 
575
+ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) -> str:
576
+ """'florencev2_image_caption' is a tool that can caption or describe an image based
577
+ on its contents. It returns a text describing the image.
578
+
579
+ Parameters:
580
+ image (np.ndarray): The image to caption
581
+ detail_caption (bool): If True, the caption will be as detailed as possible else
582
+ the caption will be a brief description.
583
+
584
+ Returns:
585
+ str: A string which is the caption for the given image.
586
+
587
+ Example
588
+ -------
589
+ >>> florencev2_image_caption(image, False)
590
+ 'This image contains a cat sitting on a table with a bowl of milk.'
591
+ """
592
+ image_b64 = convert_to_b64(image)
593
+ data = {
594
+ "image": image_b64,
595
+ "tool": "florence2_image_captioning",
596
+ "detail_caption": detail_caption,
597
+ }
598
+
599
+ answer = send_inference_request(data, "tools")
600
+ return answer["text"][0] # type: ignore
601
+
602
+
603
+ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
604
+ """'florencev2_object_detection' is a tool that can detect common objects in an
605
+ image without any text prompt or thresholding. It returns a list of detected objects
606
+ as labels and their location as bounding boxes.
607
+
608
+ Parameters:
609
+ image (np.ndarray): The image to used to detect objects
610
+
611
+ Returns:
612
+ List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
613
+ bounding box of the detected objects with normalized coordinates between 0
614
+ and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
615
+ top-left and xmax and ymax are the coordinates of the bottom-right of the
616
+ bounding box. The scores are always 1.0 and cannot be thresholded
617
+
618
+ Example
619
+ -------
620
+ >>> florencev2_object_detection(image)
621
+ [
622
+ {'score': 1.0, 'label': 'window', 'bbox': [0.1, 0.11, 0.35, 0.4]},
623
+ {'score': 1.0, 'label': 'car', 'bbox': [0.2, 0.21, 0.45, 0.5},
624
+ {'score': 1.0, 'label': 'person', 'bbox': [0.34, 0.21, 0.85, 0.5},
625
+ ]
626
+ """
627
+ image_size = image.shape[:2]
628
+ image_b64 = convert_to_b64(image)
629
+ data = {
630
+ "image": image_b64,
631
+ "tool": "object_detection",
632
+ }
633
+
634
+ answer = send_inference_request(data, "tools")
635
+ return_data = []
636
+ for i in range(len(answer["bboxes"])):
637
+ return_data.append(
638
+ {
639
+ "score": round(answer["scores"][i], 2),
640
+ "label": answer["labels"][i],
641
+ "bbox": normalize_bbox(answer["bboxes"][i], image_size),
642
+ }
643
+ )
644
+ return return_data
645
+
646
+
647
+ def detr_segmentation(image: np.ndarray) -> List[Dict[str, Any]]:
648
+ """'detr_segmentation' is a tool that can segment common objects in an
649
+ image without any text prompt. It returns a list of detected objects
650
+ as labels, their regions as masks and their scores.
651
+
652
+ Parameters:
653
+ image (np.ndarray): The image used to segment things and objects
654
+
655
+ Returns:
656
+ List[Dict[str, Any]]: A list of dictionaries containing the score, label
657
+ and mask of the detected objects. The mask is binary 2D numpy array where 1
658
+ indicates the object and 0 indicates the background.
659
+
660
+ Example
661
+ -------
662
+ >>> detr_segmentation(image)
663
+ [
664
+ {
665
+ 'score': 0.45,
666
+ 'label': 'window',
667
+ 'mask': array([[0, 0, 0, ..., 0, 0, 0],
668
+ [0, 0, 0, ..., 0, 0, 0],
669
+ ...,
670
+ [0, 0, 0, ..., 0, 0, 0],
671
+ [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
672
+ },
673
+ {
674
+ 'score': 0.70,
675
+ 'label': 'bird',
676
+ 'mask': array([[0, 0, 0, ..., 0, 0, 0],
677
+ [0, 0, 0, ..., 0, 0, 0],
678
+ ...,
679
+ [0, 0, 0, ..., 0, 0, 0],
680
+ [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
681
+ },
682
+ ]
683
+ """
684
+ image_b64 = convert_to_b64(image)
685
+ data = {
686
+ "image": image_b64,
687
+ "tool": "panoptic_segmentation",
688
+ }
689
+
690
+ answer = send_inference_request(data, "tools")
691
+ return_data = []
692
+
693
+ for i in range(len(answer["scores"])):
694
+ return_data.append(
695
+ {
696
+ "score": round(answer["scores"][i], 2),
697
+ "label": answer["labels"][i],
698
+ "mask": rle_decode(
699
+ mask_rle=answer["masks"][i], shape=answer["mask_shape"][0]
700
+ ),
701
+ }
702
+ )
703
+ return return_data
704
+
705
+
706
+ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
707
+ """'depth_anything_v2' is a tool that runs depth_anythingv2 model to generate a
708
+ depth image from a given RGB image. The returned depth image is monochrome and
709
+ represents depth values as pixel intesities with pixel values ranging from 0 to 255.
710
+
711
+ Parameters:
712
+ image (np.ndarray): The image to used to generate depth image
713
+
714
+ Returns:
715
+ np.ndarray: A grayscale depth image with pixel values ranging from 0 to 255.
716
+
717
+ Example
718
+ -------
719
+ >>> depth_anything_v2(image)
720
+ array([[0, 0, 0, ..., 0, 0, 0],
721
+ [0, 20, 24, ..., 0, 100, 103],
722
+ ...,
723
+ [10, 11, 15, ..., 202, 202, 205],
724
+ [10, 10, 10, ..., 200, 200, 200]], dtype=uint8),
725
+ """
726
+ image_b64 = convert_to_b64(image)
727
+ data = {
728
+ "image": image_b64,
729
+ "tool": "generate_depth",
730
+ }
731
+
732
+ answer = send_inference_request(data, "tools")
733
+ return_data = np.array(b64_to_pil(answer["masks"][0]).convert("L"))
734
+ return return_data
735
+
736
+
737
+ def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
738
+ """'generate_soft_edge_image' is a tool that runs Holistically Nested edge detection
739
+ to generate a soft edge image (HED) from a given RGB image. The returned image is
740
+ monochrome and represents object boundaries as soft white edges on black background
741
+
742
+ Parameters:
743
+ image (np.ndarray): The image to used to generate soft edge image
744
+
745
+ Returns:
746
+ np.ndarray: A soft edge image with pixel values ranging from 0 to 255.
747
+
748
+ Example
749
+ -------
750
+ >>> generate_soft_edge_image(image)
751
+ array([[0, 0, 0, ..., 0, 0, 0],
752
+ [0, 20, 24, ..., 0, 100, 103],
753
+ ...,
754
+ [10, 11, 15, ..., 202, 202, 205],
755
+ [10, 10, 10, ..., 200, 200, 200]], dtype=uint8),
756
+ """
757
+ image_b64 = convert_to_b64(image)
758
+ data = {
759
+ "image": image_b64,
760
+ "tool": "generate_hed",
761
+ }
762
+
763
+ answer = send_inference_request(data, "tools")
764
+ return_data = np.array(b64_to_pil(answer["masks"][0]).convert("L"))
765
+ return return_data
766
+
767
+
768
+ def dpt_hybrid_midas(image: np.ndarray) -> np.ndarray:
769
+ """'dpt_hybrid_midas' is a tool that generates a normal mapped from a given RGB
770
+ image. The returned RGB image is texture mapped image of the surface normals and the
771
+ RGB values represent the surface normals in the x, y, z directions.
772
+
773
+ Parameters:
774
+ image (np.ndarray): The image to used to generate normal image
775
+
776
+ Returns:
777
+ np.ndarray: A mapped normal image with RGB pixel values indicating surface
778
+ normals in x, y, z directions.
779
+
780
+ Example
781
+ -------
782
+ >>> dpt_hybrid_midas(image)
783
+ array([[0, 0, 0, ..., 0, 0, 0],
784
+ [0, 20, 24, ..., 0, 100, 103],
785
+ ...,
786
+ [10, 11, 15, ..., 202, 202, 205],
787
+ [10, 10, 10, ..., 200, 200, 200]], dtype=uint8),
788
+ """
789
+ image_b64 = convert_to_b64(image)
790
+ data = {
791
+ "image": image_b64,
792
+ "tool": "generate_normal",
793
+ }
794
+
795
+ answer = send_inference_request(data, "tools")
796
+ return_data = np.array(b64_to_pil(answer["masks"][0]).convert("RGB"))
797
+ return return_data
798
+
799
+
800
+ def generate_pose_image(image: np.ndarray) -> np.ndarray:
801
+ """'generate_pose_image' is a tool that generates a open pose bone/stick image from
802
+ a given RGB image. The returned bone image is RGB with the pose amd keypoints colored
803
+ and background as black.
804
+
805
+ Parameters:
806
+ image (np.ndarray): The image to used to generate pose image
807
+
808
+ Returns:
809
+ np.ndarray: A bone or pose image indicating the pose and keypoints
810
+
811
+ Example
812
+ -------
813
+ >>> generate_pose_image(image)
814
+ array([[0, 0, 0, ..., 0, 0, 0],
815
+ [0, 20, 24, ..., 0, 100, 103],
816
+ ...,
817
+ [10, 11, 15, ..., 202, 202, 205],
818
+ [10, 10, 10, ..., 200, 200, 200]], dtype=uint8),
819
+ """
820
+ image_b64 = convert_to_b64(image)
821
+ data = {
822
+ "image": image_b64,
823
+ "tool": "generate_pose",
824
+ }
825
+
826
+ answer = send_inference_request(data, "tools")
827
+ return_data = np.array(b64_to_pil(answer["masks"][0]).convert("RGB"))
828
+ return return_data
829
+
830
+
831
+ def template_match(
832
+ image: np.ndarray, template_image: np.ndarray
833
+ ) -> List[Dict[str, Any]]:
834
+ """'template_match' is a tool that can detect all instances of a template in
835
+ a given image. It returns the locations of the detected template, a corresponding
836
+ similarity score of the same
837
+
838
+ Parameters:
839
+ image (np.ndarray): The image used for searching the template
840
+ template_image (np.ndarray): The template image or crop to search in the image
841
+
842
+ Returns:
843
+ List[Dict[str, Any]]: A list of dictionaries containing the score and
844
+ bounding box of the detected template with normalized coordinates between 0
845
+ and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
846
+ top-left and xmax and ymax are the coordinates of the bottom-right of the
847
+ bounding box.
848
+
849
+ Example
850
+ -------
851
+ >>> template_match(image, template)
852
+ [
853
+ {'score': 0.79, 'bbox': [0.1, 0.11, 0.35, 0.4]},
854
+ {'score': 0.38, 'bbox': [0.2, 0.21, 0.45, 0.5},
855
+ ]
856
+ """
857
+ image_size = image.shape[:2]
858
+ image_b64 = convert_to_b64(image)
859
+ template_image_b64 = convert_to_b64(template_image)
860
+ data = {
861
+ "image": image_b64,
862
+ "template": template_image_b64,
863
+ "tool": "template_match",
864
+ }
865
+
866
+ answer = send_inference_request(data, "tools")
867
+ return_data = []
868
+ for i in range(len(answer["bboxes"])):
869
+ return_data.append(
870
+ {
871
+ "score": round(answer["scores"][i], 2),
872
+ "bbox": normalize_bbox(answer["bboxes"][i], image_size),
873
+ }
874
+ )
875
+ return return_data
876
+
877
+
524
878
  def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
525
879
  """'closest_mask_distance' calculates the closest distance between two masks.
526
880
 
@@ -733,7 +1087,7 @@ def overlay_bounding_boxes(
733
1087
  image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
734
1088
  )
735
1089
  """
736
- pil_image = Image.fromarray(image.astype(np.uint8))
1090
+ pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
737
1091
 
738
1092
  if len(set([box["label"] for box in bboxes])) > len(COLORS):
739
1093
  _LOGGER.warning(
@@ -920,8 +1274,14 @@ TOOLS = [
920
1274
  vit_nsfw_classification,
921
1275
  loca_zero_shot_counting,
922
1276
  loca_visual_prompt_counting,
923
- git_vqa_v2,
924
- blip_image_caption,
1277
+ florencev2_roberta_vqa,
1278
+ florencev2_image_caption,
1279
+ florencev2_object_detection,
1280
+ detr_segmentation,
1281
+ depth_anything_v2,
1282
+ generate_soft_edge_image,
1283
+ dpt_hybrid_midas,
1284
+ generate_pose_image,
925
1285
  closest_mask_distance,
926
1286
  closest_box_distance,
927
1287
  save_json,
@@ -931,6 +1291,7 @@ TOOLS = [
931
1291
  overlay_bounding_boxes,
932
1292
  overlay_segmentation_masks,
933
1293
  overlay_heat_map,
1294
+ template_match,
934
1295
  ]
935
1296
  TOOLS_DF = get_tools_df(TOOLS) # type: ignore
936
1297
  TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
File without changes
File without changes